Merge aosp-toolchain/gcc/gcc-4_9 changes.
[official-gcc.git] / gcc-4_9 / gcc / config / i386 / i386.c
blob54942d52080dc1dd4f9e6eeaedbe270b3cd42646
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "cfgloop.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
87 static rtx legitimize_dllimport_symbol (rtx, bool);
88 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
89 static rtx legitimize_pe_coff_symbol (rtx, bool);
91 #ifndef CHECK_STACK_LIMIT
92 #define CHECK_STACK_LIMIT (-1)
93 #endif
95 /* Return index of given mode in mult and division cost tables. */
96 #define MODE_INDEX(mode) \
97 ((mode) == QImode ? 0 \
98 : (mode) == HImode ? 1 \
99 : (mode) == SImode ? 2 \
100 : (mode) == DImode ? 3 \
101 : 4)
103 /* Processor costs (relative to an add) */
104 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
105 #define COSTS_N_BYTES(N) ((N) * 2)
107 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109 static stringop_algs ix86_size_memcpy[2] = {
110 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
112 static stringop_algs ix86_size_memset[2] = {
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116 const
117 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
118 COSTS_N_BYTES (2), /* cost of an add instruction */
119 COSTS_N_BYTES (3), /* cost of a lea instruction */
120 COSTS_N_BYTES (2), /* variable shift costs */
121 COSTS_N_BYTES (3), /* constant shift costs */
122 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
123 COSTS_N_BYTES (3), /* HI */
124 COSTS_N_BYTES (3), /* SI */
125 COSTS_N_BYTES (3), /* DI */
126 COSTS_N_BYTES (5)}, /* other */
127 0, /* cost of multiply per each bit set */
128 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
129 COSTS_N_BYTES (3), /* HI */
130 COSTS_N_BYTES (3), /* SI */
131 COSTS_N_BYTES (3), /* DI */
132 COSTS_N_BYTES (5)}, /* other */
133 COSTS_N_BYTES (3), /* cost of movsx */
134 COSTS_N_BYTES (3), /* cost of movzx */
135 0, /* "large" insn */
136 2, /* MOVE_RATIO */
137 2, /* cost for loading QImode using movzbl */
138 {2, 2, 2}, /* cost of loading integer registers
139 in QImode, HImode and SImode.
140 Relative to reg-reg move (2). */
141 {2, 2, 2}, /* cost of storing integer registers */
142 2, /* cost of reg,reg fld/fst */
143 {2, 2, 2}, /* cost of loading fp registers
144 in SFmode, DFmode and XFmode */
145 {2, 2, 2}, /* cost of storing fp registers
146 in SFmode, DFmode and XFmode */
147 3, /* cost of moving MMX register */
148 {3, 3}, /* cost of loading MMX registers
149 in SImode and DImode */
150 {3, 3}, /* cost of storing MMX registers
151 in SImode and DImode */
152 3, /* cost of moving SSE register */
153 {3, 3, 3}, /* cost of loading SSE registers
154 in SImode, DImode and TImode */
155 {3, 3, 3}, /* cost of storing SSE registers
156 in SImode, DImode and TImode */
157 3, /* MMX or SSE register to integer */
158 0, /* size of l1 cache */
159 0, /* size of l2 cache */
160 0, /* size of prefetch block */
161 0, /* number of parallel prefetches */
162 2, /* Branch cost */
163 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
164 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
165 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
166 COSTS_N_BYTES (2), /* cost of FABS instruction. */
167 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
168 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
169 ix86_size_memcpy,
170 ix86_size_memset,
171 1, /* scalar_stmt_cost. */
172 1, /* scalar load_cost. */
173 1, /* scalar_store_cost. */
174 1, /* vec_stmt_cost. */
175 1, /* vec_to_scalar_cost. */
176 1, /* scalar_to_vec_cost. */
177 1, /* vec_align_load_cost. */
178 1, /* vec_unalign_load_cost. */
179 1, /* vec_store_cost. */
180 1, /* cond_taken_branch_cost. */
181 1, /* cond_not_taken_branch_cost. */
184 /* Processor costs (relative to an add) */
185 static stringop_algs i386_memcpy[2] = {
186 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
187 DUMMY_STRINGOP_ALGS};
188 static stringop_algs i386_memset[2] = {
189 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
190 DUMMY_STRINGOP_ALGS};
192 static const
193 struct processor_costs i386_cost = { /* 386 specific costs */
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (6), /* HI */
200 COSTS_N_INSNS (6), /* SI */
201 COSTS_N_INSNS (6), /* DI */
202 COSTS_N_INSNS (6)}, /* other */
203 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
204 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (23), /* HI */
206 COSTS_N_INSNS (23), /* SI */
207 COSTS_N_INSNS (23), /* DI */
208 COSTS_N_INSNS (23)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
211 15, /* "large" insn */
212 3, /* MOVE_RATIO */
213 4, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
233 3, /* MMX or SSE register to integer */
234 0, /* size of l1 cache */
235 0, /* size of l2 cache */
236 0, /* size of prefetch block */
237 0, /* number of parallel prefetches */
238 1, /* Branch cost */
239 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
240 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
241 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
242 COSTS_N_INSNS (22), /* cost of FABS instruction. */
243 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
244 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
245 i386_memcpy,
246 i386_memset,
247 1, /* scalar_stmt_cost. */
248 1, /* scalar load_cost. */
249 1, /* scalar_store_cost. */
250 1, /* vec_stmt_cost. */
251 1, /* vec_to_scalar_cost. */
252 1, /* scalar_to_vec_cost. */
253 1, /* vec_align_load_cost. */
254 2, /* vec_unalign_load_cost. */
255 1, /* vec_store_cost. */
256 3, /* cond_taken_branch_cost. */
257 1, /* cond_not_taken_branch_cost. */
260 static stringop_algs i486_memcpy[2] = {
261 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
262 DUMMY_STRINGOP_ALGS};
263 static stringop_algs i486_memset[2] = {
264 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
265 DUMMY_STRINGOP_ALGS};
267 static const
268 struct processor_costs i486_cost = { /* 486 specific costs */
269 COSTS_N_INSNS (1), /* cost of an add instruction */
270 COSTS_N_INSNS (1), /* cost of a lea instruction */
271 COSTS_N_INSNS (3), /* variable shift costs */
272 COSTS_N_INSNS (2), /* constant shift costs */
273 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
274 COSTS_N_INSNS (12), /* HI */
275 COSTS_N_INSNS (12), /* SI */
276 COSTS_N_INSNS (12), /* DI */
277 COSTS_N_INSNS (12)}, /* other */
278 1, /* cost of multiply per each bit set */
279 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
280 COSTS_N_INSNS (40), /* HI */
281 COSTS_N_INSNS (40), /* SI */
282 COSTS_N_INSNS (40), /* DI */
283 COSTS_N_INSNS (40)}, /* other */
284 COSTS_N_INSNS (3), /* cost of movsx */
285 COSTS_N_INSNS (2), /* cost of movzx */
286 15, /* "large" insn */
287 3, /* MOVE_RATIO */
288 4, /* cost for loading QImode using movzbl */
289 {2, 4, 2}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 4, 2}, /* cost of storing integer registers */
293 2, /* cost of reg,reg fld/fst */
294 {8, 8, 8}, /* cost of loading fp registers
295 in SFmode, DFmode and XFmode */
296 {8, 8, 8}, /* cost of storing fp registers
297 in SFmode, DFmode and XFmode */
298 2, /* cost of moving MMX register */
299 {4, 8}, /* cost of loading MMX registers
300 in SImode and DImode */
301 {4, 8}, /* cost of storing MMX registers
302 in SImode and DImode */
303 2, /* cost of moving SSE register */
304 {4, 8, 16}, /* cost of loading SSE registers
305 in SImode, DImode and TImode */
306 {4, 8, 16}, /* cost of storing SSE registers
307 in SImode, DImode and TImode */
308 3, /* MMX or SSE register to integer */
309 4, /* size of l1 cache. 486 has 8kB cache
310 shared for code and data, so 4kB is
311 not really precise. */
312 4, /* size of l2 cache */
313 0, /* size of prefetch block */
314 0, /* number of parallel prefetches */
315 1, /* Branch cost */
316 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
317 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
318 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
319 COSTS_N_INSNS (3), /* cost of FABS instruction. */
320 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
321 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
322 i486_memcpy,
323 i486_memset,
324 1, /* scalar_stmt_cost. */
325 1, /* scalar load_cost. */
326 1, /* scalar_store_cost. */
327 1, /* vec_stmt_cost. */
328 1, /* vec_to_scalar_cost. */
329 1, /* scalar_to_vec_cost. */
330 1, /* vec_align_load_cost. */
331 2, /* vec_unalign_load_cost. */
332 1, /* vec_store_cost. */
333 3, /* cond_taken_branch_cost. */
334 1, /* cond_not_taken_branch_cost. */
337 static stringop_algs pentium_memcpy[2] = {
338 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
339 DUMMY_STRINGOP_ALGS};
340 static stringop_algs pentium_memset[2] = {
341 {libcall, {{-1, rep_prefix_4_byte, false}}},
342 DUMMY_STRINGOP_ALGS};
344 static const
345 struct processor_costs pentium_cost = {
346 COSTS_N_INSNS (1), /* cost of an add instruction */
347 COSTS_N_INSNS (1), /* cost of a lea instruction */
348 COSTS_N_INSNS (4), /* variable shift costs */
349 COSTS_N_INSNS (1), /* constant shift costs */
350 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
351 COSTS_N_INSNS (11), /* HI */
352 COSTS_N_INSNS (11), /* SI */
353 COSTS_N_INSNS (11), /* DI */
354 COSTS_N_INSNS (11)}, /* other */
355 0, /* cost of multiply per each bit set */
356 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
357 COSTS_N_INSNS (25), /* HI */
358 COSTS_N_INSNS (25), /* SI */
359 COSTS_N_INSNS (25), /* DI */
360 COSTS_N_INSNS (25)}, /* other */
361 COSTS_N_INSNS (3), /* cost of movsx */
362 COSTS_N_INSNS (2), /* cost of movzx */
363 8, /* "large" insn */
364 6, /* MOVE_RATIO */
365 6, /* cost for loading QImode using movzbl */
366 {2, 4, 2}, /* cost of loading integer registers
367 in QImode, HImode and SImode.
368 Relative to reg-reg move (2). */
369 {2, 4, 2}, /* cost of storing integer registers */
370 2, /* cost of reg,reg fld/fst */
371 {2, 2, 6}, /* cost of loading fp registers
372 in SFmode, DFmode and XFmode */
373 {4, 4, 6}, /* cost of storing fp registers
374 in SFmode, DFmode and XFmode */
375 8, /* cost of moving MMX register */
376 {8, 8}, /* cost of loading MMX registers
377 in SImode and DImode */
378 {8, 8}, /* cost of storing MMX registers
379 in SImode and DImode */
380 2, /* cost of moving SSE register */
381 {4, 8, 16}, /* cost of loading SSE registers
382 in SImode, DImode and TImode */
383 {4, 8, 16}, /* cost of storing SSE registers
384 in SImode, DImode and TImode */
385 3, /* MMX or SSE register to integer */
386 8, /* size of l1 cache. */
387 8, /* size of l2 cache */
388 0, /* size of prefetch block */
389 0, /* number of parallel prefetches */
390 2, /* Branch cost */
391 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
392 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
393 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
394 COSTS_N_INSNS (1), /* cost of FABS instruction. */
395 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
396 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
397 pentium_memcpy,
398 pentium_memset,
399 1, /* scalar_stmt_cost. */
400 1, /* scalar load_cost. */
401 1, /* scalar_store_cost. */
402 1, /* vec_stmt_cost. */
403 1, /* vec_to_scalar_cost. */
404 1, /* scalar_to_vec_cost. */
405 1, /* vec_align_load_cost. */
406 2, /* vec_unalign_load_cost. */
407 1, /* vec_store_cost. */
408 3, /* cond_taken_branch_cost. */
409 1, /* cond_not_taken_branch_cost. */
412 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
413 (we ensure the alignment). For small blocks inline loop is still a
414 noticeable win, for bigger blocks either rep movsl or rep movsb is
415 way to go. Rep movsb has apparently more expensive startup time in CPU,
416 but after 4K the difference is down in the noise. */
417 static stringop_algs pentiumpro_memcpy[2] = {
418 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
419 {8192, rep_prefix_4_byte, false},
420 {-1, rep_prefix_1_byte, false}}},
421 DUMMY_STRINGOP_ALGS};
422 static stringop_algs pentiumpro_memset[2] = {
423 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
424 {8192, rep_prefix_4_byte, false},
425 {-1, libcall, false}}},
426 DUMMY_STRINGOP_ALGS};
427 static const
428 struct processor_costs pentiumpro_cost = {
429 COSTS_N_INSNS (1), /* cost of an add instruction */
430 COSTS_N_INSNS (1), /* cost of a lea instruction */
431 COSTS_N_INSNS (1), /* variable shift costs */
432 COSTS_N_INSNS (1), /* constant shift costs */
433 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
434 COSTS_N_INSNS (4), /* HI */
435 COSTS_N_INSNS (4), /* SI */
436 COSTS_N_INSNS (4), /* DI */
437 COSTS_N_INSNS (4)}, /* other */
438 0, /* cost of multiply per each bit set */
439 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
440 COSTS_N_INSNS (17), /* HI */
441 COSTS_N_INSNS (17), /* SI */
442 COSTS_N_INSNS (17), /* DI */
443 COSTS_N_INSNS (17)}, /* other */
444 COSTS_N_INSNS (1), /* cost of movsx */
445 COSTS_N_INSNS (1), /* cost of movzx */
446 8, /* "large" insn */
447 6, /* MOVE_RATIO */
448 2, /* cost for loading QImode using movzbl */
449 {4, 4, 4}, /* cost of loading integer registers
450 in QImode, HImode and SImode.
451 Relative to reg-reg move (2). */
452 {2, 2, 2}, /* cost of storing integer registers */
453 2, /* cost of reg,reg fld/fst */
454 {2, 2, 6}, /* cost of loading fp registers
455 in SFmode, DFmode and XFmode */
456 {4, 4, 6}, /* cost of storing fp registers
457 in SFmode, DFmode and XFmode */
458 2, /* cost of moving MMX register */
459 {2, 2}, /* cost of loading MMX registers
460 in SImode and DImode */
461 {2, 2}, /* cost of storing MMX registers
462 in SImode and DImode */
463 2, /* cost of moving SSE register */
464 {2, 2, 8}, /* cost of loading SSE registers
465 in SImode, DImode and TImode */
466 {2, 2, 8}, /* cost of storing SSE registers
467 in SImode, DImode and TImode */
468 3, /* MMX or SSE register to integer */
469 8, /* size of l1 cache. */
470 256, /* size of l2 cache */
471 32, /* size of prefetch block */
472 6, /* number of parallel prefetches */
473 2, /* Branch cost */
474 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
475 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
476 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
477 COSTS_N_INSNS (2), /* cost of FABS instruction. */
478 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
479 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
480 pentiumpro_memcpy,
481 pentiumpro_memset,
482 1, /* scalar_stmt_cost. */
483 1, /* scalar load_cost. */
484 1, /* scalar_store_cost. */
485 1, /* vec_stmt_cost. */
486 1, /* vec_to_scalar_cost. */
487 1, /* scalar_to_vec_cost. */
488 1, /* vec_align_load_cost. */
489 2, /* vec_unalign_load_cost. */
490 1, /* vec_store_cost. */
491 3, /* cond_taken_branch_cost. */
492 1, /* cond_not_taken_branch_cost. */
495 static stringop_algs geode_memcpy[2] = {
496 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
497 DUMMY_STRINGOP_ALGS};
498 static stringop_algs geode_memset[2] = {
499 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
500 DUMMY_STRINGOP_ALGS};
501 static const
502 struct processor_costs geode_cost = {
503 COSTS_N_INSNS (1), /* cost of an add instruction */
504 COSTS_N_INSNS (1), /* cost of a lea instruction */
505 COSTS_N_INSNS (2), /* variable shift costs */
506 COSTS_N_INSNS (1), /* constant shift costs */
507 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
508 COSTS_N_INSNS (4), /* HI */
509 COSTS_N_INSNS (7), /* SI */
510 COSTS_N_INSNS (7), /* DI */
511 COSTS_N_INSNS (7)}, /* other */
512 0, /* cost of multiply per each bit set */
513 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
514 COSTS_N_INSNS (23), /* HI */
515 COSTS_N_INSNS (39), /* SI */
516 COSTS_N_INSNS (39), /* DI */
517 COSTS_N_INSNS (39)}, /* other */
518 COSTS_N_INSNS (1), /* cost of movsx */
519 COSTS_N_INSNS (1), /* cost of movzx */
520 8, /* "large" insn */
521 4, /* MOVE_RATIO */
522 1, /* cost for loading QImode using movzbl */
523 {1, 1, 1}, /* cost of loading integer registers
524 in QImode, HImode and SImode.
525 Relative to reg-reg move (2). */
526 {1, 1, 1}, /* cost of storing integer registers */
527 1, /* cost of reg,reg fld/fst */
528 {1, 1, 1}, /* cost of loading fp registers
529 in SFmode, DFmode and XFmode */
530 {4, 6, 6}, /* cost of storing fp registers
531 in SFmode, DFmode and XFmode */
533 1, /* cost of moving MMX register */
534 {1, 1}, /* cost of loading MMX registers
535 in SImode and DImode */
536 {1, 1}, /* cost of storing MMX registers
537 in SImode and DImode */
538 1, /* cost of moving SSE register */
539 {1, 1, 1}, /* cost of loading SSE registers
540 in SImode, DImode and TImode */
541 {1, 1, 1}, /* cost of storing SSE registers
542 in SImode, DImode and TImode */
543 1, /* MMX or SSE register to integer */
544 64, /* size of l1 cache. */
545 128, /* size of l2 cache. */
546 32, /* size of prefetch block */
547 1, /* number of parallel prefetches */
548 1, /* Branch cost */
549 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
550 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
551 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
552 COSTS_N_INSNS (1), /* cost of FABS instruction. */
553 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
554 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
555 geode_memcpy,
556 geode_memset,
557 1, /* scalar_stmt_cost. */
558 1, /* scalar load_cost. */
559 1, /* scalar_store_cost. */
560 1, /* vec_stmt_cost. */
561 1, /* vec_to_scalar_cost. */
562 1, /* scalar_to_vec_cost. */
563 1, /* vec_align_load_cost. */
564 2, /* vec_unalign_load_cost. */
565 1, /* vec_store_cost. */
566 3, /* cond_taken_branch_cost. */
567 1, /* cond_not_taken_branch_cost. */
570 static stringop_algs k6_memcpy[2] = {
571 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
572 DUMMY_STRINGOP_ALGS};
573 static stringop_algs k6_memset[2] = {
574 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
575 DUMMY_STRINGOP_ALGS};
576 static const
577 struct processor_costs k6_cost = {
578 COSTS_N_INSNS (1), /* cost of an add instruction */
579 COSTS_N_INSNS (2), /* cost of a lea instruction */
580 COSTS_N_INSNS (1), /* variable shift costs */
581 COSTS_N_INSNS (1), /* constant shift costs */
582 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
583 COSTS_N_INSNS (3), /* HI */
584 COSTS_N_INSNS (3), /* SI */
585 COSTS_N_INSNS (3), /* DI */
586 COSTS_N_INSNS (3)}, /* other */
587 0, /* cost of multiply per each bit set */
588 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
589 COSTS_N_INSNS (18), /* HI */
590 COSTS_N_INSNS (18), /* SI */
591 COSTS_N_INSNS (18), /* DI */
592 COSTS_N_INSNS (18)}, /* other */
593 COSTS_N_INSNS (2), /* cost of movsx */
594 COSTS_N_INSNS (2), /* cost of movzx */
595 8, /* "large" insn */
596 4, /* MOVE_RATIO */
597 3, /* cost for loading QImode using movzbl */
598 {4, 5, 4}, /* cost of loading integer registers
599 in QImode, HImode and SImode.
600 Relative to reg-reg move (2). */
601 {2, 3, 2}, /* cost of storing integer registers */
602 4, /* cost of reg,reg fld/fst */
603 {6, 6, 6}, /* cost of loading fp registers
604 in SFmode, DFmode and XFmode */
605 {4, 4, 4}, /* cost of storing fp registers
606 in SFmode, DFmode and XFmode */
607 2, /* cost of moving MMX register */
608 {2, 2}, /* cost of loading MMX registers
609 in SImode and DImode */
610 {2, 2}, /* cost of storing MMX registers
611 in SImode and DImode */
612 2, /* cost of moving SSE register */
613 {2, 2, 8}, /* cost of loading SSE registers
614 in SImode, DImode and TImode */
615 {2, 2, 8}, /* cost of storing SSE registers
616 in SImode, DImode and TImode */
617 6, /* MMX or SSE register to integer */
618 32, /* size of l1 cache. */
619 32, /* size of l2 cache. Some models
620 have integrated l2 cache, but
621 optimizing for k6 is not important
622 enough to worry about that. */
623 32, /* size of prefetch block */
624 1, /* number of parallel prefetches */
625 1, /* Branch cost */
626 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
627 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
628 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
629 COSTS_N_INSNS (2), /* cost of FABS instruction. */
630 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
631 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
632 k6_memcpy,
633 k6_memset,
634 1, /* scalar_stmt_cost. */
635 1, /* scalar load_cost. */
636 1, /* scalar_store_cost. */
637 1, /* vec_stmt_cost. */
638 1, /* vec_to_scalar_cost. */
639 1, /* scalar_to_vec_cost. */
640 1, /* vec_align_load_cost. */
641 2, /* vec_unalign_load_cost. */
642 1, /* vec_store_cost. */
643 3, /* cond_taken_branch_cost. */
644 1, /* cond_not_taken_branch_cost. */
647 /* For some reason, Athlon deals better with REP prefix (relative to loops)
648 compared to K8. Alignment becomes important after 8 bytes for memcpy and
649 128 bytes for memset. */
650 static stringop_algs athlon_memcpy[2] = {
651 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
652 DUMMY_STRINGOP_ALGS};
653 static stringop_algs athlon_memset[2] = {
654 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
655 DUMMY_STRINGOP_ALGS};
656 static const
657 struct processor_costs athlon_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (5), /* HI */
664 COSTS_N_INSNS (5), /* SI */
665 COSTS_N_INSNS (5), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {4, 4}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 4, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 256, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 6, /* number of parallel prefetches */
702 5, /* Branch cost */
703 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
704 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
705 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
706 COSTS_N_INSNS (2), /* cost of FABS instruction. */
707 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
708 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
709 athlon_memcpy,
710 athlon_memset,
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
724 /* K8 has optimized REP instruction for medium sized blocks, but for very
725 small blocks it is better to use loop. For large blocks, libcall can
726 do nontemporary accesses and beat inline considerably. */
727 static stringop_algs k8_memcpy[2] = {
728 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
729 {-1, rep_prefix_4_byte, false}}},
730 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
731 {-1, libcall, false}}}};
732 static stringop_algs k8_memset[2] = {
733 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
734 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
735 {libcall, {{48, unrolled_loop, false},
736 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
737 static const
738 struct processor_costs k8_cost = {
739 COSTS_N_INSNS (1), /* cost of an add instruction */
740 COSTS_N_INSNS (2), /* cost of a lea instruction */
741 COSTS_N_INSNS (1), /* variable shift costs */
742 COSTS_N_INSNS (1), /* constant shift costs */
743 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
744 COSTS_N_INSNS (4), /* HI */
745 COSTS_N_INSNS (3), /* SI */
746 COSTS_N_INSNS (4), /* DI */
747 COSTS_N_INSNS (5)}, /* other */
748 0, /* cost of multiply per each bit set */
749 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
750 COSTS_N_INSNS (26), /* HI */
751 COSTS_N_INSNS (42), /* SI */
752 COSTS_N_INSNS (74), /* DI */
753 COSTS_N_INSNS (74)}, /* other */
754 COSTS_N_INSNS (1), /* cost of movsx */
755 COSTS_N_INSNS (1), /* cost of movzx */
756 8, /* "large" insn */
757 9, /* MOVE_RATIO */
758 4, /* cost for loading QImode using movzbl */
759 {3, 4, 3}, /* cost of loading integer registers
760 in QImode, HImode and SImode.
761 Relative to reg-reg move (2). */
762 {3, 4, 3}, /* cost of storing integer registers */
763 4, /* cost of reg,reg fld/fst */
764 {4, 4, 12}, /* cost of loading fp registers
765 in SFmode, DFmode and XFmode */
766 {6, 6, 8}, /* cost of storing fp registers
767 in SFmode, DFmode and XFmode */
768 2, /* cost of moving MMX register */
769 {3, 3}, /* cost of loading MMX registers
770 in SImode and DImode */
771 {4, 4}, /* cost of storing MMX registers
772 in SImode and DImode */
773 2, /* cost of moving SSE register */
774 {4, 3, 6}, /* cost of loading SSE registers
775 in SImode, DImode and TImode */
776 {4, 4, 5}, /* cost of storing SSE registers
777 in SImode, DImode and TImode */
778 5, /* MMX or SSE register to integer */
779 64, /* size of l1 cache. */
780 512, /* size of l2 cache. */
781 64, /* size of prefetch block */
782 /* New AMD processors never drop prefetches; if they cannot be performed
783 immediately, they are queued. We set number of simultaneous prefetches
784 to a large constant to reflect this (it probably is not a good idea not
785 to limit number of prefetches at all, as their execution also takes some
786 time). */
787 100, /* number of parallel prefetches */
788 3, /* Branch cost */
789 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
790 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
791 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
792 COSTS_N_INSNS (2), /* cost of FABS instruction. */
793 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
794 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796 k8_memcpy,
797 k8_memset,
798 4, /* scalar_stmt_cost. */
799 2, /* scalar load_cost. */
800 2, /* scalar_store_cost. */
801 5, /* vec_stmt_cost. */
802 0, /* vec_to_scalar_cost. */
803 2, /* scalar_to_vec_cost. */
804 2, /* vec_align_load_cost. */
805 3, /* vec_unalign_load_cost. */
806 3, /* vec_store_cost. */
807 3, /* cond_taken_branch_cost. */
808 2, /* cond_not_taken_branch_cost. */
811 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
812 very small blocks it is better to use loop. For large blocks, libcall can
813 do nontemporary accesses and beat inline considerably. */
814 static stringop_algs amdfam10_memcpy[2] = {
815 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
816 {-1, rep_prefix_4_byte, false}}},
817 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
818 {-1, libcall, false}}}};
819 static stringop_algs amdfam10_memset[2] = {
820 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
821 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
822 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
823 {-1, libcall, false}}}};
824 struct processor_costs amdfam10_cost = {
825 COSTS_N_INSNS (1), /* cost of an add instruction */
826 COSTS_N_INSNS (2), /* cost of a lea instruction */
827 COSTS_N_INSNS (1), /* variable shift costs */
828 COSTS_N_INSNS (1), /* constant shift costs */
829 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
830 COSTS_N_INSNS (4), /* HI */
831 COSTS_N_INSNS (3), /* SI */
832 COSTS_N_INSNS (4), /* DI */
833 COSTS_N_INSNS (5)}, /* other */
834 0, /* cost of multiply per each bit set */
835 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
836 COSTS_N_INSNS (35), /* HI */
837 COSTS_N_INSNS (51), /* SI */
838 COSTS_N_INSNS (83), /* DI */
839 COSTS_N_INSNS (83)}, /* other */
840 COSTS_N_INSNS (1), /* cost of movsx */
841 COSTS_N_INSNS (1), /* cost of movzx */
842 8, /* "large" insn */
843 9, /* MOVE_RATIO */
844 4, /* cost for loading QImode using movzbl */
845 {3, 4, 3}, /* cost of loading integer registers
846 in QImode, HImode and SImode.
847 Relative to reg-reg move (2). */
848 {3, 4, 3}, /* cost of storing integer registers */
849 4, /* cost of reg,reg fld/fst */
850 {4, 4, 12}, /* cost of loading fp registers
851 in SFmode, DFmode and XFmode */
852 {6, 6, 8}, /* cost of storing fp registers
853 in SFmode, DFmode and XFmode */
854 2, /* cost of moving MMX register */
855 {3, 3}, /* cost of loading MMX registers
856 in SImode and DImode */
857 {4, 4}, /* cost of storing MMX registers
858 in SImode and DImode */
859 2, /* cost of moving SSE register */
860 {4, 4, 3}, /* cost of loading SSE registers
861 in SImode, DImode and TImode */
862 {4, 4, 5}, /* cost of storing SSE registers
863 in SImode, DImode and TImode */
864 3, /* MMX or SSE register to integer */
865 /* On K8:
866 MOVD reg64, xmmreg Double FSTORE 4
867 MOVD reg32, xmmreg Double FSTORE 4
868 On AMDFAM10:
869 MOVD reg64, xmmreg Double FADD 3
870 1/1 1/1
871 MOVD reg32, xmmreg Double FADD 3
872 1/1 1/1 */
873 64, /* size of l1 cache. */
874 512, /* size of l2 cache. */
875 64, /* size of prefetch block */
876 /* New AMD processors never drop prefetches; if they cannot be performed
877 immediately, they are queued. We set number of simultaneous prefetches
878 to a large constant to reflect this (it probably is not a good idea not
879 to limit number of prefetches at all, as their execution also takes some
880 time). */
881 100, /* number of parallel prefetches */
882 2, /* Branch cost */
883 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
884 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
885 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
886 COSTS_N_INSNS (2), /* cost of FABS instruction. */
887 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
888 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890 amdfam10_memcpy,
891 amdfam10_memset,
892 4, /* scalar_stmt_cost. */
893 2, /* scalar load_cost. */
894 2, /* scalar_store_cost. */
895 6, /* vec_stmt_cost. */
896 0, /* vec_to_scalar_cost. */
897 2, /* scalar_to_vec_cost. */
898 2, /* vec_align_load_cost. */
899 2, /* vec_unalign_load_cost. */
900 2, /* vec_store_cost. */
901 2, /* cond_taken_branch_cost. */
902 1, /* cond_not_taken_branch_cost. */
905 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
906 very small blocks it is better to use loop. For large blocks, libcall
907 can do nontemporary accesses and beat inline considerably. */
908 static stringop_algs bdver1_memcpy[2] = {
909 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
910 {-1, rep_prefix_4_byte, false}}},
911 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
912 {-1, libcall, false}}}};
913 static stringop_algs bdver1_memset[2] = {
914 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
915 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
916 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
917 {-1, libcall, false}}}};
919 const struct processor_costs bdver1_cost = {
920 COSTS_N_INSNS (1), /* cost of an add instruction */
921 COSTS_N_INSNS (1), /* cost of a lea instruction */
922 COSTS_N_INSNS (1), /* variable shift costs */
923 COSTS_N_INSNS (1), /* constant shift costs */
924 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
925 COSTS_N_INSNS (4), /* HI */
926 COSTS_N_INSNS (4), /* SI */
927 COSTS_N_INSNS (6), /* DI */
928 COSTS_N_INSNS (6)}, /* other */
929 0, /* cost of multiply per each bit set */
930 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
931 COSTS_N_INSNS (35), /* HI */
932 COSTS_N_INSNS (51), /* SI */
933 COSTS_N_INSNS (83), /* DI */
934 COSTS_N_INSNS (83)}, /* other */
935 COSTS_N_INSNS (1), /* cost of movsx */
936 COSTS_N_INSNS (1), /* cost of movzx */
937 8, /* "large" insn */
938 9, /* MOVE_RATIO */
939 4, /* cost for loading QImode using movzbl */
940 {5, 5, 4}, /* cost of loading integer registers
941 in QImode, HImode and SImode.
942 Relative to reg-reg move (2). */
943 {4, 4, 4}, /* cost of storing integer registers */
944 2, /* cost of reg,reg fld/fst */
945 {5, 5, 12}, /* cost of loading fp registers
946 in SFmode, DFmode and XFmode */
947 {4, 4, 8}, /* cost of storing fp registers
948 in SFmode, DFmode and XFmode */
949 2, /* cost of moving MMX register */
950 {4, 4}, /* cost of loading MMX registers
951 in SImode and DImode */
952 {4, 4}, /* cost of storing MMX registers
953 in SImode and DImode */
954 2, /* cost of moving SSE register */
955 {4, 4, 4}, /* cost of loading SSE registers
956 in SImode, DImode and TImode */
957 {4, 4, 4}, /* cost of storing SSE registers
958 in SImode, DImode and TImode */
959 2, /* MMX or SSE register to integer */
960 /* On K8:
961 MOVD reg64, xmmreg Double FSTORE 4
962 MOVD reg32, xmmreg Double FSTORE 4
963 On AMDFAM10:
964 MOVD reg64, xmmreg Double FADD 3
965 1/1 1/1
966 MOVD reg32, xmmreg Double FADD 3
967 1/1 1/1 */
968 16, /* size of l1 cache. */
969 2048, /* size of l2 cache. */
970 64, /* size of prefetch block */
971 /* New AMD processors never drop prefetches; if they cannot be performed
972 immediately, they are queued. We set number of simultaneous prefetches
973 to a large constant to reflect this (it probably is not a good idea not
974 to limit number of prefetches at all, as their execution also takes some
975 time). */
976 100, /* number of parallel prefetches */
977 2, /* Branch cost */
978 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
979 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
980 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
981 COSTS_N_INSNS (2), /* cost of FABS instruction. */
982 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
983 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985 bdver1_memcpy,
986 bdver1_memset,
987 6, /* scalar_stmt_cost. */
988 4, /* scalar load_cost. */
989 4, /* scalar_store_cost. */
990 6, /* vec_stmt_cost. */
991 0, /* vec_to_scalar_cost. */
992 2, /* scalar_to_vec_cost. */
993 4, /* vec_align_load_cost. */
994 4, /* vec_unalign_load_cost. */
995 4, /* vec_store_cost. */
996 2, /* cond_taken_branch_cost. */
997 1, /* cond_not_taken_branch_cost. */
1000 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1001 very small blocks it is better to use loop. For large blocks, libcall
1002 can do nontemporary accesses and beat inline considerably. */
1004 static stringop_algs bdver2_memcpy[2] = {
1005 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1006 {-1, rep_prefix_4_byte, false}}},
1007 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1008 {-1, libcall, false}}}};
1009 static stringop_algs bdver2_memset[2] = {
1010 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1011 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1012 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1013 {-1, libcall, false}}}};
1015 const struct processor_costs bdver2_cost = {
1016 COSTS_N_INSNS (1), /* cost of an add instruction */
1017 COSTS_N_INSNS (1), /* cost of a lea instruction */
1018 COSTS_N_INSNS (1), /* variable shift costs */
1019 COSTS_N_INSNS (1), /* constant shift costs */
1020 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1021 COSTS_N_INSNS (4), /* HI */
1022 COSTS_N_INSNS (4), /* SI */
1023 COSTS_N_INSNS (6), /* DI */
1024 COSTS_N_INSNS (6)}, /* other */
1025 0, /* cost of multiply per each bit set */
1026 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1027 COSTS_N_INSNS (35), /* HI */
1028 COSTS_N_INSNS (51), /* SI */
1029 COSTS_N_INSNS (83), /* DI */
1030 COSTS_N_INSNS (83)}, /* other */
1031 COSTS_N_INSNS (1), /* cost of movsx */
1032 COSTS_N_INSNS (1), /* cost of movzx */
1033 8, /* "large" insn */
1034 9, /* MOVE_RATIO */
1035 4, /* cost for loading QImode using movzbl */
1036 {5, 5, 4}, /* cost of loading integer registers
1037 in QImode, HImode and SImode.
1038 Relative to reg-reg move (2). */
1039 {4, 4, 4}, /* cost of storing integer registers */
1040 2, /* cost of reg,reg fld/fst */
1041 {5, 5, 12}, /* cost of loading fp registers
1042 in SFmode, DFmode and XFmode */
1043 {4, 4, 8}, /* cost of storing fp registers
1044 in SFmode, DFmode and XFmode */
1045 2, /* cost of moving MMX register */
1046 {4, 4}, /* cost of loading MMX registers
1047 in SImode and DImode */
1048 {4, 4}, /* cost of storing MMX registers
1049 in SImode and DImode */
1050 2, /* cost of moving SSE register */
1051 {4, 4, 4}, /* cost of loading SSE registers
1052 in SImode, DImode and TImode */
1053 {4, 4, 4}, /* cost of storing SSE registers
1054 in SImode, DImode and TImode */
1055 2, /* MMX or SSE register to integer */
1056 /* On K8:
1057 MOVD reg64, xmmreg Double FSTORE 4
1058 MOVD reg32, xmmreg Double FSTORE 4
1059 On AMDFAM10:
1060 MOVD reg64, xmmreg Double FADD 3
1061 1/1 1/1
1062 MOVD reg32, xmmreg Double FADD 3
1063 1/1 1/1 */
1064 16, /* size of l1 cache. */
1065 2048, /* size of l2 cache. */
1066 64, /* size of prefetch block */
1067 /* New AMD processors never drop prefetches; if they cannot be performed
1068 immediately, they are queued. We set number of simultaneous prefetches
1069 to a large constant to reflect this (it probably is not a good idea not
1070 to limit number of prefetches at all, as their execution also takes some
1071 time). */
1072 100, /* number of parallel prefetches */
1073 2, /* Branch cost */
1074 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1075 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1076 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1077 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1078 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1079 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081 bdver2_memcpy,
1082 bdver2_memset,
1083 6, /* scalar_stmt_cost. */
1084 4, /* scalar load_cost. */
1085 4, /* scalar_store_cost. */
1086 6, /* vec_stmt_cost. */
1087 0, /* vec_to_scalar_cost. */
1088 2, /* scalar_to_vec_cost. */
1089 4, /* vec_align_load_cost. */
1090 4, /* vec_unalign_load_cost. */
1091 4, /* vec_store_cost. */
1092 2, /* cond_taken_branch_cost. */
1093 1, /* cond_not_taken_branch_cost. */
1097 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1098 very small blocks it is better to use loop. For large blocks, libcall
1099 can do nontemporary accesses and beat inline considerably. */
1100 static stringop_algs bdver3_memcpy[2] = {
1101 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1102 {-1, rep_prefix_4_byte, false}}},
1103 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1104 {-1, libcall, false}}}};
1105 static stringop_algs bdver3_memset[2] = {
1106 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1107 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1108 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1109 {-1, libcall, false}}}};
1110 struct processor_costs bdver3_cost = {
1111 COSTS_N_INSNS (1), /* cost of an add instruction */
1112 COSTS_N_INSNS (1), /* cost of a lea instruction */
1113 COSTS_N_INSNS (1), /* variable shift costs */
1114 COSTS_N_INSNS (1), /* constant shift costs */
1115 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1116 COSTS_N_INSNS (4), /* HI */
1117 COSTS_N_INSNS (4), /* SI */
1118 COSTS_N_INSNS (6), /* DI */
1119 COSTS_N_INSNS (6)}, /* other */
1120 0, /* cost of multiply per each bit set */
1121 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1122 COSTS_N_INSNS (35), /* HI */
1123 COSTS_N_INSNS (51), /* SI */
1124 COSTS_N_INSNS (83), /* DI */
1125 COSTS_N_INSNS (83)}, /* other */
1126 COSTS_N_INSNS (1), /* cost of movsx */
1127 COSTS_N_INSNS (1), /* cost of movzx */
1128 8, /* "large" insn */
1129 9, /* MOVE_RATIO */
1130 4, /* cost for loading QImode using movzbl */
1131 {5, 5, 4}, /* cost of loading integer registers
1132 in QImode, HImode and SImode.
1133 Relative to reg-reg move (2). */
1134 {4, 4, 4}, /* cost of storing integer registers */
1135 2, /* cost of reg,reg fld/fst */
1136 {5, 5, 12}, /* cost of loading fp registers
1137 in SFmode, DFmode and XFmode */
1138 {4, 4, 8}, /* cost of storing fp registers
1139 in SFmode, DFmode and XFmode */
1140 2, /* cost of moving MMX register */
1141 {4, 4}, /* cost of loading MMX registers
1142 in SImode and DImode */
1143 {4, 4}, /* cost of storing MMX registers
1144 in SImode and DImode */
1145 2, /* cost of moving SSE register */
1146 {4, 4, 4}, /* cost of loading SSE registers
1147 in SImode, DImode and TImode */
1148 {4, 4, 4}, /* cost of storing SSE registers
1149 in SImode, DImode and TImode */
1150 2, /* MMX or SSE register to integer */
1151 16, /* size of l1 cache. */
1152 2048, /* size of l2 cache. */
1153 64, /* size of prefetch block */
1154 /* New AMD processors never drop prefetches; if they cannot be performed
1155 immediately, they are queued. We set number of simultaneous prefetches
1156 to a large constant to reflect this (it probably is not a good idea not
1157 to limit number of prefetches at all, as their execution also takes some
1158 time). */
1159 100, /* number of parallel prefetches */
1160 2, /* Branch cost */
1161 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1162 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1163 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1164 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1165 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1166 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168 bdver3_memcpy,
1169 bdver3_memset,
1170 6, /* scalar_stmt_cost. */
1171 4, /* scalar load_cost. */
1172 4, /* scalar_store_cost. */
1173 6, /* vec_stmt_cost. */
1174 0, /* vec_to_scalar_cost. */
1175 2, /* scalar_to_vec_cost. */
1176 4, /* vec_align_load_cost. */
1177 4, /* vec_unalign_load_cost. */
1178 4, /* vec_store_cost. */
1179 2, /* cond_taken_branch_cost. */
1180 1, /* cond_not_taken_branch_cost. */
1183 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1184 very small blocks it is better to use loop. For large blocks, libcall
1185 can do nontemporary accesses and beat inline considerably. */
1186 static stringop_algs bdver4_memcpy[2] = {
1187 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1188 {-1, rep_prefix_4_byte, false}}},
1189 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1190 {-1, libcall, false}}}};
1191 static stringop_algs bdver4_memset[2] = {
1192 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1193 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1194 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1195 {-1, libcall, false}}}};
1196 struct processor_costs bdver4_cost = {
1197 COSTS_N_INSNS (1), /* cost of an add instruction */
1198 COSTS_N_INSNS (1), /* cost of a lea instruction */
1199 COSTS_N_INSNS (1), /* variable shift costs */
1200 COSTS_N_INSNS (1), /* constant shift costs */
1201 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1202 COSTS_N_INSNS (4), /* HI */
1203 COSTS_N_INSNS (4), /* SI */
1204 COSTS_N_INSNS (6), /* DI */
1205 COSTS_N_INSNS (6)}, /* other */
1206 0, /* cost of multiply per each bit set */
1207 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1208 COSTS_N_INSNS (35), /* HI */
1209 COSTS_N_INSNS (51), /* SI */
1210 COSTS_N_INSNS (83), /* DI */
1211 COSTS_N_INSNS (83)}, /* other */
1212 COSTS_N_INSNS (1), /* cost of movsx */
1213 COSTS_N_INSNS (1), /* cost of movzx */
1214 8, /* "large" insn */
1215 9, /* MOVE_RATIO */
1216 4, /* cost for loading QImode using movzbl */
1217 {5, 5, 4}, /* cost of loading integer registers
1218 in QImode, HImode and SImode.
1219 Relative to reg-reg move (2). */
1220 {4, 4, 4}, /* cost of storing integer registers */
1221 2, /* cost of reg,reg fld/fst */
1222 {5, 5, 12}, /* cost of loading fp registers
1223 in SFmode, DFmode and XFmode */
1224 {4, 4, 8}, /* cost of storing fp registers
1225 in SFmode, DFmode and XFmode */
1226 2, /* cost of moving MMX register */
1227 {4, 4}, /* cost of loading MMX registers
1228 in SImode and DImode */
1229 {4, 4}, /* cost of storing MMX registers
1230 in SImode and DImode */
1231 2, /* cost of moving SSE register */
1232 {4, 4, 4}, /* cost of loading SSE registers
1233 in SImode, DImode and TImode */
1234 {4, 4, 4}, /* cost of storing SSE registers
1235 in SImode, DImode and TImode */
1236 2, /* MMX or SSE register to integer */
1237 16, /* size of l1 cache. */
1238 2048, /* size of l2 cache. */
1239 64, /* size of prefetch block */
1240 /* New AMD processors never drop prefetches; if they cannot be performed
1241 immediately, they are queued. We set number of simultaneous prefetches
1242 to a large constant to reflect this (it probably is not a good idea not
1243 to limit number of prefetches at all, as their execution also takes some
1244 time). */
1245 100, /* number of parallel prefetches */
1246 2, /* Branch cost */
1247 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1248 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1249 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1250 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1251 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1252 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254 bdver4_memcpy,
1255 bdver4_memset,
1256 6, /* scalar_stmt_cost. */
1257 4, /* scalar load_cost. */
1258 4, /* scalar_store_cost. */
1259 6, /* vec_stmt_cost. */
1260 0, /* vec_to_scalar_cost. */
1261 2, /* scalar_to_vec_cost. */
1262 4, /* vec_align_load_cost. */
1263 4, /* vec_unalign_load_cost. */
1264 4, /* vec_store_cost. */
1265 2, /* cond_taken_branch_cost. */
1266 1, /* cond_not_taken_branch_cost. */
1269 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1270 very small blocks it is better to use loop. For large blocks, libcall can
1271 do nontemporary accesses and beat inline considerably. */
1272 static stringop_algs btver1_memcpy[2] = {
1273 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1274 {-1, rep_prefix_4_byte, false}}},
1275 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1276 {-1, libcall, false}}}};
1277 static stringop_algs btver1_memset[2] = {
1278 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1279 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1280 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1281 {-1, libcall, false}}}};
1282 const struct processor_costs btver1_cost = {
1283 COSTS_N_INSNS (1), /* cost of an add instruction */
1284 COSTS_N_INSNS (2), /* cost of a lea instruction */
1285 COSTS_N_INSNS (1), /* variable shift costs */
1286 COSTS_N_INSNS (1), /* constant shift costs */
1287 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1288 COSTS_N_INSNS (4), /* HI */
1289 COSTS_N_INSNS (3), /* SI */
1290 COSTS_N_INSNS (4), /* DI */
1291 COSTS_N_INSNS (5)}, /* other */
1292 0, /* cost of multiply per each bit set */
1293 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1294 COSTS_N_INSNS (35), /* HI */
1295 COSTS_N_INSNS (51), /* SI */
1296 COSTS_N_INSNS (83), /* DI */
1297 COSTS_N_INSNS (83)}, /* other */
1298 COSTS_N_INSNS (1), /* cost of movsx */
1299 COSTS_N_INSNS (1), /* cost of movzx */
1300 8, /* "large" insn */
1301 9, /* MOVE_RATIO */
1302 4, /* cost for loading QImode using movzbl */
1303 {3, 4, 3}, /* cost of loading integer registers
1304 in QImode, HImode and SImode.
1305 Relative to reg-reg move (2). */
1306 {3, 4, 3}, /* cost of storing integer registers */
1307 4, /* cost of reg,reg fld/fst */
1308 {4, 4, 12}, /* cost of loading fp registers
1309 in SFmode, DFmode and XFmode */
1310 {6, 6, 8}, /* cost of storing fp registers
1311 in SFmode, DFmode and XFmode */
1312 2, /* cost of moving MMX register */
1313 {3, 3}, /* cost of loading MMX registers
1314 in SImode and DImode */
1315 {4, 4}, /* cost of storing MMX registers
1316 in SImode and DImode */
1317 2, /* cost of moving SSE register */
1318 {4, 4, 3}, /* cost of loading SSE registers
1319 in SImode, DImode and TImode */
1320 {4, 4, 5}, /* cost of storing SSE registers
1321 in SImode, DImode and TImode */
1322 3, /* MMX or SSE register to integer */
1323 /* On K8:
1324 MOVD reg64, xmmreg Double FSTORE 4
1325 MOVD reg32, xmmreg Double FSTORE 4
1326 On AMDFAM10:
1327 MOVD reg64, xmmreg Double FADD 3
1328 1/1 1/1
1329 MOVD reg32, xmmreg Double FADD 3
1330 1/1 1/1 */
1331 32, /* size of l1 cache. */
1332 512, /* size of l2 cache. */
1333 64, /* size of prefetch block */
1334 100, /* number of parallel prefetches */
1335 2, /* Branch cost */
1336 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1337 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1338 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1339 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1340 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1341 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343 btver1_memcpy,
1344 btver1_memset,
1345 4, /* scalar_stmt_cost. */
1346 2, /* scalar load_cost. */
1347 2, /* scalar_store_cost. */
1348 6, /* vec_stmt_cost. */
1349 0, /* vec_to_scalar_cost. */
1350 2, /* scalar_to_vec_cost. */
1351 2, /* vec_align_load_cost. */
1352 2, /* vec_unalign_load_cost. */
1353 2, /* vec_store_cost. */
1354 2, /* cond_taken_branch_cost. */
1355 1, /* cond_not_taken_branch_cost. */
1358 static stringop_algs btver2_memcpy[2] = {
1359 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1360 {-1, rep_prefix_4_byte, false}}},
1361 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1362 {-1, libcall, false}}}};
1363 static stringop_algs btver2_memset[2] = {
1364 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1365 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1366 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1367 {-1, libcall, false}}}};
1368 const struct processor_costs btver2_cost = {
1369 COSTS_N_INSNS (1), /* cost of an add instruction */
1370 COSTS_N_INSNS (2), /* cost of a lea instruction */
1371 COSTS_N_INSNS (1), /* variable shift costs */
1372 COSTS_N_INSNS (1), /* constant shift costs */
1373 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1374 COSTS_N_INSNS (4), /* HI */
1375 COSTS_N_INSNS (3), /* SI */
1376 COSTS_N_INSNS (4), /* DI */
1377 COSTS_N_INSNS (5)}, /* other */
1378 0, /* cost of multiply per each bit set */
1379 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1380 COSTS_N_INSNS (35), /* HI */
1381 COSTS_N_INSNS (51), /* SI */
1382 COSTS_N_INSNS (83), /* DI */
1383 COSTS_N_INSNS (83)}, /* other */
1384 COSTS_N_INSNS (1), /* cost of movsx */
1385 COSTS_N_INSNS (1), /* cost of movzx */
1386 8, /* "large" insn */
1387 9, /* MOVE_RATIO */
1388 4, /* cost for loading QImode using movzbl */
1389 {3, 4, 3}, /* cost of loading integer registers
1390 in QImode, HImode and SImode.
1391 Relative to reg-reg move (2). */
1392 {3, 4, 3}, /* cost of storing integer registers */
1393 4, /* cost of reg,reg fld/fst */
1394 {4, 4, 12}, /* cost of loading fp registers
1395 in SFmode, DFmode and XFmode */
1396 {6, 6, 8}, /* cost of storing fp registers
1397 in SFmode, DFmode and XFmode */
1398 2, /* cost of moving MMX register */
1399 {3, 3}, /* cost of loading MMX registers
1400 in SImode and DImode */
1401 {4, 4}, /* cost of storing MMX registers
1402 in SImode and DImode */
1403 2, /* cost of moving SSE register */
1404 {4, 4, 3}, /* cost of loading SSE registers
1405 in SImode, DImode and TImode */
1406 {4, 4, 5}, /* cost of storing SSE registers
1407 in SImode, DImode and TImode */
1408 3, /* MMX or SSE register to integer */
1409 /* On K8:
1410 MOVD reg64, xmmreg Double FSTORE 4
1411 MOVD reg32, xmmreg Double FSTORE 4
1412 On AMDFAM10:
1413 MOVD reg64, xmmreg Double FADD 3
1414 1/1 1/1
1415 MOVD reg32, xmmreg Double FADD 3
1416 1/1 1/1 */
1417 32, /* size of l1 cache. */
1418 2048, /* size of l2 cache. */
1419 64, /* size of prefetch block */
1420 100, /* number of parallel prefetches */
1421 2, /* Branch cost */
1422 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1423 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1424 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1425 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1426 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1427 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1428 btver2_memcpy,
1429 btver2_memset,
1430 4, /* scalar_stmt_cost. */
1431 2, /* scalar load_cost. */
1432 2, /* scalar_store_cost. */
1433 6, /* vec_stmt_cost. */
1434 0, /* vec_to_scalar_cost. */
1435 2, /* scalar_to_vec_cost. */
1436 2, /* vec_align_load_cost. */
1437 2, /* vec_unalign_load_cost. */
1438 2, /* vec_store_cost. */
1439 2, /* cond_taken_branch_cost. */
1440 1, /* cond_not_taken_branch_cost. */
1443 static stringop_algs pentium4_memcpy[2] = {
1444 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1445 DUMMY_STRINGOP_ALGS};
1446 static stringop_algs pentium4_memset[2] = {
1447 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1448 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1449 DUMMY_STRINGOP_ALGS};
1451 static const
1452 struct processor_costs pentium4_cost = {
1453 COSTS_N_INSNS (1), /* cost of an add instruction */
1454 COSTS_N_INSNS (3), /* cost of a lea instruction */
1455 COSTS_N_INSNS (4), /* variable shift costs */
1456 COSTS_N_INSNS (4), /* constant shift costs */
1457 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1458 COSTS_N_INSNS (15), /* HI */
1459 COSTS_N_INSNS (15), /* SI */
1460 COSTS_N_INSNS (15), /* DI */
1461 COSTS_N_INSNS (15)}, /* other */
1462 0, /* cost of multiply per each bit set */
1463 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1464 COSTS_N_INSNS (56), /* HI */
1465 COSTS_N_INSNS (56), /* SI */
1466 COSTS_N_INSNS (56), /* DI */
1467 COSTS_N_INSNS (56)}, /* other */
1468 COSTS_N_INSNS (1), /* cost of movsx */
1469 COSTS_N_INSNS (1), /* cost of movzx */
1470 16, /* "large" insn */
1471 6, /* MOVE_RATIO */
1472 2, /* cost for loading QImode using movzbl */
1473 {4, 5, 4}, /* cost of loading integer registers
1474 in QImode, HImode and SImode.
1475 Relative to reg-reg move (2). */
1476 {2, 3, 2}, /* cost of storing integer registers */
1477 2, /* cost of reg,reg fld/fst */
1478 {2, 2, 6}, /* cost of loading fp registers
1479 in SFmode, DFmode and XFmode */
1480 {4, 4, 6}, /* cost of storing fp registers
1481 in SFmode, DFmode and XFmode */
1482 2, /* cost of moving MMX register */
1483 {2, 2}, /* cost of loading MMX registers
1484 in SImode and DImode */
1485 {2, 2}, /* cost of storing MMX registers
1486 in SImode and DImode */
1487 12, /* cost of moving SSE register */
1488 {12, 12, 12}, /* cost of loading SSE registers
1489 in SImode, DImode and TImode */
1490 {2, 2, 8}, /* cost of storing SSE registers
1491 in SImode, DImode and TImode */
1492 10, /* MMX or SSE register to integer */
1493 8, /* size of l1 cache. */
1494 256, /* size of l2 cache. */
1495 64, /* size of prefetch block */
1496 6, /* number of parallel prefetches */
1497 2, /* Branch cost */
1498 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1499 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1500 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1501 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1502 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1503 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1504 pentium4_memcpy,
1505 pentium4_memset,
1506 1, /* scalar_stmt_cost. */
1507 1, /* scalar load_cost. */
1508 1, /* scalar_store_cost. */
1509 1, /* vec_stmt_cost. */
1510 1, /* vec_to_scalar_cost. */
1511 1, /* scalar_to_vec_cost. */
1512 1, /* vec_align_load_cost. */
1513 2, /* vec_unalign_load_cost. */
1514 1, /* vec_store_cost. */
1515 3, /* cond_taken_branch_cost. */
1516 1, /* cond_not_taken_branch_cost. */
1519 static stringop_algs nocona_memcpy[2] = {
1520 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1521 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1522 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524 static stringop_algs nocona_memset[2] = {
1525 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1526 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1527 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1528 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530 static const
1531 struct processor_costs nocona_cost = {
1532 COSTS_N_INSNS (1), /* cost of an add instruction */
1533 COSTS_N_INSNS (1), /* cost of a lea instruction */
1534 COSTS_N_INSNS (1), /* variable shift costs */
1535 COSTS_N_INSNS (1), /* constant shift costs */
1536 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1537 COSTS_N_INSNS (10), /* HI */
1538 COSTS_N_INSNS (10), /* SI */
1539 COSTS_N_INSNS (10), /* DI */
1540 COSTS_N_INSNS (10)}, /* other */
1541 0, /* cost of multiply per each bit set */
1542 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1543 COSTS_N_INSNS (66), /* HI */
1544 COSTS_N_INSNS (66), /* SI */
1545 COSTS_N_INSNS (66), /* DI */
1546 COSTS_N_INSNS (66)}, /* other */
1547 COSTS_N_INSNS (1), /* cost of movsx */
1548 COSTS_N_INSNS (1), /* cost of movzx */
1549 16, /* "large" insn */
1550 17, /* MOVE_RATIO */
1551 4, /* cost for loading QImode using movzbl */
1552 {4, 4, 4}, /* cost of loading integer registers
1553 in QImode, HImode and SImode.
1554 Relative to reg-reg move (2). */
1555 {4, 4, 4}, /* cost of storing integer registers */
1556 3, /* cost of reg,reg fld/fst */
1557 {12, 12, 12}, /* cost of loading fp registers
1558 in SFmode, DFmode and XFmode */
1559 {4, 4, 4}, /* cost of storing fp registers
1560 in SFmode, DFmode and XFmode */
1561 6, /* cost of moving MMX register */
1562 {12, 12}, /* cost of loading MMX registers
1563 in SImode and DImode */
1564 {12, 12}, /* cost of storing MMX registers
1565 in SImode and DImode */
1566 6, /* cost of moving SSE register */
1567 {12, 12, 12}, /* cost of loading SSE registers
1568 in SImode, DImode and TImode */
1569 {12, 12, 12}, /* cost of storing SSE registers
1570 in SImode, DImode and TImode */
1571 8, /* MMX or SSE register to integer */
1572 8, /* size of l1 cache. */
1573 1024, /* size of l2 cache. */
1574 64, /* size of prefetch block */
1575 8, /* number of parallel prefetches */
1576 1, /* Branch cost */
1577 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1578 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1579 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1580 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1581 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1582 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1583 nocona_memcpy,
1584 nocona_memset,
1585 1, /* scalar_stmt_cost. */
1586 1, /* scalar load_cost. */
1587 1, /* scalar_store_cost. */
1588 1, /* vec_stmt_cost. */
1589 1, /* vec_to_scalar_cost. */
1590 1, /* scalar_to_vec_cost. */
1591 1, /* vec_align_load_cost. */
1592 2, /* vec_unalign_load_cost. */
1593 1, /* vec_store_cost. */
1594 3, /* cond_taken_branch_cost. */
1595 1, /* cond_not_taken_branch_cost. */
1598 static stringop_algs atom_memcpy[2] = {
1599 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1600 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1601 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1602 static stringop_algs atom_memset[2] = {
1603 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1604 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1605 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1606 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1607 static const
1608 struct processor_costs atom_cost = {
1609 COSTS_N_INSNS (1), /* cost of an add instruction */
1610 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1611 COSTS_N_INSNS (1), /* variable shift costs */
1612 COSTS_N_INSNS (1), /* constant shift costs */
1613 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1614 COSTS_N_INSNS (4), /* HI */
1615 COSTS_N_INSNS (3), /* SI */
1616 COSTS_N_INSNS (4), /* DI */
1617 COSTS_N_INSNS (2)}, /* other */
1618 0, /* cost of multiply per each bit set */
1619 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1620 COSTS_N_INSNS (26), /* HI */
1621 COSTS_N_INSNS (42), /* SI */
1622 COSTS_N_INSNS (74), /* DI */
1623 COSTS_N_INSNS (74)}, /* other */
1624 COSTS_N_INSNS (1), /* cost of movsx */
1625 COSTS_N_INSNS (1), /* cost of movzx */
1626 8, /* "large" insn */
1627 17, /* MOVE_RATIO */
1628 4, /* cost for loading QImode using movzbl */
1629 {4, 4, 4}, /* cost of loading integer registers
1630 in QImode, HImode and SImode.
1631 Relative to reg-reg move (2). */
1632 {4, 4, 4}, /* cost of storing integer registers */
1633 4, /* cost of reg,reg fld/fst */
1634 {12, 12, 12}, /* cost of loading fp registers
1635 in SFmode, DFmode and XFmode */
1636 {6, 6, 8}, /* cost of storing fp registers
1637 in SFmode, DFmode and XFmode */
1638 2, /* cost of moving MMX register */
1639 {8, 8}, /* cost of loading MMX registers
1640 in SImode and DImode */
1641 {8, 8}, /* cost of storing MMX registers
1642 in SImode and DImode */
1643 2, /* cost of moving SSE register */
1644 {8, 8, 8}, /* cost of loading SSE registers
1645 in SImode, DImode and TImode */
1646 {8, 8, 8}, /* cost of storing SSE registers
1647 in SImode, DImode and TImode */
1648 5, /* MMX or SSE register to integer */
1649 32, /* size of l1 cache. */
1650 256, /* size of l2 cache. */
1651 64, /* size of prefetch block */
1652 6, /* number of parallel prefetches */
1653 3, /* Branch cost */
1654 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1655 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1656 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1657 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1658 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1659 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1660 atom_memcpy,
1661 atom_memset,
1662 1, /* scalar_stmt_cost. */
1663 1, /* scalar load_cost. */
1664 1, /* scalar_store_cost. */
1665 1, /* vec_stmt_cost. */
1666 1, /* vec_to_scalar_cost. */
1667 1, /* scalar_to_vec_cost. */
1668 1, /* vec_align_load_cost. */
1669 2, /* vec_unalign_load_cost. */
1670 1, /* vec_store_cost. */
1671 3, /* cond_taken_branch_cost. */
1672 1, /* cond_not_taken_branch_cost. */
1675 static stringop_algs slm_memcpy[2] = {
1676 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1677 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1678 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1679 static stringop_algs slm_memset[2] = {
1680 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1681 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1682 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1683 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1684 static const
1685 struct processor_costs slm_cost = {
1686 COSTS_N_INSNS (1), /* cost of an add instruction */
1687 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1688 COSTS_N_INSNS (1), /* variable shift costs */
1689 COSTS_N_INSNS (1), /* constant shift costs */
1690 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1691 COSTS_N_INSNS (3), /* HI */
1692 COSTS_N_INSNS (3), /* SI */
1693 COSTS_N_INSNS (4), /* DI */
1694 COSTS_N_INSNS (2)}, /* other */
1695 0, /* cost of multiply per each bit set */
1696 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1697 COSTS_N_INSNS (26), /* HI */
1698 COSTS_N_INSNS (42), /* SI */
1699 COSTS_N_INSNS (74), /* DI */
1700 COSTS_N_INSNS (74)}, /* other */
1701 COSTS_N_INSNS (1), /* cost of movsx */
1702 COSTS_N_INSNS (1), /* cost of movzx */
1703 8, /* "large" insn */
1704 17, /* MOVE_RATIO */
1705 4, /* cost for loading QImode using movzbl */
1706 {4, 4, 4}, /* cost of loading integer registers
1707 in QImode, HImode and SImode.
1708 Relative to reg-reg move (2). */
1709 {4, 4, 4}, /* cost of storing integer registers */
1710 4, /* cost of reg,reg fld/fst */
1711 {12, 12, 12}, /* cost of loading fp registers
1712 in SFmode, DFmode and XFmode */
1713 {6, 6, 8}, /* cost of storing fp registers
1714 in SFmode, DFmode and XFmode */
1715 2, /* cost of moving MMX register */
1716 {8, 8}, /* cost of loading MMX registers
1717 in SImode and DImode */
1718 {8, 8}, /* cost of storing MMX registers
1719 in SImode and DImode */
1720 2, /* cost of moving SSE register */
1721 {8, 8, 8}, /* cost of loading SSE registers
1722 in SImode, DImode and TImode */
1723 {8, 8, 8}, /* cost of storing SSE registers
1724 in SImode, DImode and TImode */
1725 5, /* MMX or SSE register to integer */
1726 32, /* size of l1 cache. */
1727 256, /* size of l2 cache. */
1728 64, /* size of prefetch block */
1729 6, /* number of parallel prefetches */
1730 3, /* Branch cost */
1731 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1732 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1733 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1734 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1735 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1736 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1737 slm_memcpy,
1738 slm_memset,
1739 1, /* scalar_stmt_cost. */
1740 1, /* scalar load_cost. */
1741 1, /* scalar_store_cost. */
1742 1, /* vec_stmt_cost. */
1743 4, /* vec_to_scalar_cost. */
1744 1, /* scalar_to_vec_cost. */
1745 1, /* vec_align_load_cost. */
1746 2, /* vec_unalign_load_cost. */
1747 1, /* vec_store_cost. */
1748 3, /* cond_taken_branch_cost. */
1749 1, /* cond_not_taken_branch_cost. */
1752 static stringop_algs intel_memcpy[2] = {
1753 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1754 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1756 static stringop_algs intel_memset[2] = {
1757 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1758 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1759 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1760 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1761 static const
1762 struct processor_costs intel_cost = {
1763 COSTS_N_INSNS (1), /* cost of an add instruction */
1764 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1765 COSTS_N_INSNS (1), /* variable shift costs */
1766 COSTS_N_INSNS (1), /* constant shift costs */
1767 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1768 COSTS_N_INSNS (3), /* HI */
1769 COSTS_N_INSNS (3), /* SI */
1770 COSTS_N_INSNS (4), /* DI */
1771 COSTS_N_INSNS (2)}, /* other */
1772 0, /* cost of multiply per each bit set */
1773 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1774 COSTS_N_INSNS (26), /* HI */
1775 COSTS_N_INSNS (42), /* SI */
1776 COSTS_N_INSNS (74), /* DI */
1777 COSTS_N_INSNS (74)}, /* other */
1778 COSTS_N_INSNS (1), /* cost of movsx */
1779 COSTS_N_INSNS (1), /* cost of movzx */
1780 8, /* "large" insn */
1781 17, /* MOVE_RATIO */
1782 4, /* cost for loading QImode using movzbl */
1783 {4, 4, 4}, /* cost of loading integer registers
1784 in QImode, HImode and SImode.
1785 Relative to reg-reg move (2). */
1786 {4, 4, 4}, /* cost of storing integer registers */
1787 4, /* cost of reg,reg fld/fst */
1788 {12, 12, 12}, /* cost of loading fp registers
1789 in SFmode, DFmode and XFmode */
1790 {6, 6, 8}, /* cost of storing fp registers
1791 in SFmode, DFmode and XFmode */
1792 2, /* cost of moving MMX register */
1793 {8, 8}, /* cost of loading MMX registers
1794 in SImode and DImode */
1795 {8, 8}, /* cost of storing MMX registers
1796 in SImode and DImode */
1797 2, /* cost of moving SSE register */
1798 {8, 8, 8}, /* cost of loading SSE registers
1799 in SImode, DImode and TImode */
1800 {8, 8, 8}, /* cost of storing SSE registers
1801 in SImode, DImode and TImode */
1802 5, /* MMX or SSE register to integer */
1803 32, /* size of l1 cache. */
1804 256, /* size of l2 cache. */
1805 64, /* size of prefetch block */
1806 6, /* number of parallel prefetches */
1807 3, /* Branch cost */
1808 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1809 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1810 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1811 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1812 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1813 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1814 intel_memcpy,
1815 intel_memset,
1816 1, /* scalar_stmt_cost. */
1817 1, /* scalar load_cost. */
1818 1, /* scalar_store_cost. */
1819 1, /* vec_stmt_cost. */
1820 4, /* vec_to_scalar_cost. */
1821 1, /* scalar_to_vec_cost. */
1822 1, /* vec_align_load_cost. */
1823 2, /* vec_unalign_load_cost. */
1824 1, /* vec_store_cost. */
1825 3, /* cond_taken_branch_cost. */
1826 1, /* cond_not_taken_branch_cost. */
1829 /* Generic should produce code tuned for Core-i7 (and newer chips)
1830 and btver1 (and newer chips). */
1832 static stringop_algs generic_memcpy[2] = {
1833 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1834 {-1, libcall, false}}},
1835 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1836 {-1, libcall, false}}}};
1837 static stringop_algs generic_memset[2] = {
1838 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1839 {-1, libcall, false}}},
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1841 {-1, libcall, false}}}};
1842 static const
1843 struct processor_costs generic_cost = {
1844 COSTS_N_INSNS (1), /* cost of an add instruction */
1845 /* On all chips taken into consideration lea is 2 cycles and more. With
1846 this cost however our current implementation of synth_mult results in
1847 use of unnecessary temporary registers causing regression on several
1848 SPECfp benchmarks. */
1849 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1850 COSTS_N_INSNS (1), /* variable shift costs */
1851 COSTS_N_INSNS (1), /* constant shift costs */
1852 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1853 COSTS_N_INSNS (4), /* HI */
1854 COSTS_N_INSNS (3), /* SI */
1855 COSTS_N_INSNS (4), /* DI */
1856 COSTS_N_INSNS (2)}, /* other */
1857 0, /* cost of multiply per each bit set */
1858 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1859 COSTS_N_INSNS (26), /* HI */
1860 COSTS_N_INSNS (42), /* SI */
1861 COSTS_N_INSNS (74), /* DI */
1862 COSTS_N_INSNS (74)}, /* other */
1863 COSTS_N_INSNS (1), /* cost of movsx */
1864 COSTS_N_INSNS (1), /* cost of movzx */
1865 8, /* "large" insn */
1866 17, /* MOVE_RATIO */
1867 4, /* cost for loading QImode using movzbl */
1868 {4, 4, 4}, /* cost of loading integer registers
1869 in QImode, HImode and SImode.
1870 Relative to reg-reg move (2). */
1871 {4, 4, 4}, /* cost of storing integer registers */
1872 4, /* cost of reg,reg fld/fst */
1873 {12, 12, 12}, /* cost of loading fp registers
1874 in SFmode, DFmode and XFmode */
1875 {6, 6, 8}, /* cost of storing fp registers
1876 in SFmode, DFmode and XFmode */
1877 2, /* cost of moving MMX register */
1878 {8, 8}, /* cost of loading MMX registers
1879 in SImode and DImode */
1880 {8, 8}, /* cost of storing MMX registers
1881 in SImode and DImode */
1882 2, /* cost of moving SSE register */
1883 {8, 8, 8}, /* cost of loading SSE registers
1884 in SImode, DImode and TImode */
1885 {8, 8, 8}, /* cost of storing SSE registers
1886 in SImode, DImode and TImode */
1887 5, /* MMX or SSE register to integer */
1888 32, /* size of l1 cache. */
1889 512, /* size of l2 cache. */
1890 64, /* size of prefetch block */
1891 6, /* number of parallel prefetches */
1892 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1893 value is increased to perhaps more appropriate value of 5. */
1894 3, /* Branch cost */
1895 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1896 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1897 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1898 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1899 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1900 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1901 generic_memcpy,
1902 generic_memset,
1903 1, /* scalar_stmt_cost. */
1904 1, /* scalar load_cost. */
1905 1, /* scalar_store_cost. */
1906 1, /* vec_stmt_cost. */
1907 1, /* vec_to_scalar_cost. */
1908 1, /* scalar_to_vec_cost. */
1909 1, /* vec_align_load_cost. */
1910 2, /* vec_unalign_load_cost. */
1911 1, /* vec_store_cost. */
1912 3, /* cond_taken_branch_cost. */
1913 1, /* cond_not_taken_branch_cost. */
1916 /* core_cost should produce code tuned for Core familly of CPUs. */
1917 static stringop_algs core_memcpy[2] = {
1918 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1919 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1920 {-1, libcall, false}}}};
1921 static stringop_algs core_memset[2] = {
1922 {libcall, {{6, loop_1_byte, true},
1923 {24, loop, true},
1924 {8192, rep_prefix_4_byte, true},
1925 {-1, libcall, false}}},
1926 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1927 {-1, libcall, false}}}};
1929 static const
1930 struct processor_costs core_cost = {
1931 COSTS_N_INSNS (1), /* cost of an add instruction */
1932 /* On all chips taken into consideration lea is 2 cycles and more. With
1933 this cost however our current implementation of synth_mult results in
1934 use of unnecessary temporary registers causing regression on several
1935 SPECfp benchmarks. */
1936 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1937 COSTS_N_INSNS (1), /* variable shift costs */
1938 COSTS_N_INSNS (1), /* constant shift costs */
1939 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1940 COSTS_N_INSNS (4), /* HI */
1941 COSTS_N_INSNS (3), /* SI */
1942 COSTS_N_INSNS (4), /* DI */
1943 COSTS_N_INSNS (2)}, /* other */
1944 0, /* cost of multiply per each bit set */
1945 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1946 COSTS_N_INSNS (26), /* HI */
1947 COSTS_N_INSNS (42), /* SI */
1948 COSTS_N_INSNS (74), /* DI */
1949 COSTS_N_INSNS (74)}, /* other */
1950 COSTS_N_INSNS (1), /* cost of movsx */
1951 COSTS_N_INSNS (1), /* cost of movzx */
1952 8, /* "large" insn */
1953 17, /* MOVE_RATIO */
1954 4, /* cost for loading QImode using movzbl */
1955 {4, 4, 4}, /* cost of loading integer registers
1956 in QImode, HImode and SImode.
1957 Relative to reg-reg move (2). */
1958 {4, 4, 4}, /* cost of storing integer registers */
1959 4, /* cost of reg,reg fld/fst */
1960 {12, 12, 12}, /* cost of loading fp registers
1961 in SFmode, DFmode and XFmode */
1962 {6, 6, 8}, /* cost of storing fp registers
1963 in SFmode, DFmode and XFmode */
1964 2, /* cost of moving MMX register */
1965 {8, 8}, /* cost of loading MMX registers
1966 in SImode and DImode */
1967 {8, 8}, /* cost of storing MMX registers
1968 in SImode and DImode */
1969 2, /* cost of moving SSE register */
1970 {8, 8, 8}, /* cost of loading SSE registers
1971 in SImode, DImode and TImode */
1972 {8, 8, 8}, /* cost of storing SSE registers
1973 in SImode, DImode and TImode */
1974 5, /* MMX or SSE register to integer */
1975 64, /* size of l1 cache. */
1976 512, /* size of l2 cache. */
1977 64, /* size of prefetch block */
1978 6, /* number of parallel prefetches */
1979 /* FIXME perhaps more appropriate value is 5. */
1980 3, /* Branch cost */
1981 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1982 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1983 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1984 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1985 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1986 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1987 core_memcpy,
1988 core_memset,
1989 1, /* scalar_stmt_cost. */
1990 1, /* scalar load_cost. */
1991 1, /* scalar_store_cost. */
1992 1, /* vec_stmt_cost. */
1993 1, /* vec_to_scalar_cost. */
1994 1, /* scalar_to_vec_cost. */
1995 1, /* vec_align_load_cost. */
1996 2, /* vec_unalign_load_cost. */
1997 1, /* vec_store_cost. */
1998 3, /* cond_taken_branch_cost. */
1999 1, /* cond_not_taken_branch_cost. */
2003 /* Set by -mtune. */
2004 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006 /* Set by -mtune or -Os. */
2007 const struct processor_costs *ix86_cost = &pentium_cost;
2009 /* Processor feature/optimization bitmasks. */
2010 #define m_386 (1<<PROCESSOR_I386)
2011 #define m_486 (1<<PROCESSOR_I486)
2012 #define m_PENT (1<<PROCESSOR_PENTIUM)
2013 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2014 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2015 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2016 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2017 #define m_CORE2 (1<<PROCESSOR_CORE2)
2018 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2019 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2020 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2021 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2022 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2023 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2024 #define m_INTEL (1<<PROCESSOR_INTEL)
2026 #define m_GEODE (1<<PROCESSOR_GEODE)
2027 #define m_K6 (1<<PROCESSOR_K6)
2028 #define m_K6_GEODE (m_K6 | m_GEODE)
2029 #define m_K8 (1<<PROCESSOR_K8)
2030 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2031 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2032 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2033 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2034 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2035 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2036 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2037 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2038 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2039 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2040 #define m_BTVER (m_BTVER1 | m_BTVER2)
2041 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2046 #undef DEF_TUNE
2047 #define DEF_TUNE(tune, name, selector) name,
2048 #include "x86-tune.def"
2049 #undef DEF_TUNE
2052 /* Feature tests against the various tunings. */
2053 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055 /* Feature tests against the various tunings used to create ix86_tune_features
2056 based on the processor mask. */
2057 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2058 #undef DEF_TUNE
2059 #define DEF_TUNE(tune, name, selector) selector,
2060 #include "x86-tune.def"
2061 #undef DEF_TUNE
2064 /* Feature tests against the various architecture variations. */
2065 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067 /* Feature tests against the various architecture variations, used to create
2068 ix86_arch_features based on the processor mask. */
2069 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2070 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2071 ~(m_386 | m_486 | m_PENT | m_K6),
2073 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2074 ~m_386,
2076 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2077 ~(m_386 | m_486),
2079 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2080 ~m_386,
2082 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2083 ~m_386,
2086 /* In case the average insn count for single function invocation is
2087 lower than this constant, emit fast (but longer) prologue and
2088 epilogue code. */
2089 #define FAST_PROLOGUE_INSN_COUNT 20
2091 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2092 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2093 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2094 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096 /* Array of the smallest class containing reg number REGNO, indexed by
2097 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 /* ax, dx, cx, bx */
2102 AREG, DREG, CREG, BREG,
2103 /* si, di, bp, sp */
2104 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2105 /* FP registers */
2106 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2107 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2108 /* arg pointer */
2109 NON_Q_REGS,
2110 /* flags, fpsr, fpcr, frame */
2111 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2112 /* SSE registers */
2113 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2114 SSE_REGS, SSE_REGS,
2115 /* MMX registers */
2116 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2117 MMX_REGS, MMX_REGS,
2118 /* REX registers */
2119 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 /* SSE REX registers */
2122 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2123 SSE_REGS, SSE_REGS,
2124 /* AVX-512 SSE registers */
2125 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 /* Mask registers. */
2130 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2131 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2134 /* The "default" register map used in 32bit mode. */
2136 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2139 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2140 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2141 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2142 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2143 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2147 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2150 /* The "default" register map used in 64bit mode. */
2152 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2155 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2156 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2157 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2158 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2159 8,9,10,11,12,13,14,15, /* extended integer registers */
2160 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2161 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2162 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2163 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2166 /* Define the register numbers to be used in Dwarf debugging information.
2167 The SVR4 reference port C compiler uses the following register numbers
2168 in its Dwarf output code:
2169 0 for %eax (gcc regno = 0)
2170 1 for %ecx (gcc regno = 2)
2171 2 for %edx (gcc regno = 1)
2172 3 for %ebx (gcc regno = 3)
2173 4 for %esp (gcc regno = 7)
2174 5 for %ebp (gcc regno = 6)
2175 6 for %esi (gcc regno = 4)
2176 7 for %edi (gcc regno = 5)
2177 The following three DWARF register numbers are never generated by
2178 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2179 believes these numbers have these meanings.
2180 8 for %eip (no gcc equivalent)
2181 9 for %eflags (gcc regno = 17)
2182 10 for %trapno (no gcc equivalent)
2183 It is not at all clear how we should number the FP stack registers
2184 for the x86 architecture. If the version of SDB on x86/svr4 were
2185 a bit less brain dead with respect to floating-point then we would
2186 have a precedent to follow with respect to DWARF register numbers
2187 for x86 FP registers, but the SDB on x86/svr4 is so completely
2188 broken with respect to FP registers that it is hardly worth thinking
2189 of it as something to strive for compatibility with.
2190 The version of x86/svr4 SDB I have at the moment does (partially)
2191 seem to believe that DWARF register number 11 is associated with
2192 the x86 register %st(0), but that's about all. Higher DWARF
2193 register numbers don't seem to be associated with anything in
2194 particular, and even for DWARF regno 11, SDB only seems to under-
2195 stand that it should say that a variable lives in %st(0) (when
2196 asked via an `=' command) if we said it was in DWARF regno 11,
2197 but SDB still prints garbage when asked for the value of the
2198 variable in question (via a `/' command).
2199 (Also note that the labels SDB prints for various FP stack regs
2200 when doing an `x' command are all wrong.)
2201 Note that these problems generally don't affect the native SVR4
2202 C compiler because it doesn't allow the use of -O with -g and
2203 because when it is *not* optimizing, it allocates a memory
2204 location for each floating-point variable, and the memory
2205 location is what gets described in the DWARF AT_location
2206 attribute for the variable in question.
2207 Regardless of the severe mental illness of the x86/svr4 SDB, we
2208 do something sensible here and we use the following DWARF
2209 register numbers. Note that these are all stack-top-relative
2210 numbers.
2211 11 for %st(0) (gcc regno = 8)
2212 12 for %st(1) (gcc regno = 9)
2213 13 for %st(2) (gcc regno = 10)
2214 14 for %st(3) (gcc regno = 11)
2215 15 for %st(4) (gcc regno = 12)
2216 16 for %st(5) (gcc regno = 13)
2217 17 for %st(6) (gcc regno = 14)
2218 18 for %st(7) (gcc regno = 15)
2220 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2223 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2224 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2225 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2226 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2227 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2231 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2234 /* Define parameter passing and return registers. */
2236 static int const x86_64_int_parameter_registers[6] =
2238 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2241 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 CX_REG, DX_REG, R8_REG, R9_REG
2246 static int const x86_64_int_return_registers[4] =
2248 AX_REG, DX_REG, DI_REG, SI_REG
2251 /* Additional registers that are clobbered by SYSV calls. */
2253 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 SI_REG, DI_REG,
2256 XMM6_REG, XMM7_REG,
2257 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2258 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2261 /* Define the structure for the machine field in struct function. */
2263 struct GTY(()) stack_local_entry {
2264 unsigned short mode;
2265 unsigned short n;
2266 rtx rtl;
2267 struct stack_local_entry *next;
2270 /* Structure describing stack frame layout.
2271 Stack grows downward:
2273 [arguments]
2274 <- ARG_POINTER
2275 saved pc
2277 saved static chain if ix86_static_chain_on_stack
2279 saved frame pointer if frame_pointer_needed
2280 <- HARD_FRAME_POINTER
2281 [saved regs]
2282 <- regs_save_offset
2283 [padding0]
2285 [saved SSE regs]
2286 <- sse_regs_save_offset
2287 [padding1] |
2288 | <- FRAME_POINTER
2289 [va_arg registers] |
2291 [frame] |
2293 [padding2] | = to_allocate
2294 <- STACK_POINTER
2296 struct ix86_frame
2298 int nsseregs;
2299 int nregs;
2300 int va_arg_size;
2301 int red_zone_size;
2302 int outgoing_arguments_size;
2304 /* The offsets relative to ARG_POINTER. */
2305 HOST_WIDE_INT frame_pointer_offset;
2306 HOST_WIDE_INT hard_frame_pointer_offset;
2307 HOST_WIDE_INT stack_pointer_offset;
2308 HOST_WIDE_INT hfp_save_offset;
2309 HOST_WIDE_INT reg_save_offset;
2310 HOST_WIDE_INT sse_reg_save_offset;
2312 /* When save_regs_using_mov is set, emit prologue using
2313 move instead of push instructions. */
2314 bool save_regs_using_mov;
2317 /* Which cpu are we scheduling for. */
2318 enum attr_cpu ix86_schedule;
2320 /* Which cpu are we optimizing for. */
2321 enum processor_type ix86_tune;
2323 /* Which instruction set architecture to use. */
2324 enum processor_type ix86_arch;
2326 /* True if processor has SSE prefetch instruction. */
2327 unsigned char x86_prefetch_sse;
2329 /* -mstackrealign option */
2330 static const char ix86_force_align_arg_pointer_string[]
2331 = "force_align_arg_pointer";
2333 static rtx (*ix86_gen_leave) (void);
2334 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2335 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2337 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2338 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2339 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2341 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346 /* Preferred alignment for stack boundary in bits. */
2347 unsigned int ix86_preferred_stack_boundary;
2349 /* Alignment for incoming stack boundary in bits specified at
2350 command line. */
2351 static unsigned int ix86_user_incoming_stack_boundary;
2353 /* Default alignment for incoming stack boundary in bits. */
2354 static unsigned int ix86_default_incoming_stack_boundary;
2356 /* Alignment for incoming stack boundary in bits. */
2357 unsigned int ix86_incoming_stack_boundary;
2359 /* Calling abi specific va_list type nodes. */
2360 static GTY(()) tree sysv_va_list_type_node;
2361 static GTY(()) tree ms_va_list_type_node;
2363 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2364 char internal_label_prefix[16];
2365 int internal_label_prefix_len;
2367 /* Fence to use after loop using movnt. */
2368 tree x86_mfence;
2370 /* Register class used for passing given 64bit part of the argument.
2371 These represent classes as documented by the PS ABI, with the exception
2372 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2373 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2376 whenever possible (upper half does contain padding). */
2377 enum x86_64_reg_class
2379 X86_64_NO_CLASS,
2380 X86_64_INTEGER_CLASS,
2381 X86_64_INTEGERSI_CLASS,
2382 X86_64_SSE_CLASS,
2383 X86_64_SSESF_CLASS,
2384 X86_64_SSEDF_CLASS,
2385 X86_64_SSEUP_CLASS,
2386 X86_64_X87_CLASS,
2387 X86_64_X87UP_CLASS,
2388 X86_64_COMPLEX_X87_CLASS,
2389 X86_64_MEMORY_CLASS
2392 #define MAX_CLASSES 8
2394 /* Table of constants used by fldpi, fldln2, etc.... */
2395 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2396 static bool ext_80387_constants_init = 0;
2399 static struct machine_function * ix86_init_machine_status (void);
2400 static rtx ix86_function_value (const_tree, const_tree, bool);
2401 static bool ix86_function_value_regno_p (const unsigned int);
2402 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2403 const_tree);
2404 static rtx ix86_static_chain (const_tree, bool);
2405 static int ix86_function_regparm (const_tree, const_tree);
2406 static void ix86_compute_frame_layout (struct ix86_frame *);
2407 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2408 rtx, rtx, int);
2409 static void ix86_add_new_builtins (HOST_WIDE_INT);
2410 static tree ix86_canonical_va_list_type (tree);
2411 static void predict_jump (int);
2412 static unsigned int split_stack_prologue_scratch_regno (void);
2413 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415 enum ix86_function_specific_strings
2417 IX86_FUNCTION_SPECIFIC_ARCH,
2418 IX86_FUNCTION_SPECIFIC_TUNE,
2419 IX86_FUNCTION_SPECIFIC_MAX
2422 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2423 const char *, enum fpmath_unit, bool);
2424 static void ix86_function_specific_save (struct cl_target_option *,
2425 struct gcc_options *opts);
2426 static void ix86_function_specific_restore (struct gcc_options *opts,
2427 struct cl_target_option *);
2428 static void ix86_function_specific_print (FILE *, int,
2429 struct cl_target_option *);
2430 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2431 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2432 struct gcc_options *,
2433 struct gcc_options *,
2434 struct gcc_options *);
2435 static bool ix86_can_inline_p (tree, tree);
2436 static void ix86_set_current_function (tree);
2437 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439 static enum calling_abi ix86_function_abi (const_tree);
2442 #ifndef SUBTARGET32_DEFAULT_CPU
2443 #define SUBTARGET32_DEFAULT_CPU "i386"
2444 #endif
2446 /* Whether -mtune= or -march= were specified */
2447 static int ix86_tune_defaulted;
2448 static int ix86_arch_specified;
2450 /* Vectorization library interface and handlers. */
2451 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2454 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456 /* Processor target table, indexed by processor number */
2457 struct ptt
2459 const char *const name; /* processor name */
2460 const struct processor_costs *cost; /* Processor costs */
2461 const int align_loop; /* Default alignments. */
2462 const int align_loop_max_skip;
2463 const int align_jump;
2464 const int align_jump_max_skip;
2465 const int align_func;
2468 /* This table must be in sync with enum processor_type in i386.h. */
2469 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2472 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2473 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2474 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2475 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2476 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2477 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2478 {"core2", &core_cost, 16, 10, 16, 10, 16},
2479 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2480 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2481 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2482 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2483 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2484 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2485 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2486 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2487 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2488 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2489 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2490 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2491 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2492 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2493 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2494 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2495 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2498 static bool
2499 gate_insert_vzeroupper (void)
2501 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2504 static unsigned int
2505 rest_of_handle_insert_vzeroupper (void)
2507 int i;
2509 /* vzeroupper instructions are inserted immediately after reload to
2510 account for possible spills from 256bit registers. The pass
2511 reuses mode switching infrastructure by re-running mode insertion
2512 pass, so disable entities that have already been processed. */
2513 for (i = 0; i < MAX_386_ENTITIES; i++)
2514 ix86_optimize_mode_switching[i] = 0;
2516 ix86_optimize_mode_switching[AVX_U128] = 1;
2518 /* Call optimize_mode_switching. */
2519 g->get_passes ()->execute_pass_mode_switching ();
2520 return 0;
2523 namespace {
2525 const pass_data pass_data_insert_vzeroupper =
2527 RTL_PASS, /* type */
2528 "vzeroupper", /* name */
2529 OPTGROUP_NONE, /* optinfo_flags */
2530 true, /* has_gate */
2531 true, /* has_execute */
2532 TV_NONE, /* tv_id */
2533 0, /* properties_required */
2534 0, /* properties_provided */
2535 0, /* properties_destroyed */
2536 0, /* todo_flags_start */
2537 ( TODO_df_finish | TODO_verify_rtl_sharing | 0 ), /* todo_flags_finish */
2540 class pass_insert_vzeroupper : public rtl_opt_pass
2542 public:
2543 pass_insert_vzeroupper(gcc::context *ctxt)
2544 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2547 /* opt_pass methods: */
2548 bool gate () { return gate_insert_vzeroupper (); }
2549 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2551 }; // class pass_insert_vzeroupper
2553 } // anon namespace
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2558 return new pass_insert_vzeroupper (ctxt);
2561 /* Return true if a red-zone is in use. */
2563 static inline bool
2564 ix86_using_red_zone (void)
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2577 struct ix86_target_opts
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2596 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2597 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2598 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2599 { "-msse3", OPTION_MASK_ISA_SSE3 },
2600 { "-msse2", OPTION_MASK_ISA_SSE2 },
2601 { "-msse", OPTION_MASK_ISA_SSE },
2602 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2603 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2604 { "-mmmx", OPTION_MASK_ISA_MMX },
2605 { "-mabm", OPTION_MASK_ISA_ABM },
2606 { "-mbmi", OPTION_MASK_ISA_BMI },
2607 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2608 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2609 { "-mhle", OPTION_MASK_ISA_HLE },
2610 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2611 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2612 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2613 { "-madx", OPTION_MASK_ISA_ADX },
2614 { "-mtbm", OPTION_MASK_ISA_TBM },
2615 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2616 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2617 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2618 { "-maes", OPTION_MASK_ISA_AES },
2619 { "-msha", OPTION_MASK_ISA_SHA },
2620 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2621 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2622 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2623 { "-mf16c", OPTION_MASK_ISA_F16C },
2624 { "-mrtm", OPTION_MASK_ISA_RTM },
2625 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2626 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2627 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2630 /* Flag options. */
2631 static struct ix86_target_opts flag_opts[] =
2633 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2634 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2635 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2636 { "-m80387", MASK_80387 },
2637 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2638 { "-malign-double", MASK_ALIGN_DOUBLE },
2639 { "-mcld", MASK_CLD },
2640 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2641 { "-mieee-fp", MASK_IEEE_FP },
2642 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2643 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2644 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2645 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2646 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2647 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2648 { "-mno-red-zone", MASK_NO_RED_ZONE },
2649 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2650 { "-mrecip", MASK_RECIP },
2651 { "-mrtd", MASK_RTD },
2652 { "-msseregparm", MASK_SSEREGPARM },
2653 { "-mstack-arg-probe", MASK_STACK_PROBE },
2654 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2655 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2656 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2657 { "-mvzeroupper", MASK_VZEROUPPER },
2658 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2659 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2660 { "-mprefer-avx128", MASK_PREFER_AVX128},
2663 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2665 char isa_other[40];
2666 char target_other[40];
2667 unsigned num = 0;
2668 unsigned i, j;
2669 char *ret;
2670 char *ptr;
2671 size_t len;
2672 size_t line_len;
2673 size_t sep_len;
2674 const char *abi;
2676 memset (opts, '\0', sizeof (opts));
2678 /* Add -march= option. */
2679 if (arch)
2681 opts[num][0] = "-march=";
2682 opts[num++][1] = arch;
2685 /* Add -mtune= option. */
2686 if (tune)
2688 opts[num][0] = "-mtune=";
2689 opts[num++][1] = tune;
2692 /* Add -m32/-m64/-mx32. */
2693 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2695 if ((isa & OPTION_MASK_ABI_64) != 0)
2696 abi = "-m64";
2697 else
2698 abi = "-mx32";
2699 isa &= ~ (OPTION_MASK_ISA_64BIT
2700 | OPTION_MASK_ABI_64
2701 | OPTION_MASK_ABI_X32);
2703 else
2704 abi = "-m32";
2705 opts[num++][0] = abi;
2707 /* Pick out the options in isa options. */
2708 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2710 if ((isa & isa_opts[i].mask) != 0)
2712 opts[num++][0] = isa_opts[i].option;
2713 isa &= ~ isa_opts[i].mask;
2717 if (isa && add_nl_p)
2719 opts[num++][0] = isa_other;
2720 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2721 isa);
2724 /* Add flag options. */
2725 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2727 if ((flags & flag_opts[i].mask) != 0)
2729 opts[num++][0] = flag_opts[i].option;
2730 flags &= ~ flag_opts[i].mask;
2734 if (flags && add_nl_p)
2736 opts[num++][0] = target_other;
2737 sprintf (target_other, "(other flags: %#x)", flags);
2740 /* Add -fpmath= option. */
2741 if (fpmath)
2743 opts[num][0] = "-mfpmath=";
2744 switch ((int) fpmath)
2746 case FPMATH_387:
2747 opts[num++][1] = "387";
2748 break;
2750 case FPMATH_SSE:
2751 opts[num++][1] = "sse";
2752 break;
2754 case FPMATH_387 | FPMATH_SSE:
2755 opts[num++][1] = "sse+387";
2756 break;
2758 default:
2759 gcc_unreachable ();
2763 /* Any options? */
2764 if (num == 0)
2765 return NULL;
2767 gcc_assert (num < ARRAY_SIZE (opts));
2769 /* Size the string. */
2770 len = 0;
2771 sep_len = (add_nl_p) ? 3 : 1;
2772 for (i = 0; i < num; i++)
2774 len += sep_len;
2775 for (j = 0; j < 2; j++)
2776 if (opts[i][j])
2777 len += strlen (opts[i][j]);
2780 /* Build the string. */
2781 ret = ptr = (char *) xmalloc (len);
2782 line_len = 0;
2784 for (i = 0; i < num; i++)
2786 size_t len2[2];
2788 for (j = 0; j < 2; j++)
2789 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2791 if (i != 0)
2793 *ptr++ = ' ';
2794 line_len++;
2796 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2798 *ptr++ = '\\';
2799 *ptr++ = '\n';
2800 line_len = 0;
2804 for (j = 0; j < 2; j++)
2805 if (opts[i][j])
2807 memcpy (ptr, opts[i][j], len2[j]);
2808 ptr += len2[j];
2809 line_len += len2[j];
2813 *ptr = '\0';
2814 gcc_assert (ret + len >= ptr);
2816 return ret;
2819 /* Return true, if profiling code should be emitted before
2820 prologue. Otherwise it returns false.
2821 Note: For x86 with "hotfix" it is sorried. */
2822 static bool
2823 ix86_profile_before_prologue (void)
2825 return flag_fentry != 0;
2828 /* Function that is callable from the debugger to print the current
2829 options. */
2830 void ATTRIBUTE_UNUSED
2831 ix86_debug_options (void)
2833 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2834 ix86_arch_string, ix86_tune_string,
2835 ix86_fpmath, true);
2837 if (opts)
2839 fprintf (stderr, "%s\n\n", opts);
2840 free (opts);
2842 else
2843 fputs ("<no options>\n\n", stderr);
2845 return;
2848 static const char *stringop_alg_names[] = {
2849 #define DEF_ENUM
2850 #define DEF_ALG(alg, name) #name,
2851 #include "stringop.def"
2852 #undef DEF_ENUM
2853 #undef DEF_ALG
2856 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2857 The string is of the following form (or comma separated list of it):
2859 strategy_alg:max_size:[align|noalign]
2861 where the full size range for the strategy is either [0, max_size] or
2862 [min_size, max_size], in which min_size is the max_size + 1 of the
2863 preceding range. The last size range must have max_size == -1.
2865 Examples:
2868 -mmemcpy-strategy=libcall:-1:noalign
2870 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2874 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2876 This is to tell the compiler to use the following strategy for memset
2877 1) when the expected size is between [1, 16], use rep_8byte strategy;
2878 2) when the size is between [17, 2048], use vector_loop;
2879 3) when the size is > 2048, use libcall. */
2881 struct stringop_size_range
2883 int max;
2884 stringop_alg alg;
2885 bool noalign;
2888 static void
2889 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2891 const struct stringop_algs *default_algs;
2892 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2893 char *curr_range_str, *next_range_str;
2894 int i = 0, n = 0;
2896 if (is_memset)
2897 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2898 else
2899 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2901 curr_range_str = strategy_str;
2905 int maxs;
2906 char alg_name[128];
2907 char align[16];
2908 next_range_str = strchr (curr_range_str, ',');
2909 if (next_range_str)
2910 *next_range_str++ = '\0';
2912 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2913 alg_name, &maxs, align))
2915 error ("wrong arg %s to option %s", curr_range_str,
2916 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2917 return;
2920 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2922 error ("size ranges of option %s should be increasing",
2923 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2924 return;
2927 for (i = 0; i < last_alg; i++)
2928 if (!strcmp (alg_name, stringop_alg_names[i]))
2929 break;
2931 if (i == last_alg)
2933 error ("wrong stringop strategy name %s specified for option %s",
2934 alg_name,
2935 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2936 return;
2939 input_ranges[n].max = maxs;
2940 input_ranges[n].alg = (stringop_alg) i;
2941 if (!strcmp (align, "align"))
2942 input_ranges[n].noalign = false;
2943 else if (!strcmp (align, "noalign"))
2944 input_ranges[n].noalign = true;
2945 else
2947 error ("unknown alignment %s specified for option %s",
2948 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2949 return;
2951 n++;
2952 curr_range_str = next_range_str;
2954 while (curr_range_str);
2956 if (input_ranges[n - 1].max != -1)
2958 error ("the max value for the last size range should be -1"
2959 " for option %s",
2960 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2961 return;
2964 if (n > MAX_STRINGOP_ALGS)
2966 error ("too many size ranges specified in option %s",
2967 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2968 return;
2971 /* Now override the default algs array. */
2972 for (i = 0; i < n; i++)
2974 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2975 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2976 = input_ranges[i].alg;
2977 *const_cast<int *>(&default_algs->size[i].noalign)
2978 = input_ranges[i].noalign;
2983 /* parse -mtune-ctrl= option. When DUMP is true,
2984 print the features that are explicitly set. */
2986 static void
2987 parse_mtune_ctrl_str (bool dump)
2989 if (!ix86_tune_ctrl_string)
2990 return;
2992 char *next_feature_string = NULL;
2993 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2994 char *orig = curr_feature_string;
2995 int i;
2998 bool clear = false;
3000 next_feature_string = strchr (curr_feature_string, ',');
3001 if (next_feature_string)
3002 *next_feature_string++ = '\0';
3003 if (*curr_feature_string == '^')
3005 curr_feature_string++;
3006 clear = true;
3008 for (i = 0; i < X86_TUNE_LAST; i++)
3010 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3012 ix86_tune_features[i] = !clear;
3013 if (dump)
3014 fprintf (stderr, "Explicitly %s feature %s\n",
3015 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3016 break;
3019 if (i == X86_TUNE_LAST)
3020 error ("Unknown parameter to option -mtune-ctrl: %s",
3021 clear ? curr_feature_string - 1 : curr_feature_string);
3022 curr_feature_string = next_feature_string;
3024 while (curr_feature_string);
3025 free (orig);
3028 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3029 processor type. */
3031 static void
3032 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3034 unsigned int ix86_tune_mask = 1u << ix86_tune;
3035 int i;
3037 for (i = 0; i < X86_TUNE_LAST; ++i)
3039 if (ix86_tune_no_default)
3040 ix86_tune_features[i] = 0;
3041 else
3042 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3045 if (dump)
3047 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3048 for (i = 0; i < X86_TUNE_LAST; i++)
3049 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3050 ix86_tune_features[i] ? "on" : "off");
3053 parse_mtune_ctrl_str (dump);
3057 /* Override various settings based on options. If MAIN_ARGS_P, the
3058 options are from the command line, otherwise they are from
3059 attributes. */
3061 static void
3062 ix86_option_override_internal (bool main_args_p,
3063 struct gcc_options *opts,
3064 struct gcc_options *opts_set)
3066 int i;
3067 unsigned int ix86_arch_mask;
3068 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3069 const char *prefix;
3070 const char *suffix;
3071 const char *sw;
3073 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3074 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3075 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3076 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3077 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3078 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3079 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3080 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3081 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3082 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3083 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3084 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3085 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3086 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3087 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3088 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3089 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3090 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3091 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3092 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3093 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3094 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3095 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3096 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3097 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3098 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3099 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3100 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3101 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3102 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3103 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3104 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3105 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3106 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3107 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3108 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3109 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3110 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3111 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3112 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3113 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3114 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3115 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3116 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3117 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3118 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3120 #define PTA_CORE2 \
3121 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3122 | PTA_CX16 | PTA_FXSR)
3123 #define PTA_NEHALEM \
3124 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3125 #define PTA_WESTMERE \
3126 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3127 #define PTA_SANDYBRIDGE \
3128 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3129 #define PTA_IVYBRIDGE \
3130 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3131 #define PTA_HASWELL \
3132 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3133 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3134 #define PTA_BROADWELL \
3135 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3136 #define PTA_BONNELL \
3137 (PTA_CORE2 | PTA_MOVBE)
3138 #define PTA_SILVERMONT \
3139 (PTA_WESTMERE | PTA_MOVBE)
3141 /* if this reaches 64, need to widen struct pta flags below */
3143 static struct pta
3145 const char *const name; /* processor name or nickname. */
3146 const enum processor_type processor;
3147 const enum attr_cpu schedule;
3148 const unsigned HOST_WIDE_INT flags;
3150 const processor_alias_table[] =
3152 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3153 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3154 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3155 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3156 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3157 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3158 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3159 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3160 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3161 PTA_MMX | PTA_SSE | PTA_FXSR},
3162 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3163 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3164 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3165 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3166 PTA_MMX | PTA_SSE | PTA_FXSR},
3167 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3168 PTA_MMX | PTA_SSE | PTA_FXSR},
3169 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3170 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3171 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3172 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3173 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3174 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3175 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3176 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3177 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3178 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3179 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3180 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3181 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3182 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3183 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3184 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3185 PTA_SANDYBRIDGE},
3186 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3187 PTA_SANDYBRIDGE},
3188 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3189 PTA_IVYBRIDGE},
3190 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3191 PTA_IVYBRIDGE},
3192 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3193 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3194 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3195 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3196 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3197 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3198 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3199 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3200 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3201 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3202 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3203 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3204 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3205 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3206 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3207 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3208 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3209 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3210 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3211 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3212 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3213 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3214 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3215 {"x86-64", PROCESSOR_K8, CPU_K8,
3216 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3217 {"k8", PROCESSOR_K8, CPU_K8,
3218 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3219 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3220 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3221 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3222 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3223 {"opteron", PROCESSOR_K8, CPU_K8,
3224 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3225 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3226 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3227 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3228 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3229 {"athlon64", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3240 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3241 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3243 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3244 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3245 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3246 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3247 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3248 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3249 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3250 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3251 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3252 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3253 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3254 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3255 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3256 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3257 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3258 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3259 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3260 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3261 | PTA_XSAVEOPT | PTA_FSGSBASE},
3262 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3263 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3264 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3265 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3266 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3267 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3268 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3269 | PTA_MOVBE},
3270 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3273 | PTA_FXSR | PTA_XSAVE},
3274 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3278 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3279 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3281 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3282 PTA_64BIT
3283 | PTA_HLE /* flags are only used for -march switch. */ },
3286 /* -mrecip options. */
3287 static struct
3289 const char *string; /* option name */
3290 unsigned int mask; /* mask bits to set */
3292 const recip_options[] =
3294 { "all", RECIP_MASK_ALL },
3295 { "none", RECIP_MASK_NONE },
3296 { "div", RECIP_MASK_DIV },
3297 { "sqrt", RECIP_MASK_SQRT },
3298 { "vec-div", RECIP_MASK_VEC_DIV },
3299 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3302 int const pta_size = ARRAY_SIZE (processor_alias_table);
3304 /* Set up prefix/suffix so the error messages refer to either the command
3305 line argument, or the attribute(target). */
3306 if (main_args_p)
3308 prefix = "-m";
3309 suffix = "";
3310 sw = "switch";
3312 else
3314 prefix = "option(\"";
3315 suffix = "\")";
3316 sw = "attribute";
3319 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3320 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3321 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3322 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3323 #ifdef TARGET_BI_ARCH
3324 else
3326 #if TARGET_BI_ARCH == 1
3327 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3328 is on and OPTION_MASK_ABI_X32 is off. We turn off
3329 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3330 -mx32. */
3331 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3332 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3333 #else
3334 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3335 on and OPTION_MASK_ABI_64 is off. We turn off
3336 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3337 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3338 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3339 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3340 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3341 #endif
3343 #endif
3345 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3347 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3348 OPTION_MASK_ABI_64 for TARGET_X32. */
3349 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3350 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3352 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3353 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3354 | OPTION_MASK_ABI_X32
3355 | OPTION_MASK_ABI_64);
3356 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3358 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3359 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3360 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3361 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3364 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3365 SUBTARGET_OVERRIDE_OPTIONS;
3366 #endif
3368 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3369 SUBSUBTARGET_OVERRIDE_OPTIONS;
3370 #endif
3372 /* -fPIC is the default for x86_64. */
3373 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3374 opts->x_flag_pic = 2;
3376 /* Need to check -mtune=generic first. */
3377 if (opts->x_ix86_tune_string)
3379 /* As special support for cross compilers we read -mtune=native
3380 as -mtune=generic. With native compilers we won't see the
3381 -mtune=native, as it was changed by the driver. */
3382 if (!strcmp (opts->x_ix86_tune_string, "native"))
3384 opts->x_ix86_tune_string = "generic";
3386 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3387 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3388 "%stune=k8%s or %stune=generic%s instead as appropriate",
3389 prefix, suffix, prefix, suffix, prefix, suffix);
3391 else
3393 if (opts->x_ix86_arch_string)
3394 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3395 if (!opts->x_ix86_tune_string)
3397 opts->x_ix86_tune_string
3398 = processor_target_table[TARGET_CPU_DEFAULT].name;
3399 ix86_tune_defaulted = 1;
3402 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3403 or defaulted. We need to use a sensible tune option. */
3404 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3406 opts->x_ix86_tune_string = "generic";
3410 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3411 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3413 /* rep; movq isn't available in 32-bit code. */
3414 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3415 opts->x_ix86_stringop_alg = no_stringop;
3418 if (!opts->x_ix86_arch_string)
3419 opts->x_ix86_arch_string
3420 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3421 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3422 else
3423 ix86_arch_specified = 1;
3425 if (opts_set->x_ix86_pmode)
3427 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3428 && opts->x_ix86_pmode == PMODE_SI)
3429 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3430 && opts->x_ix86_pmode == PMODE_DI))
3431 error ("address mode %qs not supported in the %s bit mode",
3432 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3433 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3435 else
3436 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3437 ? PMODE_DI : PMODE_SI;
3439 if (!opts_set->x_ix86_abi)
3440 opts->x_ix86_abi = DEFAULT_ABI;
3442 /* For targets using ms ABI enable ms-extensions, if not
3443 explicit turned off. For non-ms ABI we turn off this
3444 option. */
3445 if (!opts_set->x_flag_ms_extensions)
3446 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3448 if (opts_set->x_ix86_cmodel)
3450 switch (opts->x_ix86_cmodel)
3452 case CM_SMALL:
3453 case CM_SMALL_PIC:
3454 if (opts->x_flag_pic)
3455 opts->x_ix86_cmodel = CM_SMALL_PIC;
3456 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3457 error ("code model %qs not supported in the %s bit mode",
3458 "small", "32");
3459 break;
3461 case CM_MEDIUM:
3462 case CM_MEDIUM_PIC:
3463 if (opts->x_flag_pic)
3464 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3465 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3466 error ("code model %qs not supported in the %s bit mode",
3467 "medium", "32");
3468 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3469 error ("code model %qs not supported in x32 mode",
3470 "medium");
3471 break;
3473 case CM_LARGE:
3474 case CM_LARGE_PIC:
3475 if (opts->x_flag_pic)
3476 opts->x_ix86_cmodel = CM_LARGE_PIC;
3477 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3478 error ("code model %qs not supported in the %s bit mode",
3479 "large", "32");
3480 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3481 error ("code model %qs not supported in x32 mode",
3482 "large");
3483 break;
3485 case CM_32:
3486 if (opts->x_flag_pic)
3487 error ("code model %s does not support PIC mode", "32");
3488 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "32", "64");
3491 break;
3493 case CM_KERNEL:
3494 if (opts->x_flag_pic)
3496 error ("code model %s does not support PIC mode", "kernel");
3497 opts->x_ix86_cmodel = CM_32;
3499 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3500 error ("code model %qs not supported in the %s bit mode",
3501 "kernel", "32");
3502 break;
3504 default:
3505 gcc_unreachable ();
3508 else
3510 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3511 use of rip-relative addressing. This eliminates fixups that
3512 would otherwise be needed if this object is to be placed in a
3513 DLL, and is essentially just as efficient as direct addressing. */
3514 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3515 && (TARGET_RDOS || TARGET_PECOFF))
3516 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3517 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3518 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3519 else
3520 opts->x_ix86_cmodel = CM_32;
3522 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3524 error ("-masm=intel not supported in this configuration");
3525 opts->x_ix86_asm_dialect = ASM_ATT;
3527 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3528 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3529 sorry ("%i-bit mode not compiled in",
3530 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3532 for (i = 0; i < pta_size; i++)
3533 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3535 ix86_schedule = processor_alias_table[i].schedule;
3536 ix86_arch = processor_alias_table[i].processor;
3537 /* Default cpu tuning to the architecture. */
3538 ix86_tune = ix86_arch;
3540 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3541 && !(processor_alias_table[i].flags & PTA_64BIT))
3542 error ("CPU you selected does not support x86-64 "
3543 "instruction set");
3545 if (processor_alias_table[i].flags & PTA_MMX
3546 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3547 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3548 if (processor_alias_table[i].flags & PTA_3DNOW
3549 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3550 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3551 if (processor_alias_table[i].flags & PTA_3DNOW_A
3552 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3553 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3554 if (processor_alias_table[i].flags & PTA_SSE
3555 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3556 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3557 if (processor_alias_table[i].flags & PTA_SSE2
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3560 if (processor_alias_table[i].flags & PTA_SSE3
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3563 if (processor_alias_table[i].flags & PTA_SSSE3
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3566 if (processor_alias_table[i].flags & PTA_SSE4_1
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3569 if (processor_alias_table[i].flags & PTA_SSE4_2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3572 if (processor_alias_table[i].flags & PTA_AVX
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3575 if (processor_alias_table[i].flags & PTA_AVX2
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3578 if (processor_alias_table[i].flags & PTA_FMA
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3581 if (processor_alias_table[i].flags & PTA_SSE4A
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3584 if (processor_alias_table[i].flags & PTA_FMA4
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3587 if (processor_alias_table[i].flags & PTA_XOP
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3590 if (processor_alias_table[i].flags & PTA_LWP
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3593 if (processor_alias_table[i].flags & PTA_ABM
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3596 if (processor_alias_table[i].flags & PTA_BMI
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3599 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3602 if (processor_alias_table[i].flags & PTA_TBM
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3605 if (processor_alias_table[i].flags & PTA_BMI2
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3608 if (processor_alias_table[i].flags & PTA_CX16
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3611 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3614 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3615 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3616 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3617 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3618 if (processor_alias_table[i].flags & PTA_MOVBE
3619 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3620 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3621 if (processor_alias_table[i].flags & PTA_AES
3622 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3623 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3624 if (processor_alias_table[i].flags & PTA_SHA
3625 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3626 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3627 if (processor_alias_table[i].flags & PTA_PCLMUL
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3630 if (processor_alias_table[i].flags & PTA_FSGSBASE
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3633 if (processor_alias_table[i].flags & PTA_RDRND
3634 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3635 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3636 if (processor_alias_table[i].flags & PTA_F16C
3637 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3638 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3639 if (processor_alias_table[i].flags & PTA_RTM
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3642 if (processor_alias_table[i].flags & PTA_HLE
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3645 if (processor_alias_table[i].flags & PTA_PRFCHW
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3648 if (processor_alias_table[i].flags & PTA_RDSEED
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3651 if (processor_alias_table[i].flags & PTA_ADX
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3654 if (processor_alias_table[i].flags & PTA_FXSR
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3657 if (processor_alias_table[i].flags & PTA_XSAVE
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3660 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3663 if (processor_alias_table[i].flags & PTA_AVX512F
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3666 if (processor_alias_table[i].flags & PTA_AVX512ER
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3669 if (processor_alias_table[i].flags & PTA_AVX512PF
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3672 if (processor_alias_table[i].flags & PTA_AVX512CD
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3675 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3678 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3679 x86_prefetch_sse = true;
3681 break;
3684 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3685 error ("generic CPU can be used only for %stune=%s %s",
3686 prefix, suffix, sw);
3687 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3688 error ("intel CPU can be used only for %stune=%s %s",
3689 prefix, suffix, sw);
3690 else if (i == pta_size)
3691 error ("bad value (%s) for %sarch=%s %s",
3692 opts->x_ix86_arch_string, prefix, suffix, sw);
3694 ix86_arch_mask = 1u << ix86_arch;
3695 for (i = 0; i < X86_ARCH_LAST; ++i)
3696 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3698 for (i = 0; i < pta_size; i++)
3699 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3701 ix86_schedule = processor_alias_table[i].schedule;
3702 ix86_tune = processor_alias_table[i].processor;
3703 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3705 if (!(processor_alias_table[i].flags & PTA_64BIT))
3707 if (ix86_tune_defaulted)
3709 opts->x_ix86_tune_string = "x86-64";
3710 for (i = 0; i < pta_size; i++)
3711 if (! strcmp (opts->x_ix86_tune_string,
3712 processor_alias_table[i].name))
3713 break;
3714 ix86_schedule = processor_alias_table[i].schedule;
3715 ix86_tune = processor_alias_table[i].processor;
3717 else
3718 error ("CPU you selected does not support x86-64 "
3719 "instruction set");
3722 /* Intel CPUs have always interpreted SSE prefetch instructions as
3723 NOPs; so, we can enable SSE prefetch instructions even when
3724 -mtune (rather than -march) points us to a processor that has them.
3725 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3726 higher processors. */
3727 if (TARGET_CMOV
3728 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3729 x86_prefetch_sse = true;
3730 break;
3733 if (ix86_tune_specified && i == pta_size)
3734 error ("bad value (%s) for %stune=%s %s",
3735 opts->x_ix86_tune_string, prefix, suffix, sw);
3737 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3739 #ifndef USE_IX86_FRAME_POINTER
3740 #define USE_IX86_FRAME_POINTER 0
3741 #endif
3743 #ifndef USE_X86_64_FRAME_POINTER
3744 #define USE_X86_64_FRAME_POINTER 0
3745 #endif
3747 /* Set the default values for switches whose default depends on TARGET_64BIT
3748 in case they weren't overwritten by command line options. */
3749 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3751 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3752 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3753 if (opts->x_flag_asynchronous_unwind_tables
3754 && !opts_set->x_flag_unwind_tables
3755 && TARGET_64BIT_MS_ABI)
3756 opts->x_flag_unwind_tables = 1;
3757 if (opts->x_flag_asynchronous_unwind_tables == 2)
3758 opts->x_flag_unwind_tables
3759 = opts->x_flag_asynchronous_unwind_tables = 1;
3760 if (opts->x_flag_pcc_struct_return == 2)
3761 opts->x_flag_pcc_struct_return = 0;
3763 else
3765 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3766 opts->x_flag_omit_frame_pointer
3767 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3768 if (opts->x_flag_asynchronous_unwind_tables == 2)
3769 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3770 if (opts->x_flag_pcc_struct_return == 2)
3771 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3774 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3775 if (opts->x_optimize_size)
3776 ix86_cost = &ix86_size_cost;
3777 else
3778 ix86_cost = ix86_tune_cost;
3780 /* Arrange to set up i386_stack_locals for all functions. */
3781 init_machine_status = ix86_init_machine_status;
3783 /* Validate -mregparm= value. */
3784 if (opts_set->x_ix86_regparm)
3786 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3787 warning (0, "-mregparm is ignored in 64-bit mode");
3788 if (opts->x_ix86_regparm > REGPARM_MAX)
3790 error ("-mregparm=%d is not between 0 and %d",
3791 opts->x_ix86_regparm, REGPARM_MAX);
3792 opts->x_ix86_regparm = 0;
3795 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3796 opts->x_ix86_regparm = REGPARM_MAX;
3798 /* Default align_* from the processor table. */
3799 if (opts->x_align_loops == 0)
3801 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3802 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3804 if (opts->x_align_jumps == 0)
3806 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3807 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3809 if (opts->x_align_functions == 0)
3811 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3814 /* Provide default for -mbranch-cost= value. */
3815 if (!opts_set->x_ix86_branch_cost)
3816 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3818 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3820 opts->x_target_flags
3821 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3823 /* Enable by default the SSE and MMX builtins. Do allow the user to
3824 explicitly disable any of these. In particular, disabling SSE and
3825 MMX for kernel code is extremely useful. */
3826 if (!ix86_arch_specified)
3827 opts->x_ix86_isa_flags
3828 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3829 | TARGET_SUBTARGET64_ISA_DEFAULT)
3830 & ~opts->x_ix86_isa_flags_explicit);
3832 if (TARGET_RTD_P (opts->x_target_flags))
3833 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3835 else
3837 opts->x_target_flags
3838 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3840 if (!ix86_arch_specified)
3841 opts->x_ix86_isa_flags
3842 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3844 /* i386 ABI does not specify red zone. It still makes sense to use it
3845 when programmer takes care to stack from being destroyed. */
3846 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3847 opts->x_target_flags |= MASK_NO_RED_ZONE;
3850 if (!global_options_set.x_flag_shrink_wrap_frame_pointer)
3851 flag_shrink_wrap_frame_pointer = 1;
3853 /* -fshrink-wrap-frame-pointer is an optimization based on
3854 -fno-omit-frame-pointer mode, so it is only effective when
3855 flag_omit_frame_pointer is false.
3856 Frame pointer shrinkwrap may increase code size, so disable
3857 it when optimize_size is true. */
3858 if (flag_omit_frame_pointer
3859 || optimize == 0
3860 || optimize_size)
3861 flag_shrink_wrap_frame_pointer = 0;
3863 /* If only no -mno-omit-leaf-frame-pointer is explicitly specified,
3864 -fshrink_wrap_frame_pointer will enable omitting leaf frame
3865 pointer by default. */
3866 if (flag_shrink_wrap_frame_pointer
3867 && !(TARGET_OMIT_LEAF_FRAME_POINTER_P (opts_set->x_target_flags)
3868 && !TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)))
3869 opts->x_target_flags |= MASK_OMIT_LEAF_FRAME_POINTER;
3871 /* Keep nonleaf frame pointers. */
3872 if (opts->x_flag_omit_frame_pointer)
3873 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3875 /* If we're doing fast math, we don't care about comparison order
3876 wrt NaNs. This lets us use a shorter comparison sequence. */
3877 if (opts->x_flag_finite_math_only)
3878 opts->x_target_flags &= ~MASK_IEEE_FP;
3880 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3881 since the insns won't need emulation. */
3882 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3883 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3885 /* Likewise, if the target doesn't have a 387, or we've specified
3886 software floating point, don't use 387 inline intrinsics. */
3887 if (!TARGET_80387_P (opts->x_target_flags))
3888 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3890 /* Turn on MMX builtins for -msse. */
3891 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3892 opts->x_ix86_isa_flags
3893 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3895 /* Enable SSE prefetch. */
3896 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3897 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3898 x86_prefetch_sse = true;
3900 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3901 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3902 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_isa_flags
3904 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3906 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3907 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3908 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3909 opts->x_ix86_isa_flags
3910 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3912 /* Enable lzcnt instruction for -mabm. */
3913 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3914 opts->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3917 /* Validate -mpreferred-stack-boundary= value or default it to
3918 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3919 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3920 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3922 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3923 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3924 int max = (TARGET_SEH ? 4 : 12);
3926 if (opts->x_ix86_preferred_stack_boundary_arg < min
3927 || opts->x_ix86_preferred_stack_boundary_arg > max)
3929 if (min == max)
3930 error ("-mpreferred-stack-boundary is not supported "
3931 "for this target");
3932 else
3933 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3934 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3936 else
3937 ix86_preferred_stack_boundary
3938 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3941 /* Set the default value for -mstackrealign. */
3942 if (opts->x_ix86_force_align_arg_pointer == -1)
3943 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3945 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3947 /* Validate -mincoming-stack-boundary= value or default it to
3948 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3949 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3950 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3952 if (opts->x_ix86_incoming_stack_boundary_arg
3953 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3954 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3955 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3956 opts->x_ix86_incoming_stack_boundary_arg,
3957 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3958 else
3960 ix86_user_incoming_stack_boundary
3961 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3962 ix86_incoming_stack_boundary
3963 = ix86_user_incoming_stack_boundary;
3967 /* Accept -msseregparm only if at least SSE support is enabled. */
3968 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3969 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3970 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3972 if (opts_set->x_ix86_fpmath)
3974 if (opts->x_ix86_fpmath & FPMATH_SSE)
3976 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3978 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3979 opts->x_ix86_fpmath = FPMATH_387;
3981 else if ((opts->x_ix86_fpmath & FPMATH_387)
3982 && !TARGET_80387_P (opts->x_target_flags))
3984 warning (0, "387 instruction set disabled, using SSE arithmetics");
3985 opts->x_ix86_fpmath = FPMATH_SSE;
3989 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3990 fpmath=387. The second is however default at many targets since the
3991 extra 80bit precision of temporaries is considered to be part of ABI.
3992 Overwrite the default at least for -ffast-math.
3993 TODO: -mfpmath=both seems to produce same performing code with bit
3994 smaller binaries. It is however not clear if register allocation is
3995 ready for this setting.
3996 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3997 codegen. We may switch to 387 with -ffast-math for size optimized
3998 functions. */
3999 else if (fast_math_flags_set_p (&global_options)
4000 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4001 opts->x_ix86_fpmath = FPMATH_SSE;
4002 else
4003 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4005 /* If the i387 is disabled, then do not return values in it. */
4006 if (!TARGET_80387_P (opts->x_target_flags))
4007 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4009 /* Use external vectorized library in vectorizing intrinsics. */
4010 if (opts_set->x_ix86_veclibabi_type)
4011 switch (opts->x_ix86_veclibabi_type)
4013 case ix86_veclibabi_type_svml:
4014 ix86_veclib_handler = ix86_veclibabi_svml;
4015 break;
4017 case ix86_veclibabi_type_acml:
4018 ix86_veclib_handler = ix86_veclibabi_acml;
4019 break;
4021 default:
4022 gcc_unreachable ();
4025 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4026 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4027 && !opts->x_optimize_size)
4028 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4030 /* If stack probes are required, the space used for large function
4031 arguments on the stack must also be probed, so enable
4032 -maccumulate-outgoing-args so this happens in the prologue. */
4033 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4034 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4036 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4038 "for correctness", prefix, suffix);
4039 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4042 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4044 char *p;
4045 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4046 p = strchr (internal_label_prefix, 'X');
4047 internal_label_prefix_len = p - internal_label_prefix;
4048 *p = '\0';
4051 /* When scheduling description is not available, disable scheduler pass
4052 so it won't slow down the compilation and make x87 code slower. */
4053 if (!TARGET_SCHEDULE)
4054 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4056 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4057 ix86_tune_cost->simultaneous_prefetches,
4058 opts->x_param_values,
4059 opts_set->x_param_values);
4060 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4061 ix86_tune_cost->prefetch_block,
4062 opts->x_param_values,
4063 opts_set->x_param_values);
4064 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4065 ix86_tune_cost->l1_cache_size,
4066 opts->x_param_values,
4067 opts_set->x_param_values);
4068 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4069 ix86_tune_cost->l2_cache_size,
4070 opts->x_param_values,
4071 opts_set->x_param_values);
4073 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4074 if (opts->x_flag_prefetch_loop_arrays < 0
4075 && HAVE_prefetch
4076 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4077 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4078 opts->x_flag_prefetch_loop_arrays = 1;
4080 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4081 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4082 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4083 targetm.expand_builtin_va_start = NULL;
4085 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4087 ix86_gen_leave = gen_leave_rex64;
4088 if (Pmode == DImode)
4090 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4091 ix86_gen_tls_local_dynamic_base_64
4092 = gen_tls_local_dynamic_base_64_di;
4094 else
4096 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4097 ix86_gen_tls_local_dynamic_base_64
4098 = gen_tls_local_dynamic_base_64_si;
4101 else
4102 ix86_gen_leave = gen_leave;
4104 if (Pmode == DImode)
4106 ix86_gen_add3 = gen_adddi3;
4107 ix86_gen_sub3 = gen_subdi3;
4108 ix86_gen_sub3_carry = gen_subdi3_carry;
4109 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4110 ix86_gen_andsp = gen_anddi3;
4111 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4112 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4113 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4114 ix86_gen_monitor = gen_sse3_monitor_di;
4116 else
4118 ix86_gen_add3 = gen_addsi3;
4119 ix86_gen_sub3 = gen_subsi3;
4120 ix86_gen_sub3_carry = gen_subsi3_carry;
4121 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4122 ix86_gen_andsp = gen_andsi3;
4123 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4124 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4125 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4126 ix86_gen_monitor = gen_sse3_monitor_si;
4129 #ifdef USE_IX86_CLD
4130 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4131 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4132 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4133 #endif
4135 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4137 if (opts->x_flag_fentry > 0)
4138 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4139 "with -fpic");
4140 opts->x_flag_fentry = 0;
4142 else if (TARGET_SEH)
4144 if (opts->x_flag_fentry == 0)
4145 sorry ("-mno-fentry isn%'t compatible with SEH");
4146 opts->x_flag_fentry = 1;
4148 else if (opts->x_flag_fentry < 0)
4150 #if defined(PROFILE_BEFORE_PROLOGUE)
4151 opts->x_flag_fentry = 1;
4152 #else
4153 opts->x_flag_fentry = 0;
4154 #endif
4157 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4158 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4159 AVX unaligned load/store. */
4160 if (!opts->x_optimize_size)
4162 if (flag_expensive_optimizations
4163 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4164 opts->x_target_flags |= MASK_VZEROUPPER;
4165 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4166 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4167 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4168 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4169 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4170 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4171 /* Enable 128-bit AVX instruction generation
4172 for the auto-vectorizer. */
4173 if (TARGET_AVX128_OPTIMAL
4174 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4175 opts->x_target_flags |= MASK_PREFER_AVX128;
4178 if (opts->x_ix86_recip_name)
4180 char *p = ASTRDUP (opts->x_ix86_recip_name);
4181 char *q;
4182 unsigned int mask, i;
4183 bool invert;
4185 while ((q = strtok (p, ",")) != NULL)
4187 p = NULL;
4188 if (*q == '!')
4190 invert = true;
4191 q++;
4193 else
4194 invert = false;
4196 if (!strcmp (q, "default"))
4197 mask = RECIP_MASK_ALL;
4198 else
4200 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4201 if (!strcmp (q, recip_options[i].string))
4203 mask = recip_options[i].mask;
4204 break;
4207 if (i == ARRAY_SIZE (recip_options))
4209 error ("unknown option for -mrecip=%s", q);
4210 invert = false;
4211 mask = RECIP_MASK_NONE;
4215 opts->x_recip_mask_explicit |= mask;
4216 if (invert)
4217 opts->x_recip_mask &= ~mask;
4218 else
4219 opts->x_recip_mask |= mask;
4223 if (TARGET_RECIP_P (opts->x_target_flags))
4224 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4225 else if (opts_set->x_target_flags & MASK_RECIP)
4226 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4228 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4229 for 64-bit Bionic. */
4230 if (TARGET_HAS_BIONIC
4231 && !(opts_set->x_target_flags
4232 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4233 opts->x_target_flags |= (TARGET_64BIT
4234 ? MASK_LONG_DOUBLE_128
4235 : MASK_LONG_DOUBLE_64);
4237 /* Only one of them can be active. */
4238 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4239 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4241 /* Save the initial options in case the user does function specific
4242 options. */
4243 if (main_args_p)
4244 target_option_default_node = target_option_current_node
4245 = build_target_option_node (opts);
4247 /* Handle stack protector */
4248 if (!opts_set->x_ix86_stack_protector_guard)
4249 opts->x_ix86_stack_protector_guard
4250 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4252 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4253 if (opts->x_ix86_tune_memcpy_strategy)
4255 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4256 ix86_parse_stringop_strategy_string (str, false);
4257 free (str);
4260 if (opts->x_ix86_tune_memset_strategy)
4262 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4263 ix86_parse_stringop_strategy_string (str, true);
4264 free (str);
4268 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4270 static void
4271 ix86_option_override (void)
4273 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4274 static struct register_pass_info insert_vzeroupper_info
4275 = { pass_insert_vzeroupper, "reload",
4276 1, PASS_POS_INSERT_AFTER
4279 ix86_option_override_internal (true, &global_options, &global_options_set);
4282 /* This needs to be done at start up. It's convenient to do it here. */
4283 register_pass (&insert_vzeroupper_info);
4286 /* Update register usage after having seen the compiler flags. */
4288 static void
4289 ix86_conditional_register_usage (void)
4291 int i, c_mask;
4292 unsigned int j;
4294 /* The PIC register, if it exists, is fixed. */
4295 j = PIC_OFFSET_TABLE_REGNUM;
4296 if (j != INVALID_REGNUM)
4297 fixed_regs[j] = call_used_regs[j] = 1;
4299 /* For 32-bit targets, squash the REX registers. */
4300 if (! TARGET_64BIT)
4302 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4303 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4304 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4305 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4306 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4307 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4310 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4311 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4312 : TARGET_64BIT ? (1 << 2)
4313 : (1 << 1));
4315 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4317 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4319 /* Set/reset conditionally defined registers from
4320 CALL_USED_REGISTERS initializer. */
4321 if (call_used_regs[i] > 1)
4322 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4324 /* Calculate registers of CLOBBERED_REGS register set
4325 as call used registers from GENERAL_REGS register set. */
4326 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4327 && call_used_regs[i])
4328 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4331 /* If MMX is disabled, squash the registers. */
4332 if (! TARGET_MMX)
4333 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4334 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4335 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4337 /* If SSE is disabled, squash the registers. */
4338 if (! TARGET_SSE)
4339 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4340 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4341 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4343 /* If the FPU is disabled, squash the registers. */
4344 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4345 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4346 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4347 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4349 /* If AVX512F is disabled, squash the registers. */
4350 if (! TARGET_AVX512F)
4352 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4353 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4355 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4356 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4361 /* Save the current options */
4363 static void
4364 ix86_function_specific_save (struct cl_target_option *ptr,
4365 struct gcc_options *opts)
4367 ptr->arch = ix86_arch;
4368 ptr->schedule = ix86_schedule;
4369 ptr->tune = ix86_tune;
4370 ptr->branch_cost = ix86_branch_cost;
4371 ptr->tune_defaulted = ix86_tune_defaulted;
4372 ptr->arch_specified = ix86_arch_specified;
4373 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4374 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4375 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4376 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4377 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4378 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4379 ptr->x_ix86_abi = opts->x_ix86_abi;
4380 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4381 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4382 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4383 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4384 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4385 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4386 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4387 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4388 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4389 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4390 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4391 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4392 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4393 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4394 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4395 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4396 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4397 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4398 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4399 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4401 /* The fields are char but the variables are not; make sure the
4402 values fit in the fields. */
4403 gcc_assert (ptr->arch == ix86_arch);
4404 gcc_assert (ptr->schedule == ix86_schedule);
4405 gcc_assert (ptr->tune == ix86_tune);
4406 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4409 /* Restore the current options */
4411 static void
4412 ix86_function_specific_restore (struct gcc_options *opts,
4413 struct cl_target_option *ptr)
4415 enum processor_type old_tune = ix86_tune;
4416 enum processor_type old_arch = ix86_arch;
4417 unsigned int ix86_arch_mask;
4418 int i;
4420 /* We don't change -fPIC. */
4421 opts->x_flag_pic = flag_pic;
4423 ix86_arch = (enum processor_type) ptr->arch;
4424 ix86_schedule = (enum attr_cpu) ptr->schedule;
4425 ix86_tune = (enum processor_type) ptr->tune;
4426 opts->x_ix86_branch_cost = ptr->branch_cost;
4427 ix86_tune_defaulted = ptr->tune_defaulted;
4428 ix86_arch_specified = ptr->arch_specified;
4429 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4430 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4431 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4432 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4433 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4434 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4435 opts->x_ix86_abi = ptr->x_ix86_abi;
4436 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4437 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4438 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4439 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4440 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4441 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4442 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4443 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4444 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4445 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4446 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4447 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4448 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4449 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4450 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4451 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4452 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4453 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4454 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4455 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4457 /* Recreate the arch feature tests if the arch changed */
4458 if (old_arch != ix86_arch)
4460 ix86_arch_mask = 1u << ix86_arch;
4461 for (i = 0; i < X86_ARCH_LAST; ++i)
4462 ix86_arch_features[i]
4463 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4466 /* Recreate the tune optimization tests */
4467 if (old_tune != ix86_tune)
4468 set_ix86_tune_features (ix86_tune, false);
4471 /* Print the current options */
4473 static void
4474 ix86_function_specific_print (FILE *file, int indent,
4475 struct cl_target_option *ptr)
4477 char *target_string
4478 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4479 NULL, NULL, ptr->x_ix86_fpmath, false);
4481 gcc_assert (ptr->arch < PROCESSOR_max);
4482 fprintf (file, "%*sarch = %d (%s)\n",
4483 indent, "",
4484 ptr->arch, processor_target_table[ptr->arch].name);
4486 gcc_assert (ptr->tune < PROCESSOR_max);
4487 fprintf (file, "%*stune = %d (%s)\n",
4488 indent, "",
4489 ptr->tune, processor_target_table[ptr->tune].name);
4491 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4493 if (target_string)
4495 fprintf (file, "%*s%s\n", indent, "", target_string);
4496 free (target_string);
4501 /* Inner function to process the attribute((target(...))), take an argument and
4502 set the current options from the argument. If we have a list, recursively go
4503 over the list. */
4505 static bool
4506 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4507 struct gcc_options *opts,
4508 struct gcc_options *opts_set,
4509 struct gcc_options *enum_opts_set)
4511 char *next_optstr;
4512 bool ret = true;
4514 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4515 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4516 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4517 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4518 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4520 enum ix86_opt_type
4522 ix86_opt_unknown,
4523 ix86_opt_yes,
4524 ix86_opt_no,
4525 ix86_opt_str,
4526 ix86_opt_enum,
4527 ix86_opt_isa
4530 static const struct
4532 const char *string;
4533 size_t len;
4534 enum ix86_opt_type type;
4535 int opt;
4536 int mask;
4537 } attrs[] = {
4538 /* isa options */
4539 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4540 IX86_ATTR_ISA ("abm", OPT_mabm),
4541 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4542 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4543 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4544 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4545 IX86_ATTR_ISA ("aes", OPT_maes),
4546 IX86_ATTR_ISA ("sha", OPT_msha),
4547 IX86_ATTR_ISA ("avx", OPT_mavx),
4548 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4549 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4550 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4551 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4552 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4553 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4554 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4555 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4556 IX86_ATTR_ISA ("sse", OPT_msse),
4557 IX86_ATTR_ISA ("sse2", OPT_msse2),
4558 IX86_ATTR_ISA ("sse3", OPT_msse3),
4559 IX86_ATTR_ISA ("sse4", OPT_msse4),
4560 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4561 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4562 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4563 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4564 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4565 IX86_ATTR_ISA ("fma", OPT_mfma),
4566 IX86_ATTR_ISA ("xop", OPT_mxop),
4567 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4568 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4569 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4570 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4571 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4572 IX86_ATTR_ISA ("hle", OPT_mhle),
4573 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4574 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4575 IX86_ATTR_ISA ("adx", OPT_madx),
4576 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4577 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4578 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4579 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4581 /* enum options */
4582 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4584 /* string options */
4585 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4586 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4588 /* flag options */
4589 IX86_ATTR_YES ("cld",
4590 OPT_mcld,
4591 MASK_CLD),
4593 IX86_ATTR_NO ("fancy-math-387",
4594 OPT_mfancy_math_387,
4595 MASK_NO_FANCY_MATH_387),
4597 IX86_ATTR_YES ("ieee-fp",
4598 OPT_mieee_fp,
4599 MASK_IEEE_FP),
4601 IX86_ATTR_YES ("inline-all-stringops",
4602 OPT_minline_all_stringops,
4603 MASK_INLINE_ALL_STRINGOPS),
4605 IX86_ATTR_YES ("inline-stringops-dynamically",
4606 OPT_minline_stringops_dynamically,
4607 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4609 IX86_ATTR_NO ("align-stringops",
4610 OPT_mno_align_stringops,
4611 MASK_NO_ALIGN_STRINGOPS),
4613 IX86_ATTR_YES ("recip",
4614 OPT_mrecip,
4615 MASK_RECIP),
4619 /* If this is a list, recurse to get the options. */
4620 if (TREE_CODE (args) == TREE_LIST)
4622 bool ret = true;
4624 for (; args; args = TREE_CHAIN (args))
4625 if (TREE_VALUE (args)
4626 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4627 p_strings, opts, opts_set,
4628 enum_opts_set))
4629 ret = false;
4631 return ret;
4634 else if (TREE_CODE (args) != STRING_CST)
4636 error ("attribute %<target%> argument not a string");
4637 return false;
4640 /* Handle multiple arguments separated by commas. */
4641 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4643 while (next_optstr && *next_optstr != '\0')
4645 char *p = next_optstr;
4646 char *orig_p = p;
4647 char *comma = strchr (next_optstr, ',');
4648 const char *opt_string;
4649 size_t len, opt_len;
4650 int opt;
4651 bool opt_set_p;
4652 char ch;
4653 unsigned i;
4654 enum ix86_opt_type type = ix86_opt_unknown;
4655 int mask = 0;
4657 if (comma)
4659 *comma = '\0';
4660 len = comma - next_optstr;
4661 next_optstr = comma + 1;
4663 else
4665 len = strlen (p);
4666 next_optstr = NULL;
4669 /* Recognize no-xxx. */
4670 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4672 opt_set_p = false;
4673 p += 3;
4674 len -= 3;
4676 else
4677 opt_set_p = true;
4679 /* Find the option. */
4680 ch = *p;
4681 opt = N_OPTS;
4682 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4684 type = attrs[i].type;
4685 opt_len = attrs[i].len;
4686 if (ch == attrs[i].string[0]
4687 && ((type != ix86_opt_str && type != ix86_opt_enum)
4688 ? len == opt_len
4689 : len > opt_len)
4690 && memcmp (p, attrs[i].string, opt_len) == 0)
4692 opt = attrs[i].opt;
4693 mask = attrs[i].mask;
4694 opt_string = attrs[i].string;
4695 break;
4699 /* Process the option. */
4700 if (opt == N_OPTS)
4702 error ("attribute(target(\"%s\")) is unknown", orig_p);
4703 ret = false;
4706 else if (type == ix86_opt_isa)
4708 struct cl_decoded_option decoded;
4710 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4711 ix86_handle_option (opts, opts_set,
4712 &decoded, input_location);
4715 else if (type == ix86_opt_yes || type == ix86_opt_no)
4717 if (type == ix86_opt_no)
4718 opt_set_p = !opt_set_p;
4720 if (opt_set_p)
4721 opts->x_target_flags |= mask;
4722 else
4723 opts->x_target_flags &= ~mask;
4726 else if (type == ix86_opt_str)
4728 if (p_strings[opt])
4730 error ("option(\"%s\") was already specified", opt_string);
4731 ret = false;
4733 else
4734 p_strings[opt] = xstrdup (p + opt_len);
4737 else if (type == ix86_opt_enum)
4739 bool arg_ok;
4740 int value;
4742 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4743 if (arg_ok)
4744 set_option (opts, enum_opts_set, opt, value,
4745 p + opt_len, DK_UNSPECIFIED, input_location,
4746 global_dc);
4747 else
4749 error ("attribute(target(\"%s\")) is unknown", orig_p);
4750 ret = false;
4754 else
4755 gcc_unreachable ();
4758 return ret;
4761 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4763 tree
4764 ix86_valid_target_attribute_tree (tree args,
4765 struct gcc_options *opts,
4766 struct gcc_options *opts_set)
4768 const char *orig_arch_string = opts->x_ix86_arch_string;
4769 const char *orig_tune_string = opts->x_ix86_tune_string;
4770 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4771 int orig_tune_defaulted = ix86_tune_defaulted;
4772 int orig_arch_specified = ix86_arch_specified;
4773 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4774 tree t = NULL_TREE;
4775 int i;
4776 struct cl_target_option *def
4777 = TREE_TARGET_OPTION (target_option_default_node);
4778 struct gcc_options enum_opts_set;
4780 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4782 /* Process each of the options on the chain. */
4783 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4784 opts_set, &enum_opts_set))
4785 return error_mark_node;
4787 /* If the changed options are different from the default, rerun
4788 ix86_option_override_internal, and then save the options away.
4789 The string options are are attribute options, and will be undone
4790 when we copy the save structure. */
4791 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4792 || opts->x_target_flags != def->x_target_flags
4793 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4794 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4795 || enum_opts_set.x_ix86_fpmath)
4797 /* If we are using the default tune= or arch=, undo the string assigned,
4798 and use the default. */
4799 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4800 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4801 else if (!orig_arch_specified)
4802 opts->x_ix86_arch_string = NULL;
4804 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4805 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4806 else if (orig_tune_defaulted)
4807 opts->x_ix86_tune_string = NULL;
4809 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4810 if (enum_opts_set.x_ix86_fpmath)
4811 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4812 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4813 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4815 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4816 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4819 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4820 ix86_option_override_internal (false, opts, opts_set);
4822 /* Add any builtin functions with the new isa if any. */
4823 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4825 /* Save the current options unless we are validating options for
4826 #pragma. */
4827 t = build_target_option_node (opts);
4829 opts->x_ix86_arch_string = orig_arch_string;
4830 opts->x_ix86_tune_string = orig_tune_string;
4831 opts_set->x_ix86_fpmath = orig_fpmath_set;
4833 /* Free up memory allocated to hold the strings */
4834 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4835 free (option_strings[i]);
4838 return t;
4841 /* Hook to validate attribute((target("string"))). */
4843 static bool
4844 ix86_valid_target_attribute_p (tree fndecl,
4845 tree ARG_UNUSED (name),
4846 tree args,
4847 int ARG_UNUSED (flags))
4849 struct gcc_options func_options;
4850 tree new_target, new_optimize;
4851 bool ret = true;
4853 /* attribute((target("default"))) does nothing, beyond
4854 affecting multi-versioning. */
4855 if (TREE_VALUE (args)
4856 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4857 && TREE_CHAIN (args) == NULL_TREE
4858 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4859 return true;
4861 tree old_optimize = build_optimization_node (&global_options);
4863 /* Get the optimization options of the current function. */
4864 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4866 if (!func_optimize)
4867 func_optimize = old_optimize;
4869 /* Init func_options. */
4870 memset (&func_options, 0, sizeof (func_options));
4871 init_options_struct (&func_options, NULL);
4872 lang_hooks.init_options_struct (&func_options);
4874 cl_optimization_restore (&func_options,
4875 TREE_OPTIMIZATION (func_optimize));
4877 /* Initialize func_options to the default before its target options can
4878 be set. */
4879 cl_target_option_restore (&func_options,
4880 TREE_TARGET_OPTION (target_option_default_node));
4882 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4883 &global_options_set);
4885 new_optimize = build_optimization_node (&func_options);
4887 if (new_target == error_mark_node)
4888 ret = false;
4890 else if (fndecl && new_target)
4892 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4894 if (old_optimize != new_optimize)
4895 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4898 return ret;
4902 /* Hook to determine if one function can safely inline another. */
4904 static bool
4905 ix86_can_inline_p (tree caller, tree callee)
4907 bool ret = false;
4908 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4909 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4911 /* If callee has no option attributes, then it is ok to inline. */
4912 if (!callee_tree)
4913 ret = true;
4915 /* If caller has no option attributes, but callee does then it is not ok to
4916 inline. */
4917 else if (!caller_tree)
4918 ret = false;
4920 else
4922 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4923 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4925 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4926 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4927 function. */
4928 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4929 != callee_opts->x_ix86_isa_flags)
4930 ret = false;
4932 /* See if we have the same non-isa options. */
4933 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4934 ret = false;
4936 /* See if arch, tune, etc. are the same. */
4937 else if (caller_opts->arch != callee_opts->arch)
4938 ret = false;
4940 else if (caller_opts->tune != callee_opts->tune)
4941 ret = false;
4943 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4944 ret = false;
4946 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4947 ret = false;
4949 else
4950 ret = true;
4953 return ret;
4957 /* Remember the last target of ix86_set_current_function. */
4958 static GTY(()) tree ix86_previous_fndecl;
4960 /* Invalidate ix86_previous_fndecl cache. */
4961 void
4962 ix86_reset_previous_fndecl (void)
4964 ix86_previous_fndecl = NULL_TREE;
4967 /* Establish appropriate back-end context for processing the function
4968 FNDECL. The argument might be NULL to indicate processing at top
4969 level, outside of any function scope. */
4970 static void
4971 ix86_set_current_function (tree fndecl)
4973 /* Only change the context if the function changes. This hook is called
4974 several times in the course of compiling a function, and we don't want to
4975 slow things down too much or call target_reinit when it isn't safe. */
4976 if (fndecl && fndecl != ix86_previous_fndecl)
4978 tree old_tree = (ix86_previous_fndecl
4979 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4980 : NULL_TREE);
4982 tree new_tree = (fndecl
4983 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4984 : NULL_TREE);
4986 ix86_previous_fndecl = fndecl;
4987 if (old_tree == new_tree)
4990 else if (new_tree)
4992 cl_target_option_restore (&global_options,
4993 TREE_TARGET_OPTION (new_tree));
4994 if (TREE_TARGET_GLOBALS (new_tree))
4995 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4996 else
4997 TREE_TARGET_GLOBALS (new_tree)
4998 = save_target_globals_default_opts ();
5001 else if (old_tree)
5003 new_tree = target_option_current_node;
5004 cl_target_option_restore (&global_options,
5005 TREE_TARGET_OPTION (new_tree));
5006 if (TREE_TARGET_GLOBALS (new_tree))
5007 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5008 else if (new_tree == target_option_default_node)
5009 restore_target_globals (&default_target_globals);
5010 else
5011 TREE_TARGET_GLOBALS (new_tree)
5012 = save_target_globals_default_opts ();
5018 /* Return true if this goes in large data/bss. */
5020 static bool
5021 ix86_in_large_data_p (tree exp)
5023 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5024 return false;
5026 /* Functions are never large data. */
5027 if (TREE_CODE (exp) == FUNCTION_DECL)
5028 return false;
5030 /* Automatic variables are never large data. */
5031 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5032 return false;
5034 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5036 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5037 if (strcmp (section, ".ldata") == 0
5038 || strcmp (section, ".lbss") == 0)
5039 return true;
5040 return false;
5042 else
5044 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5046 /* If this is an incomplete type with size 0, then we can't put it
5047 in data because it might be too big when completed. Also,
5048 int_size_in_bytes returns -1 if size can vary or is larger than
5049 an integer in which case also it is safer to assume that it goes in
5050 large data. */
5051 if (size <= 0 || size > ix86_section_threshold)
5052 return true;
5055 return false;
5058 /* Switch to the appropriate section for output of DECL.
5059 DECL is either a `VAR_DECL' node or a constant of some sort.
5060 RELOC indicates whether forming the initial value of DECL requires
5061 link-time relocations. */
5063 ATTRIBUTE_UNUSED static section *
5064 x86_64_elf_select_section (tree decl, int reloc,
5065 unsigned HOST_WIDE_INT align)
5067 if (ix86_in_large_data_p (decl))
5069 const char *sname = NULL;
5070 unsigned int flags = SECTION_WRITE;
5071 switch (categorize_decl_for_section (decl, reloc))
5073 case SECCAT_DATA:
5074 sname = ".ldata";
5075 break;
5076 case SECCAT_DATA_REL:
5077 sname = ".ldata.rel";
5078 break;
5079 case SECCAT_DATA_REL_LOCAL:
5080 sname = ".ldata.rel.local";
5081 break;
5082 case SECCAT_DATA_REL_RO:
5083 sname = ".ldata.rel.ro";
5084 break;
5085 case SECCAT_DATA_REL_RO_LOCAL:
5086 sname = ".ldata.rel.ro.local";
5087 break;
5088 case SECCAT_BSS:
5089 sname = ".lbss";
5090 flags |= SECTION_BSS;
5091 break;
5092 case SECCAT_RODATA:
5093 case SECCAT_RODATA_MERGE_STR:
5094 case SECCAT_RODATA_MERGE_STR_INIT:
5095 case SECCAT_RODATA_MERGE_CONST:
5096 sname = ".lrodata";
5097 flags = 0;
5098 break;
5099 case SECCAT_SRODATA:
5100 case SECCAT_SDATA:
5101 case SECCAT_SBSS:
5102 gcc_unreachable ();
5103 case SECCAT_TEXT:
5104 case SECCAT_TDATA:
5105 case SECCAT_TBSS:
5106 /* We don't split these for medium model. Place them into
5107 default sections and hope for best. */
5108 break;
5110 if (sname)
5112 /* We might get called with string constants, but get_named_section
5113 doesn't like them as they are not DECLs. Also, we need to set
5114 flags in that case. */
5115 if (!DECL_P (decl))
5116 return get_section (sname, flags, NULL);
5117 return get_named_section (decl, sname, reloc);
5120 return default_elf_select_section (decl, reloc, align);
5123 /* Select a set of attributes for section NAME based on the properties
5124 of DECL and whether or not RELOC indicates that DECL's initializer
5125 might contain runtime relocations. */
5127 static unsigned int ATTRIBUTE_UNUSED
5128 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5130 unsigned int flags = default_section_type_flags (decl, name, reloc);
5132 if (decl == NULL_TREE
5133 && (strcmp (name, ".ldata.rel.ro") == 0
5134 || strcmp (name, ".ldata.rel.ro.local") == 0))
5135 flags |= SECTION_RELRO;
5137 if (strcmp (name, ".lbss") == 0
5138 || strncmp (name, ".lbss.", 5) == 0
5139 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5140 flags |= SECTION_BSS;
5142 return flags;
5145 /* Build up a unique section name, expressed as a
5146 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5147 RELOC indicates whether the initial value of EXP requires
5148 link-time relocations. */
5150 static void ATTRIBUTE_UNUSED
5151 x86_64_elf_unique_section (tree decl, int reloc)
5153 if (ix86_in_large_data_p (decl))
5155 const char *prefix = NULL;
5156 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5157 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5159 switch (categorize_decl_for_section (decl, reloc))
5161 case SECCAT_DATA:
5162 case SECCAT_DATA_REL:
5163 case SECCAT_DATA_REL_LOCAL:
5164 case SECCAT_DATA_REL_RO:
5165 case SECCAT_DATA_REL_RO_LOCAL:
5166 prefix = one_only ? ".ld" : ".ldata";
5167 break;
5168 case SECCAT_BSS:
5169 prefix = one_only ? ".lb" : ".lbss";
5170 break;
5171 case SECCAT_RODATA:
5172 case SECCAT_RODATA_MERGE_STR:
5173 case SECCAT_RODATA_MERGE_STR_INIT:
5174 case SECCAT_RODATA_MERGE_CONST:
5175 prefix = one_only ? ".lr" : ".lrodata";
5176 break;
5177 case SECCAT_SRODATA:
5178 case SECCAT_SDATA:
5179 case SECCAT_SBSS:
5180 gcc_unreachable ();
5181 case SECCAT_TEXT:
5182 case SECCAT_TDATA:
5183 case SECCAT_TBSS:
5184 /* We don't split these for medium model. Place them into
5185 default sections and hope for best. */
5186 break;
5188 if (prefix)
5190 const char *name, *linkonce;
5191 char *string;
5193 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5194 name = targetm.strip_name_encoding (name);
5196 /* If we're using one_only, then there needs to be a .gnu.linkonce
5197 prefix to the section name. */
5198 linkonce = one_only ? ".gnu.linkonce" : "";
5200 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5202 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5203 return;
5206 default_unique_section (decl, reloc);
5209 #ifdef COMMON_ASM_OP
5210 /* This says how to output assembler code to declare an
5211 uninitialized external linkage data object.
5213 For medium model x86-64 we need to use .largecomm opcode for
5214 large objects. */
5215 void
5216 x86_elf_aligned_common (FILE *file,
5217 const char *name, unsigned HOST_WIDE_INT size,
5218 int align)
5220 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5221 && size > (unsigned int)ix86_section_threshold)
5222 fputs ("\t.largecomm\t", file);
5223 else
5224 fputs (COMMON_ASM_OP, file);
5225 assemble_name (file, name);
5226 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5227 size, align / BITS_PER_UNIT);
5229 #endif
5231 /* Utility function for targets to use in implementing
5232 ASM_OUTPUT_ALIGNED_BSS. */
5234 void
5235 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5236 const char *name, unsigned HOST_WIDE_INT size,
5237 int align)
5239 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5240 && size > (unsigned int)ix86_section_threshold)
5241 switch_to_section (get_named_section (decl, ".lbss", 0));
5242 else
5243 switch_to_section (bss_section);
5244 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5245 #ifdef ASM_DECLARE_OBJECT_NAME
5246 last_assemble_variable_decl = decl;
5247 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5248 #else
5249 /* Standard thing is just output label for the object. */
5250 ASM_OUTPUT_LABEL (file, name);
5251 #endif /* ASM_DECLARE_OBJECT_NAME */
5252 ASM_OUTPUT_SKIP (file, size ? size : 1);
5255 /* Decide whether we must probe the stack before any space allocation
5256 on this target. It's essentially TARGET_STACK_PROBE except when
5257 -fstack-check causes the stack to be already probed differently. */
5259 bool
5260 ix86_target_stack_probe (void)
5262 /* Do not probe the stack twice if static stack checking is enabled. */
5263 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5264 return false;
5266 return TARGET_STACK_PROBE;
5269 /* Decide whether we can make a sibling call to a function. DECL is the
5270 declaration of the function being targeted by the call and EXP is the
5271 CALL_EXPR representing the call. */
5273 static bool
5274 ix86_function_ok_for_sibcall (tree decl, tree exp)
5276 tree type, decl_or_type;
5277 rtx a, b;
5279 /* If we are generating position-independent code, we cannot sibcall
5280 optimize any indirect call, or a direct call to a global function,
5281 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5282 if (!TARGET_MACHO
5283 && !TARGET_64BIT
5284 && flag_pic
5285 && (!decl || !targetm.binds_local_p (decl)))
5286 return false;
5288 /* If we need to align the outgoing stack, then sibcalling would
5289 unalign the stack, which may break the called function. */
5290 if (ix86_minimum_incoming_stack_boundary (true)
5291 < PREFERRED_STACK_BOUNDARY)
5292 return false;
5294 if (decl)
5296 decl_or_type = decl;
5297 type = TREE_TYPE (decl);
5299 else
5301 /* We're looking at the CALL_EXPR, we need the type of the function. */
5302 type = CALL_EXPR_FN (exp); /* pointer expression */
5303 type = TREE_TYPE (type); /* pointer type */
5304 type = TREE_TYPE (type); /* function type */
5305 decl_or_type = type;
5308 /* Check that the return value locations are the same. Like
5309 if we are returning floats on the 80387 register stack, we cannot
5310 make a sibcall from a function that doesn't return a float to a
5311 function that does or, conversely, from a function that does return
5312 a float to a function that doesn't; the necessary stack adjustment
5313 would not be executed. This is also the place we notice
5314 differences in the return value ABI. Note that it is ok for one
5315 of the functions to have void return type as long as the return
5316 value of the other is passed in a register. */
5317 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5318 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5319 cfun->decl, false);
5320 if (STACK_REG_P (a) || STACK_REG_P (b))
5322 if (!rtx_equal_p (a, b))
5323 return false;
5325 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5327 else if (!rtx_equal_p (a, b))
5328 return false;
5330 if (TARGET_64BIT)
5332 /* The SYSV ABI has more call-clobbered registers;
5333 disallow sibcalls from MS to SYSV. */
5334 if (cfun->machine->call_abi == MS_ABI
5335 && ix86_function_type_abi (type) == SYSV_ABI)
5336 return false;
5338 else
5340 /* If this call is indirect, we'll need to be able to use a
5341 call-clobbered register for the address of the target function.
5342 Make sure that all such registers are not used for passing
5343 parameters. Note that DLLIMPORT functions are indirect. */
5344 if (!decl
5345 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5347 if (ix86_function_regparm (type, NULL) >= 3)
5349 /* ??? Need to count the actual number of registers to be used,
5350 not the possible number of registers. Fix later. */
5351 return false;
5356 /* Otherwise okay. That also includes certain types of indirect calls. */
5357 return true;
5360 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5361 and "sseregparm" calling convention attributes;
5362 arguments as in struct attribute_spec.handler. */
5364 static tree
5365 ix86_handle_cconv_attribute (tree *node, tree name,
5366 tree args,
5367 int flags ATTRIBUTE_UNUSED,
5368 bool *no_add_attrs)
5370 if (TREE_CODE (*node) != FUNCTION_TYPE
5371 && TREE_CODE (*node) != METHOD_TYPE
5372 && TREE_CODE (*node) != FIELD_DECL
5373 && TREE_CODE (*node) != TYPE_DECL)
5375 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5376 name);
5377 *no_add_attrs = true;
5378 return NULL_TREE;
5381 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5382 if (is_attribute_p ("regparm", name))
5384 tree cst;
5386 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5388 error ("fastcall and regparm attributes are not compatible");
5391 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5393 error ("regparam and thiscall attributes are not compatible");
5396 cst = TREE_VALUE (args);
5397 if (TREE_CODE (cst) != INTEGER_CST)
5399 warning (OPT_Wattributes,
5400 "%qE attribute requires an integer constant argument",
5401 name);
5402 *no_add_attrs = true;
5404 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5406 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5407 name, REGPARM_MAX);
5408 *no_add_attrs = true;
5411 return NULL_TREE;
5414 if (TARGET_64BIT)
5416 /* Do not warn when emulating the MS ABI. */
5417 if ((TREE_CODE (*node) != FUNCTION_TYPE
5418 && TREE_CODE (*node) != METHOD_TYPE)
5419 || ix86_function_type_abi (*node) != MS_ABI)
5420 warning (OPT_Wattributes, "%qE attribute ignored",
5421 name);
5422 *no_add_attrs = true;
5423 return NULL_TREE;
5426 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5427 if (is_attribute_p ("fastcall", name))
5429 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5431 error ("fastcall and cdecl attributes are not compatible");
5433 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5435 error ("fastcall and stdcall attributes are not compatible");
5437 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5439 error ("fastcall and regparm attributes are not compatible");
5441 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5443 error ("fastcall and thiscall attributes are not compatible");
5447 /* Can combine stdcall with fastcall (redundant), regparm and
5448 sseregparm. */
5449 else if (is_attribute_p ("stdcall", name))
5451 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5453 error ("stdcall and cdecl attributes are not compatible");
5455 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5457 error ("stdcall and fastcall attributes are not compatible");
5459 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5461 error ("stdcall and thiscall attributes are not compatible");
5465 /* Can combine cdecl with regparm and sseregparm. */
5466 else if (is_attribute_p ("cdecl", name))
5468 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5470 error ("stdcall and cdecl attributes are not compatible");
5472 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5474 error ("fastcall and cdecl attributes are not compatible");
5476 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5478 error ("cdecl and thiscall attributes are not compatible");
5481 else if (is_attribute_p ("thiscall", name))
5483 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5484 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5485 name);
5486 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5488 error ("stdcall and thiscall attributes are not compatible");
5490 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5492 error ("fastcall and thiscall attributes are not compatible");
5494 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5496 error ("cdecl and thiscall attributes are not compatible");
5500 /* Can combine sseregparm with all attributes. */
5502 return NULL_TREE;
5505 /* The transactional memory builtins are implicitly regparm or fastcall
5506 depending on the ABI. Override the generic do-nothing attribute that
5507 these builtins were declared with, and replace it with one of the two
5508 attributes that we expect elsewhere. */
5510 static tree
5511 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5512 tree args ATTRIBUTE_UNUSED,
5513 int flags, bool *no_add_attrs)
5515 tree alt;
5517 /* In no case do we want to add the placeholder attribute. */
5518 *no_add_attrs = true;
5520 /* The 64-bit ABI is unchanged for transactional memory. */
5521 if (TARGET_64BIT)
5522 return NULL_TREE;
5524 /* ??? Is there a better way to validate 32-bit windows? We have
5525 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5526 if (CHECK_STACK_LIMIT > 0)
5527 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5528 else
5530 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5531 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5533 decl_attributes (node, alt, flags);
5535 return NULL_TREE;
5538 /* This function determines from TYPE the calling-convention. */
5540 unsigned int
5541 ix86_get_callcvt (const_tree type)
5543 unsigned int ret = 0;
5544 bool is_stdarg;
5545 tree attrs;
5547 if (TARGET_64BIT)
5548 return IX86_CALLCVT_CDECL;
5550 attrs = TYPE_ATTRIBUTES (type);
5551 if (attrs != NULL_TREE)
5553 if (lookup_attribute ("cdecl", attrs))
5554 ret |= IX86_CALLCVT_CDECL;
5555 else if (lookup_attribute ("stdcall", attrs))
5556 ret |= IX86_CALLCVT_STDCALL;
5557 else if (lookup_attribute ("fastcall", attrs))
5558 ret |= IX86_CALLCVT_FASTCALL;
5559 else if (lookup_attribute ("thiscall", attrs))
5560 ret |= IX86_CALLCVT_THISCALL;
5562 /* Regparam isn't allowed for thiscall and fastcall. */
5563 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5565 if (lookup_attribute ("regparm", attrs))
5566 ret |= IX86_CALLCVT_REGPARM;
5567 if (lookup_attribute ("sseregparm", attrs))
5568 ret |= IX86_CALLCVT_SSEREGPARM;
5571 if (IX86_BASE_CALLCVT(ret) != 0)
5572 return ret;
5575 is_stdarg = stdarg_p (type);
5576 if (TARGET_RTD && !is_stdarg)
5577 return IX86_CALLCVT_STDCALL | ret;
5579 if (ret != 0
5580 || is_stdarg
5581 || TREE_CODE (type) != METHOD_TYPE
5582 || ix86_function_type_abi (type) != MS_ABI)
5583 return IX86_CALLCVT_CDECL | ret;
5585 return IX86_CALLCVT_THISCALL;
5588 /* Return 0 if the attributes for two types are incompatible, 1 if they
5589 are compatible, and 2 if they are nearly compatible (which causes a
5590 warning to be generated). */
5592 static int
5593 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5595 unsigned int ccvt1, ccvt2;
5597 if (TREE_CODE (type1) != FUNCTION_TYPE
5598 && TREE_CODE (type1) != METHOD_TYPE)
5599 return 1;
5601 ccvt1 = ix86_get_callcvt (type1);
5602 ccvt2 = ix86_get_callcvt (type2);
5603 if (ccvt1 != ccvt2)
5604 return 0;
5605 if (ix86_function_regparm (type1, NULL)
5606 != ix86_function_regparm (type2, NULL))
5607 return 0;
5609 return 1;
5612 /* Return the regparm value for a function with the indicated TYPE and DECL.
5613 DECL may be NULL when calling function indirectly
5614 or considering a libcall. */
5616 static int
5617 ix86_function_regparm (const_tree type, const_tree decl)
5619 tree attr;
5620 int regparm;
5621 unsigned int ccvt;
5623 if (TARGET_64BIT)
5624 return (ix86_function_type_abi (type) == SYSV_ABI
5625 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5626 ccvt = ix86_get_callcvt (type);
5627 regparm = ix86_regparm;
5629 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5631 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5632 if (attr)
5634 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5635 return regparm;
5638 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5639 return 2;
5640 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5641 return 1;
5643 /* Use register calling convention for local functions when possible. */
5644 if (decl
5645 && TREE_CODE (decl) == FUNCTION_DECL
5646 /* Caller and callee must agree on the calling convention, so
5647 checking here just optimize means that with
5648 __attribute__((optimize (...))) caller could use regparm convention
5649 and callee not, or vice versa. Instead look at whether the callee
5650 is optimized or not. */
5651 && opt_for_fn (decl, optimize)
5652 && !(profile_flag && !flag_fentry))
5654 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5655 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5656 if (i && i->local && i->can_change_signature)
5658 int local_regparm, globals = 0, regno;
5660 /* Make sure no regparm register is taken by a
5661 fixed register variable. */
5662 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5663 if (fixed_regs[local_regparm])
5664 break;
5666 /* We don't want to use regparm(3) for nested functions as
5667 these use a static chain pointer in the third argument. */
5668 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5669 local_regparm = 2;
5671 /* In 32-bit mode save a register for the split stack. */
5672 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5673 local_regparm = 2;
5675 /* Each fixed register usage increases register pressure,
5676 so less registers should be used for argument passing.
5677 This functionality can be overriden by an explicit
5678 regparm value. */
5679 for (regno = AX_REG; regno <= DI_REG; regno++)
5680 if (fixed_regs[regno])
5681 globals++;
5683 local_regparm
5684 = globals < local_regparm ? local_regparm - globals : 0;
5686 if (local_regparm > regparm)
5687 regparm = local_regparm;
5691 return regparm;
5694 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5695 DFmode (2) arguments in SSE registers for a function with the
5696 indicated TYPE and DECL. DECL may be NULL when calling function
5697 indirectly or considering a libcall. Otherwise return 0. */
5699 static int
5700 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5702 gcc_assert (!TARGET_64BIT);
5704 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5705 by the sseregparm attribute. */
5706 if (TARGET_SSEREGPARM
5707 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5709 if (!TARGET_SSE)
5711 if (warn)
5713 if (decl)
5714 error ("calling %qD with attribute sseregparm without "
5715 "SSE/SSE2 enabled", decl);
5716 else
5717 error ("calling %qT with attribute sseregparm without "
5718 "SSE/SSE2 enabled", type);
5720 return 0;
5723 return 2;
5726 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5727 (and DFmode for SSE2) arguments in SSE registers. */
5728 if (decl && TARGET_SSE_MATH && optimize
5729 && !(profile_flag && !flag_fentry))
5731 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5732 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5733 if (i && i->local && i->can_change_signature)
5734 return TARGET_SSE2 ? 2 : 1;
5737 return 0;
5740 /* Return true if EAX is live at the start of the function. Used by
5741 ix86_expand_prologue to determine if we need special help before
5742 calling allocate_stack_worker. */
5744 static bool
5745 ix86_eax_live_at_start_p (void)
5747 /* Cheat. Don't bother working forward from ix86_function_regparm
5748 to the function type to whether an actual argument is located in
5749 eax. Instead just look at cfg info, which is still close enough
5750 to correct at this point. This gives false positives for broken
5751 functions that might use uninitialized data that happens to be
5752 allocated in eax, but who cares? */
5753 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5756 static bool
5757 ix86_keep_aggregate_return_pointer (tree fntype)
5759 tree attr;
5761 if (!TARGET_64BIT)
5763 attr = lookup_attribute ("callee_pop_aggregate_return",
5764 TYPE_ATTRIBUTES (fntype));
5765 if (attr)
5766 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5768 /* For 32-bit MS-ABI the default is to keep aggregate
5769 return pointer. */
5770 if (ix86_function_type_abi (fntype) == MS_ABI)
5771 return true;
5773 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5776 /* Value is the number of bytes of arguments automatically
5777 popped when returning from a subroutine call.
5778 FUNDECL is the declaration node of the function (as a tree),
5779 FUNTYPE is the data type of the function (as a tree),
5780 or for a library call it is an identifier node for the subroutine name.
5781 SIZE is the number of bytes of arguments passed on the stack.
5783 On the 80386, the RTD insn may be used to pop them if the number
5784 of args is fixed, but if the number is variable then the caller
5785 must pop them all. RTD can't be used for library calls now
5786 because the library is compiled with the Unix compiler.
5787 Use of RTD is a selectable option, since it is incompatible with
5788 standard Unix calling sequences. If the option is not selected,
5789 the caller must always pop the args.
5791 The attribute stdcall is equivalent to RTD on a per module basis. */
5793 static int
5794 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5796 unsigned int ccvt;
5798 /* None of the 64-bit ABIs pop arguments. */
5799 if (TARGET_64BIT)
5800 return 0;
5802 ccvt = ix86_get_callcvt (funtype);
5804 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5805 | IX86_CALLCVT_THISCALL)) != 0
5806 && ! stdarg_p (funtype))
5807 return size;
5809 /* Lose any fake structure return argument if it is passed on the stack. */
5810 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5811 && !ix86_keep_aggregate_return_pointer (funtype))
5813 int nregs = ix86_function_regparm (funtype, fundecl);
5814 if (nregs == 0)
5815 return GET_MODE_SIZE (Pmode);
5818 return 0;
5821 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5823 static bool
5824 ix86_legitimate_combined_insn (rtx insn)
5826 /* Check operand constraints in case hard registers were propagated
5827 into insn pattern. This check prevents combine pass from
5828 generating insn patterns with invalid hard register operands.
5829 These invalid insns can eventually confuse reload to error out
5830 with a spill failure. See also PRs 46829 and 46843. */
5831 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5833 int i;
5835 extract_insn (insn);
5836 preprocess_constraints ();
5838 for (i = 0; i < recog_data.n_operands; i++)
5840 rtx op = recog_data.operand[i];
5841 enum machine_mode mode = GET_MODE (op);
5842 struct operand_alternative *op_alt;
5843 int offset = 0;
5844 bool win;
5845 int j;
5847 /* For pre-AVX disallow unaligned loads/stores where the
5848 instructions don't support it. */
5849 if (!TARGET_AVX
5850 && VECTOR_MODE_P (GET_MODE (op))
5851 && misaligned_operand (op, GET_MODE (op)))
5853 int min_align = get_attr_ssememalign (insn);
5854 if (min_align == 0)
5855 return false;
5858 /* A unary operator may be accepted by the predicate, but it
5859 is irrelevant for matching constraints. */
5860 if (UNARY_P (op))
5861 op = XEXP (op, 0);
5863 if (GET_CODE (op) == SUBREG)
5865 if (REG_P (SUBREG_REG (op))
5866 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5867 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5868 GET_MODE (SUBREG_REG (op)),
5869 SUBREG_BYTE (op),
5870 GET_MODE (op));
5871 op = SUBREG_REG (op);
5874 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5875 continue;
5877 op_alt = recog_op_alt[i];
5879 /* Operand has no constraints, anything is OK. */
5880 win = !recog_data.n_alternatives;
5882 for (j = 0; j < recog_data.n_alternatives; j++)
5884 if (op_alt[j].anything_ok
5885 || (op_alt[j].matches != -1
5886 && operands_match_p
5887 (recog_data.operand[i],
5888 recog_data.operand[op_alt[j].matches]))
5889 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5891 win = true;
5892 break;
5896 if (!win)
5897 return false;
5901 return true;
5904 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5906 static unsigned HOST_WIDE_INT
5907 ix86_asan_shadow_offset (void)
5909 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5910 : HOST_WIDE_INT_C (0x7fff8000))
5911 : (HOST_WIDE_INT_1 << 29);
5914 /* Argument support functions. */
5916 /* Return true when register may be used to pass function parameters. */
5917 bool
5918 ix86_function_arg_regno_p (int regno)
5920 int i;
5921 const int *parm_regs;
5923 if (!TARGET_64BIT)
5925 if (TARGET_MACHO)
5926 return (regno < REGPARM_MAX
5927 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5928 else
5929 return (regno < REGPARM_MAX
5930 || (TARGET_MMX && MMX_REGNO_P (regno)
5931 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5932 || (TARGET_SSE && SSE_REGNO_P (regno)
5933 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5936 if (TARGET_SSE && SSE_REGNO_P (regno)
5937 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5938 return true;
5940 /* TODO: The function should depend on current function ABI but
5941 builtins.c would need updating then. Therefore we use the
5942 default ABI. */
5944 /* RAX is used as hidden argument to va_arg functions. */
5945 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5946 return true;
5948 if (ix86_abi == MS_ABI)
5949 parm_regs = x86_64_ms_abi_int_parameter_registers;
5950 else
5951 parm_regs = x86_64_int_parameter_registers;
5952 for (i = 0; i < (ix86_abi == MS_ABI
5953 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5954 if (regno == parm_regs[i])
5955 return true;
5956 return false;
5959 /* Return if we do not know how to pass TYPE solely in registers. */
5961 static bool
5962 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5964 if (must_pass_in_stack_var_size_or_pad (mode, type))
5965 return true;
5967 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5968 The layout_type routine is crafty and tries to trick us into passing
5969 currently unsupported vector types on the stack by using TImode. */
5970 return (!TARGET_64BIT && mode == TImode
5971 && type && TREE_CODE (type) != VECTOR_TYPE);
5974 /* It returns the size, in bytes, of the area reserved for arguments passed
5975 in registers for the function represented by fndecl dependent to the used
5976 abi format. */
5978 ix86_reg_parm_stack_space (const_tree fndecl)
5980 enum calling_abi call_abi = SYSV_ABI;
5981 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5982 call_abi = ix86_function_abi (fndecl);
5983 else
5984 call_abi = ix86_function_type_abi (fndecl);
5985 if (TARGET_64BIT && call_abi == MS_ABI)
5986 return 32;
5987 return 0;
5990 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5991 call abi used. */
5992 enum calling_abi
5993 ix86_function_type_abi (const_tree fntype)
5995 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5997 enum calling_abi abi = ix86_abi;
5998 if (abi == SYSV_ABI)
6000 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6002 if (TARGET_X32)
6004 static bool warned = false;
6005 if (!warned)
6007 error ("X32 does not support ms_abi attribute");
6008 warned = true;
6011 abi = MS_ABI;
6014 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6015 abi = SYSV_ABI;
6016 return abi;
6018 return ix86_abi;
6021 /* We add this as a workaround in order to use libc_has_function
6022 hook in i386.md. */
6023 bool
6024 ix86_libc_has_function (enum function_class fn_class)
6026 return targetm.libc_has_function (fn_class);
6029 static bool
6030 ix86_function_ms_hook_prologue (const_tree fn)
6032 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6034 if (decl_function_context (fn) != NULL_TREE)
6035 error_at (DECL_SOURCE_LOCATION (fn),
6036 "ms_hook_prologue is not compatible with nested function");
6037 else
6038 return true;
6040 return false;
6043 static enum calling_abi
6044 ix86_function_abi (const_tree fndecl)
6046 if (! fndecl)
6047 return ix86_abi;
6048 return ix86_function_type_abi (TREE_TYPE (fndecl));
6051 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6052 call abi used. */
6053 enum calling_abi
6054 ix86_cfun_abi (void)
6056 if (! cfun)
6057 return ix86_abi;
6058 return cfun->machine->call_abi;
6061 /* Write the extra assembler code needed to declare a function properly. */
6063 void
6064 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6065 tree decl)
6067 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6069 if (is_ms_hook)
6071 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6072 unsigned int filler_cc = 0xcccccccc;
6074 for (i = 0; i < filler_count; i += 4)
6075 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6078 #ifdef SUBTARGET_ASM_UNWIND_INIT
6079 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6080 #endif
6082 ASM_OUTPUT_LABEL (asm_out_file, fname);
6084 /* Output magic byte marker, if hot-patch attribute is set. */
6085 if (is_ms_hook)
6087 if (TARGET_64BIT)
6089 /* leaq [%rsp + 0], %rsp */
6090 asm_fprintf (asm_out_file, ASM_BYTE
6091 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6093 else
6095 /* movl.s %edi, %edi
6096 push %ebp
6097 movl.s %esp, %ebp */
6098 asm_fprintf (asm_out_file, ASM_BYTE
6099 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6104 /* regclass.c */
6105 extern void init_regs (void);
6107 /* Implementation of call abi switching target hook. Specific to FNDECL
6108 the specific call register sets are set. See also
6109 ix86_conditional_register_usage for more details. */
6110 void
6111 ix86_call_abi_override (const_tree fndecl)
6113 if (fndecl == NULL_TREE)
6114 cfun->machine->call_abi = ix86_abi;
6115 else
6116 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6119 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6120 expensive re-initialization of init_regs each time we switch function context
6121 since this is needed only during RTL expansion. */
6122 static void
6123 ix86_maybe_switch_abi (void)
6125 if (TARGET_64BIT &&
6126 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6127 reinit_regs ();
6130 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6131 for a call to a function whose data type is FNTYPE.
6132 For a library call, FNTYPE is 0. */
6134 void
6135 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6136 tree fntype, /* tree ptr for function decl */
6137 rtx libname, /* SYMBOL_REF of library name or 0 */
6138 tree fndecl,
6139 int caller)
6141 struct cgraph_local_info *i;
6143 memset (cum, 0, sizeof (*cum));
6145 if (fndecl)
6147 i = cgraph_local_info (fndecl);
6148 cum->call_abi = ix86_function_abi (fndecl);
6150 else
6152 i = NULL;
6153 cum->call_abi = ix86_function_type_abi (fntype);
6156 cum->caller = caller;
6158 /* Set up the number of registers to use for passing arguments. */
6159 cum->nregs = ix86_regparm;
6160 if (TARGET_64BIT)
6162 cum->nregs = (cum->call_abi == SYSV_ABI
6163 ? X86_64_REGPARM_MAX
6164 : X86_64_MS_REGPARM_MAX);
6166 if (TARGET_SSE)
6168 cum->sse_nregs = SSE_REGPARM_MAX;
6169 if (TARGET_64BIT)
6171 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6172 ? X86_64_SSE_REGPARM_MAX
6173 : X86_64_MS_SSE_REGPARM_MAX);
6176 if (TARGET_MMX)
6177 cum->mmx_nregs = MMX_REGPARM_MAX;
6178 cum->warn_avx512f = true;
6179 cum->warn_avx = true;
6180 cum->warn_sse = true;
6181 cum->warn_mmx = true;
6183 /* Because type might mismatch in between caller and callee, we need to
6184 use actual type of function for local calls.
6185 FIXME: cgraph_analyze can be told to actually record if function uses
6186 va_start so for local functions maybe_vaarg can be made aggressive
6187 helping K&R code.
6188 FIXME: once typesytem is fixed, we won't need this code anymore. */
6189 if (i && i->local && i->can_change_signature)
6190 fntype = TREE_TYPE (fndecl);
6191 cum->maybe_vaarg = (fntype
6192 ? (!prototype_p (fntype) || stdarg_p (fntype))
6193 : !libname);
6195 if (!TARGET_64BIT)
6197 /* If there are variable arguments, then we won't pass anything
6198 in registers in 32-bit mode. */
6199 if (stdarg_p (fntype))
6201 cum->nregs = 0;
6202 cum->sse_nregs = 0;
6203 cum->mmx_nregs = 0;
6204 cum->warn_avx512f = false;
6205 cum->warn_avx = false;
6206 cum->warn_sse = false;
6207 cum->warn_mmx = false;
6208 return;
6211 /* Use ecx and edx registers if function has fastcall attribute,
6212 else look for regparm information. */
6213 if (fntype)
6215 unsigned int ccvt = ix86_get_callcvt (fntype);
6216 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6218 cum->nregs = 1;
6219 cum->fastcall = 1; /* Same first register as in fastcall. */
6221 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6223 cum->nregs = 2;
6224 cum->fastcall = 1;
6226 else
6227 cum->nregs = ix86_function_regparm (fntype, fndecl);
6230 /* Set up the number of SSE registers used for passing SFmode
6231 and DFmode arguments. Warn for mismatching ABI. */
6232 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6236 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6237 But in the case of vector types, it is some vector mode.
6239 When we have only some of our vector isa extensions enabled, then there
6240 are some modes for which vector_mode_supported_p is false. For these
6241 modes, the generic vector support in gcc will choose some non-vector mode
6242 in order to implement the type. By computing the natural mode, we'll
6243 select the proper ABI location for the operand and not depend on whatever
6244 the middle-end decides to do with these vector types.
6246 The midde-end can't deal with the vector types > 16 bytes. In this
6247 case, we return the original mode and warn ABI change if CUM isn't
6248 NULL.
6250 If INT_RETURN is true, warn ABI change if the vector mode isn't
6251 available for function return value. */
6253 static enum machine_mode
6254 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6255 bool in_return)
6257 enum machine_mode mode = TYPE_MODE (type);
6259 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6261 HOST_WIDE_INT size = int_size_in_bytes (type);
6262 if ((size == 8 || size == 16 || size == 32 || size == 64)
6263 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6264 && TYPE_VECTOR_SUBPARTS (type) > 1)
6266 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6268 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6269 mode = MIN_MODE_VECTOR_FLOAT;
6270 else
6271 mode = MIN_MODE_VECTOR_INT;
6273 /* Get the mode which has this inner mode and number of units. */
6274 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6275 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6276 && GET_MODE_INNER (mode) == innermode)
6278 if (size == 64 && !TARGET_AVX512F)
6280 static bool warnedavx512f;
6281 static bool warnedavx512f_ret;
6283 if (cum && cum->warn_avx512f && !warnedavx512f)
6285 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6286 "without AVX512F enabled changes the ABI"))
6287 warnedavx512f = true;
6289 else if (in_return && !warnedavx512f_ret)
6291 if (warning (OPT_Wpsabi, "AVX512F vector return "
6292 "without AVX512F enabled changes the ABI"))
6293 warnedavx512f_ret = true;
6296 return TYPE_MODE (type);
6298 else if (size == 32 && !TARGET_AVX)
6300 static bool warnedavx;
6301 static bool warnedavx_ret;
6303 if (cum && cum->warn_avx && !warnedavx)
6305 if (warning (OPT_Wpsabi, "AVX vector argument "
6306 "without AVX enabled changes the ABI"))
6307 warnedavx = true;
6309 else if (in_return && !warnedavx_ret)
6311 if (warning (OPT_Wpsabi, "AVX vector return "
6312 "without AVX enabled changes the ABI"))
6313 warnedavx_ret = true;
6316 return TYPE_MODE (type);
6318 else if (((size == 8 && TARGET_64BIT) || size == 16)
6319 && !TARGET_SSE)
6321 static bool warnedsse;
6322 static bool warnedsse_ret;
6324 if (cum && cum->warn_sse && !warnedsse)
6326 if (warning (OPT_Wpsabi, "SSE vector argument "
6327 "without SSE enabled changes the ABI"))
6328 warnedsse = true;
6330 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6332 if (warning (OPT_Wpsabi, "SSE vector return "
6333 "without SSE enabled changes the ABI"))
6334 warnedsse_ret = true;
6337 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6339 static bool warnedmmx;
6340 static bool warnedmmx_ret;
6342 if (cum && cum->warn_mmx && !warnedmmx)
6344 if (warning (OPT_Wpsabi, "MMX vector argument "
6345 "without MMX enabled changes the ABI"))
6346 warnedmmx = true;
6348 else if (in_return && !warnedmmx_ret)
6350 if (warning (OPT_Wpsabi, "MMX vector return "
6351 "without MMX enabled changes the ABI"))
6352 warnedmmx_ret = true;
6355 return mode;
6358 gcc_unreachable ();
6362 return mode;
6365 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6366 this may not agree with the mode that the type system has chosen for the
6367 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6368 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6370 static rtx
6371 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6372 unsigned int regno)
6374 rtx tmp;
6376 if (orig_mode != BLKmode)
6377 tmp = gen_rtx_REG (orig_mode, regno);
6378 else
6380 tmp = gen_rtx_REG (mode, regno);
6381 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6382 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6385 return tmp;
6388 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6389 of this code is to classify each 8bytes of incoming argument by the register
6390 class and assign registers accordingly. */
6392 /* Return the union class of CLASS1 and CLASS2.
6393 See the x86-64 PS ABI for details. */
6395 static enum x86_64_reg_class
6396 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6398 /* Rule #1: If both classes are equal, this is the resulting class. */
6399 if (class1 == class2)
6400 return class1;
6402 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6403 the other class. */
6404 if (class1 == X86_64_NO_CLASS)
6405 return class2;
6406 if (class2 == X86_64_NO_CLASS)
6407 return class1;
6409 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6410 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6411 return X86_64_MEMORY_CLASS;
6413 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6414 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6415 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6416 return X86_64_INTEGERSI_CLASS;
6417 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6418 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6419 return X86_64_INTEGER_CLASS;
6421 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6422 MEMORY is used. */
6423 if (class1 == X86_64_X87_CLASS
6424 || class1 == X86_64_X87UP_CLASS
6425 || class1 == X86_64_COMPLEX_X87_CLASS
6426 || class2 == X86_64_X87_CLASS
6427 || class2 == X86_64_X87UP_CLASS
6428 || class2 == X86_64_COMPLEX_X87_CLASS)
6429 return X86_64_MEMORY_CLASS;
6431 /* Rule #6: Otherwise class SSE is used. */
6432 return X86_64_SSE_CLASS;
6435 /* Classify the argument of type TYPE and mode MODE.
6436 CLASSES will be filled by the register class used to pass each word
6437 of the operand. The number of words is returned. In case the parameter
6438 should be passed in memory, 0 is returned. As a special case for zero
6439 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6441 BIT_OFFSET is used internally for handling records and specifies offset
6442 of the offset in bits modulo 512 to avoid overflow cases.
6444 See the x86-64 PS ABI for details.
6447 static int
6448 classify_argument (enum machine_mode mode, const_tree type,
6449 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6451 HOST_WIDE_INT bytes =
6452 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6453 int words
6454 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6456 /* Variable sized entities are always passed/returned in memory. */
6457 if (bytes < 0)
6458 return 0;
6460 if (mode != VOIDmode
6461 && targetm.calls.must_pass_in_stack (mode, type))
6462 return 0;
6464 if (type && AGGREGATE_TYPE_P (type))
6466 int i;
6467 tree field;
6468 enum x86_64_reg_class subclasses[MAX_CLASSES];
6470 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6471 if (bytes > 64)
6472 return 0;
6474 for (i = 0; i < words; i++)
6475 classes[i] = X86_64_NO_CLASS;
6477 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6478 signalize memory class, so handle it as special case. */
6479 if (!words)
6481 classes[0] = X86_64_NO_CLASS;
6482 return 1;
6485 /* Classify each field of record and merge classes. */
6486 switch (TREE_CODE (type))
6488 case RECORD_TYPE:
6489 /* And now merge the fields of structure. */
6490 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6492 if (TREE_CODE (field) == FIELD_DECL)
6494 int num;
6496 if (TREE_TYPE (field) == error_mark_node)
6497 continue;
6499 /* Bitfields are always classified as integer. Handle them
6500 early, since later code would consider them to be
6501 misaligned integers. */
6502 if (DECL_BIT_FIELD (field))
6504 for (i = (int_bit_position (field)
6505 + (bit_offset % 64)) / 8 / 8;
6506 i < ((int_bit_position (field) + (bit_offset % 64))
6507 + tree_to_shwi (DECL_SIZE (field))
6508 + 63) / 8 / 8; i++)
6509 classes[i] =
6510 merge_classes (X86_64_INTEGER_CLASS,
6511 classes[i]);
6513 else
6515 int pos;
6517 type = TREE_TYPE (field);
6519 /* Flexible array member is ignored. */
6520 if (TYPE_MODE (type) == BLKmode
6521 && TREE_CODE (type) == ARRAY_TYPE
6522 && TYPE_SIZE (type) == NULL_TREE
6523 && TYPE_DOMAIN (type) != NULL_TREE
6524 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6525 == NULL_TREE))
6527 static bool warned;
6529 if (!warned && warn_psabi)
6531 warned = true;
6532 inform (input_location,
6533 "the ABI of passing struct with"
6534 " a flexible array member has"
6535 " changed in GCC 4.4");
6537 continue;
6539 num = classify_argument (TYPE_MODE (type), type,
6540 subclasses,
6541 (int_bit_position (field)
6542 + bit_offset) % 512);
6543 if (!num)
6544 return 0;
6545 pos = (int_bit_position (field)
6546 + (bit_offset % 64)) / 8 / 8;
6547 for (i = 0; i < num && (i + pos) < words; i++)
6548 classes[i + pos] =
6549 merge_classes (subclasses[i], classes[i + pos]);
6553 break;
6555 case ARRAY_TYPE:
6556 /* Arrays are handled as small records. */
6558 int num;
6559 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6560 TREE_TYPE (type), subclasses, bit_offset);
6561 if (!num)
6562 return 0;
6564 /* The partial classes are now full classes. */
6565 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6566 subclasses[0] = X86_64_SSE_CLASS;
6567 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6568 && !((bit_offset % 64) == 0 && bytes == 4))
6569 subclasses[0] = X86_64_INTEGER_CLASS;
6571 for (i = 0; i < words; i++)
6572 classes[i] = subclasses[i % num];
6574 break;
6576 case UNION_TYPE:
6577 case QUAL_UNION_TYPE:
6578 /* Unions are similar to RECORD_TYPE but offset is always 0.
6580 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6582 if (TREE_CODE (field) == FIELD_DECL)
6584 int num;
6586 if (TREE_TYPE (field) == error_mark_node)
6587 continue;
6589 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6590 TREE_TYPE (field), subclasses,
6591 bit_offset);
6592 if (!num)
6593 return 0;
6594 for (i = 0; i < num && i < words; i++)
6595 classes[i] = merge_classes (subclasses[i], classes[i]);
6598 break;
6600 default:
6601 gcc_unreachable ();
6604 if (words > 2)
6606 /* When size > 16 bytes, if the first one isn't
6607 X86_64_SSE_CLASS or any other ones aren't
6608 X86_64_SSEUP_CLASS, everything should be passed in
6609 memory. */
6610 if (classes[0] != X86_64_SSE_CLASS)
6611 return 0;
6613 for (i = 1; i < words; i++)
6614 if (classes[i] != X86_64_SSEUP_CLASS)
6615 return 0;
6618 /* Final merger cleanup. */
6619 for (i = 0; i < words; i++)
6621 /* If one class is MEMORY, everything should be passed in
6622 memory. */
6623 if (classes[i] == X86_64_MEMORY_CLASS)
6624 return 0;
6626 /* The X86_64_SSEUP_CLASS should be always preceded by
6627 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6628 if (classes[i] == X86_64_SSEUP_CLASS
6629 && classes[i - 1] != X86_64_SSE_CLASS
6630 && classes[i - 1] != X86_64_SSEUP_CLASS)
6632 /* The first one should never be X86_64_SSEUP_CLASS. */
6633 gcc_assert (i != 0);
6634 classes[i] = X86_64_SSE_CLASS;
6637 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6638 everything should be passed in memory. */
6639 if (classes[i] == X86_64_X87UP_CLASS
6640 && (classes[i - 1] != X86_64_X87_CLASS))
6642 static bool warned;
6644 /* The first one should never be X86_64_X87UP_CLASS. */
6645 gcc_assert (i != 0);
6646 if (!warned && warn_psabi)
6648 warned = true;
6649 inform (input_location,
6650 "the ABI of passing union with long double"
6651 " has changed in GCC 4.4");
6653 return 0;
6656 return words;
6659 /* Compute alignment needed. We align all types to natural boundaries with
6660 exception of XFmode that is aligned to 64bits. */
6661 if (mode != VOIDmode && mode != BLKmode)
6663 int mode_alignment = GET_MODE_BITSIZE (mode);
6665 if (mode == XFmode)
6666 mode_alignment = 128;
6667 else if (mode == XCmode)
6668 mode_alignment = 256;
6669 if (COMPLEX_MODE_P (mode))
6670 mode_alignment /= 2;
6671 /* Misaligned fields are always returned in memory. */
6672 if (bit_offset % mode_alignment)
6673 return 0;
6676 /* for V1xx modes, just use the base mode */
6677 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6678 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6679 mode = GET_MODE_INNER (mode);
6681 /* Classification of atomic types. */
6682 switch (mode)
6684 case SDmode:
6685 case DDmode:
6686 classes[0] = X86_64_SSE_CLASS;
6687 return 1;
6688 case TDmode:
6689 classes[0] = X86_64_SSE_CLASS;
6690 classes[1] = X86_64_SSEUP_CLASS;
6691 return 2;
6692 case DImode:
6693 case SImode:
6694 case HImode:
6695 case QImode:
6696 case CSImode:
6697 case CHImode:
6698 case CQImode:
6700 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6702 /* Analyze last 128 bits only. */
6703 size = (size - 1) & 0x7f;
6705 if (size < 32)
6707 classes[0] = X86_64_INTEGERSI_CLASS;
6708 return 1;
6710 else if (size < 64)
6712 classes[0] = X86_64_INTEGER_CLASS;
6713 return 1;
6715 else if (size < 64+32)
6717 classes[0] = X86_64_INTEGER_CLASS;
6718 classes[1] = X86_64_INTEGERSI_CLASS;
6719 return 2;
6721 else if (size < 64+64)
6723 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6724 return 2;
6726 else
6727 gcc_unreachable ();
6729 case CDImode:
6730 case TImode:
6731 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6732 return 2;
6733 case COImode:
6734 case OImode:
6735 /* OImode shouldn't be used directly. */
6736 gcc_unreachable ();
6737 case CTImode:
6738 return 0;
6739 case SFmode:
6740 if (!(bit_offset % 64))
6741 classes[0] = X86_64_SSESF_CLASS;
6742 else
6743 classes[0] = X86_64_SSE_CLASS;
6744 return 1;
6745 case DFmode:
6746 classes[0] = X86_64_SSEDF_CLASS;
6747 return 1;
6748 case XFmode:
6749 classes[0] = X86_64_X87_CLASS;
6750 classes[1] = X86_64_X87UP_CLASS;
6751 return 2;
6752 case TFmode:
6753 classes[0] = X86_64_SSE_CLASS;
6754 classes[1] = X86_64_SSEUP_CLASS;
6755 return 2;
6756 case SCmode:
6757 classes[0] = X86_64_SSE_CLASS;
6758 if (!(bit_offset % 64))
6759 return 1;
6760 else
6762 static bool warned;
6764 if (!warned && warn_psabi)
6766 warned = true;
6767 inform (input_location,
6768 "the ABI of passing structure with complex float"
6769 " member has changed in GCC 4.4");
6771 classes[1] = X86_64_SSESF_CLASS;
6772 return 2;
6774 case DCmode:
6775 classes[0] = X86_64_SSEDF_CLASS;
6776 classes[1] = X86_64_SSEDF_CLASS;
6777 return 2;
6778 case XCmode:
6779 classes[0] = X86_64_COMPLEX_X87_CLASS;
6780 return 1;
6781 case TCmode:
6782 /* This modes is larger than 16 bytes. */
6783 return 0;
6784 case V8SFmode:
6785 case V8SImode:
6786 case V32QImode:
6787 case V16HImode:
6788 case V4DFmode:
6789 case V4DImode:
6790 classes[0] = X86_64_SSE_CLASS;
6791 classes[1] = X86_64_SSEUP_CLASS;
6792 classes[2] = X86_64_SSEUP_CLASS;
6793 classes[3] = X86_64_SSEUP_CLASS;
6794 return 4;
6795 case V8DFmode:
6796 case V16SFmode:
6797 case V8DImode:
6798 case V16SImode:
6799 case V32HImode:
6800 case V64QImode:
6801 classes[0] = X86_64_SSE_CLASS;
6802 classes[1] = X86_64_SSEUP_CLASS;
6803 classes[2] = X86_64_SSEUP_CLASS;
6804 classes[3] = X86_64_SSEUP_CLASS;
6805 classes[4] = X86_64_SSEUP_CLASS;
6806 classes[5] = X86_64_SSEUP_CLASS;
6807 classes[6] = X86_64_SSEUP_CLASS;
6808 classes[7] = X86_64_SSEUP_CLASS;
6809 return 8;
6810 case V4SFmode:
6811 case V4SImode:
6812 case V16QImode:
6813 case V8HImode:
6814 case V2DFmode:
6815 case V2DImode:
6816 classes[0] = X86_64_SSE_CLASS;
6817 classes[1] = X86_64_SSEUP_CLASS;
6818 return 2;
6819 case V1TImode:
6820 case V1DImode:
6821 case V2SFmode:
6822 case V2SImode:
6823 case V4HImode:
6824 case V8QImode:
6825 classes[0] = X86_64_SSE_CLASS;
6826 return 1;
6827 case BLKmode:
6828 case VOIDmode:
6829 return 0;
6830 default:
6831 gcc_assert (VECTOR_MODE_P (mode));
6833 if (bytes > 16)
6834 return 0;
6836 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6838 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6839 classes[0] = X86_64_INTEGERSI_CLASS;
6840 else
6841 classes[0] = X86_64_INTEGER_CLASS;
6842 classes[1] = X86_64_INTEGER_CLASS;
6843 return 1 + (bytes > 8);
6847 /* Examine the argument and return set number of register required in each
6848 class. Return 0 iff parameter should be passed in memory. */
6849 static int
6850 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6851 int *int_nregs, int *sse_nregs)
6853 enum x86_64_reg_class regclass[MAX_CLASSES];
6854 int n = classify_argument (mode, type, regclass, 0);
6856 *int_nregs = 0;
6857 *sse_nregs = 0;
6858 if (!n)
6859 return 0;
6860 for (n--; n >= 0; n--)
6861 switch (regclass[n])
6863 case X86_64_INTEGER_CLASS:
6864 case X86_64_INTEGERSI_CLASS:
6865 (*int_nregs)++;
6866 break;
6867 case X86_64_SSE_CLASS:
6868 case X86_64_SSESF_CLASS:
6869 case X86_64_SSEDF_CLASS:
6870 (*sse_nregs)++;
6871 break;
6872 case X86_64_NO_CLASS:
6873 case X86_64_SSEUP_CLASS:
6874 break;
6875 case X86_64_X87_CLASS:
6876 case X86_64_X87UP_CLASS:
6877 if (!in_return)
6878 return 0;
6879 break;
6880 case X86_64_COMPLEX_X87_CLASS:
6881 return in_return ? 2 : 0;
6882 case X86_64_MEMORY_CLASS:
6883 gcc_unreachable ();
6885 return 1;
6888 /* Construct container for the argument used by GCC interface. See
6889 FUNCTION_ARG for the detailed description. */
6891 static rtx
6892 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6893 const_tree type, int in_return, int nintregs, int nsseregs,
6894 const int *intreg, int sse_regno)
6896 /* The following variables hold the static issued_error state. */
6897 static bool issued_sse_arg_error;
6898 static bool issued_sse_ret_error;
6899 static bool issued_x87_ret_error;
6901 enum machine_mode tmpmode;
6902 int bytes =
6903 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6904 enum x86_64_reg_class regclass[MAX_CLASSES];
6905 int n;
6906 int i;
6907 int nexps = 0;
6908 int needed_sseregs, needed_intregs;
6909 rtx exp[MAX_CLASSES];
6910 rtx ret;
6912 n = classify_argument (mode, type, regclass, 0);
6913 if (!n)
6914 return NULL;
6915 if (!examine_argument (mode, type, in_return, &needed_intregs,
6916 &needed_sseregs))
6917 return NULL;
6918 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6919 return NULL;
6921 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6922 some less clueful developer tries to use floating-point anyway. */
6923 if (needed_sseregs && !TARGET_SSE)
6925 if (in_return)
6927 if (!issued_sse_ret_error)
6929 error ("SSE register return with SSE disabled");
6930 issued_sse_ret_error = true;
6933 else if (!issued_sse_arg_error)
6935 error ("SSE register argument with SSE disabled");
6936 issued_sse_arg_error = true;
6938 return NULL;
6941 /* Likewise, error if the ABI requires us to return values in the
6942 x87 registers and the user specified -mno-80387. */
6943 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6944 for (i = 0; i < n; i++)
6945 if (regclass[i] == X86_64_X87_CLASS
6946 || regclass[i] == X86_64_X87UP_CLASS
6947 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6949 if (!issued_x87_ret_error)
6951 error ("x87 register return with x87 disabled");
6952 issued_x87_ret_error = true;
6954 return NULL;
6957 /* First construct simple cases. Avoid SCmode, since we want to use
6958 single register to pass this type. */
6959 if (n == 1 && mode != SCmode)
6960 switch (regclass[0])
6962 case X86_64_INTEGER_CLASS:
6963 case X86_64_INTEGERSI_CLASS:
6964 return gen_rtx_REG (mode, intreg[0]);
6965 case X86_64_SSE_CLASS:
6966 case X86_64_SSESF_CLASS:
6967 case X86_64_SSEDF_CLASS:
6968 if (mode != BLKmode)
6969 return gen_reg_or_parallel (mode, orig_mode,
6970 SSE_REGNO (sse_regno));
6971 break;
6972 case X86_64_X87_CLASS:
6973 case X86_64_COMPLEX_X87_CLASS:
6974 return gen_rtx_REG (mode, FIRST_STACK_REG);
6975 case X86_64_NO_CLASS:
6976 /* Zero sized array, struct or class. */
6977 return NULL;
6978 default:
6979 gcc_unreachable ();
6981 if (n == 2
6982 && regclass[0] == X86_64_SSE_CLASS
6983 && regclass[1] == X86_64_SSEUP_CLASS
6984 && mode != BLKmode)
6985 return gen_reg_or_parallel (mode, orig_mode,
6986 SSE_REGNO (sse_regno));
6987 if (n == 4
6988 && regclass[0] == X86_64_SSE_CLASS
6989 && regclass[1] == X86_64_SSEUP_CLASS
6990 && regclass[2] == X86_64_SSEUP_CLASS
6991 && regclass[3] == X86_64_SSEUP_CLASS
6992 && mode != BLKmode)
6993 return gen_reg_or_parallel (mode, orig_mode,
6994 SSE_REGNO (sse_regno));
6995 if (n == 8
6996 && regclass[0] == X86_64_SSE_CLASS
6997 && regclass[1] == X86_64_SSEUP_CLASS
6998 && regclass[2] == X86_64_SSEUP_CLASS
6999 && regclass[3] == X86_64_SSEUP_CLASS
7000 && regclass[4] == X86_64_SSEUP_CLASS
7001 && regclass[5] == X86_64_SSEUP_CLASS
7002 && regclass[6] == X86_64_SSEUP_CLASS
7003 && regclass[7] == X86_64_SSEUP_CLASS
7004 && mode != BLKmode)
7005 return gen_reg_or_parallel (mode, orig_mode,
7006 SSE_REGNO (sse_regno));
7007 if (n == 2
7008 && regclass[0] == X86_64_X87_CLASS
7009 && regclass[1] == X86_64_X87UP_CLASS)
7010 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7012 if (n == 2
7013 && regclass[0] == X86_64_INTEGER_CLASS
7014 && regclass[1] == X86_64_INTEGER_CLASS
7015 && (mode == CDImode || mode == TImode)
7016 && intreg[0] + 1 == intreg[1])
7017 return gen_rtx_REG (mode, intreg[0]);
7019 /* Otherwise figure out the entries of the PARALLEL. */
7020 for (i = 0; i < n; i++)
7022 int pos;
7024 switch (regclass[i])
7026 case X86_64_NO_CLASS:
7027 break;
7028 case X86_64_INTEGER_CLASS:
7029 case X86_64_INTEGERSI_CLASS:
7030 /* Merge TImodes on aligned occasions here too. */
7031 if (i * 8 + 8 > bytes)
7032 tmpmode
7033 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7034 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7035 tmpmode = SImode;
7036 else
7037 tmpmode = DImode;
7038 /* We've requested 24 bytes we
7039 don't have mode for. Use DImode. */
7040 if (tmpmode == BLKmode)
7041 tmpmode = DImode;
7042 exp [nexps++]
7043 = gen_rtx_EXPR_LIST (VOIDmode,
7044 gen_rtx_REG (tmpmode, *intreg),
7045 GEN_INT (i*8));
7046 intreg++;
7047 break;
7048 case X86_64_SSESF_CLASS:
7049 exp [nexps++]
7050 = gen_rtx_EXPR_LIST (VOIDmode,
7051 gen_rtx_REG (SFmode,
7052 SSE_REGNO (sse_regno)),
7053 GEN_INT (i*8));
7054 sse_regno++;
7055 break;
7056 case X86_64_SSEDF_CLASS:
7057 exp [nexps++]
7058 = gen_rtx_EXPR_LIST (VOIDmode,
7059 gen_rtx_REG (DFmode,
7060 SSE_REGNO (sse_regno)),
7061 GEN_INT (i*8));
7062 sse_regno++;
7063 break;
7064 case X86_64_SSE_CLASS:
7065 pos = i;
7066 switch (n)
7068 case 1:
7069 tmpmode = DImode;
7070 break;
7071 case 2:
7072 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7074 tmpmode = TImode;
7075 i++;
7077 else
7078 tmpmode = DImode;
7079 break;
7080 case 4:
7081 gcc_assert (i == 0
7082 && regclass[1] == X86_64_SSEUP_CLASS
7083 && regclass[2] == X86_64_SSEUP_CLASS
7084 && regclass[3] == X86_64_SSEUP_CLASS);
7085 tmpmode = OImode;
7086 i += 3;
7087 break;
7088 case 8:
7089 gcc_assert (i == 0
7090 && regclass[1] == X86_64_SSEUP_CLASS
7091 && regclass[2] == X86_64_SSEUP_CLASS
7092 && regclass[3] == X86_64_SSEUP_CLASS
7093 && regclass[4] == X86_64_SSEUP_CLASS
7094 && regclass[5] == X86_64_SSEUP_CLASS
7095 && regclass[6] == X86_64_SSEUP_CLASS
7096 && regclass[7] == X86_64_SSEUP_CLASS);
7097 tmpmode = XImode;
7098 i += 7;
7099 break;
7100 default:
7101 gcc_unreachable ();
7103 exp [nexps++]
7104 = gen_rtx_EXPR_LIST (VOIDmode,
7105 gen_rtx_REG (tmpmode,
7106 SSE_REGNO (sse_regno)),
7107 GEN_INT (pos*8));
7108 sse_regno++;
7109 break;
7110 default:
7111 gcc_unreachable ();
7115 /* Empty aligned struct, union or class. */
7116 if (nexps == 0)
7117 return NULL;
7119 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7120 for (i = 0; i < nexps; i++)
7121 XVECEXP (ret, 0, i) = exp [i];
7122 return ret;
7125 /* Update the data in CUM to advance over an argument of mode MODE
7126 and data type TYPE. (TYPE is null for libcalls where that information
7127 may not be available.) */
7129 static void
7130 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7131 const_tree type, HOST_WIDE_INT bytes,
7132 HOST_WIDE_INT words)
7134 switch (mode)
7136 default:
7137 break;
7139 case BLKmode:
7140 if (bytes < 0)
7141 break;
7142 /* FALLTHRU */
7144 case DImode:
7145 case SImode:
7146 case HImode:
7147 case QImode:
7148 cum->words += words;
7149 cum->nregs -= words;
7150 cum->regno += words;
7152 if (cum->nregs <= 0)
7154 cum->nregs = 0;
7155 cum->regno = 0;
7157 break;
7159 case OImode:
7160 /* OImode shouldn't be used directly. */
7161 gcc_unreachable ();
7163 case DFmode:
7164 if (cum->float_in_sse < 2)
7165 break;
7166 case SFmode:
7167 if (cum->float_in_sse < 1)
7168 break;
7169 /* FALLTHRU */
7171 case V8SFmode:
7172 case V8SImode:
7173 case V64QImode:
7174 case V32HImode:
7175 case V16SImode:
7176 case V8DImode:
7177 case V16SFmode:
7178 case V8DFmode:
7179 case V32QImode:
7180 case V16HImode:
7181 case V4DFmode:
7182 case V4DImode:
7183 case TImode:
7184 case V16QImode:
7185 case V8HImode:
7186 case V4SImode:
7187 case V2DImode:
7188 case V4SFmode:
7189 case V2DFmode:
7190 if (!type || !AGGREGATE_TYPE_P (type))
7192 cum->sse_words += words;
7193 cum->sse_nregs -= 1;
7194 cum->sse_regno += 1;
7195 if (cum->sse_nregs <= 0)
7197 cum->sse_nregs = 0;
7198 cum->sse_regno = 0;
7201 break;
7203 case V8QImode:
7204 case V4HImode:
7205 case V2SImode:
7206 case V2SFmode:
7207 case V1TImode:
7208 case V1DImode:
7209 if (!type || !AGGREGATE_TYPE_P (type))
7211 cum->mmx_words += words;
7212 cum->mmx_nregs -= 1;
7213 cum->mmx_regno += 1;
7214 if (cum->mmx_nregs <= 0)
7216 cum->mmx_nregs = 0;
7217 cum->mmx_regno = 0;
7220 break;
7224 static void
7225 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7226 const_tree type, HOST_WIDE_INT words, bool named)
7228 int int_nregs, sse_nregs;
7230 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7231 if (!named && (VALID_AVX512F_REG_MODE (mode)
7232 || VALID_AVX256_REG_MODE (mode)))
7233 return;
7235 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7236 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7238 cum->nregs -= int_nregs;
7239 cum->sse_nregs -= sse_nregs;
7240 cum->regno += int_nregs;
7241 cum->sse_regno += sse_nregs;
7243 else
7245 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7246 cum->words = (cum->words + align - 1) & ~(align - 1);
7247 cum->words += words;
7251 static void
7252 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7253 HOST_WIDE_INT words)
7255 /* Otherwise, this should be passed indirect. */
7256 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7258 cum->words += words;
7259 if (cum->nregs > 0)
7261 cum->nregs -= 1;
7262 cum->regno += 1;
7266 /* Update the data in CUM to advance over an argument of mode MODE and
7267 data type TYPE. (TYPE is null for libcalls where that information
7268 may not be available.) */
7270 static void
7271 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7272 const_tree type, bool named)
7274 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7275 HOST_WIDE_INT bytes, words;
7277 if (mode == BLKmode)
7278 bytes = int_size_in_bytes (type);
7279 else
7280 bytes = GET_MODE_SIZE (mode);
7281 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7283 if (type)
7284 mode = type_natural_mode (type, NULL, false);
7286 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7287 function_arg_advance_ms_64 (cum, bytes, words);
7288 else if (TARGET_64BIT)
7289 function_arg_advance_64 (cum, mode, type, words, named);
7290 else
7291 function_arg_advance_32 (cum, mode, type, bytes, words);
7294 /* Define where to put the arguments to a function.
7295 Value is zero to push the argument on the stack,
7296 or a hard register in which to store the argument.
7298 MODE is the argument's machine mode.
7299 TYPE is the data type of the argument (as a tree).
7300 This is null for libcalls where that information may
7301 not be available.
7302 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7303 the preceding args and about the function being called.
7304 NAMED is nonzero if this argument is a named parameter
7305 (otherwise it is an extra parameter matching an ellipsis). */
7307 static rtx
7308 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7309 enum machine_mode orig_mode, const_tree type,
7310 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7312 /* Avoid the AL settings for the Unix64 ABI. */
7313 if (mode == VOIDmode)
7314 return constm1_rtx;
7316 switch (mode)
7318 default:
7319 break;
7321 case BLKmode:
7322 if (bytes < 0)
7323 break;
7324 /* FALLTHRU */
7325 case DImode:
7326 case SImode:
7327 case HImode:
7328 case QImode:
7329 if (words <= cum->nregs)
7331 int regno = cum->regno;
7333 /* Fastcall allocates the first two DWORD (SImode) or
7334 smaller arguments to ECX and EDX if it isn't an
7335 aggregate type . */
7336 if (cum->fastcall)
7338 if (mode == BLKmode
7339 || mode == DImode
7340 || (type && AGGREGATE_TYPE_P (type)))
7341 break;
7343 /* ECX not EAX is the first allocated register. */
7344 if (regno == AX_REG)
7345 regno = CX_REG;
7347 return gen_rtx_REG (mode, regno);
7349 break;
7351 case DFmode:
7352 if (cum->float_in_sse < 2)
7353 break;
7354 case SFmode:
7355 if (cum->float_in_sse < 1)
7356 break;
7357 /* FALLTHRU */
7358 case TImode:
7359 /* In 32bit, we pass TImode in xmm registers. */
7360 case V16QImode:
7361 case V8HImode:
7362 case V4SImode:
7363 case V2DImode:
7364 case V4SFmode:
7365 case V2DFmode:
7366 if (!type || !AGGREGATE_TYPE_P (type))
7368 if (cum->sse_nregs)
7369 return gen_reg_or_parallel (mode, orig_mode,
7370 cum->sse_regno + FIRST_SSE_REG);
7372 break;
7374 case OImode:
7375 case XImode:
7376 /* OImode and XImode shouldn't be used directly. */
7377 gcc_unreachable ();
7379 case V64QImode:
7380 case V32HImode:
7381 case V16SImode:
7382 case V8DImode:
7383 case V16SFmode:
7384 case V8DFmode:
7385 case V8SFmode:
7386 case V8SImode:
7387 case V32QImode:
7388 case V16HImode:
7389 case V4DFmode:
7390 case V4DImode:
7391 if (!type || !AGGREGATE_TYPE_P (type))
7393 if (cum->sse_nregs)
7394 return gen_reg_or_parallel (mode, orig_mode,
7395 cum->sse_regno + FIRST_SSE_REG);
7397 break;
7399 case V8QImode:
7400 case V4HImode:
7401 case V2SImode:
7402 case V2SFmode:
7403 case V1TImode:
7404 case V1DImode:
7405 if (!type || !AGGREGATE_TYPE_P (type))
7407 if (cum->mmx_nregs)
7408 return gen_reg_or_parallel (mode, orig_mode,
7409 cum->mmx_regno + FIRST_MMX_REG);
7411 break;
7414 return NULL_RTX;
7417 static rtx
7418 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7419 enum machine_mode orig_mode, const_tree type, bool named)
7421 /* Handle a hidden AL argument containing number of registers
7422 for varargs x86-64 functions. */
7423 if (mode == VOIDmode)
7424 return GEN_INT (cum->maybe_vaarg
7425 ? (cum->sse_nregs < 0
7426 ? X86_64_SSE_REGPARM_MAX
7427 : cum->sse_regno)
7428 : -1);
7430 switch (mode)
7432 default:
7433 break;
7435 case V8SFmode:
7436 case V8SImode:
7437 case V32QImode:
7438 case V16HImode:
7439 case V4DFmode:
7440 case V4DImode:
7441 case V16SFmode:
7442 case V16SImode:
7443 case V64QImode:
7444 case V32HImode:
7445 case V8DFmode:
7446 case V8DImode:
7447 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7448 if (!named)
7449 return NULL;
7450 break;
7453 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7454 cum->sse_nregs,
7455 &x86_64_int_parameter_registers [cum->regno],
7456 cum->sse_regno);
7459 static rtx
7460 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7461 enum machine_mode orig_mode, bool named,
7462 HOST_WIDE_INT bytes)
7464 unsigned int regno;
7466 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7467 We use value of -2 to specify that current function call is MSABI. */
7468 if (mode == VOIDmode)
7469 return GEN_INT (-2);
7471 /* If we've run out of registers, it goes on the stack. */
7472 if (cum->nregs == 0)
7473 return NULL_RTX;
7475 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7477 /* Only floating point modes are passed in anything but integer regs. */
7478 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7480 if (named)
7481 regno = cum->regno + FIRST_SSE_REG;
7482 else
7484 rtx t1, t2;
7486 /* Unnamed floating parameters are passed in both the
7487 SSE and integer registers. */
7488 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7489 t2 = gen_rtx_REG (mode, regno);
7490 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7491 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7492 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7495 /* Handle aggregated types passed in register. */
7496 if (orig_mode == BLKmode)
7498 if (bytes > 0 && bytes <= 8)
7499 mode = (bytes > 4 ? DImode : SImode);
7500 if (mode == BLKmode)
7501 mode = DImode;
7504 return gen_reg_or_parallel (mode, orig_mode, regno);
7507 /* Return where to put the arguments to a function.
7508 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7510 MODE is the argument's machine mode. TYPE is the data type of the
7511 argument. It is null for libcalls where that information may not be
7512 available. CUM gives information about the preceding args and about
7513 the function being called. NAMED is nonzero if this argument is a
7514 named parameter (otherwise it is an extra parameter matching an
7515 ellipsis). */
7517 static rtx
7518 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7519 const_tree type, bool named)
7521 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7522 enum machine_mode mode = omode;
7523 HOST_WIDE_INT bytes, words;
7524 rtx arg;
7526 if (mode == BLKmode)
7527 bytes = int_size_in_bytes (type);
7528 else
7529 bytes = GET_MODE_SIZE (mode);
7530 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7532 /* To simplify the code below, represent vector types with a vector mode
7533 even if MMX/SSE are not active. */
7534 if (type && TREE_CODE (type) == VECTOR_TYPE)
7535 mode = type_natural_mode (type, cum, false);
7537 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7538 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7539 else if (TARGET_64BIT)
7540 arg = function_arg_64 (cum, mode, omode, type, named);
7541 else
7542 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7544 return arg;
7547 /* A C expression that indicates when an argument must be passed by
7548 reference. If nonzero for an argument, a copy of that argument is
7549 made in memory and a pointer to the argument is passed instead of
7550 the argument itself. The pointer is passed in whatever way is
7551 appropriate for passing a pointer to that type. */
7553 static bool
7554 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7555 const_tree type, bool named ATTRIBUTE_UNUSED)
7557 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7559 /* See Windows x64 Software Convention. */
7560 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7562 int msize = (int) GET_MODE_SIZE (mode);
7563 if (type)
7565 /* Arrays are passed by reference. */
7566 if (TREE_CODE (type) == ARRAY_TYPE)
7567 return true;
7569 if (AGGREGATE_TYPE_P (type))
7571 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7572 are passed by reference. */
7573 msize = int_size_in_bytes (type);
7577 /* __m128 is passed by reference. */
7578 switch (msize) {
7579 case 1: case 2: case 4: case 8:
7580 break;
7581 default:
7582 return true;
7585 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7586 return 1;
7588 return 0;
7591 /* Return true when TYPE should be 128bit aligned for 32bit argument
7592 passing ABI. XXX: This function is obsolete and is only used for
7593 checking psABI compatibility with previous versions of GCC. */
7595 static bool
7596 ix86_compat_aligned_value_p (const_tree type)
7598 enum machine_mode mode = TYPE_MODE (type);
7599 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7600 || mode == TDmode
7601 || mode == TFmode
7602 || mode == TCmode)
7603 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7604 return true;
7605 if (TYPE_ALIGN (type) < 128)
7606 return false;
7608 if (AGGREGATE_TYPE_P (type))
7610 /* Walk the aggregates recursively. */
7611 switch (TREE_CODE (type))
7613 case RECORD_TYPE:
7614 case UNION_TYPE:
7615 case QUAL_UNION_TYPE:
7617 tree field;
7619 /* Walk all the structure fields. */
7620 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7622 if (TREE_CODE (field) == FIELD_DECL
7623 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7624 return true;
7626 break;
7629 case ARRAY_TYPE:
7630 /* Just for use if some languages passes arrays by value. */
7631 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7632 return true;
7633 break;
7635 default:
7636 gcc_unreachable ();
7639 return false;
7642 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7643 XXX: This function is obsolete and is only used for checking psABI
7644 compatibility with previous versions of GCC. */
7646 static unsigned int
7647 ix86_compat_function_arg_boundary (enum machine_mode mode,
7648 const_tree type, unsigned int align)
7650 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7651 natural boundaries. */
7652 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7654 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7655 make an exception for SSE modes since these require 128bit
7656 alignment.
7658 The handling here differs from field_alignment. ICC aligns MMX
7659 arguments to 4 byte boundaries, while structure fields are aligned
7660 to 8 byte boundaries. */
7661 if (!type)
7663 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7664 align = PARM_BOUNDARY;
7666 else
7668 if (!ix86_compat_aligned_value_p (type))
7669 align = PARM_BOUNDARY;
7672 if (align > BIGGEST_ALIGNMENT)
7673 align = BIGGEST_ALIGNMENT;
7674 return align;
7677 /* Return true when TYPE should be 128bit aligned for 32bit argument
7678 passing ABI. */
7680 static bool
7681 ix86_contains_aligned_value_p (const_tree type)
7683 enum machine_mode mode = TYPE_MODE (type);
7685 if (mode == XFmode || mode == XCmode)
7686 return false;
7688 if (TYPE_ALIGN (type) < 128)
7689 return false;
7691 if (AGGREGATE_TYPE_P (type))
7693 /* Walk the aggregates recursively. */
7694 switch (TREE_CODE (type))
7696 case RECORD_TYPE:
7697 case UNION_TYPE:
7698 case QUAL_UNION_TYPE:
7700 tree field;
7702 /* Walk all the structure fields. */
7703 for (field = TYPE_FIELDS (type);
7704 field;
7705 field = DECL_CHAIN (field))
7707 if (TREE_CODE (field) == FIELD_DECL
7708 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7709 return true;
7711 break;
7714 case ARRAY_TYPE:
7715 /* Just for use if some languages passes arrays by value. */
7716 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7717 return true;
7718 break;
7720 default:
7721 gcc_unreachable ();
7724 else
7725 return TYPE_ALIGN (type) >= 128;
7727 return false;
7730 /* Gives the alignment boundary, in bits, of an argument with the
7731 specified mode and type. */
7733 static unsigned int
7734 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7736 unsigned int align;
7737 if (type)
7739 /* Since the main variant type is used for call, we convert it to
7740 the main variant type. */
7741 type = TYPE_MAIN_VARIANT (type);
7742 align = TYPE_ALIGN (type);
7744 else
7745 align = GET_MODE_ALIGNMENT (mode);
7746 if (align < PARM_BOUNDARY)
7747 align = PARM_BOUNDARY;
7748 else
7750 static bool warned;
7751 unsigned int saved_align = align;
7753 if (!TARGET_64BIT)
7755 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7756 if (!type)
7758 if (mode == XFmode || mode == XCmode)
7759 align = PARM_BOUNDARY;
7761 else if (!ix86_contains_aligned_value_p (type))
7762 align = PARM_BOUNDARY;
7764 if (align < 128)
7765 align = PARM_BOUNDARY;
7768 if (warn_psabi
7769 && !warned
7770 && align != ix86_compat_function_arg_boundary (mode, type,
7771 saved_align))
7773 warned = true;
7774 inform (input_location,
7775 "The ABI for passing parameters with %d-byte"
7776 " alignment has changed in GCC 4.6",
7777 align / BITS_PER_UNIT);
7781 return align;
7784 /* Return true if N is a possible register number of function value. */
7786 static bool
7787 ix86_function_value_regno_p (const unsigned int regno)
7789 switch (regno)
7791 case AX_REG:
7792 case DX_REG:
7793 return true;
7794 case DI_REG:
7795 case SI_REG:
7796 return TARGET_64BIT && ix86_abi != MS_ABI;
7798 /* Complex values are returned in %st(0)/%st(1) pair. */
7799 case ST0_REG:
7800 case ST1_REG:
7801 /* TODO: The function should depend on current function ABI but
7802 builtins.c would need updating then. Therefore we use the
7803 default ABI. */
7804 if (TARGET_64BIT && ix86_abi == MS_ABI)
7805 return false;
7806 return TARGET_FLOAT_RETURNS_IN_80387;
7808 /* Complex values are returned in %xmm0/%xmm1 pair. */
7809 case XMM0_REG:
7810 case XMM1_REG:
7811 return TARGET_SSE;
7813 case MM0_REG:
7814 if (TARGET_MACHO || TARGET_64BIT)
7815 return false;
7816 return TARGET_MMX;
7819 return false;
7822 /* Define how to find the value returned by a function.
7823 VALTYPE is the data type of the value (as a tree).
7824 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7825 otherwise, FUNC is 0. */
7827 static rtx
7828 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7829 const_tree fntype, const_tree fn)
7831 unsigned int regno;
7833 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7834 we normally prevent this case when mmx is not available. However
7835 some ABIs may require the result to be returned like DImode. */
7836 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7837 regno = FIRST_MMX_REG;
7839 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7840 we prevent this case when sse is not available. However some ABIs
7841 may require the result to be returned like integer TImode. */
7842 else if (mode == TImode
7843 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7844 regno = FIRST_SSE_REG;
7846 /* 32-byte vector modes in %ymm0. */
7847 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7848 regno = FIRST_SSE_REG;
7850 /* 64-byte vector modes in %zmm0. */
7851 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7852 regno = FIRST_SSE_REG;
7854 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7855 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7856 regno = FIRST_FLOAT_REG;
7857 else
7858 /* Most things go in %eax. */
7859 regno = AX_REG;
7861 /* Override FP return register with %xmm0 for local functions when
7862 SSE math is enabled or for functions with sseregparm attribute. */
7863 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7865 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7866 if ((sse_level >= 1 && mode == SFmode)
7867 || (sse_level == 2 && mode == DFmode))
7868 regno = FIRST_SSE_REG;
7871 /* OImode shouldn't be used directly. */
7872 gcc_assert (mode != OImode);
7874 return gen_rtx_REG (orig_mode, regno);
7877 static rtx
7878 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7879 const_tree valtype)
7881 rtx ret;
7883 /* Handle libcalls, which don't provide a type node. */
7884 if (valtype == NULL)
7886 unsigned int regno;
7888 switch (mode)
7890 case SFmode:
7891 case SCmode:
7892 case DFmode:
7893 case DCmode:
7894 case TFmode:
7895 case SDmode:
7896 case DDmode:
7897 case TDmode:
7898 regno = FIRST_SSE_REG;
7899 break;
7900 case XFmode:
7901 case XCmode:
7902 regno = FIRST_FLOAT_REG;
7903 break;
7904 case TCmode:
7905 return NULL;
7906 default:
7907 regno = AX_REG;
7910 return gen_rtx_REG (mode, regno);
7912 else if (POINTER_TYPE_P (valtype))
7914 /* Pointers are always returned in word_mode. */
7915 mode = word_mode;
7918 ret = construct_container (mode, orig_mode, valtype, 1,
7919 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7920 x86_64_int_return_registers, 0);
7922 /* For zero sized structures, construct_container returns NULL, but we
7923 need to keep rest of compiler happy by returning meaningful value. */
7924 if (!ret)
7925 ret = gen_rtx_REG (orig_mode, AX_REG);
7927 return ret;
7930 static rtx
7931 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7932 const_tree valtype)
7934 unsigned int regno = AX_REG;
7936 if (TARGET_SSE)
7938 switch (GET_MODE_SIZE (mode))
7940 case 16:
7941 if (valtype != NULL_TREE
7942 && !VECTOR_INTEGER_TYPE_P (valtype)
7943 && !VECTOR_INTEGER_TYPE_P (valtype)
7944 && !INTEGRAL_TYPE_P (valtype)
7945 && !VECTOR_FLOAT_TYPE_P (valtype))
7946 break;
7947 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7948 && !COMPLEX_MODE_P (mode))
7949 regno = FIRST_SSE_REG;
7950 break;
7951 case 8:
7952 case 4:
7953 if (mode == SFmode || mode == DFmode)
7954 regno = FIRST_SSE_REG;
7955 break;
7956 default:
7957 break;
7960 return gen_rtx_REG (orig_mode, regno);
7963 static rtx
7964 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7965 enum machine_mode orig_mode, enum machine_mode mode)
7967 const_tree fn, fntype;
7969 fn = NULL_TREE;
7970 if (fntype_or_decl && DECL_P (fntype_or_decl))
7971 fn = fntype_or_decl;
7972 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7974 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7975 return function_value_ms_64 (orig_mode, mode, valtype);
7976 else if (TARGET_64BIT)
7977 return function_value_64 (orig_mode, mode, valtype);
7978 else
7979 return function_value_32 (orig_mode, mode, fntype, fn);
7982 static rtx
7983 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7984 bool outgoing ATTRIBUTE_UNUSED)
7986 enum machine_mode mode, orig_mode;
7988 orig_mode = TYPE_MODE (valtype);
7989 mode = type_natural_mode (valtype, NULL, true);
7990 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7993 /* Pointer function arguments and return values are promoted to
7994 word_mode. */
7996 static enum machine_mode
7997 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7998 int *punsignedp, const_tree fntype,
7999 int for_return)
8001 if (type != NULL_TREE && POINTER_TYPE_P (type))
8003 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8004 return word_mode;
8006 return default_promote_function_mode (type, mode, punsignedp, fntype,
8007 for_return);
8010 /* Return true if a structure, union or array with MODE containing FIELD
8011 should be accessed using BLKmode. */
8013 static bool
8014 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8016 /* Union with XFmode must be in BLKmode. */
8017 return (mode == XFmode
8018 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8019 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8023 ix86_libcall_value (enum machine_mode mode)
8025 return ix86_function_value_1 (NULL, NULL, mode, mode);
8028 /* Return true iff type is returned in memory. */
8030 static bool ATTRIBUTE_UNUSED
8031 return_in_memory_32 (const_tree type, enum machine_mode mode)
8033 HOST_WIDE_INT size;
8035 if (mode == BLKmode)
8036 return true;
8038 size = int_size_in_bytes (type);
8040 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8041 return false;
8043 if (VECTOR_MODE_P (mode) || mode == TImode)
8045 /* User-created vectors small enough to fit in EAX. */
8046 if (size < 8)
8047 return false;
8049 /* MMX/3dNow values are returned in MM0,
8050 except when it doesn't exits or the ABI prescribes otherwise. */
8051 if (size == 8)
8052 return !TARGET_MMX || TARGET_VECT8_RETURNS;
8054 /* SSE values are returned in XMM0, except when it doesn't exist. */
8055 if (size == 16)
8056 return !TARGET_SSE;
8058 /* AVX values are returned in YMM0, except when it doesn't exist. */
8059 if (size == 32)
8060 return !TARGET_AVX;
8062 /* AVX512F values are returned in ZMM0, except when it doesn't exist. */
8063 if (size == 64)
8064 return !TARGET_AVX512F;
8067 if (mode == XFmode)
8068 return false;
8070 if (size > 12)
8071 return true;
8073 /* OImode shouldn't be used directly. */
8074 gcc_assert (mode != OImode);
8076 return false;
8079 static bool ATTRIBUTE_UNUSED
8080 return_in_memory_64 (const_tree type, enum machine_mode mode)
8082 int needed_intregs, needed_sseregs;
8083 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
8086 static bool ATTRIBUTE_UNUSED
8087 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
8089 HOST_WIDE_INT size = int_size_in_bytes (type);
8091 /* __m128 is returned in xmm0. */
8092 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
8093 || VECTOR_FLOAT_TYPE_P (type))
8094 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8095 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
8096 return false;
8098 /* Otherwise, the size must be exactly in [1248]. */
8099 return size != 1 && size != 2 && size != 4 && size != 8;
8102 static bool
8103 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8105 #ifdef SUBTARGET_RETURN_IN_MEMORY
8106 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8107 #else
8108 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8110 if (TARGET_64BIT)
8112 if (ix86_function_type_abi (fntype) == MS_ABI)
8113 return return_in_memory_ms_64 (type, mode);
8114 else
8115 return return_in_memory_64 (type, mode);
8117 else
8118 return return_in_memory_32 (type, mode);
8119 #endif
8123 /* Create the va_list data type. */
8125 /* Returns the calling convention specific va_list date type.
8126 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8128 static tree
8129 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8131 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8133 /* For i386 we use plain pointer to argument area. */
8134 if (!TARGET_64BIT || abi == MS_ABI)
8135 return build_pointer_type (char_type_node);
8137 record = lang_hooks.types.make_type (RECORD_TYPE);
8138 type_decl = build_decl (BUILTINS_LOCATION,
8139 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8141 f_gpr = build_decl (BUILTINS_LOCATION,
8142 FIELD_DECL, get_identifier ("gp_offset"),
8143 unsigned_type_node);
8144 f_fpr = build_decl (BUILTINS_LOCATION,
8145 FIELD_DECL, get_identifier ("fp_offset"),
8146 unsigned_type_node);
8147 f_ovf = build_decl (BUILTINS_LOCATION,
8148 FIELD_DECL, get_identifier ("overflow_arg_area"),
8149 ptr_type_node);
8150 f_sav = build_decl (BUILTINS_LOCATION,
8151 FIELD_DECL, get_identifier ("reg_save_area"),
8152 ptr_type_node);
8154 va_list_gpr_counter_field = f_gpr;
8155 va_list_fpr_counter_field = f_fpr;
8157 DECL_FIELD_CONTEXT (f_gpr) = record;
8158 DECL_FIELD_CONTEXT (f_fpr) = record;
8159 DECL_FIELD_CONTEXT (f_ovf) = record;
8160 DECL_FIELD_CONTEXT (f_sav) = record;
8162 TYPE_STUB_DECL (record) = type_decl;
8163 TYPE_NAME (record) = type_decl;
8164 TYPE_FIELDS (record) = f_gpr;
8165 DECL_CHAIN (f_gpr) = f_fpr;
8166 DECL_CHAIN (f_fpr) = f_ovf;
8167 DECL_CHAIN (f_ovf) = f_sav;
8169 layout_type (record);
8171 /* The correct type is an array type of one element. */
8172 return build_array_type (record, build_index_type (size_zero_node));
8175 /* Setup the builtin va_list data type and for 64-bit the additional
8176 calling convention specific va_list data types. */
8178 static tree
8179 ix86_build_builtin_va_list (void)
8181 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8183 /* Initialize abi specific va_list builtin types. */
8184 if (TARGET_64BIT)
8186 tree t;
8187 if (ix86_abi == MS_ABI)
8189 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8190 if (TREE_CODE (t) != RECORD_TYPE)
8191 t = build_variant_type_copy (t);
8192 sysv_va_list_type_node = t;
8194 else
8196 t = ret;
8197 if (TREE_CODE (t) != RECORD_TYPE)
8198 t = build_variant_type_copy (t);
8199 sysv_va_list_type_node = t;
8201 if (ix86_abi != MS_ABI)
8203 t = ix86_build_builtin_va_list_abi (MS_ABI);
8204 if (TREE_CODE (t) != RECORD_TYPE)
8205 t = build_variant_type_copy (t);
8206 ms_va_list_type_node = t;
8208 else
8210 t = ret;
8211 if (TREE_CODE (t) != RECORD_TYPE)
8212 t = build_variant_type_copy (t);
8213 ms_va_list_type_node = t;
8217 return ret;
8220 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8222 static void
8223 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8225 rtx save_area, mem;
8226 alias_set_type set;
8227 int i, max;
8229 /* GPR size of varargs save area. */
8230 if (cfun->va_list_gpr_size)
8231 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8232 else
8233 ix86_varargs_gpr_size = 0;
8235 /* FPR size of varargs save area. We don't need it if we don't pass
8236 anything in SSE registers. */
8237 if (TARGET_SSE && cfun->va_list_fpr_size)
8238 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8239 else
8240 ix86_varargs_fpr_size = 0;
8242 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8243 return;
8245 save_area = frame_pointer_rtx;
8246 set = get_varargs_alias_set ();
8248 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8249 if (max > X86_64_REGPARM_MAX)
8250 max = X86_64_REGPARM_MAX;
8252 for (i = cum->regno; i < max; i++)
8254 mem = gen_rtx_MEM (word_mode,
8255 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8256 MEM_NOTRAP_P (mem) = 1;
8257 set_mem_alias_set (mem, set);
8258 emit_move_insn (mem,
8259 gen_rtx_REG (word_mode,
8260 x86_64_int_parameter_registers[i]));
8263 if (ix86_varargs_fpr_size)
8265 enum machine_mode smode;
8266 rtx label, test;
8268 /* Now emit code to save SSE registers. The AX parameter contains number
8269 of SSE parameter registers used to call this function, though all we
8270 actually check here is the zero/non-zero status. */
8272 label = gen_label_rtx ();
8273 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8274 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8275 label));
8277 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8278 we used movdqa (i.e. TImode) instead? Perhaps even better would
8279 be if we could determine the real mode of the data, via a hook
8280 into pass_stdarg. Ignore all that for now. */
8281 smode = V4SFmode;
8282 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8283 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8285 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8286 if (max > X86_64_SSE_REGPARM_MAX)
8287 max = X86_64_SSE_REGPARM_MAX;
8289 for (i = cum->sse_regno; i < max; ++i)
8291 mem = plus_constant (Pmode, save_area,
8292 i * 16 + ix86_varargs_gpr_size);
8293 mem = gen_rtx_MEM (smode, mem);
8294 MEM_NOTRAP_P (mem) = 1;
8295 set_mem_alias_set (mem, set);
8296 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8298 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8301 emit_label (label);
8305 static void
8306 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8308 alias_set_type set = get_varargs_alias_set ();
8309 int i;
8311 /* Reset to zero, as there might be a sysv vaarg used
8312 before. */
8313 ix86_varargs_gpr_size = 0;
8314 ix86_varargs_fpr_size = 0;
8316 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8318 rtx reg, mem;
8320 mem = gen_rtx_MEM (Pmode,
8321 plus_constant (Pmode, virtual_incoming_args_rtx,
8322 i * UNITS_PER_WORD));
8323 MEM_NOTRAP_P (mem) = 1;
8324 set_mem_alias_set (mem, set);
8326 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8327 emit_move_insn (mem, reg);
8331 static void
8332 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8333 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8334 int no_rtl)
8336 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8337 CUMULATIVE_ARGS next_cum;
8338 tree fntype;
8340 /* This argument doesn't appear to be used anymore. Which is good,
8341 because the old code here didn't suppress rtl generation. */
8342 gcc_assert (!no_rtl);
8344 if (!TARGET_64BIT)
8345 return;
8347 fntype = TREE_TYPE (current_function_decl);
8349 /* For varargs, we do not want to skip the dummy va_dcl argument.
8350 For stdargs, we do want to skip the last named argument. */
8351 next_cum = *cum;
8352 if (stdarg_p (fntype))
8353 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8354 true);
8356 if (cum->call_abi == MS_ABI)
8357 setup_incoming_varargs_ms_64 (&next_cum);
8358 else
8359 setup_incoming_varargs_64 (&next_cum);
8362 /* Checks if TYPE is of kind va_list char *. */
8364 static bool
8365 is_va_list_char_pointer (tree type)
8367 tree canonic;
8369 /* For 32-bit it is always true. */
8370 if (!TARGET_64BIT)
8371 return true;
8372 canonic = ix86_canonical_va_list_type (type);
8373 return (canonic == ms_va_list_type_node
8374 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8377 /* Implement va_start. */
8379 static void
8380 ix86_va_start (tree valist, rtx nextarg)
8382 HOST_WIDE_INT words, n_gpr, n_fpr;
8383 tree f_gpr, f_fpr, f_ovf, f_sav;
8384 tree gpr, fpr, ovf, sav, t;
8385 tree type;
8386 rtx ovf_rtx;
8388 if (flag_split_stack
8389 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8391 unsigned int scratch_regno;
8393 /* When we are splitting the stack, we can't refer to the stack
8394 arguments using internal_arg_pointer, because they may be on
8395 the old stack. The split stack prologue will arrange to
8396 leave a pointer to the old stack arguments in a scratch
8397 register, which we here copy to a pseudo-register. The split
8398 stack prologue can't set the pseudo-register directly because
8399 it (the prologue) runs before any registers have been saved. */
8401 scratch_regno = split_stack_prologue_scratch_regno ();
8402 if (scratch_regno != INVALID_REGNUM)
8404 rtx reg, seq;
8406 reg = gen_reg_rtx (Pmode);
8407 cfun->machine->split_stack_varargs_pointer = reg;
8409 start_sequence ();
8410 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8411 seq = get_insns ();
8412 end_sequence ();
8414 push_topmost_sequence ();
8415 emit_insn_after (seq, entry_of_function ());
8416 pop_topmost_sequence ();
8420 /* Only 64bit target needs something special. */
8421 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8423 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8424 std_expand_builtin_va_start (valist, nextarg);
8425 else
8427 rtx va_r, next;
8429 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8430 next = expand_binop (ptr_mode, add_optab,
8431 cfun->machine->split_stack_varargs_pointer,
8432 crtl->args.arg_offset_rtx,
8433 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8434 convert_move (va_r, next, 0);
8436 return;
8439 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8440 f_fpr = DECL_CHAIN (f_gpr);
8441 f_ovf = DECL_CHAIN (f_fpr);
8442 f_sav = DECL_CHAIN (f_ovf);
8444 valist = build_simple_mem_ref (valist);
8445 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8446 /* The following should be folded into the MEM_REF offset. */
8447 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8448 f_gpr, NULL_TREE);
8449 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8450 f_fpr, NULL_TREE);
8451 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8452 f_ovf, NULL_TREE);
8453 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8454 f_sav, NULL_TREE);
8456 /* Count number of gp and fp argument registers used. */
8457 words = crtl->args.info.words;
8458 n_gpr = crtl->args.info.regno;
8459 n_fpr = crtl->args.info.sse_regno;
8461 if (cfun->va_list_gpr_size)
8463 type = TREE_TYPE (gpr);
8464 t = build2 (MODIFY_EXPR, type,
8465 gpr, build_int_cst (type, n_gpr * 8));
8466 TREE_SIDE_EFFECTS (t) = 1;
8467 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8470 if (TARGET_SSE && cfun->va_list_fpr_size)
8472 type = TREE_TYPE (fpr);
8473 t = build2 (MODIFY_EXPR, type, fpr,
8474 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8475 TREE_SIDE_EFFECTS (t) = 1;
8476 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8479 /* Find the overflow area. */
8480 type = TREE_TYPE (ovf);
8481 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8482 ovf_rtx = crtl->args.internal_arg_pointer;
8483 else
8484 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8485 t = make_tree (type, ovf_rtx);
8486 if (words != 0)
8487 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8488 t = build2 (MODIFY_EXPR, type, ovf, t);
8489 TREE_SIDE_EFFECTS (t) = 1;
8490 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8492 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8494 /* Find the register save area.
8495 Prologue of the function save it right above stack frame. */
8496 type = TREE_TYPE (sav);
8497 t = make_tree (type, frame_pointer_rtx);
8498 if (!ix86_varargs_gpr_size)
8499 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8500 t = build2 (MODIFY_EXPR, type, sav, t);
8501 TREE_SIDE_EFFECTS (t) = 1;
8502 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8506 /* Implement va_arg. */
8508 static tree
8509 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8510 gimple_seq *post_p)
8512 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8513 tree f_gpr, f_fpr, f_ovf, f_sav;
8514 tree gpr, fpr, ovf, sav, t;
8515 int size, rsize;
8516 tree lab_false, lab_over = NULL_TREE;
8517 tree addr, t2;
8518 rtx container;
8519 int indirect_p = 0;
8520 tree ptrtype;
8521 enum machine_mode nat_mode;
8522 unsigned int arg_boundary;
8524 /* Only 64bit target needs something special. */
8525 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8526 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8528 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8529 f_fpr = DECL_CHAIN (f_gpr);
8530 f_ovf = DECL_CHAIN (f_fpr);
8531 f_sav = DECL_CHAIN (f_ovf);
8533 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8534 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8535 valist = build_va_arg_indirect_ref (valist);
8536 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8537 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8538 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8540 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8541 if (indirect_p)
8542 type = build_pointer_type (type);
8543 size = int_size_in_bytes (type);
8544 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8546 nat_mode = type_natural_mode (type, NULL, false);
8547 switch (nat_mode)
8549 case V8SFmode:
8550 case V8SImode:
8551 case V32QImode:
8552 case V16HImode:
8553 case V4DFmode:
8554 case V4DImode:
8555 case V16SFmode:
8556 case V16SImode:
8557 case V64QImode:
8558 case V32HImode:
8559 case V8DFmode:
8560 case V8DImode:
8561 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8562 if (!TARGET_64BIT_MS_ABI)
8564 container = NULL;
8565 break;
8568 default:
8569 container = construct_container (nat_mode, TYPE_MODE (type),
8570 type, 0, X86_64_REGPARM_MAX,
8571 X86_64_SSE_REGPARM_MAX, intreg,
8573 break;
8576 /* Pull the value out of the saved registers. */
8578 addr = create_tmp_var (ptr_type_node, "addr");
8580 if (container)
8582 int needed_intregs, needed_sseregs;
8583 bool need_temp;
8584 tree int_addr, sse_addr;
8586 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8587 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8589 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8591 need_temp = (!REG_P (container)
8592 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8593 || TYPE_ALIGN (type) > 128));
8595 /* In case we are passing structure, verify that it is consecutive block
8596 on the register save area. If not we need to do moves. */
8597 if (!need_temp && !REG_P (container))
8599 /* Verify that all registers are strictly consecutive */
8600 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8602 int i;
8604 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8606 rtx slot = XVECEXP (container, 0, i);
8607 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8608 || INTVAL (XEXP (slot, 1)) != i * 16)
8609 need_temp = 1;
8612 else
8614 int i;
8616 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8618 rtx slot = XVECEXP (container, 0, i);
8619 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8620 || INTVAL (XEXP (slot, 1)) != i * 8)
8621 need_temp = 1;
8625 if (!need_temp)
8627 int_addr = addr;
8628 sse_addr = addr;
8630 else
8632 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8633 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8636 /* First ensure that we fit completely in registers. */
8637 if (needed_intregs)
8639 t = build_int_cst (TREE_TYPE (gpr),
8640 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8641 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8642 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8643 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8644 gimplify_and_add (t, pre_p);
8646 if (needed_sseregs)
8648 t = build_int_cst (TREE_TYPE (fpr),
8649 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8650 + X86_64_REGPARM_MAX * 8);
8651 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8652 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8653 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8654 gimplify_and_add (t, pre_p);
8657 /* Compute index to start of area used for integer regs. */
8658 if (needed_intregs)
8660 /* int_addr = gpr + sav; */
8661 t = fold_build_pointer_plus (sav, gpr);
8662 gimplify_assign (int_addr, t, pre_p);
8664 if (needed_sseregs)
8666 /* sse_addr = fpr + sav; */
8667 t = fold_build_pointer_plus (sav, fpr);
8668 gimplify_assign (sse_addr, t, pre_p);
8670 if (need_temp)
8672 int i, prev_size = 0;
8673 tree temp = create_tmp_var (type, "va_arg_tmp");
8675 /* addr = &temp; */
8676 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8677 gimplify_assign (addr, t, pre_p);
8679 for (i = 0; i < XVECLEN (container, 0); i++)
8681 rtx slot = XVECEXP (container, 0, i);
8682 rtx reg = XEXP (slot, 0);
8683 enum machine_mode mode = GET_MODE (reg);
8684 tree piece_type;
8685 tree addr_type;
8686 tree daddr_type;
8687 tree src_addr, src;
8688 int src_offset;
8689 tree dest_addr, dest;
8690 int cur_size = GET_MODE_SIZE (mode);
8692 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8693 prev_size = INTVAL (XEXP (slot, 1));
8694 if (prev_size + cur_size > size)
8696 cur_size = size - prev_size;
8697 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8698 if (mode == BLKmode)
8699 mode = QImode;
8701 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8702 if (mode == GET_MODE (reg))
8703 addr_type = build_pointer_type (piece_type);
8704 else
8705 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8706 true);
8707 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8708 true);
8710 if (SSE_REGNO_P (REGNO (reg)))
8712 src_addr = sse_addr;
8713 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8715 else
8717 src_addr = int_addr;
8718 src_offset = REGNO (reg) * 8;
8720 src_addr = fold_convert (addr_type, src_addr);
8721 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8723 dest_addr = fold_convert (daddr_type, addr);
8724 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8725 if (cur_size == GET_MODE_SIZE (mode))
8727 src = build_va_arg_indirect_ref (src_addr);
8728 dest = build_va_arg_indirect_ref (dest_addr);
8730 gimplify_assign (dest, src, pre_p);
8732 else
8734 tree copy
8735 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8736 3, dest_addr, src_addr,
8737 size_int (cur_size));
8738 gimplify_and_add (copy, pre_p);
8740 prev_size += cur_size;
8744 if (needed_intregs)
8746 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8747 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8748 gimplify_assign (gpr, t, pre_p);
8751 if (needed_sseregs)
8753 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8754 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8755 gimplify_assign (fpr, t, pre_p);
8758 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8760 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8763 /* ... otherwise out of the overflow area. */
8765 /* When we align parameter on stack for caller, if the parameter
8766 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8767 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8768 here with caller. */
8769 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8770 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8771 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8773 /* Care for on-stack alignment if needed. */
8774 if (arg_boundary <= 64 || size == 0)
8775 t = ovf;
8776 else
8778 HOST_WIDE_INT align = arg_boundary / 8;
8779 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8780 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8781 build_int_cst (TREE_TYPE (t), -align));
8784 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8785 gimplify_assign (addr, t, pre_p);
8787 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8788 gimplify_assign (unshare_expr (ovf), t, pre_p);
8790 if (container)
8791 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8793 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8794 addr = fold_convert (ptrtype, addr);
8796 if (indirect_p)
8797 addr = build_va_arg_indirect_ref (addr);
8798 return build_va_arg_indirect_ref (addr);
8801 /* Return true if OPNUM's MEM should be matched
8802 in movabs* patterns. */
8804 bool
8805 ix86_check_movabs (rtx insn, int opnum)
8807 rtx set, mem;
8809 set = PATTERN (insn);
8810 if (GET_CODE (set) == PARALLEL)
8811 set = XVECEXP (set, 0, 0);
8812 gcc_assert (GET_CODE (set) == SET);
8813 mem = XEXP (set, opnum);
8814 while (GET_CODE (mem) == SUBREG)
8815 mem = SUBREG_REG (mem);
8816 gcc_assert (MEM_P (mem));
8817 return volatile_ok || !MEM_VOLATILE_P (mem);
8820 /* Initialize the table of extra 80387 mathematical constants. */
8822 static void
8823 init_ext_80387_constants (void)
8825 static const char * cst[5] =
8827 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8828 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8829 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8830 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8831 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8833 int i;
8835 for (i = 0; i < 5; i++)
8837 real_from_string (&ext_80387_constants_table[i], cst[i]);
8838 /* Ensure each constant is rounded to XFmode precision. */
8839 real_convert (&ext_80387_constants_table[i],
8840 XFmode, &ext_80387_constants_table[i]);
8843 ext_80387_constants_init = 1;
8846 /* Return non-zero if the constant is something that
8847 can be loaded with a special instruction. */
8850 standard_80387_constant_p (rtx x)
8852 enum machine_mode mode = GET_MODE (x);
8854 REAL_VALUE_TYPE r;
8856 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8857 return -1;
8859 if (x == CONST0_RTX (mode))
8860 return 1;
8861 if (x == CONST1_RTX (mode))
8862 return 2;
8864 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8866 /* For XFmode constants, try to find a special 80387 instruction when
8867 optimizing for size or on those CPUs that benefit from them. */
8868 if (mode == XFmode
8869 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8871 int i;
8873 if (! ext_80387_constants_init)
8874 init_ext_80387_constants ();
8876 for (i = 0; i < 5; i++)
8877 if (real_identical (&r, &ext_80387_constants_table[i]))
8878 return i + 3;
8881 /* Load of the constant -0.0 or -1.0 will be split as
8882 fldz;fchs or fld1;fchs sequence. */
8883 if (real_isnegzero (&r))
8884 return 8;
8885 if (real_identical (&r, &dconstm1))
8886 return 9;
8888 return 0;
8891 /* Return the opcode of the special instruction to be used to load
8892 the constant X. */
8894 const char *
8895 standard_80387_constant_opcode (rtx x)
8897 switch (standard_80387_constant_p (x))
8899 case 1:
8900 return "fldz";
8901 case 2:
8902 return "fld1";
8903 case 3:
8904 return "fldlg2";
8905 case 4:
8906 return "fldln2";
8907 case 5:
8908 return "fldl2e";
8909 case 6:
8910 return "fldl2t";
8911 case 7:
8912 return "fldpi";
8913 case 8:
8914 case 9:
8915 return "#";
8916 default:
8917 gcc_unreachable ();
8921 /* Return the CONST_DOUBLE representing the 80387 constant that is
8922 loaded by the specified special instruction. The argument IDX
8923 matches the return value from standard_80387_constant_p. */
8926 standard_80387_constant_rtx (int idx)
8928 int i;
8930 if (! ext_80387_constants_init)
8931 init_ext_80387_constants ();
8933 switch (idx)
8935 case 3:
8936 case 4:
8937 case 5:
8938 case 6:
8939 case 7:
8940 i = idx - 3;
8941 break;
8943 default:
8944 gcc_unreachable ();
8947 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8948 XFmode);
8951 /* Return 1 if X is all 0s and 2 if x is all 1s
8952 in supported SSE/AVX vector mode. */
8955 standard_sse_constant_p (rtx x)
8957 enum machine_mode mode = GET_MODE (x);
8959 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8960 return 1;
8961 if (vector_all_ones_operand (x, mode))
8962 switch (mode)
8964 case V16QImode:
8965 case V8HImode:
8966 case V4SImode:
8967 case V2DImode:
8968 if (TARGET_SSE2)
8969 return 2;
8970 case V32QImode:
8971 case V16HImode:
8972 case V8SImode:
8973 case V4DImode:
8974 if (TARGET_AVX2)
8975 return 2;
8976 case V64QImode:
8977 case V32HImode:
8978 case V16SImode:
8979 case V8DImode:
8980 if (TARGET_AVX512F)
8981 return 2;
8982 default:
8983 break;
8986 return 0;
8989 /* Return the opcode of the special instruction to be used to load
8990 the constant X. */
8992 const char *
8993 standard_sse_constant_opcode (rtx insn, rtx x)
8995 switch (standard_sse_constant_p (x))
8997 case 1:
8998 switch (get_attr_mode (insn))
9000 case MODE_XI:
9001 case MODE_V16SF:
9002 return "vpxord\t%g0, %g0, %g0";
9003 case MODE_V8DF:
9004 return "vpxorq\t%g0, %g0, %g0";
9005 case MODE_TI:
9006 return "%vpxor\t%0, %d0";
9007 case MODE_V2DF:
9008 return "%vxorpd\t%0, %d0";
9009 case MODE_V4SF:
9010 return "%vxorps\t%0, %d0";
9012 case MODE_OI:
9013 return "vpxor\t%x0, %x0, %x0";
9014 case MODE_V4DF:
9015 return "vxorpd\t%x0, %x0, %x0";
9016 case MODE_V8SF:
9017 return "vxorps\t%x0, %x0, %x0";
9019 default:
9020 break;
9023 case 2:
9024 if (get_attr_mode (insn) == MODE_XI
9025 || get_attr_mode (insn) == MODE_V8DF
9026 || get_attr_mode (insn) == MODE_V16SF)
9027 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9028 if (TARGET_AVX)
9029 return "vpcmpeqd\t%0, %0, %0";
9030 else
9031 return "pcmpeqd\t%0, %0";
9033 default:
9034 break;
9036 gcc_unreachable ();
9039 /* Returns true if OP contains a symbol reference */
9041 bool
9042 symbolic_reference_mentioned_p (rtx op)
9044 const char *fmt;
9045 int i;
9047 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9048 return true;
9050 fmt = GET_RTX_FORMAT (GET_CODE (op));
9051 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9053 if (fmt[i] == 'E')
9055 int j;
9057 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9058 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9059 return true;
9062 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9063 return true;
9066 return false;
9069 /* Return true if it is appropriate to emit `ret' instructions in the
9070 body of a function. Do this only if the epilogue is simple, needing a
9071 couple of insns. Prior to reloading, we can't tell how many registers
9072 must be saved, so return false then. Return false if there is no frame
9073 marker to de-allocate. */
9075 bool
9076 ix86_can_use_return_insn_p (void)
9078 struct ix86_frame frame;
9080 if (! reload_completed || frame_pointer_needed)
9081 return 0;
9083 /* Don't allow more than 32k pop, since that's all we can do
9084 with one instruction. */
9085 if (crtl->args.pops_args && crtl->args.size >= 32768)
9086 return 0;
9088 ix86_compute_frame_layout (&frame);
9089 return (frame.stack_pointer_offset == UNITS_PER_WORD
9090 && (frame.nregs + frame.nsseregs) == 0);
9093 /* Value should be nonzero if functions must have frame pointers.
9094 Zero means the frame pointer need not be set up (and parms may
9095 be accessed via the stack pointer) in functions that seem suitable. */
9097 static bool
9098 ix86_frame_pointer_required (void)
9100 /* If we accessed previous frames, then the generated code expects
9101 to be able to access the saved ebp value in our frame. */
9102 if (cfun->machine->accesses_prev_frame)
9103 return true;
9105 /* Several x86 os'es need a frame pointer for other reasons,
9106 usually pertaining to setjmp. */
9107 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9108 return true;
9110 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9111 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9112 return true;
9114 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9115 allocation is 4GB. */
9116 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9117 return true;
9119 if (crtl->profile && !flag_fentry)
9120 return true;
9122 return false;
9125 /* Return true if the frame pointer of the function could be omitted. */
9127 static bool
9128 ix86_can_omit_leaf_frame_pointer (void)
9130 return TARGET_OMIT_LEAF_FRAME_POINTER
9131 && (crtl->is_leaf
9132 && !ix86_current_function_calls_tls_descriptor);
9135 /* Record that the current function accesses previous call frames. */
9137 void
9138 ix86_setup_frame_addresses (void)
9140 cfun->machine->accesses_prev_frame = 1;
9143 #ifndef USE_HIDDEN_LINKONCE
9144 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9145 # define USE_HIDDEN_LINKONCE 1
9146 # else
9147 # define USE_HIDDEN_LINKONCE 0
9148 # endif
9149 #endif
9151 static int pic_labels_used;
9153 /* Fills in the label name that should be used for a pc thunk for
9154 the given register. */
9156 static void
9157 get_pc_thunk_name (char name[32], unsigned int regno)
9159 gcc_assert (!TARGET_64BIT);
9161 if (USE_HIDDEN_LINKONCE)
9162 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9163 else
9164 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9168 /* This function generates code for -fpic that loads %ebx with
9169 the return address of the caller and then returns. */
9171 static void
9172 ix86_code_end (void)
9174 rtx xops[2];
9175 int regno;
9177 for (regno = AX_REG; regno <= SP_REG; regno++)
9179 char name[32];
9180 tree decl;
9182 if (!(pic_labels_used & (1 << regno)))
9183 continue;
9185 get_pc_thunk_name (name, regno);
9187 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9188 get_identifier (name),
9189 build_function_type_list (void_type_node, NULL_TREE));
9190 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9191 NULL_TREE, void_type_node);
9192 TREE_PUBLIC (decl) = 1;
9193 TREE_STATIC (decl) = 1;
9194 DECL_IGNORED_P (decl) = 1;
9196 #if TARGET_MACHO
9197 if (TARGET_MACHO)
9199 switch_to_section (darwin_sections[text_coal_section]);
9200 fputs ("\t.weak_definition\t", asm_out_file);
9201 assemble_name (asm_out_file, name);
9202 fputs ("\n\t.private_extern\t", asm_out_file);
9203 assemble_name (asm_out_file, name);
9204 putc ('\n', asm_out_file);
9205 ASM_OUTPUT_LABEL (asm_out_file, name);
9206 DECL_WEAK (decl) = 1;
9208 else
9209 #endif
9210 if (USE_HIDDEN_LINKONCE)
9212 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9214 targetm.asm_out.unique_section (decl, 0);
9215 switch_to_section (get_named_section (decl, NULL, 0));
9217 targetm.asm_out.globalize_label (asm_out_file, name);
9218 fputs ("\t.hidden\t", asm_out_file);
9219 assemble_name (asm_out_file, name);
9220 putc ('\n', asm_out_file);
9221 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9223 else
9225 switch_to_section (text_section);
9226 ASM_OUTPUT_LABEL (asm_out_file, name);
9229 DECL_INITIAL (decl) = make_node (BLOCK);
9230 current_function_decl = decl;
9231 init_function_start (decl);
9232 first_function_block_is_cold = false;
9233 /* Make sure unwind info is emitted for the thunk if needed. */
9234 final_start_function (emit_barrier (), asm_out_file, 1);
9236 /* Pad stack IP move with 4 instructions (two NOPs count
9237 as one instruction). */
9238 if (TARGET_PAD_SHORT_FUNCTION)
9240 int i = 8;
9242 while (i--)
9243 fputs ("\tnop\n", asm_out_file);
9246 xops[0] = gen_rtx_REG (Pmode, regno);
9247 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9248 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9249 fputs ("\tret\n", asm_out_file);
9250 final_end_function ();
9251 init_insn_lengths ();
9252 free_after_compilation (cfun);
9253 set_cfun (NULL);
9254 current_function_decl = NULL;
9257 if (flag_split_stack)
9258 file_end_indicate_split_stack ();
9261 /* Emit code for the SET_GOT patterns. */
9263 const char *
9264 output_set_got (rtx dest, rtx label)
9266 rtx xops[3];
9268 xops[0] = dest;
9270 if (TARGET_VXWORKS_RTP && flag_pic)
9272 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9273 xops[2] = gen_rtx_MEM (Pmode,
9274 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9275 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9277 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9278 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9279 an unadorned address. */
9280 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9281 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9282 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9283 return "";
9286 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9288 if (!flag_pic)
9290 if (TARGET_MACHO)
9291 /* We don't need a pic base, we're not producing pic. */
9292 gcc_unreachable ();
9294 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9295 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9296 targetm.asm_out.internal_label (asm_out_file, "L",
9297 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9299 else
9301 char name[32];
9302 get_pc_thunk_name (name, REGNO (dest));
9303 pic_labels_used |= 1 << REGNO (dest);
9305 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9306 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9307 output_asm_insn ("call\t%X2", xops);
9309 #if TARGET_MACHO
9310 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9311 This is what will be referenced by the Mach-O PIC subsystem. */
9312 if (machopic_should_output_picbase_label () || !label)
9313 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9315 /* When we are restoring the pic base at the site of a nonlocal label,
9316 and we decided to emit the pic base above, we will still output a
9317 local label used for calculating the correction offset (even though
9318 the offset will be 0 in that case). */
9319 if (label)
9320 targetm.asm_out.internal_label (asm_out_file, "L",
9321 CODE_LABEL_NUMBER (label));
9322 #endif
9325 if (!TARGET_MACHO)
9326 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9328 return "";
9331 /* Generate an "push" pattern for input ARG. */
9333 static rtx
9334 gen_push (rtx arg)
9336 struct machine_function *m = cfun->machine;
9338 if (m->fs.cfa_reg == stack_pointer_rtx)
9339 m->fs.cfa_offset += UNITS_PER_WORD;
9340 m->fs.sp_offset += UNITS_PER_WORD;
9342 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9343 arg = gen_rtx_REG (word_mode, REGNO (arg));
9345 return gen_rtx_SET (VOIDmode,
9346 gen_rtx_MEM (word_mode,
9347 gen_rtx_PRE_DEC (Pmode,
9348 stack_pointer_rtx)),
9349 arg);
9352 /* Generate an "pop" pattern for input ARG. */
9354 static rtx
9355 gen_pop (rtx arg)
9357 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9358 arg = gen_rtx_REG (word_mode, REGNO (arg));
9360 return gen_rtx_SET (VOIDmode,
9361 arg,
9362 gen_rtx_MEM (word_mode,
9363 gen_rtx_POST_INC (Pmode,
9364 stack_pointer_rtx)));
9367 /* Return >= 0 if there is an unused call-clobbered register available
9368 for the entire function. */
9370 static unsigned int
9371 ix86_select_alt_pic_regnum (void)
9373 if (crtl->is_leaf
9374 && !crtl->profile
9375 && !ix86_current_function_calls_tls_descriptor)
9377 int i, drap;
9378 /* Can't use the same register for both PIC and DRAP. */
9379 if (crtl->drap_reg)
9380 drap = REGNO (crtl->drap_reg);
9381 else
9382 drap = -1;
9383 for (i = 2; i >= 0; --i)
9384 if (i != drap && !df_regs_ever_live_p (i))
9385 return i;
9388 return INVALID_REGNUM;
9391 /* Return TRUE if we need to save REGNO. */
9393 static bool
9394 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9396 if (pic_offset_table_rtx
9397 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9398 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9399 || crtl->profile
9400 || crtl->calls_eh_return
9401 || crtl->uses_const_pool
9402 || cfun->has_nonlocal_label))
9403 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9405 if (crtl->calls_eh_return && maybe_eh_return)
9407 unsigned i;
9408 for (i = 0; ; i++)
9410 unsigned test = EH_RETURN_DATA_REGNO (i);
9411 if (test == INVALID_REGNUM)
9412 break;
9413 if (test == regno)
9414 return true;
9418 if (crtl->drap_reg
9419 && regno == REGNO (crtl->drap_reg)
9420 && !cfun->machine->no_drap_save_restore)
9421 return true;
9423 return (df_regs_ever_live_p (regno)
9424 && !call_used_regs[regno]
9425 && !fixed_regs[regno]
9426 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9429 /* Return number of saved general prupose registers. */
9431 static int
9432 ix86_nsaved_regs (void)
9434 int nregs = 0;
9435 int regno;
9437 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9438 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9439 nregs ++;
9440 return nregs;
9443 /* Return number of saved SSE registrers. */
9445 static int
9446 ix86_nsaved_sseregs (void)
9448 int nregs = 0;
9449 int regno;
9451 if (!TARGET_64BIT_MS_ABI)
9452 return 0;
9453 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9454 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9455 nregs ++;
9456 return nregs;
9459 /* Given FROM and TO register numbers, say whether this elimination is
9460 allowed. If stack alignment is needed, we can only replace argument
9461 pointer with hard frame pointer, or replace frame pointer with stack
9462 pointer. Otherwise, frame pointer elimination is automatically
9463 handled and all other eliminations are valid. */
9465 static bool
9466 ix86_can_eliminate (const int from, const int to)
9468 if (stack_realign_fp)
9469 return ((from == ARG_POINTER_REGNUM
9470 && to == HARD_FRAME_POINTER_REGNUM)
9471 || (from == FRAME_POINTER_REGNUM
9472 && to == STACK_POINTER_REGNUM));
9473 else
9474 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9477 /* Return the offset between two registers, one to be eliminated, and the other
9478 its replacement, at the start of a routine. */
9480 HOST_WIDE_INT
9481 ix86_initial_elimination_offset (int from, int to)
9483 struct ix86_frame frame;
9484 ix86_compute_frame_layout (&frame);
9486 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9487 return frame.hard_frame_pointer_offset;
9488 else if (from == FRAME_POINTER_REGNUM
9489 && to == HARD_FRAME_POINTER_REGNUM)
9490 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9491 else
9493 gcc_assert (to == STACK_POINTER_REGNUM);
9495 if (from == ARG_POINTER_REGNUM)
9496 return frame.stack_pointer_offset;
9498 gcc_assert (from == FRAME_POINTER_REGNUM);
9499 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9503 /* In a dynamically-aligned function, we can't know the offset from
9504 stack pointer to frame pointer, so we must ensure that setjmp
9505 eliminates fp against the hard fp (%ebp) rather than trying to
9506 index from %esp up to the top of the frame across a gap that is
9507 of unknown (at compile-time) size. */
9508 static rtx
9509 ix86_builtin_setjmp_frame_value (void)
9511 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9514 /* When using -fsplit-stack, the allocation routines set a field in
9515 the TCB to the bottom of the stack plus this much space, measured
9516 in bytes. */
9518 #define SPLIT_STACK_AVAILABLE 256
9520 /* Fill structure ix86_frame about frame of currently computed function. */
9522 static void
9523 ix86_compute_frame_layout (struct ix86_frame *frame)
9525 unsigned HOST_WIDE_INT stack_alignment_needed;
9526 HOST_WIDE_INT offset;
9527 unsigned HOST_WIDE_INT preferred_alignment;
9528 HOST_WIDE_INT size = get_frame_size ();
9529 HOST_WIDE_INT to_allocate;
9531 frame->nregs = ix86_nsaved_regs ();
9532 frame->nsseregs = ix86_nsaved_sseregs ();
9534 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9535 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9537 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9538 function prologues and leaf. */
9539 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9540 && (!crtl->is_leaf || cfun->calls_alloca != 0
9541 || ix86_current_function_calls_tls_descriptor))
9543 preferred_alignment = 16;
9544 stack_alignment_needed = 16;
9545 crtl->preferred_stack_boundary = 128;
9546 crtl->stack_alignment_needed = 128;
9549 gcc_assert (!size || stack_alignment_needed);
9550 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9551 gcc_assert (preferred_alignment <= stack_alignment_needed);
9553 /* For SEH we have to limit the amount of code movement into the prologue.
9554 At present we do this via a BLOCKAGE, at which point there's very little
9555 scheduling that can be done, which means that there's very little point
9556 in doing anything except PUSHs. */
9557 if (TARGET_SEH)
9558 cfun->machine->use_fast_prologue_epilogue = false;
9560 /* During reload iteration the amount of registers saved can change.
9561 Recompute the value as needed. Do not recompute when amount of registers
9562 didn't change as reload does multiple calls to the function and does not
9563 expect the decision to change within single iteration. */
9564 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9565 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9567 int count = frame->nregs;
9568 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9570 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9572 /* The fast prologue uses move instead of push to save registers. This
9573 is significantly longer, but also executes faster as modern hardware
9574 can execute the moves in parallel, but can't do that for push/pop.
9576 Be careful about choosing what prologue to emit: When function takes
9577 many instructions to execute we may use slow version as well as in
9578 case function is known to be outside hot spot (this is known with
9579 feedback only). Weight the size of function by number of registers
9580 to save as it is cheap to use one or two push instructions but very
9581 slow to use many of them. */
9582 if (count)
9583 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9584 if (node->frequency < NODE_FREQUENCY_NORMAL
9585 || (flag_branch_probabilities
9586 && node->frequency < NODE_FREQUENCY_HOT))
9587 cfun->machine->use_fast_prologue_epilogue = false;
9588 else
9589 cfun->machine->use_fast_prologue_epilogue
9590 = !expensive_function_p (count);
9593 frame->save_regs_using_mov
9594 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9595 /* If static stack checking is enabled and done with probes,
9596 the registers need to be saved before allocating the frame. */
9597 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9599 /* Skip return address. */
9600 offset = UNITS_PER_WORD;
9602 /* Skip pushed static chain. */
9603 if (ix86_static_chain_on_stack)
9604 offset += UNITS_PER_WORD;
9606 /* Skip saved base pointer. */
9607 if (frame_pointer_needed || frame_pointer_partially_needed)
9608 offset += UNITS_PER_WORD;
9609 frame->hfp_save_offset = offset;
9611 /* The traditional frame pointer location is at the top of the frame. */
9612 frame->hard_frame_pointer_offset = offset;
9614 /* Register save area */
9615 offset += frame->nregs * UNITS_PER_WORD;
9616 frame->reg_save_offset = offset;
9618 /* On SEH target, registers are pushed just before the frame pointer
9619 location. */
9620 if (TARGET_SEH)
9621 frame->hard_frame_pointer_offset = offset;
9623 /* Align and set SSE register save area. */
9624 if (frame->nsseregs)
9626 /* The only ABI that has saved SSE registers (Win64) also has a
9627 16-byte aligned default stack, and thus we don't need to be
9628 within the re-aligned local stack frame to save them. */
9629 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9630 offset = (offset + 16 - 1) & -16;
9631 offset += frame->nsseregs * 16;
9633 frame->sse_reg_save_offset = offset;
9635 /* The re-aligned stack starts here. Values before this point are not
9636 directly comparable with values below this point. In order to make
9637 sure that no value happens to be the same before and after, force
9638 the alignment computation below to add a non-zero value. */
9639 if (stack_realign_fp)
9640 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9642 /* Va-arg area */
9643 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9644 offset += frame->va_arg_size;
9646 /* Align start of frame for local function. */
9647 if (stack_realign_fp
9648 || offset != frame->sse_reg_save_offset
9649 || size != 0
9650 || !crtl->is_leaf
9651 || cfun->calls_alloca
9652 || ix86_current_function_calls_tls_descriptor)
9653 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9655 /* Frame pointer points here. */
9656 frame->frame_pointer_offset = offset;
9658 offset += size;
9660 /* Add outgoing arguments area. Can be skipped if we eliminated
9661 all the function calls as dead code.
9662 Skipping is however impossible when function calls alloca. Alloca
9663 expander assumes that last crtl->outgoing_args_size
9664 of stack frame are unused. */
9665 if (ACCUMULATE_OUTGOING_ARGS
9666 && (!crtl->is_leaf || cfun->calls_alloca
9667 || ix86_current_function_calls_tls_descriptor))
9669 offset += crtl->outgoing_args_size;
9670 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9672 else
9673 frame->outgoing_arguments_size = 0;
9675 /* Align stack boundary. Only needed if we're calling another function
9676 or using alloca. */
9677 if (!crtl->is_leaf || cfun->calls_alloca
9678 || ix86_current_function_calls_tls_descriptor)
9679 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9681 /* We've reached end of stack frame. */
9682 frame->stack_pointer_offset = offset;
9684 /* Size prologue needs to allocate. */
9685 to_allocate = offset - frame->sse_reg_save_offset;
9687 if ((!to_allocate && frame->nregs <= 1)
9688 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9689 frame->save_regs_using_mov = false;
9691 if (ix86_using_red_zone ()
9692 && crtl->sp_is_unchanging
9693 && crtl->is_leaf
9694 && !ix86_current_function_calls_tls_descriptor)
9696 frame->red_zone_size = to_allocate;
9697 if (frame->save_regs_using_mov)
9698 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9699 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9700 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9702 else
9703 frame->red_zone_size = 0;
9704 frame->stack_pointer_offset -= frame->red_zone_size;
9706 /* The SEH frame pointer location is near the bottom of the frame.
9707 This is enforced by the fact that the difference between the
9708 stack pointer and the frame pointer is limited to 240 bytes in
9709 the unwind data structure. */
9710 if (TARGET_SEH)
9712 HOST_WIDE_INT diff;
9714 /* If we can leave the frame pointer where it is, do so. Also, returns
9715 the establisher frame for __builtin_frame_address (0). */
9716 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9717 if (diff <= SEH_MAX_FRAME_SIZE
9718 && (diff > 240 || (diff & 15) != 0)
9719 && !crtl->accesses_prior_frames)
9721 /* Ideally we'd determine what portion of the local stack frame
9722 (within the constraint of the lowest 240) is most heavily used.
9723 But without that complication, simply bias the frame pointer
9724 by 128 bytes so as to maximize the amount of the local stack
9725 frame that is addressable with 8-bit offsets. */
9726 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9731 /* This is semi-inlined memory_address_length, but simplified
9732 since we know that we're always dealing with reg+offset, and
9733 to avoid having to create and discard all that rtl. */
9735 static inline int
9736 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9738 int len = 4;
9740 if (offset == 0)
9742 /* EBP and R13 cannot be encoded without an offset. */
9743 len = (regno == BP_REG || regno == R13_REG);
9745 else if (IN_RANGE (offset, -128, 127))
9746 len = 1;
9748 /* ESP and R12 must be encoded with a SIB byte. */
9749 if (regno == SP_REG || regno == R12_REG)
9750 len++;
9752 return len;
9755 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9756 The valid base registers are taken from CFUN->MACHINE->FS. */
9758 static rtx
9759 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9761 const struct machine_function *m = cfun->machine;
9762 rtx base_reg = NULL;
9763 HOST_WIDE_INT base_offset = 0;
9765 if (m->use_fast_prologue_epilogue)
9767 /* Choose the base register most likely to allow the most scheduling
9768 opportunities. Generally FP is valid throughout the function,
9769 while DRAP must be reloaded within the epilogue. But choose either
9770 over the SP due to increased encoding size. */
9772 if (m->fs.fp_valid)
9774 base_reg = hard_frame_pointer_rtx;
9775 base_offset = m->fs.fp_offset - cfa_offset;
9777 else if (m->fs.drap_valid)
9779 base_reg = crtl->drap_reg;
9780 base_offset = 0 - cfa_offset;
9782 else if (m->fs.sp_valid)
9784 base_reg = stack_pointer_rtx;
9785 base_offset = m->fs.sp_offset - cfa_offset;
9788 else
9790 HOST_WIDE_INT toffset;
9791 int len = 16, tlen;
9793 /* Choose the base register with the smallest address encoding.
9794 With a tie, choose FP > DRAP > SP. */
9795 if (m->fs.sp_valid)
9797 base_reg = stack_pointer_rtx;
9798 base_offset = m->fs.sp_offset - cfa_offset;
9799 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9801 if (m->fs.drap_valid)
9803 toffset = 0 - cfa_offset;
9804 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9805 if (tlen <= len)
9807 base_reg = crtl->drap_reg;
9808 base_offset = toffset;
9809 len = tlen;
9812 if (m->fs.fp_valid)
9814 toffset = m->fs.fp_offset - cfa_offset;
9815 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9816 if (tlen <= len)
9818 base_reg = hard_frame_pointer_rtx;
9819 base_offset = toffset;
9820 len = tlen;
9824 gcc_assert (base_reg != NULL);
9826 return plus_constant (Pmode, base_reg, base_offset);
9829 /* Emit code to save registers in the prologue. */
9831 static void
9832 ix86_emit_save_regs (void)
9834 unsigned int regno;
9835 rtx insn;
9837 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9838 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9840 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9841 RTX_FRAME_RELATED_P (insn) = 1;
9845 /* Emit a single register save at CFA - CFA_OFFSET. */
9847 static void
9848 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9849 HOST_WIDE_INT cfa_offset)
9851 struct machine_function *m = cfun->machine;
9852 rtx reg = gen_rtx_REG (mode, regno);
9853 rtx mem, addr, base, insn;
9855 addr = choose_baseaddr (cfa_offset);
9856 mem = gen_frame_mem (mode, addr);
9858 /* For SSE saves, we need to indicate the 128-bit alignment. */
9859 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9861 insn = emit_move_insn (mem, reg);
9862 RTX_FRAME_RELATED_P (insn) = 1;
9864 base = addr;
9865 if (GET_CODE (base) == PLUS)
9866 base = XEXP (base, 0);
9867 gcc_checking_assert (REG_P (base));
9869 /* When saving registers into a re-aligned local stack frame, avoid
9870 any tricky guessing by dwarf2out. */
9871 if (m->fs.realigned)
9873 gcc_checking_assert (stack_realign_drap);
9875 if (regno == REGNO (crtl->drap_reg))
9877 /* A bit of a hack. We force the DRAP register to be saved in
9878 the re-aligned stack frame, which provides us with a copy
9879 of the CFA that will last past the prologue. Install it. */
9880 gcc_checking_assert (cfun->machine->fs.fp_valid);
9881 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9882 cfun->machine->fs.fp_offset - cfa_offset);
9883 mem = gen_rtx_MEM (mode, addr);
9884 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9886 else
9888 /* The frame pointer is a stable reference within the
9889 aligned frame. Use it. */
9890 gcc_checking_assert (cfun->machine->fs.fp_valid);
9891 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9892 cfun->machine->fs.fp_offset - cfa_offset);
9893 mem = gen_rtx_MEM (mode, addr);
9894 add_reg_note (insn, REG_CFA_EXPRESSION,
9895 gen_rtx_SET (VOIDmode, mem, reg));
9899 /* The memory may not be relative to the current CFA register,
9900 which means that we may need to generate a new pattern for
9901 use by the unwind info. */
9902 else if (base != m->fs.cfa_reg)
9904 addr = plus_constant (Pmode, m->fs.cfa_reg,
9905 m->fs.cfa_offset - cfa_offset);
9906 mem = gen_rtx_MEM (mode, addr);
9907 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9911 /* Emit code to save registers using MOV insns.
9912 First register is stored at CFA - CFA_OFFSET. */
9913 static void
9914 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9916 unsigned int regno;
9918 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9919 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9921 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9922 cfa_offset -= UNITS_PER_WORD;
9926 /* Emit code to save SSE registers using MOV insns.
9927 First register is stored at CFA - CFA_OFFSET. */
9928 static void
9929 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9931 unsigned int regno;
9933 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9934 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9936 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9937 cfa_offset -= 16;
9941 static GTY(()) rtx queued_cfa_restores;
9943 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9944 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9945 Don't add the note if the previously saved value will be left untouched
9946 within stack red-zone till return, as unwinders can find the same value
9947 in the register and on the stack. */
9949 static void
9950 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9952 if (!crtl->shrink_wrapped
9953 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9954 return;
9956 if (insn)
9958 add_reg_note (insn, REG_CFA_RESTORE, reg);
9959 RTX_FRAME_RELATED_P (insn) = 1;
9961 else
9962 queued_cfa_restores
9963 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9966 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9968 static void
9969 ix86_add_queued_cfa_restore_notes (rtx insn)
9971 rtx last;
9972 if (!queued_cfa_restores)
9973 return;
9974 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9976 XEXP (last, 1) = REG_NOTES (insn);
9977 REG_NOTES (insn) = queued_cfa_restores;
9978 queued_cfa_restores = NULL_RTX;
9979 RTX_FRAME_RELATED_P (insn) = 1;
9982 /* Expand prologue or epilogue stack adjustment.
9983 The pattern exist to put a dependency on all ebp-based memory accesses.
9984 STYLE should be negative if instructions should be marked as frame related,
9985 zero if %r11 register is live and cannot be freely used and positive
9986 otherwise. */
9988 static void
9989 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9990 int style, bool set_cfa)
9992 struct machine_function *m = cfun->machine;
9993 rtx insn;
9994 bool add_frame_related_expr = false;
9996 if (Pmode == SImode)
9997 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9998 else if (x86_64_immediate_operand (offset, DImode))
9999 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10000 else
10002 rtx tmp;
10003 /* r11 is used by indirect sibcall return as well, set before the
10004 epilogue and used after the epilogue. */
10005 if (style)
10006 tmp = gen_rtx_REG (DImode, R11_REG);
10007 else
10009 gcc_assert (src != hard_frame_pointer_rtx
10010 && dest != hard_frame_pointer_rtx);
10011 tmp = hard_frame_pointer_rtx;
10013 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10014 if (style < 0)
10015 add_frame_related_expr = true;
10017 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10020 insn = emit_insn (insn);
10021 if (style >= 0)
10022 ix86_add_queued_cfa_restore_notes (insn);
10024 if (set_cfa)
10026 rtx r;
10028 gcc_assert (m->fs.cfa_reg == src);
10029 m->fs.cfa_offset += INTVAL (offset);
10030 m->fs.cfa_reg = dest;
10032 r = gen_rtx_PLUS (Pmode, src, offset);
10033 r = gen_rtx_SET (VOIDmode, dest, r);
10034 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10035 RTX_FRAME_RELATED_P (insn) = 1;
10037 else if (style < 0)
10039 RTX_FRAME_RELATED_P (insn) = 1;
10040 if (add_frame_related_expr)
10042 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10043 r = gen_rtx_SET (VOIDmode, dest, r);
10044 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10048 if (dest == stack_pointer_rtx)
10050 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10051 bool valid = m->fs.sp_valid;
10053 if (src == hard_frame_pointer_rtx)
10055 valid = m->fs.fp_valid;
10056 ooffset = m->fs.fp_offset;
10058 else if (src == crtl->drap_reg)
10060 valid = m->fs.drap_valid;
10061 ooffset = 0;
10063 else
10065 /* Else there are two possibilities: SP itself, which we set
10066 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10067 taken care of this by hand along the eh_return path. */
10068 gcc_checking_assert (src == stack_pointer_rtx
10069 || offset == const0_rtx);
10072 m->fs.sp_offset = ooffset - INTVAL (offset);
10073 m->fs.sp_valid = valid;
10077 /* Find an available register to be used as dynamic realign argument
10078 pointer regsiter. Such a register will be written in prologue and
10079 used in begin of body, so it must not be
10080 1. parameter passing register.
10081 2. GOT pointer.
10082 We reuse static-chain register if it is available. Otherwise, we
10083 use DI for i386 and R13 for x86-64. We chose R13 since it has
10084 shorter encoding.
10086 Return: the regno of chosen register. */
10088 static unsigned int
10089 find_drap_reg (void)
10091 tree decl = cfun->decl;
10093 if (TARGET_64BIT)
10095 /* Use R13 for nested function or function need static chain.
10096 Since function with tail call may use any caller-saved
10097 registers in epilogue, DRAP must not use caller-saved
10098 register in such case. */
10099 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10100 return R13_REG;
10102 return R10_REG;
10104 else
10106 /* Use DI for nested function or function need static chain.
10107 Since function with tail call may use any caller-saved
10108 registers in epilogue, DRAP must not use caller-saved
10109 register in such case. */
10110 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10111 return DI_REG;
10113 /* Reuse static chain register if it isn't used for parameter
10114 passing. */
10115 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10117 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10118 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10119 return CX_REG;
10121 return DI_REG;
10125 /* Return minimum incoming stack alignment. */
10127 static unsigned int
10128 ix86_minimum_incoming_stack_boundary (bool sibcall)
10130 unsigned int incoming_stack_boundary;
10132 /* Prefer the one specified at command line. */
10133 if (ix86_user_incoming_stack_boundary)
10134 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10135 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10136 if -mstackrealign is used, it isn't used for sibcall check and
10137 estimated stack alignment is 128bit. */
10138 else if (!sibcall
10139 && !TARGET_64BIT
10140 && ix86_force_align_arg_pointer
10141 && crtl->stack_alignment_estimated == 128)
10142 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10143 else
10144 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10146 /* Incoming stack alignment can be changed on individual functions
10147 via force_align_arg_pointer attribute. We use the smallest
10148 incoming stack boundary. */
10149 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10150 && lookup_attribute (ix86_force_align_arg_pointer_string,
10151 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10152 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10154 /* The incoming stack frame has to be aligned at least at
10155 parm_stack_boundary. */
10156 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10157 incoming_stack_boundary = crtl->parm_stack_boundary;
10159 /* Stack at entrance of main is aligned by runtime. We use the
10160 smallest incoming stack boundary. */
10161 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10162 && DECL_NAME (current_function_decl)
10163 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10164 && DECL_FILE_SCOPE_P (current_function_decl))
10165 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10167 return incoming_stack_boundary;
10170 /* Update incoming stack boundary and estimated stack alignment. */
10172 static void
10173 ix86_update_stack_boundary (void)
10175 ix86_incoming_stack_boundary
10176 = ix86_minimum_incoming_stack_boundary (false);
10178 /* x86_64 vararg needs 16byte stack alignment for register save
10179 area. */
10180 if (TARGET_64BIT
10181 && cfun->stdarg
10182 && crtl->stack_alignment_estimated < 128)
10183 crtl->stack_alignment_estimated = 128;
10186 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10187 needed or an rtx for DRAP otherwise. */
10189 static rtx
10190 ix86_get_drap_rtx (void)
10192 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10193 crtl->need_drap = true;
10195 if (stack_realign_drap)
10197 /* Assign DRAP to vDRAP and returns vDRAP */
10198 unsigned int regno = find_drap_reg ();
10199 rtx drap_vreg;
10200 rtx arg_ptr;
10201 rtx seq, insn;
10203 arg_ptr = gen_rtx_REG (Pmode, regno);
10204 crtl->drap_reg = arg_ptr;
10206 start_sequence ();
10207 drap_vreg = copy_to_reg (arg_ptr);
10208 seq = get_insns ();
10209 end_sequence ();
10211 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10212 if (!optimize)
10214 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10215 RTX_FRAME_RELATED_P (insn) = 1;
10217 return drap_vreg;
10219 else
10220 return NULL;
10223 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10225 static rtx
10226 ix86_internal_arg_pointer (void)
10228 return virtual_incoming_args_rtx;
10231 struct scratch_reg {
10232 rtx reg;
10233 bool saved;
10236 /* Return a short-lived scratch register for use on function entry.
10237 In 32-bit mode, it is valid only after the registers are saved
10238 in the prologue. This register must be released by means of
10239 release_scratch_register_on_entry once it is dead. */
10241 static void
10242 get_scratch_register_on_entry (struct scratch_reg *sr)
10244 int regno;
10246 sr->saved = false;
10248 if (TARGET_64BIT)
10250 /* We always use R11 in 64-bit mode. */
10251 regno = R11_REG;
10253 else
10255 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10256 bool fastcall_p
10257 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10258 bool thiscall_p
10259 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10260 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10261 int regparm = ix86_function_regparm (fntype, decl);
10262 int drap_regno
10263 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10265 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10266 for the static chain register. */
10267 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10268 && drap_regno != AX_REG)
10269 regno = AX_REG;
10270 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10271 for the static chain register. */
10272 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10273 regno = AX_REG;
10274 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10275 regno = DX_REG;
10276 /* ecx is the static chain register. */
10277 else if (regparm < 3 && !fastcall_p && !thiscall_p
10278 && !static_chain_p
10279 && drap_regno != CX_REG)
10280 regno = CX_REG;
10281 else if (ix86_save_reg (BX_REG, true))
10282 regno = BX_REG;
10283 /* esi is the static chain register. */
10284 else if (!(regparm == 3 && static_chain_p)
10285 && ix86_save_reg (SI_REG, true))
10286 regno = SI_REG;
10287 else if (ix86_save_reg (DI_REG, true))
10288 regno = DI_REG;
10289 else
10291 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10292 sr->saved = true;
10296 sr->reg = gen_rtx_REG (Pmode, regno);
10297 if (sr->saved)
10299 rtx insn = emit_insn (gen_push (sr->reg));
10300 RTX_FRAME_RELATED_P (insn) = 1;
10304 /* Release a scratch register obtained from the preceding function. */
10306 static void
10307 release_scratch_register_on_entry (struct scratch_reg *sr)
10309 if (sr->saved)
10311 struct machine_function *m = cfun->machine;
10312 rtx x, insn = emit_insn (gen_pop (sr->reg));
10314 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10315 RTX_FRAME_RELATED_P (insn) = 1;
10316 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10317 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10318 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10319 m->fs.sp_offset -= UNITS_PER_WORD;
10323 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10325 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10327 static void
10328 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10330 /* We skip the probe for the first interval + a small dope of 4 words and
10331 probe that many bytes past the specified size to maintain a protection
10332 area at the botton of the stack. */
10333 const int dope = 4 * UNITS_PER_WORD;
10334 rtx size_rtx = GEN_INT (size), last;
10336 /* See if we have a constant small number of probes to generate. If so,
10337 that's the easy case. The run-time loop is made up of 11 insns in the
10338 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10339 for n # of intervals. */
10340 if (size <= 5 * PROBE_INTERVAL)
10342 HOST_WIDE_INT i, adjust;
10343 bool first_probe = true;
10345 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10346 values of N from 1 until it exceeds SIZE. If only one probe is
10347 needed, this will not generate any code. Then adjust and probe
10348 to PROBE_INTERVAL + SIZE. */
10349 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10351 if (first_probe)
10353 adjust = 2 * PROBE_INTERVAL + dope;
10354 first_probe = false;
10356 else
10357 adjust = PROBE_INTERVAL;
10359 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10360 plus_constant (Pmode, stack_pointer_rtx,
10361 -adjust)));
10362 emit_stack_probe (stack_pointer_rtx);
10365 if (first_probe)
10366 adjust = size + PROBE_INTERVAL + dope;
10367 else
10368 adjust = size + PROBE_INTERVAL - i;
10370 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10371 plus_constant (Pmode, stack_pointer_rtx,
10372 -adjust)));
10373 emit_stack_probe (stack_pointer_rtx);
10375 /* Adjust back to account for the additional first interval. */
10376 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10377 plus_constant (Pmode, stack_pointer_rtx,
10378 PROBE_INTERVAL + dope)));
10381 /* Otherwise, do the same as above, but in a loop. Note that we must be
10382 extra careful with variables wrapping around because we might be at
10383 the very top (or the very bottom) of the address space and we have
10384 to be able to handle this case properly; in particular, we use an
10385 equality test for the loop condition. */
10386 else
10388 HOST_WIDE_INT rounded_size;
10389 struct scratch_reg sr;
10391 get_scratch_register_on_entry (&sr);
10394 /* Step 1: round SIZE to the previous multiple of the interval. */
10396 rounded_size = size & -PROBE_INTERVAL;
10399 /* Step 2: compute initial and final value of the loop counter. */
10401 /* SP = SP_0 + PROBE_INTERVAL. */
10402 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10403 plus_constant (Pmode, stack_pointer_rtx,
10404 - (PROBE_INTERVAL + dope))));
10406 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10407 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10408 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10409 gen_rtx_PLUS (Pmode, sr.reg,
10410 stack_pointer_rtx)));
10413 /* Step 3: the loop
10415 while (SP != LAST_ADDR)
10417 SP = SP + PROBE_INTERVAL
10418 probe at SP
10421 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10422 values of N from 1 until it is equal to ROUNDED_SIZE. */
10424 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10427 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10428 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10430 if (size != rounded_size)
10432 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10433 plus_constant (Pmode, stack_pointer_rtx,
10434 rounded_size - size)));
10435 emit_stack_probe (stack_pointer_rtx);
10438 /* Adjust back to account for the additional first interval. */
10439 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10440 plus_constant (Pmode, stack_pointer_rtx,
10441 PROBE_INTERVAL + dope)));
10443 release_scratch_register_on_entry (&sr);
10446 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10448 /* Even if the stack pointer isn't the CFA register, we need to correctly
10449 describe the adjustments made to it, in particular differentiate the
10450 frame-related ones from the frame-unrelated ones. */
10451 if (size > 0)
10453 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10454 XVECEXP (expr, 0, 0)
10455 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10456 plus_constant (Pmode, stack_pointer_rtx, -size));
10457 XVECEXP (expr, 0, 1)
10458 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10459 plus_constant (Pmode, stack_pointer_rtx,
10460 PROBE_INTERVAL + dope + size));
10461 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10462 RTX_FRAME_RELATED_P (last) = 1;
10464 cfun->machine->fs.sp_offset += size;
10467 /* Make sure nothing is scheduled before we are done. */
10468 emit_insn (gen_blockage ());
10471 /* Adjust the stack pointer up to REG while probing it. */
10473 const char *
10474 output_adjust_stack_and_probe (rtx reg)
10476 static int labelno = 0;
10477 char loop_lab[32], end_lab[32];
10478 rtx xops[2];
10480 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10481 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10483 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10485 /* Jump to END_LAB if SP == LAST_ADDR. */
10486 xops[0] = stack_pointer_rtx;
10487 xops[1] = reg;
10488 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10489 fputs ("\tje\t", asm_out_file);
10490 assemble_name_raw (asm_out_file, end_lab);
10491 fputc ('\n', asm_out_file);
10493 /* SP = SP + PROBE_INTERVAL. */
10494 xops[1] = GEN_INT (PROBE_INTERVAL);
10495 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10497 /* Probe at SP. */
10498 xops[1] = const0_rtx;
10499 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10501 fprintf (asm_out_file, "\tjmp\t");
10502 assemble_name_raw (asm_out_file, loop_lab);
10503 fputc ('\n', asm_out_file);
10505 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10507 return "";
10510 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10511 inclusive. These are offsets from the current stack pointer. */
10513 static void
10514 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10516 /* See if we have a constant small number of probes to generate. If so,
10517 that's the easy case. The run-time loop is made up of 7 insns in the
10518 generic case while the compile-time loop is made up of n insns for n #
10519 of intervals. */
10520 if (size <= 7 * PROBE_INTERVAL)
10522 HOST_WIDE_INT i;
10524 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10525 it exceeds SIZE. If only one probe is needed, this will not
10526 generate any code. Then probe at FIRST + SIZE. */
10527 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10528 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10529 -(first + i)));
10531 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10532 -(first + size)));
10535 /* Otherwise, do the same as above, but in a loop. Note that we must be
10536 extra careful with variables wrapping around because we might be at
10537 the very top (or the very bottom) of the address space and we have
10538 to be able to handle this case properly; in particular, we use an
10539 equality test for the loop condition. */
10540 else
10542 HOST_WIDE_INT rounded_size, last;
10543 struct scratch_reg sr;
10545 get_scratch_register_on_entry (&sr);
10548 /* Step 1: round SIZE to the previous multiple of the interval. */
10550 rounded_size = size & -PROBE_INTERVAL;
10553 /* Step 2: compute initial and final value of the loop counter. */
10555 /* TEST_OFFSET = FIRST. */
10556 emit_move_insn (sr.reg, GEN_INT (-first));
10558 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10559 last = first + rounded_size;
10562 /* Step 3: the loop
10564 while (TEST_ADDR != LAST_ADDR)
10566 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10567 probe at TEST_ADDR
10570 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10571 until it is equal to ROUNDED_SIZE. */
10573 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10576 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10577 that SIZE is equal to ROUNDED_SIZE. */
10579 if (size != rounded_size)
10580 emit_stack_probe (plus_constant (Pmode,
10581 gen_rtx_PLUS (Pmode,
10582 stack_pointer_rtx,
10583 sr.reg),
10584 rounded_size - size));
10586 release_scratch_register_on_entry (&sr);
10589 /* Make sure nothing is scheduled before we are done. */
10590 emit_insn (gen_blockage ());
10593 /* Probe a range of stack addresses from REG to END, inclusive. These are
10594 offsets from the current stack pointer. */
10596 const char *
10597 output_probe_stack_range (rtx reg, rtx end)
10599 static int labelno = 0;
10600 char loop_lab[32], end_lab[32];
10601 rtx xops[3];
10603 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10604 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10606 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10608 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10609 xops[0] = reg;
10610 xops[1] = end;
10611 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10612 fputs ("\tje\t", asm_out_file);
10613 assemble_name_raw (asm_out_file, end_lab);
10614 fputc ('\n', asm_out_file);
10616 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10617 xops[1] = GEN_INT (PROBE_INTERVAL);
10618 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10620 /* Probe at TEST_ADDR. */
10621 xops[0] = stack_pointer_rtx;
10622 xops[1] = reg;
10623 xops[2] = const0_rtx;
10624 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10626 fprintf (asm_out_file, "\tjmp\t");
10627 assemble_name_raw (asm_out_file, loop_lab);
10628 fputc ('\n', asm_out_file);
10630 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10632 return "";
10635 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10636 to be generated in correct form. */
10637 static void
10638 ix86_finalize_stack_realign_flags (void)
10640 /* Check if stack realign is really needed after reload, and
10641 stores result in cfun */
10642 unsigned int incoming_stack_boundary
10643 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10644 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10645 unsigned int stack_realign = (incoming_stack_boundary
10646 < (crtl->is_leaf
10647 ? crtl->max_used_stack_slot_alignment
10648 : crtl->stack_alignment_needed));
10650 if (crtl->stack_realign_finalized)
10652 /* After stack_realign_needed is finalized, we can't no longer
10653 change it. */
10654 gcc_assert (crtl->stack_realign_needed == stack_realign);
10655 return;
10658 /* If the only reason for frame_pointer_needed is that we conservatively
10659 assumed stack realignment might be needed, but in the end nothing that
10660 needed the stack alignment had been spilled, clear frame_pointer_needed
10661 and say we don't need stack realignment. */
10662 if (stack_realign
10663 && frame_pointer_needed
10664 && crtl->is_leaf
10665 && flag_omit_frame_pointer
10666 && crtl->sp_is_unchanging
10667 && !ix86_current_function_calls_tls_descriptor
10668 && !crtl->accesses_prior_frames
10669 && !cfun->calls_alloca
10670 && !crtl->calls_eh_return
10671 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10672 && !ix86_frame_pointer_required ()
10673 && get_frame_size () == 0
10674 && ix86_nsaved_sseregs () == 0
10675 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10677 HARD_REG_SET set_up_by_prologue, prologue_used;
10678 basic_block bb;
10680 CLEAR_HARD_REG_SET (prologue_used);
10681 CLEAR_HARD_REG_SET (set_up_by_prologue);
10682 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10683 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10684 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10685 HARD_FRAME_POINTER_REGNUM);
10686 FOR_EACH_BB_FN (bb, cfun)
10688 rtx insn;
10689 FOR_BB_INSNS (bb, insn)
10690 if (NONDEBUG_INSN_P (insn)
10691 && requires_stack_frame_p (insn, prologue_used,
10692 set_up_by_prologue))
10694 crtl->stack_realign_needed = stack_realign;
10695 crtl->stack_realign_finalized = true;
10696 return;
10700 /* If drap has been set, but it actually isn't live at the start
10701 of the function, there is no reason to set it up. */
10702 if (crtl->drap_reg)
10704 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10705 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10707 crtl->drap_reg = NULL_RTX;
10708 crtl->need_drap = false;
10711 else
10712 cfun->machine->no_drap_save_restore = true;
10714 frame_pointer_needed = false;
10715 stack_realign = false;
10716 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10717 crtl->stack_alignment_needed = incoming_stack_boundary;
10718 crtl->stack_alignment_estimated = incoming_stack_boundary;
10719 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10720 crtl->preferred_stack_boundary = incoming_stack_boundary;
10721 df_finish_pass (true);
10722 df_scan_alloc (NULL);
10723 df_scan_blocks ();
10724 df_compute_regs_ever_live (true);
10725 df_analyze ();
10728 crtl->stack_realign_needed = stack_realign;
10729 crtl->stack_realign_finalized = true;
10732 /* Expand the prologue into a bunch of separate insns. */
10734 void
10735 ix86_expand_prologue (void)
10737 struct machine_function *m = cfun->machine;
10738 rtx insn, t;
10739 bool pic_reg_used;
10740 struct ix86_frame frame;
10741 HOST_WIDE_INT allocate;
10742 bool int_registers_saved;
10743 bool sse_registers_saved;
10745 ix86_finalize_stack_realign_flags ();
10747 /* DRAP should not coexist with stack_realign_fp */
10748 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10750 memset (&m->fs, 0, sizeof (m->fs));
10752 /* Initialize CFA state for before the prologue. */
10753 m->fs.cfa_reg = stack_pointer_rtx;
10754 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10756 /* Track SP offset to the CFA. We continue tracking this after we've
10757 swapped the CFA register away from SP. In the case of re-alignment
10758 this is fudged; we're interested to offsets within the local frame. */
10759 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10760 m->fs.sp_valid = true;
10762 ix86_compute_frame_layout (&frame);
10764 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10766 /* We should have already generated an error for any use of
10767 ms_hook on a nested function. */
10768 gcc_checking_assert (!ix86_static_chain_on_stack);
10770 /* Check if profiling is active and we shall use profiling before
10771 prologue variant. If so sorry. */
10772 if (crtl->profile && flag_fentry != 0)
10773 sorry ("ms_hook_prologue attribute isn%'t compatible "
10774 "with -mfentry for 32-bit");
10776 /* In ix86_asm_output_function_label we emitted:
10777 8b ff movl.s %edi,%edi
10778 55 push %ebp
10779 8b ec movl.s %esp,%ebp
10781 This matches the hookable function prologue in Win32 API
10782 functions in Microsoft Windows XP Service Pack 2 and newer.
10783 Wine uses this to enable Windows apps to hook the Win32 API
10784 functions provided by Wine.
10786 What that means is that we've already set up the frame pointer. */
10788 if (frame_pointer_needed
10789 && !(crtl->drap_reg && crtl->stack_realign_needed))
10791 rtx push, mov;
10793 /* We've decided to use the frame pointer already set up.
10794 Describe this to the unwinder by pretending that both
10795 push and mov insns happen right here.
10797 Putting the unwind info here at the end of the ms_hook
10798 is done so that we can make absolutely certain we get
10799 the required byte sequence at the start of the function,
10800 rather than relying on an assembler that can produce
10801 the exact encoding required.
10803 However it does mean (in the unpatched case) that we have
10804 a 1 insn window where the asynchronous unwind info is
10805 incorrect. However, if we placed the unwind info at
10806 its correct location we would have incorrect unwind info
10807 in the patched case. Which is probably all moot since
10808 I don't expect Wine generates dwarf2 unwind info for the
10809 system libraries that use this feature. */
10811 insn = emit_insn (gen_blockage ());
10813 push = gen_push (hard_frame_pointer_rtx);
10814 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10815 stack_pointer_rtx);
10816 RTX_FRAME_RELATED_P (push) = 1;
10817 RTX_FRAME_RELATED_P (mov) = 1;
10819 RTX_FRAME_RELATED_P (insn) = 1;
10820 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10821 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10823 /* Note that gen_push incremented m->fs.cfa_offset, even
10824 though we didn't emit the push insn here. */
10825 m->fs.cfa_reg = hard_frame_pointer_rtx;
10826 m->fs.fp_offset = m->fs.cfa_offset;
10827 m->fs.fp_valid = true;
10829 else
10831 /* The frame pointer is not needed so pop %ebp again.
10832 This leaves us with a pristine state. */
10833 emit_insn (gen_pop (hard_frame_pointer_rtx));
10837 /* The first insn of a function that accepts its static chain on the
10838 stack is to push the register that would be filled in by a direct
10839 call. This insn will be skipped by the trampoline. */
10840 else if (ix86_static_chain_on_stack)
10842 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10843 emit_insn (gen_blockage ());
10845 /* We don't want to interpret this push insn as a register save,
10846 only as a stack adjustment. The real copy of the register as
10847 a save will be done later, if needed. */
10848 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10849 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10850 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10851 RTX_FRAME_RELATED_P (insn) = 1;
10854 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10855 of DRAP is needed and stack realignment is really needed after reload */
10856 if (stack_realign_drap)
10858 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10860 /* Only need to push parameter pointer reg if it is caller saved. */
10861 if (!call_used_regs[REGNO (crtl->drap_reg)])
10863 /* Push arg pointer reg */
10864 insn = emit_insn (gen_push (crtl->drap_reg));
10865 RTX_FRAME_RELATED_P (insn) = 1;
10868 /* Grab the argument pointer. */
10869 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10870 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10871 RTX_FRAME_RELATED_P (insn) = 1;
10872 m->fs.cfa_reg = crtl->drap_reg;
10873 m->fs.cfa_offset = 0;
10875 /* Align the stack. */
10876 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10877 stack_pointer_rtx,
10878 GEN_INT (-align_bytes)));
10879 RTX_FRAME_RELATED_P (insn) = 1;
10881 /* Replicate the return address on the stack so that return
10882 address can be reached via (argp - 1) slot. This is needed
10883 to implement macro RETURN_ADDR_RTX and intrinsic function
10884 expand_builtin_return_addr etc. */
10885 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10886 t = gen_frame_mem (word_mode, t);
10887 insn = emit_insn (gen_push (t));
10888 RTX_FRAME_RELATED_P (insn) = 1;
10890 /* For the purposes of frame and register save area addressing,
10891 we've started over with a new frame. */
10892 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10893 m->fs.realigned = true;
10896 int_registers_saved = (frame.nregs == 0);
10897 sse_registers_saved = (frame.nsseregs == 0);
10899 if (frame_pointer_needed && !m->fs.fp_valid)
10901 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10902 slower on all targets. Also sdb doesn't like it. */
10903 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10904 RTX_FRAME_RELATED_P (insn) = 1;
10906 /* Push registers now, before setting the frame pointer
10907 on SEH target. */
10908 if (!int_registers_saved
10909 && TARGET_SEH
10910 && !frame.save_regs_using_mov)
10912 ix86_emit_save_regs ();
10913 int_registers_saved = true;
10914 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10917 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10919 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10920 RTX_FRAME_RELATED_P (insn) = 1;
10922 if (m->fs.cfa_reg == stack_pointer_rtx)
10923 m->fs.cfa_reg = hard_frame_pointer_rtx;
10924 m->fs.fp_offset = m->fs.sp_offset;
10925 m->fs.fp_valid = true;
10928 else if (frame_pointer_partially_needed)
10930 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10931 RTX_FRAME_RELATED_P (insn) = 1;
10932 if (fpset_needed_in_prologue)
10934 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10935 /* Using sp as cfa_reg will involve more .cfi_def_cfa_offset for
10936 pushes in prologue, so use fp as cfa_reg to reduce .eh_frame
10937 size when possible. */
10938 if (!any_fp_def)
10940 RTX_FRAME_RELATED_P (insn) = 1;
10941 if (m->fs.cfa_reg == stack_pointer_rtx)
10942 m->fs.cfa_reg = hard_frame_pointer_rtx;
10943 m->fs.fp_offset = m->fs.sp_offset;
10944 m->fs.fp_valid = true;
10949 if (!int_registers_saved)
10951 /* If saving registers via PUSH, do so now. */
10952 if (!frame.save_regs_using_mov)
10954 ix86_emit_save_regs ();
10955 int_registers_saved = true;
10956 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10959 /* When using red zone we may start register saving before allocating
10960 the stack frame saving one cycle of the prologue. However, avoid
10961 doing this if we have to probe the stack; at least on x86_64 the
10962 stack probe can turn into a call that clobbers a red zone location. */
10963 else if (ix86_using_red_zone ()
10964 && (! TARGET_STACK_PROBE
10965 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10967 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10968 int_registers_saved = true;
10972 if (stack_realign_fp)
10974 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10975 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10977 /* The computation of the size of the re-aligned stack frame means
10978 that we must allocate the size of the register save area before
10979 performing the actual alignment. Otherwise we cannot guarantee
10980 that there's enough storage above the realignment point. */
10981 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10982 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10983 GEN_INT (m->fs.sp_offset
10984 - frame.sse_reg_save_offset),
10985 -1, false);
10987 /* Align the stack. */
10988 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10989 stack_pointer_rtx,
10990 GEN_INT (-align_bytes)));
10992 /* For the purposes of register save area addressing, the stack
10993 pointer is no longer valid. As for the value of sp_offset,
10994 see ix86_compute_frame_layout, which we need to match in order
10995 to pass verification of stack_pointer_offset at the end. */
10996 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10997 m->fs.sp_valid = false;
11000 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11002 if (flag_stack_usage_info)
11004 /* We start to count from ARG_POINTER. */
11005 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11007 /* If it was realigned, take into account the fake frame. */
11008 if (stack_realign_drap)
11010 if (ix86_static_chain_on_stack)
11011 stack_size += UNITS_PER_WORD;
11013 if (!call_used_regs[REGNO (crtl->drap_reg)])
11014 stack_size += UNITS_PER_WORD;
11016 /* This over-estimates by 1 minimal-stack-alignment-unit but
11017 mitigates that by counting in the new return address slot. */
11018 current_function_dynamic_stack_size
11019 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11022 current_function_static_stack_size = stack_size;
11025 /* On SEH target with very large frame size, allocate an area to save
11026 SSE registers (as the very large allocation won't be described). */
11027 if (TARGET_SEH
11028 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11029 && !sse_registers_saved)
11031 HOST_WIDE_INT sse_size =
11032 frame.sse_reg_save_offset - frame.reg_save_offset;
11034 gcc_assert (int_registers_saved);
11036 /* No need to do stack checking as the area will be immediately
11037 written. */
11038 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11039 GEN_INT (-sse_size), -1,
11040 m->fs.cfa_reg == stack_pointer_rtx);
11041 allocate -= sse_size;
11042 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11043 sse_registers_saved = true;
11046 /* The stack has already been decremented by the instruction calling us
11047 so probe if the size is non-negative to preserve the protection area. */
11048 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11050 /* We expect the registers to be saved when probes are used. */
11051 gcc_assert (int_registers_saved);
11053 if (STACK_CHECK_MOVING_SP)
11055 if (!(crtl->is_leaf && !cfun->calls_alloca
11056 && allocate <= PROBE_INTERVAL))
11058 ix86_adjust_stack_and_probe (allocate);
11059 allocate = 0;
11062 else
11064 HOST_WIDE_INT size = allocate;
11066 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11067 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11069 if (TARGET_STACK_PROBE)
11071 if (crtl->is_leaf && !cfun->calls_alloca)
11073 if (size > PROBE_INTERVAL)
11074 ix86_emit_probe_stack_range (0, size);
11076 else
11077 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11079 else
11081 if (crtl->is_leaf && !cfun->calls_alloca)
11083 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11084 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11085 size - STACK_CHECK_PROTECT);
11087 else
11088 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11093 if (allocate == 0)
11095 else if (!ix86_target_stack_probe ()
11096 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11098 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11099 GEN_INT (-allocate), -1,
11100 m->fs.cfa_reg == stack_pointer_rtx);
11102 else
11104 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11105 rtx r10 = NULL;
11106 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11107 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11108 bool eax_live = ix86_eax_live_at_start_p ();
11109 bool r10_live = false;
11111 if (TARGET_64BIT)
11112 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11114 if (eax_live)
11116 insn = emit_insn (gen_push (eax));
11117 allocate -= UNITS_PER_WORD;
11118 /* Note that SEH directives need to continue tracking the stack
11119 pointer even after the frame pointer has been set up. */
11120 if (sp_is_cfa_reg || TARGET_SEH)
11122 if (sp_is_cfa_reg)
11123 m->fs.cfa_offset += UNITS_PER_WORD;
11124 RTX_FRAME_RELATED_P (insn) = 1;
11125 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11126 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11127 plus_constant (Pmode, stack_pointer_rtx,
11128 -UNITS_PER_WORD)));
11132 if (r10_live)
11134 r10 = gen_rtx_REG (Pmode, R10_REG);
11135 insn = emit_insn (gen_push (r10));
11136 allocate -= UNITS_PER_WORD;
11137 if (sp_is_cfa_reg || TARGET_SEH)
11139 if (sp_is_cfa_reg)
11140 m->fs.cfa_offset += UNITS_PER_WORD;
11141 RTX_FRAME_RELATED_P (insn) = 1;
11142 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11143 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11144 plus_constant (Pmode, stack_pointer_rtx,
11145 -UNITS_PER_WORD)));
11149 emit_move_insn (eax, GEN_INT (allocate));
11150 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11152 /* Use the fact that AX still contains ALLOCATE. */
11153 adjust_stack_insn = (Pmode == DImode
11154 ? gen_pro_epilogue_adjust_stack_di_sub
11155 : gen_pro_epilogue_adjust_stack_si_sub);
11157 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11158 stack_pointer_rtx, eax));
11160 if (sp_is_cfa_reg || TARGET_SEH)
11162 if (sp_is_cfa_reg)
11163 m->fs.cfa_offset += allocate;
11164 RTX_FRAME_RELATED_P (insn) = 1;
11165 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11166 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11167 plus_constant (Pmode, stack_pointer_rtx,
11168 -allocate)));
11170 m->fs.sp_offset += allocate;
11172 /* Use stack_pointer_rtx for relative addressing so that code
11173 works for realigned stack, too. */
11174 if (r10_live && eax_live)
11176 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11177 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11178 gen_frame_mem (word_mode, t));
11179 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11180 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11181 gen_frame_mem (word_mode, t));
11183 else if (eax_live || r10_live)
11185 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11186 emit_move_insn (gen_rtx_REG (word_mode,
11187 (eax_live ? AX_REG : R10_REG)),
11188 gen_frame_mem (word_mode, t));
11191 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11193 /* If we havn't already set up the frame pointer, do so now. */
11194 if (frame_pointer_needed && !m->fs.fp_valid)
11196 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11197 GEN_INT (frame.stack_pointer_offset
11198 - frame.hard_frame_pointer_offset));
11199 insn = emit_insn (insn);
11200 RTX_FRAME_RELATED_P (insn) = 1;
11201 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11203 if (m->fs.cfa_reg == stack_pointer_rtx)
11204 m->fs.cfa_reg = hard_frame_pointer_rtx;
11205 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11206 m->fs.fp_valid = true;
11209 if (!int_registers_saved)
11210 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11211 if (!sse_registers_saved)
11212 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11214 pic_reg_used = false;
11215 /* We don't use pic-register for pe-coff target. */
11216 if (pic_offset_table_rtx
11217 && !TARGET_PECOFF
11218 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11219 || crtl->profile))
11221 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11223 if (alt_pic_reg_used != INVALID_REGNUM)
11224 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11226 pic_reg_used = true;
11229 if (pic_reg_used)
11231 if (TARGET_64BIT)
11233 if (ix86_cmodel == CM_LARGE_PIC)
11235 rtx label, tmp_reg;
11237 gcc_assert (Pmode == DImode);
11238 label = gen_label_rtx ();
11239 emit_label (label);
11240 LABEL_PRESERVE_P (label) = 1;
11241 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11242 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11243 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11244 label));
11245 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11246 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11247 pic_offset_table_rtx, tmp_reg));
11249 else
11250 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11252 else
11254 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11255 RTX_FRAME_RELATED_P (insn) = 1;
11256 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11260 /* In the pic_reg_used case, make sure that the got load isn't deleted
11261 when mcount needs it. Blockage to avoid call movement across mcount
11262 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11263 note. */
11264 if (crtl->profile && !flag_fentry && pic_reg_used)
11265 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11267 if (crtl->drap_reg && !crtl->stack_realign_needed)
11269 /* vDRAP is setup but after reload it turns out stack realign
11270 isn't necessary, here we will emit prologue to setup DRAP
11271 without stack realign adjustment */
11272 t = choose_baseaddr (0);
11273 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11276 /* Prevent instructions from being scheduled into register save push
11277 sequence when access to the redzone area is done through frame pointer.
11278 The offset between the frame pointer and the stack pointer is calculated
11279 relative to the value of the stack pointer at the end of the function
11280 prologue, and moving instructions that access redzone area via frame
11281 pointer inside push sequence violates this assumption. */
11282 if (frame_pointer_needed && frame.red_zone_size)
11283 emit_insn (gen_memory_blockage ());
11285 /* Emit cld instruction if stringops are used in the function. */
11286 if (TARGET_CLD && ix86_current_function_needs_cld)
11287 emit_insn (gen_cld ());
11289 /* SEH requires that the prologue end within 256 bytes of the start of
11290 the function. Prevent instruction schedules that would extend that.
11291 Further, prevent alloca modifications to the stack pointer from being
11292 combined with prologue modifications. */
11293 if (TARGET_SEH)
11294 emit_insn (gen_prologue_use (stack_pointer_rtx));
11297 /* Get frame pointer setting insn based on frame state. */
11298 static rtx
11299 ix86_set_fp_insn ()
11301 rtx r, seq;
11302 struct ix86_frame frame;
11303 HOST_WIDE_INT offset;
11305 ix86_compute_frame_layout (&frame);
11306 gcc_assert (frame_pointer_partially_needed);
11307 offset = frame.stack_pointer_offset - frame.hard_frame_pointer_offset;
11309 if (TARGET_64BIT && (offset > 0x7fffffff))
11311 r = gen_rtx_SET (DImode, hard_frame_pointer_rtx, GEN_INT (offset));
11312 emit_insn (r);
11313 r = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, stack_pointer_rtx);
11314 r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
11316 else
11318 r = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset));
11319 r = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, r);
11321 emit_insn (r);
11322 return r;
11325 /* Emit code to restore REG using a POP insn. */
11327 static void
11328 ix86_emit_restore_reg_using_pop (rtx reg)
11330 struct machine_function *m = cfun->machine;
11331 rtx insn = emit_insn (gen_pop (reg));
11333 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11334 m->fs.sp_offset -= UNITS_PER_WORD;
11336 if (m->fs.cfa_reg == crtl->drap_reg
11337 && REGNO (reg) == REGNO (crtl->drap_reg))
11339 /* Previously we'd represented the CFA as an expression
11340 like *(%ebp - 8). We've just popped that value from
11341 the stack, which means we need to reset the CFA to
11342 the drap register. This will remain until we restore
11343 the stack pointer. */
11344 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11345 RTX_FRAME_RELATED_P (insn) = 1;
11347 /* This means that the DRAP register is valid for addressing too. */
11348 m->fs.drap_valid = true;
11349 return;
11352 if (m->fs.cfa_reg == stack_pointer_rtx)
11354 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11355 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11356 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11357 RTX_FRAME_RELATED_P (insn) = 1;
11359 m->fs.cfa_offset -= UNITS_PER_WORD;
11362 /* When the frame pointer is the CFA, and we pop it, we are
11363 swapping back to the stack pointer as the CFA. This happens
11364 for stack frames that don't allocate other data, so we assume
11365 the stack pointer is now pointing at the return address, i.e.
11366 the function entry state, which makes the offset be 1 word. */
11367 if (reg == hard_frame_pointer_rtx)
11369 m->fs.fp_valid = false;
11370 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11372 m->fs.cfa_reg = stack_pointer_rtx;
11373 m->fs.cfa_offset -= UNITS_PER_WORD;
11375 add_reg_note (insn, REG_CFA_DEF_CFA,
11376 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11377 GEN_INT (m->fs.cfa_offset)));
11378 RTX_FRAME_RELATED_P (insn) = 1;
11383 /* Emit code to restore saved registers using POP insns. */
11385 static void
11386 ix86_emit_restore_regs_using_pop (void)
11388 unsigned int regno;
11390 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11391 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11392 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11395 /* Emit code and notes for the LEAVE instruction. */
11397 static void
11398 ix86_emit_leave (void)
11400 struct machine_function *m = cfun->machine;
11401 rtx insn = emit_insn (ix86_gen_leave ());
11403 ix86_add_queued_cfa_restore_notes (insn);
11405 gcc_assert (m->fs.fp_valid);
11406 m->fs.sp_valid = true;
11407 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11408 m->fs.fp_valid = false;
11410 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11412 m->fs.cfa_reg = stack_pointer_rtx;
11413 m->fs.cfa_offset = m->fs.sp_offset;
11415 add_reg_note (insn, REG_CFA_DEF_CFA,
11416 plus_constant (Pmode, stack_pointer_rtx,
11417 m->fs.sp_offset));
11418 RTX_FRAME_RELATED_P (insn) = 1;
11420 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11421 m->fs.fp_offset);
11424 /* Emit code to restore saved registers using MOV insns.
11425 First register is restored from CFA - CFA_OFFSET. */
11426 static void
11427 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11428 bool maybe_eh_return)
11430 struct machine_function *m = cfun->machine;
11431 unsigned int regno;
11433 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11434 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11436 rtx reg = gen_rtx_REG (word_mode, regno);
11437 rtx insn, mem;
11439 mem = choose_baseaddr (cfa_offset);
11440 mem = gen_frame_mem (word_mode, mem);
11441 insn = emit_move_insn (reg, mem);
11443 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11445 /* Previously we'd represented the CFA as an expression
11446 like *(%ebp - 8). We've just popped that value from
11447 the stack, which means we need to reset the CFA to
11448 the drap register. This will remain until we restore
11449 the stack pointer. */
11450 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11451 RTX_FRAME_RELATED_P (insn) = 1;
11453 /* This means that the DRAP register is valid for addressing. */
11454 m->fs.drap_valid = true;
11456 else
11457 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11459 cfa_offset -= UNITS_PER_WORD;
11463 /* Emit code to restore saved registers using MOV insns.
11464 First register is restored from CFA - CFA_OFFSET. */
11465 static void
11466 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11467 bool maybe_eh_return)
11469 unsigned int regno;
11471 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11472 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11474 rtx reg = gen_rtx_REG (V4SFmode, regno);
11475 rtx mem;
11477 mem = choose_baseaddr (cfa_offset);
11478 mem = gen_rtx_MEM (V4SFmode, mem);
11479 set_mem_align (mem, 128);
11480 emit_move_insn (reg, mem);
11482 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11484 cfa_offset -= 16;
11488 /* Restore function stack, frame, and registers. */
11490 void
11491 ix86_expand_epilogue (int style)
11493 struct machine_function *m = cfun->machine;
11494 struct machine_frame_state frame_state_save = m->fs;
11495 struct ix86_frame frame;
11496 bool restore_regs_via_mov;
11497 bool using_drap;
11499 ix86_finalize_stack_realign_flags ();
11500 ix86_compute_frame_layout (&frame);
11502 m->fs.sp_valid = (!frame_pointer_needed
11503 || (crtl->sp_is_unchanging
11504 && !stack_realign_fp));
11505 gcc_assert (!m->fs.sp_valid
11506 || m->fs.sp_offset == frame.stack_pointer_offset);
11508 /* The FP must be valid if the frame pointer is present. */
11509 if (!frame_pointer_partially_needed)
11510 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11511 else
11512 gcc_assert (!(any_fp_def && m->fs.fp_valid));
11514 gcc_assert (!m->fs.fp_valid
11515 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11517 /* We must have *some* valid pointer to the stack frame. */
11518 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11520 /* The DRAP is never valid at this point. */
11521 gcc_assert (!m->fs.drap_valid);
11523 /* See the comment about red zone and frame
11524 pointer usage in ix86_expand_prologue. */
11525 if (frame_pointer_needed && frame.red_zone_size)
11526 emit_insn (gen_memory_blockage ());
11528 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11529 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11531 /* Determine the CFA offset of the end of the red-zone. */
11532 m->fs.red_zone_offset = 0;
11533 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11535 /* The red-zone begins below the return address. */
11536 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11538 /* When the register save area is in the aligned portion of
11539 the stack, determine the maximum runtime displacement that
11540 matches up with the aligned frame. */
11541 if (stack_realign_drap)
11542 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11543 + UNITS_PER_WORD);
11546 /* Special care must be taken for the normal return case of a function
11547 using eh_return: the eax and edx registers are marked as saved, but
11548 not restored along this path. Adjust the save location to match. */
11549 if (crtl->calls_eh_return && style != 2)
11550 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11552 /* EH_RETURN requires the use of moves to function properly. */
11553 if (crtl->calls_eh_return)
11554 restore_regs_via_mov = true;
11555 /* SEH requires the use of pops to identify the epilogue. */
11556 else if (TARGET_SEH)
11557 restore_regs_via_mov = false;
11558 /* If we're only restoring one register and sp is not valid then
11559 using a move instruction to restore the register since it's
11560 less work than reloading sp and popping the register. */
11561 else if (!m->fs.sp_valid && frame.nregs <= 1)
11562 restore_regs_via_mov = true;
11563 else if (TARGET_EPILOGUE_USING_MOVE
11564 && cfun->machine->use_fast_prologue_epilogue
11565 && (frame.nregs > 1
11566 || m->fs.sp_offset != frame.reg_save_offset))
11567 restore_regs_via_mov = true;
11568 else if (frame_pointer_needed
11569 && !frame.nregs
11570 && m->fs.sp_offset != frame.reg_save_offset)
11571 restore_regs_via_mov = true;
11572 else if (frame_pointer_needed
11573 && TARGET_USE_LEAVE
11574 && cfun->machine->use_fast_prologue_epilogue
11575 && frame.nregs == 1)
11576 restore_regs_via_mov = true;
11577 else
11578 restore_regs_via_mov = false;
11580 if (restore_regs_via_mov || frame.nsseregs)
11582 /* Ensure that the entire register save area is addressable via
11583 the stack pointer, if we will restore via sp. */
11584 if (TARGET_64BIT
11585 && m->fs.sp_offset > 0x7fffffff
11586 && !(m->fs.fp_valid || m->fs.drap_valid)
11587 && (frame.nsseregs + frame.nregs) != 0)
11589 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11590 GEN_INT (m->fs.sp_offset
11591 - frame.sse_reg_save_offset),
11592 style,
11593 m->fs.cfa_reg == stack_pointer_rtx);
11597 /* If there are any SSE registers to restore, then we have to do it
11598 via moves, since there's obviously no pop for SSE regs. */
11599 if (frame.nsseregs)
11600 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11601 style == 2);
11603 if (restore_regs_via_mov)
11605 rtx t;
11607 if (frame.nregs)
11608 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11610 /* eh_return epilogues need %ecx added to the stack pointer. */
11611 if (style == 2)
11613 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11615 /* Stack align doesn't work with eh_return. */
11616 gcc_assert (!stack_realign_drap);
11617 /* Neither does regparm nested functions. */
11618 gcc_assert (!ix86_static_chain_on_stack);
11620 if (frame_pointer_needed)
11622 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11623 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11624 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11626 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11627 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11629 /* Note that we use SA as a temporary CFA, as the return
11630 address is at the proper place relative to it. We
11631 pretend this happens at the FP restore insn because
11632 prior to this insn the FP would be stored at the wrong
11633 offset relative to SA, and after this insn we have no
11634 other reasonable register to use for the CFA. We don't
11635 bother resetting the CFA to the SP for the duration of
11636 the return insn. */
11637 add_reg_note (insn, REG_CFA_DEF_CFA,
11638 plus_constant (Pmode, sa, UNITS_PER_WORD));
11639 ix86_add_queued_cfa_restore_notes (insn);
11640 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11641 RTX_FRAME_RELATED_P (insn) = 1;
11643 m->fs.cfa_reg = sa;
11644 m->fs.cfa_offset = UNITS_PER_WORD;
11645 m->fs.fp_valid = false;
11647 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11648 const0_rtx, style, false);
11650 else
11652 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11653 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11654 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11655 ix86_add_queued_cfa_restore_notes (insn);
11657 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11658 if (m->fs.cfa_offset != UNITS_PER_WORD)
11660 m->fs.cfa_offset = UNITS_PER_WORD;
11661 add_reg_note (insn, REG_CFA_DEF_CFA,
11662 plus_constant (Pmode, stack_pointer_rtx,
11663 UNITS_PER_WORD));
11664 RTX_FRAME_RELATED_P (insn) = 1;
11667 m->fs.sp_offset = UNITS_PER_WORD;
11668 m->fs.sp_valid = true;
11671 else
11673 /* SEH requires that the function end with (1) a stack adjustment
11674 if necessary, (2) a sequence of pops, and (3) a return or
11675 jump instruction. Prevent insns from the function body from
11676 being scheduled into this sequence. */
11677 if (TARGET_SEH)
11679 /* Prevent a catch region from being adjacent to the standard
11680 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11681 several other flags that would be interesting to test are
11682 not yet set up. */
11683 if (flag_non_call_exceptions)
11684 emit_insn (gen_nops (const1_rtx));
11685 else
11686 emit_insn (gen_blockage ());
11689 /* First step is to deallocate the stack frame so that we can
11690 pop the registers. Also do it on SEH target for very large
11691 frame as the emitted instructions aren't allowed by the ABI in
11692 epilogues. */
11693 if (!m->fs.sp_valid
11694 || (TARGET_SEH
11695 && (m->fs.sp_offset - frame.reg_save_offset
11696 >= SEH_MAX_FRAME_SIZE)))
11698 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11699 GEN_INT (m->fs.fp_offset
11700 - frame.reg_save_offset),
11701 style, false);
11703 else if (m->fs.sp_offset != frame.reg_save_offset)
11705 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11706 GEN_INT (m->fs.sp_offset
11707 - frame.reg_save_offset),
11708 style,
11709 m->fs.cfa_reg == stack_pointer_rtx);
11712 ix86_emit_restore_regs_using_pop ();
11715 /* If we used a stack pointer and haven't already got rid of it,
11716 then do so now. */
11717 if (m->fs.fp_valid || frame_pointer_partially_needed)
11719 /* If the stack pointer is valid and pointing at the frame
11720 pointer store address, then we only need a pop. */
11721 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11722 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11723 /* Leave results in shorter dependency chains on CPUs that are
11724 able to grok it fast. */
11725 else if (m->fs.fp_valid
11726 && (TARGET_USE_LEAVE
11727 || optimize_function_for_size_p (cfun)
11728 || !cfun->machine->use_fast_prologue_epilogue))
11729 ix86_emit_leave ();
11730 else
11732 rtx dest, offset;
11733 dest = (m->fs.fp_valid) ? hard_frame_pointer_rtx : stack_pointer_rtx;
11734 offset = (m->fs.fp_valid) ? const0_rtx :
11735 GEN_INT (m->fs.sp_offset - frame.hfp_save_offset);
11736 pro_epilogue_adjust_stack (stack_pointer_rtx,
11737 dest,
11738 offset, style, !using_drap);
11739 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11743 if (using_drap)
11745 int param_ptr_offset = UNITS_PER_WORD;
11746 rtx insn;
11748 gcc_assert (stack_realign_drap);
11750 if (ix86_static_chain_on_stack)
11751 param_ptr_offset += UNITS_PER_WORD;
11752 if (!call_used_regs[REGNO (crtl->drap_reg)])
11753 param_ptr_offset += UNITS_PER_WORD;
11755 insn = emit_insn (gen_rtx_SET
11756 (VOIDmode, stack_pointer_rtx,
11757 gen_rtx_PLUS (Pmode,
11758 crtl->drap_reg,
11759 GEN_INT (-param_ptr_offset))));
11760 m->fs.cfa_reg = stack_pointer_rtx;
11761 m->fs.cfa_offset = param_ptr_offset;
11762 m->fs.sp_offset = param_ptr_offset;
11763 m->fs.realigned = false;
11765 add_reg_note (insn, REG_CFA_DEF_CFA,
11766 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11767 GEN_INT (param_ptr_offset)));
11768 RTX_FRAME_RELATED_P (insn) = 1;
11770 if (!call_used_regs[REGNO (crtl->drap_reg)])
11771 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11774 /* At this point the stack pointer must be valid, and we must have
11775 restored all of the registers. We may not have deallocated the
11776 entire stack frame. We've delayed this until now because it may
11777 be possible to merge the local stack deallocation with the
11778 deallocation forced by ix86_static_chain_on_stack. */
11779 gcc_assert (m->fs.sp_valid);
11780 gcc_assert (!m->fs.fp_valid);
11781 gcc_assert (!m->fs.realigned);
11782 if (m->fs.sp_offset != UNITS_PER_WORD)
11784 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11785 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11786 style, true);
11788 else
11789 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11791 /* Sibcall epilogues don't want a return instruction. */
11792 if (style == 0)
11794 m->fs = frame_state_save;
11795 return;
11798 if (crtl->args.pops_args && crtl->args.size)
11800 rtx popc = GEN_INT (crtl->args.pops_args);
11802 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11803 address, do explicit add, and jump indirectly to the caller. */
11805 if (crtl->args.pops_args >= 65536)
11807 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11808 rtx insn;
11810 /* There is no "pascal" calling convention in any 64bit ABI. */
11811 gcc_assert (!TARGET_64BIT);
11813 insn = emit_insn (gen_pop (ecx));
11814 m->fs.cfa_offset -= UNITS_PER_WORD;
11815 m->fs.sp_offset -= UNITS_PER_WORD;
11817 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11818 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11819 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11820 add_reg_note (insn, REG_CFA_REGISTER,
11821 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11822 RTX_FRAME_RELATED_P (insn) = 1;
11824 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11825 popc, -1, true);
11826 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11828 else
11829 emit_jump_insn (gen_simple_return_pop_internal (popc));
11831 else
11832 emit_jump_insn (gen_simple_return_internal ());
11834 /* Restore the state back to the state from the prologue,
11835 so that it's correct for the next epilogue. */
11836 m->fs = frame_state_save;
11840 /* True if the current function should be patched with nops at prologue and
11841 returns. */
11842 static bool patch_current_function_p = false;
11844 static inline bool
11845 has_attribute (const char* attribute_name)
11847 return lookup_attribute (attribute_name,
11848 DECL_ATTRIBUTES (current_function_decl)) != NULL;
11851 /* Return true if we patch the current function. By default a function
11852 is patched if it has loops or if the number of insns is greater than
11853 patch_functions_min_instructions (number of insns roughly translates
11854 to number of instructions). */
11856 static bool
11857 check_should_patch_current_function (void)
11859 int num_insns = 0;
11860 rtx insn;
11861 const char *func_name = NULL;
11862 struct loops *loops;
11863 int num_loops = 0;
11864 int min_functions_instructions;
11866 /* If a function has an attribute forcing patching on or off, do as it
11867 indicates. */
11868 if (has_attribute ("always_patch_for_instrumentation"))
11869 return true;
11870 else if (has_attribute ("never_patch_for_instrumentation"))
11871 return false;
11873 /* Patch the function if it has at least a loop. */
11874 if (!patch_functions_ignore_loops)
11876 if (DECL_STRUCT_FUNCTION (current_function_decl)->cfg)
11878 loops = flow_loops_find (NULL);
11879 num_loops = loops->larray->length();
11880 /* FIXME - Deallocating the loop causes a seg-fault. */
11881 #if 0
11882 flow_loops_free (loops);
11883 #endif
11884 /* We are not concerned with the function body as a loop. */
11885 if (num_loops > 1)
11886 return true;
11890 /* Else, check if function has more than patch_functions_min_instrctions. */
11892 /* Borrowed this code from rest_of_handle_final() in final.c. */
11893 func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
11894 if (!patch_functions_dont_always_patch_main &&
11895 func_name &&
11896 strcmp("main", func_name) == 0)
11897 return true;
11899 min_functions_instructions =
11900 PARAM_VALUE (PARAM_FUNCTION_PATCH_MIN_INSTRUCTIONS);
11901 if (min_functions_instructions > 0)
11903 /* Calculate the number of instructions in this function and only emit
11904 function patch for instrumentation if it is greater than
11905 patch_functions_min_instructions. */
11906 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11908 if (NONDEBUG_INSN_P (insn))
11909 ++num_insns;
11911 if (num_insns < min_functions_instructions)
11912 return false;
11915 return true;
11918 /* Emit the 11-byte patch space for the function prologue for functions that
11919 qualify. */
11921 static void
11922 ix86_output_function_prologue (FILE *file,
11923 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11925 /* Only for 64-bit target. */
11926 if (TARGET_64BIT && patch_functions_for_instrumentation)
11928 patch_current_function_p = check_should_patch_current_function();
11929 /* Emit the instruction 'jmp 09' followed by 9 bytes to make it 11-bytes
11930 of nop. */
11931 ix86_output_function_nops_prologue_epilogue (
11932 file,
11933 FUNCTION_PATCH_PROLOGUE_SECTION,
11934 ASM_BYTE"0xeb,0x09",
11939 /* Emit the nop bytes at function prologue or return (including tail call
11940 jumps). The number of nop bytes generated is at least 8.
11941 Also emits a section named SECTION_NAME, which is a backpointer section
11942 holding the addresses of the nop bytes in the text section.
11943 SECTION_NAME is either '_function_patch_prologue' or
11944 '_function_patch_epilogue'. The backpointer section can be used to navigate
11945 through all the function entry and exit points which are patched with nops.
11946 PRE_INSTRUCTIONS are the instructions, if any, at the start of the nop byte
11947 sequence. NUM_REMAINING_NOPS are the number of nop bytes to fill,
11948 excluding the number of bytes in PRE_INSTRUCTIONS.
11949 Returns true if the function was patched, false otherwise. */
11951 bool
11952 ix86_output_function_nops_prologue_epilogue (FILE *file,
11953 const char *section_name,
11954 const char *pre_instructions,
11955 int num_remaining_nops)
11957 static int labelno = 0;
11958 char label[32], section_label[32];
11959 section *section = NULL;
11960 int num_actual_nops = num_remaining_nops - sizeof(void *);
11961 unsigned int section_flags = SECTION_RELRO;
11962 char *section_name_comdat = NULL;
11963 const char *decl_section_name = NULL;
11964 const char *func_name = NULL;
11965 char *section_name_function_sections = NULL;
11966 size_t len;
11968 gcc_assert (num_remaining_nops >= 0);
11970 if (!patch_current_function_p)
11971 return false;
11973 ASM_GENERATE_INTERNAL_LABEL (label, "LFPEL", labelno);
11974 ASM_GENERATE_INTERNAL_LABEL (section_label, "LFPESL", labelno++);
11976 /* Align the start of nops to 2-byte boundary so that the 2-byte jump
11977 instruction can be patched atomically at run time. */
11978 ASM_OUTPUT_ALIGN (file, 1);
11980 /* Emit nop bytes. They look like the following:
11981 $LFPEL0:
11982 <pre_instruction>
11983 0x90 (repeated num_actual_nops times)
11984 .quad $LFPESL0 - .
11985 followed by section 'section_name' which contains the address
11986 of instruction at 'label'.
11988 ASM_OUTPUT_INTERNAL_LABEL (file, label);
11989 if (pre_instructions)
11990 fprintf (file, "%s\n", pre_instructions);
11992 while (num_actual_nops-- > 0)
11993 asm_fprintf (file, ASM_BYTE"0x90\n");
11995 fprintf (file, ASM_QUAD);
11996 /* Output "section_label - ." for the relative address of the entry in
11997 the section 'section_name'. */
11998 assemble_name_raw (file, section_label);
11999 fprintf (file, " - .");
12000 fprintf (file, "\n");
12002 /* Emit the backpointer section. For functions belonging to comdat group,
12003 we emit a different section named '<section_name>.foo' where 'foo' is
12004 the name of the comdat section. This section is later renamed to
12005 '<section_name>' by ix86_elf_asm_named_section().
12006 We emit a unique section name for the back pointer section for comdat
12007 functions because otherwise the 'get_section' call may return an existing
12008 non-comdat section with the same name, leading to references from
12009 non-comdat section to comdat functions.
12011 if (current_function_decl != NULL_TREE &&
12012 DECL_ONE_ONLY (current_function_decl) &&
12013 HAVE_COMDAT_GROUP)
12015 decl_section_name =
12016 TREE_STRING_POINTER (DECL_SECTION_NAME (current_function_decl));
12017 len = strlen (decl_section_name) + strlen (section_name) + 2;
12018 section_name_comdat = (char *) alloca (len);
12019 sprintf (section_name_comdat, "%s.%s", section_name, decl_section_name);
12020 section_name = section_name_comdat;
12021 section_flags |= SECTION_LINKONCE;
12023 else if (flag_function_sections)
12025 func_name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
12026 if (func_name)
12028 len = strlen (func_name) + strlen (section_name) + 2;
12029 section_name_function_sections = (char *) alloca (len);
12030 sprintf (section_name_function_sections, "%s.%s", section_name,
12031 func_name);
12032 section_name = section_name_function_sections;
12035 section = get_section (section_name, section_flags, current_function_decl);
12036 switch_to_section (section);
12037 /* Align the section to 8-byte boundary. */
12038 ASM_OUTPUT_ALIGN (file, 3);
12040 /* Emit address of the start of nop bytes in the section:
12041 $LFPESP0:
12042 .quad $LFPEL0
12044 ASM_OUTPUT_INTERNAL_LABEL (file, section_label);
12045 fprintf(file, ASM_QUAD);
12046 assemble_name_raw (file, label);
12047 fprintf (file, "\n");
12049 /* Switching back to text section. */
12050 switch_to_section (current_function_section ());
12051 return true;
12054 /* Strips the characters after '_function_patch_prologue' or
12055 '_function_patch_epilogue' and emits the section. */
12057 static void
12058 ix86_elf_asm_named_section (const char *name, unsigned int flags,
12059 tree decl)
12061 const char *section_name = name;
12062 if (!flag_function_sections && HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
12064 const int prologue_section_name_length =
12065 sizeof(FUNCTION_PATCH_PROLOGUE_SECTION) - 1;
12066 const int epilogue_section_name_length =
12067 sizeof(FUNCTION_PATCH_EPILOGUE_SECTION) - 1;
12069 if (strncmp (name, FUNCTION_PATCH_PROLOGUE_SECTION,
12070 prologue_section_name_length) == 0)
12071 section_name = FUNCTION_PATCH_PROLOGUE_SECTION;
12072 else if (strncmp (name, FUNCTION_PATCH_EPILOGUE_SECTION,
12073 epilogue_section_name_length) == 0)
12074 section_name = FUNCTION_PATCH_EPILOGUE_SECTION;
12076 default_elf_asm_named_section (section_name, flags, decl);
12079 /* Reset from the function's potential modifications. */
12081 static void
12082 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
12083 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
12085 if (pic_offset_table_rtx)
12086 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12087 #if TARGET_MACHO
12088 /* Mach-O doesn't support labels at the end of objects, so if
12089 it looks like we might want one, insert a NOP. */
12091 rtx insn = get_last_insn ();
12092 rtx deleted_debug_label = NULL_RTX;
12093 while (insn
12094 && NOTE_P (insn)
12095 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12097 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12098 notes only, instead set their CODE_LABEL_NUMBER to -1,
12099 otherwise there would be code generation differences
12100 in between -g and -g0. */
12101 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12102 deleted_debug_label = insn;
12103 insn = PREV_INSN (insn);
12105 if (insn
12106 && (LABEL_P (insn)
12107 || (NOTE_P (insn)
12108 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12109 fputs ("\tnop\n", file);
12110 else if (deleted_debug_label)
12111 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12112 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12113 CODE_LABEL_NUMBER (insn) = -1;
12115 #endif
12119 /* Return a scratch register to use in the split stack prologue. The
12120 split stack prologue is used for -fsplit-stack. It is the first
12121 instructions in the function, even before the regular prologue.
12122 The scratch register can be any caller-saved register which is not
12123 used for parameters or for the static chain. */
12125 static unsigned int
12126 split_stack_prologue_scratch_regno (void)
12128 if (TARGET_64BIT)
12129 return R11_REG;
12130 else
12132 bool is_fastcall, is_thiscall;
12133 int regparm;
12135 is_fastcall = (lookup_attribute ("fastcall",
12136 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12137 != NULL);
12138 is_thiscall = (lookup_attribute ("thiscall",
12139 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12140 != NULL);
12141 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12143 if (is_fastcall)
12145 if (DECL_STATIC_CHAIN (cfun->decl))
12147 sorry ("-fsplit-stack does not support fastcall with "
12148 "nested function");
12149 return INVALID_REGNUM;
12151 return AX_REG;
12153 else if (is_thiscall)
12155 if (!DECL_STATIC_CHAIN (cfun->decl))
12156 return DX_REG;
12157 return AX_REG;
12159 else if (regparm < 3)
12161 if (!DECL_STATIC_CHAIN (cfun->decl))
12162 return CX_REG;
12163 else
12165 if (regparm >= 2)
12167 sorry ("-fsplit-stack does not support 2 register "
12168 " parameters for a nested function");
12169 return INVALID_REGNUM;
12171 return DX_REG;
12174 else
12176 /* FIXME: We could make this work by pushing a register
12177 around the addition and comparison. */
12178 sorry ("-fsplit-stack does not support 3 register parameters");
12179 return INVALID_REGNUM;
12184 /* A SYMBOL_REF for the function which allocates new stackspace for
12185 -fsplit-stack. */
12187 static GTY(()) rtx split_stack_fn;
12189 /* A SYMBOL_REF for the more stack function when using the large
12190 model. */
12192 static GTY(()) rtx split_stack_fn_large;
12194 /* Handle -fsplit-stack. These are the first instructions in the
12195 function, even before the regular prologue. */
12197 void
12198 ix86_expand_split_stack_prologue (void)
12200 struct ix86_frame frame;
12201 HOST_WIDE_INT allocate;
12202 unsigned HOST_WIDE_INT args_size;
12203 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12204 rtx scratch_reg = NULL_RTX;
12205 rtx varargs_label = NULL_RTX;
12206 rtx fn;
12208 gcc_assert (flag_split_stack && reload_completed);
12210 ix86_finalize_stack_realign_flags ();
12211 ix86_compute_frame_layout (&frame);
12212 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12214 /* This is the label we will branch to if we have enough stack
12215 space. We expect the basic block reordering pass to reverse this
12216 branch if optimizing, so that we branch in the unlikely case. */
12217 label = gen_label_rtx ();
12219 /* We need to compare the stack pointer minus the frame size with
12220 the stack boundary in the TCB. The stack boundary always gives
12221 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12222 can compare directly. Otherwise we need to do an addition. */
12224 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12225 UNSPEC_STACK_CHECK);
12226 limit = gen_rtx_CONST (Pmode, limit);
12227 limit = gen_rtx_MEM (Pmode, limit);
12228 if (allocate < SPLIT_STACK_AVAILABLE)
12229 current = stack_pointer_rtx;
12230 else
12232 unsigned int scratch_regno;
12233 rtx offset;
12235 /* We need a scratch register to hold the stack pointer minus
12236 the required frame size. Since this is the very start of the
12237 function, the scratch register can be any caller-saved
12238 register which is not used for parameters. */
12239 offset = GEN_INT (- allocate);
12240 scratch_regno = split_stack_prologue_scratch_regno ();
12241 if (scratch_regno == INVALID_REGNUM)
12242 return;
12243 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12244 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12246 /* We don't use ix86_gen_add3 in this case because it will
12247 want to split to lea, but when not optimizing the insn
12248 will not be split after this point. */
12249 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12250 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12251 offset)));
12253 else
12255 emit_move_insn (scratch_reg, offset);
12256 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12257 stack_pointer_rtx));
12259 current = scratch_reg;
12262 ix86_expand_branch (GEU, current, limit, label);
12263 jump_insn = get_last_insn ();
12264 JUMP_LABEL (jump_insn) = label;
12266 /* Mark the jump as very likely to be taken. */
12267 add_int_reg_note (jump_insn, REG_BR_PROB,
12268 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12270 if (split_stack_fn == NULL_RTX)
12271 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12272 fn = split_stack_fn;
12274 /* Get more stack space. We pass in the desired stack space and the
12275 size of the arguments to copy to the new stack. In 32-bit mode
12276 we push the parameters; __morestack will return on a new stack
12277 anyhow. In 64-bit mode we pass the parameters in r10 and
12278 r11. */
12279 allocate_rtx = GEN_INT (allocate);
12280 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12281 call_fusage = NULL_RTX;
12282 if (TARGET_64BIT)
12284 rtx reg10, reg11;
12286 reg10 = gen_rtx_REG (Pmode, R10_REG);
12287 reg11 = gen_rtx_REG (Pmode, R11_REG);
12289 /* If this function uses a static chain, it will be in %r10.
12290 Preserve it across the call to __morestack. */
12291 if (DECL_STATIC_CHAIN (cfun->decl))
12293 rtx rax;
12295 rax = gen_rtx_REG (word_mode, AX_REG);
12296 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12297 use_reg (&call_fusage, rax);
12300 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12301 && !TARGET_PECOFF)
12303 HOST_WIDE_INT argval;
12305 gcc_assert (Pmode == DImode);
12306 /* When using the large model we need to load the address
12307 into a register, and we've run out of registers. So we
12308 switch to a different calling convention, and we call a
12309 different function: __morestack_large. We pass the
12310 argument size in the upper 32 bits of r10 and pass the
12311 frame size in the lower 32 bits. */
12312 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12313 gcc_assert ((args_size & 0xffffffff) == args_size);
12315 if (split_stack_fn_large == NULL_RTX)
12316 split_stack_fn_large =
12317 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12319 if (ix86_cmodel == CM_LARGE_PIC)
12321 rtx label, x;
12323 label = gen_label_rtx ();
12324 emit_label (label);
12325 LABEL_PRESERVE_P (label) = 1;
12326 emit_insn (gen_set_rip_rex64 (reg10, label));
12327 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12328 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12329 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12330 UNSPEC_GOT);
12331 x = gen_rtx_CONST (Pmode, x);
12332 emit_move_insn (reg11, x);
12333 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12334 x = gen_const_mem (Pmode, x);
12335 emit_move_insn (reg11, x);
12337 else
12338 emit_move_insn (reg11, split_stack_fn_large);
12340 fn = reg11;
12342 argval = ((args_size << 16) << 16) + allocate;
12343 emit_move_insn (reg10, GEN_INT (argval));
12345 else
12347 emit_move_insn (reg10, allocate_rtx);
12348 emit_move_insn (reg11, GEN_INT (args_size));
12349 use_reg (&call_fusage, reg11);
12352 use_reg (&call_fusage, reg10);
12354 else
12356 emit_insn (gen_push (GEN_INT (args_size)));
12357 emit_insn (gen_push (allocate_rtx));
12359 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12360 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12361 NULL_RTX, false);
12362 add_function_usage_to (call_insn, call_fusage);
12364 /* In order to make call/return prediction work right, we now need
12365 to execute a return instruction. See
12366 libgcc/config/i386/morestack.S for the details on how this works.
12368 For flow purposes gcc must not see this as a return
12369 instruction--we need control flow to continue at the subsequent
12370 label. Therefore, we use an unspec. */
12371 gcc_assert (crtl->args.pops_args < 65536);
12372 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12374 /* If we are in 64-bit mode and this function uses a static chain,
12375 we saved %r10 in %rax before calling _morestack. */
12376 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12377 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12378 gen_rtx_REG (word_mode, AX_REG));
12380 /* If this function calls va_start, we need to store a pointer to
12381 the arguments on the old stack, because they may not have been
12382 all copied to the new stack. At this point the old stack can be
12383 found at the frame pointer value used by __morestack, because
12384 __morestack has set that up before calling back to us. Here we
12385 store that pointer in a scratch register, and in
12386 ix86_expand_prologue we store the scratch register in a stack
12387 slot. */
12388 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12390 unsigned int scratch_regno;
12391 rtx frame_reg;
12392 int words;
12394 scratch_regno = split_stack_prologue_scratch_regno ();
12395 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12396 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12398 /* 64-bit:
12399 fp -> old fp value
12400 return address within this function
12401 return address of caller of this function
12402 stack arguments
12403 So we add three words to get to the stack arguments.
12405 32-bit:
12406 fp -> old fp value
12407 return address within this function
12408 first argument to __morestack
12409 second argument to __morestack
12410 return address of caller of this function
12411 stack arguments
12412 So we add five words to get to the stack arguments.
12414 words = TARGET_64BIT ? 3 : 5;
12415 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12416 gen_rtx_PLUS (Pmode, frame_reg,
12417 GEN_INT (words * UNITS_PER_WORD))));
12419 varargs_label = gen_label_rtx ();
12420 emit_jump_insn (gen_jump (varargs_label));
12421 JUMP_LABEL (get_last_insn ()) = varargs_label;
12423 emit_barrier ();
12426 emit_label (label);
12427 LABEL_NUSES (label) = 1;
12429 /* If this function calls va_start, we now have to set the scratch
12430 register for the case where we do not call __morestack. In this
12431 case we need to set it based on the stack pointer. */
12432 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12434 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12435 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12436 GEN_INT (UNITS_PER_WORD))));
12438 emit_label (varargs_label);
12439 LABEL_NUSES (varargs_label) = 1;
12443 /* We may have to tell the dataflow pass that the split stack prologue
12444 is initializing a scratch register. */
12446 static void
12447 ix86_live_on_entry (bitmap regs)
12449 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12451 gcc_assert (flag_split_stack);
12452 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12456 /* Extract the parts of an RTL expression that is a valid memory address
12457 for an instruction. Return 0 if the structure of the address is
12458 grossly off. Return -1 if the address contains ASHIFT, so it is not
12459 strictly valid, but still used for computing length of lea instruction. */
12462 ix86_decompose_address (rtx addr, struct ix86_address *out)
12464 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12465 rtx base_reg, index_reg;
12466 HOST_WIDE_INT scale = 1;
12467 rtx scale_rtx = NULL_RTX;
12468 rtx tmp;
12469 int retval = 1;
12470 enum ix86_address_seg seg = SEG_DEFAULT;
12472 /* Allow zero-extended SImode addresses,
12473 they will be emitted with addr32 prefix. */
12474 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12476 if (GET_CODE (addr) == ZERO_EXTEND
12477 && GET_MODE (XEXP (addr, 0)) == SImode)
12479 addr = XEXP (addr, 0);
12480 if (CONST_INT_P (addr))
12481 return 0;
12483 else if (GET_CODE (addr) == AND
12484 && const_32bit_mask (XEXP (addr, 1), DImode))
12486 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12487 if (addr == NULL_RTX)
12488 return 0;
12490 if (CONST_INT_P (addr))
12491 return 0;
12495 /* Allow SImode subregs of DImode addresses,
12496 they will be emitted with addr32 prefix. */
12497 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12499 if (GET_CODE (addr) == SUBREG
12500 && GET_MODE (SUBREG_REG (addr)) == DImode)
12502 addr = SUBREG_REG (addr);
12503 if (CONST_INT_P (addr))
12504 return 0;
12508 if (REG_P (addr))
12509 base = addr;
12510 else if (GET_CODE (addr) == SUBREG)
12512 if (REG_P (SUBREG_REG (addr)))
12513 base = addr;
12514 else
12515 return 0;
12517 else if (GET_CODE (addr) == PLUS)
12519 rtx addends[4], op;
12520 int n = 0, i;
12522 op = addr;
12525 if (n >= 4)
12526 return 0;
12527 addends[n++] = XEXP (op, 1);
12528 op = XEXP (op, 0);
12530 while (GET_CODE (op) == PLUS);
12531 if (n >= 4)
12532 return 0;
12533 addends[n] = op;
12535 for (i = n; i >= 0; --i)
12537 op = addends[i];
12538 switch (GET_CODE (op))
12540 case MULT:
12541 if (index)
12542 return 0;
12543 index = XEXP (op, 0);
12544 scale_rtx = XEXP (op, 1);
12545 break;
12547 case ASHIFT:
12548 if (index)
12549 return 0;
12550 index = XEXP (op, 0);
12551 tmp = XEXP (op, 1);
12552 if (!CONST_INT_P (tmp))
12553 return 0;
12554 scale = INTVAL (tmp);
12555 if ((unsigned HOST_WIDE_INT) scale > 3)
12556 return 0;
12557 scale = 1 << scale;
12558 break;
12560 case ZERO_EXTEND:
12561 op = XEXP (op, 0);
12562 if (GET_CODE (op) != UNSPEC)
12563 return 0;
12564 /* FALLTHRU */
12566 case UNSPEC:
12567 if (XINT (op, 1) == UNSPEC_TP
12568 && TARGET_TLS_DIRECT_SEG_REFS
12569 && seg == SEG_DEFAULT)
12570 seg = DEFAULT_TLS_SEG_REG;
12571 else
12572 return 0;
12573 break;
12575 case SUBREG:
12576 if (!REG_P (SUBREG_REG (op)))
12577 return 0;
12578 /* FALLTHRU */
12580 case REG:
12581 if (!base)
12582 base = op;
12583 else if (!index)
12584 index = op;
12585 else
12586 return 0;
12587 break;
12589 case CONST:
12590 case CONST_INT:
12591 case SYMBOL_REF:
12592 case LABEL_REF:
12593 if (disp)
12594 return 0;
12595 disp = op;
12596 break;
12598 default:
12599 return 0;
12603 else if (GET_CODE (addr) == MULT)
12605 index = XEXP (addr, 0); /* index*scale */
12606 scale_rtx = XEXP (addr, 1);
12608 else if (GET_CODE (addr) == ASHIFT)
12610 /* We're called for lea too, which implements ashift on occasion. */
12611 index = XEXP (addr, 0);
12612 tmp = XEXP (addr, 1);
12613 if (!CONST_INT_P (tmp))
12614 return 0;
12615 scale = INTVAL (tmp);
12616 if ((unsigned HOST_WIDE_INT) scale > 3)
12617 return 0;
12618 scale = 1 << scale;
12619 retval = -1;
12621 else
12622 disp = addr; /* displacement */
12624 if (index)
12626 if (REG_P (index))
12628 else if (GET_CODE (index) == SUBREG
12629 && REG_P (SUBREG_REG (index)))
12631 else
12632 return 0;
12635 /* Extract the integral value of scale. */
12636 if (scale_rtx)
12638 if (!CONST_INT_P (scale_rtx))
12639 return 0;
12640 scale = INTVAL (scale_rtx);
12643 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12644 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12646 /* Avoid useless 0 displacement. */
12647 if (disp == const0_rtx && (base || index))
12648 disp = NULL_RTX;
12650 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12651 if (base_reg && index_reg && scale == 1
12652 && (index_reg == arg_pointer_rtx
12653 || index_reg == frame_pointer_rtx
12654 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12656 rtx tmp;
12657 tmp = base, base = index, index = tmp;
12658 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12661 /* Special case: %ebp cannot be encoded as a base without a displacement.
12662 Similarly %r13. */
12663 if (!disp
12664 && base_reg
12665 && (base_reg == hard_frame_pointer_rtx
12666 || base_reg == frame_pointer_rtx
12667 || base_reg == arg_pointer_rtx
12668 || (REG_P (base_reg)
12669 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12670 || REGNO (base_reg) == R13_REG))))
12671 disp = const0_rtx;
12673 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12674 Avoid this by transforming to [%esi+0].
12675 Reload calls address legitimization without cfun defined, so we need
12676 to test cfun for being non-NULL. */
12677 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12678 && base_reg && !index_reg && !disp
12679 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12680 disp = const0_rtx;
12682 /* Special case: encode reg+reg instead of reg*2. */
12683 if (!base && index && scale == 2)
12684 base = index, base_reg = index_reg, scale = 1;
12686 /* Special case: scaling cannot be encoded without base or displacement. */
12687 if (!base && !disp && index && scale != 1)
12688 disp = const0_rtx;
12690 out->base = base;
12691 out->index = index;
12692 out->disp = disp;
12693 out->scale = scale;
12694 out->seg = seg;
12696 return retval;
12699 /* Return cost of the memory address x.
12700 For i386, it is better to use a complex address than let gcc copy
12701 the address into a reg and make a new pseudo. But not if the address
12702 requires to two regs - that would mean more pseudos with longer
12703 lifetimes. */
12704 static int
12705 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12706 addr_space_t as ATTRIBUTE_UNUSED,
12707 bool speed ATTRIBUTE_UNUSED)
12709 struct ix86_address parts;
12710 int cost = 1;
12711 int ok = ix86_decompose_address (x, &parts);
12713 gcc_assert (ok);
12715 if (parts.base && GET_CODE (parts.base) == SUBREG)
12716 parts.base = SUBREG_REG (parts.base);
12717 if (parts.index && GET_CODE (parts.index) == SUBREG)
12718 parts.index = SUBREG_REG (parts.index);
12720 /* Attempt to minimize number of registers in the address. */
12721 if ((parts.base
12722 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12723 || (parts.index
12724 && (!REG_P (parts.index)
12725 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12726 cost++;
12728 if (parts.base
12729 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12730 && parts.index
12731 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12732 && parts.base != parts.index)
12733 cost++;
12735 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12736 since it's predecode logic can't detect the length of instructions
12737 and it degenerates to vector decoded. Increase cost of such
12738 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12739 to split such addresses or even refuse such addresses at all.
12741 Following addressing modes are affected:
12742 [base+scale*index]
12743 [scale*index+disp]
12744 [base+index]
12746 The first and last case may be avoidable by explicitly coding the zero in
12747 memory address, but I don't have AMD-K6 machine handy to check this
12748 theory. */
12750 if (TARGET_K6
12751 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12752 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12753 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12754 cost += 10;
12756 return cost;
12759 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12760 this is used for to form addresses to local data when -fPIC is in
12761 use. */
12763 static bool
12764 darwin_local_data_pic (rtx disp)
12766 return (GET_CODE (disp) == UNSPEC
12767 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12770 /* Determine if a given RTX is a valid constant. We already know this
12771 satisfies CONSTANT_P. */
12773 static bool
12774 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12776 switch (GET_CODE (x))
12778 case CONST:
12779 x = XEXP (x, 0);
12781 if (GET_CODE (x) == PLUS)
12783 if (!CONST_INT_P (XEXP (x, 1)))
12784 return false;
12785 x = XEXP (x, 0);
12788 if (TARGET_MACHO && darwin_local_data_pic (x))
12789 return true;
12791 /* Only some unspecs are valid as "constants". */
12792 if (GET_CODE (x) == UNSPEC)
12793 switch (XINT (x, 1))
12795 case UNSPEC_GOT:
12796 case UNSPEC_GOTOFF:
12797 case UNSPEC_PLTOFF:
12798 return TARGET_64BIT;
12799 case UNSPEC_TPOFF:
12800 case UNSPEC_NTPOFF:
12801 x = XVECEXP (x, 0, 0);
12802 return (GET_CODE (x) == SYMBOL_REF
12803 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12804 case UNSPEC_DTPOFF:
12805 x = XVECEXP (x, 0, 0);
12806 return (GET_CODE (x) == SYMBOL_REF
12807 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12808 default:
12809 return false;
12812 /* We must have drilled down to a symbol. */
12813 if (GET_CODE (x) == LABEL_REF)
12814 return true;
12815 if (GET_CODE (x) != SYMBOL_REF)
12816 return false;
12817 /* FALLTHRU */
12819 case SYMBOL_REF:
12820 /* TLS symbols are never valid. */
12821 if (SYMBOL_REF_TLS_MODEL (x))
12822 return false;
12824 /* DLLIMPORT symbols are never valid. */
12825 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12826 && SYMBOL_REF_DLLIMPORT_P (x))
12827 return false;
12829 #if TARGET_MACHO
12830 /* mdynamic-no-pic */
12831 if (MACHO_DYNAMIC_NO_PIC_P)
12832 return machopic_symbol_defined_p (x);
12833 #endif
12834 break;
12836 case CONST_DOUBLE:
12837 if (GET_MODE (x) == TImode
12838 && x != CONST0_RTX (TImode)
12839 && !TARGET_64BIT)
12840 return false;
12841 break;
12843 case CONST_VECTOR:
12844 if (!standard_sse_constant_p (x))
12845 return false;
12847 default:
12848 break;
12851 /* Otherwise we handle everything else in the move patterns. */
12852 return true;
12855 /* Determine if it's legal to put X into the constant pool. This
12856 is not possible for the address of thread-local symbols, which
12857 is checked above. */
12859 static bool
12860 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12862 /* We can always put integral constants and vectors in memory. */
12863 switch (GET_CODE (x))
12865 case CONST_INT:
12866 case CONST_DOUBLE:
12867 case CONST_VECTOR:
12868 return false;
12870 default:
12871 break;
12873 return !ix86_legitimate_constant_p (mode, x);
12876 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12877 otherwise zero. */
12879 static bool
12880 is_imported_p (rtx x)
12882 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12883 || GET_CODE (x) != SYMBOL_REF)
12884 return false;
12886 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12890 /* Nonzero if the constant value X is a legitimate general operand
12891 when generating PIC code. It is given that flag_pic is on and
12892 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12894 bool
12895 legitimate_pic_operand_p (rtx x)
12897 rtx inner;
12899 switch (GET_CODE (x))
12901 case CONST:
12902 inner = XEXP (x, 0);
12903 if (GET_CODE (inner) == PLUS
12904 && CONST_INT_P (XEXP (inner, 1)))
12905 inner = XEXP (inner, 0);
12907 /* Only some unspecs are valid as "constants". */
12908 if (GET_CODE (inner) == UNSPEC)
12909 switch (XINT (inner, 1))
12911 case UNSPEC_GOT:
12912 case UNSPEC_GOTOFF:
12913 case UNSPEC_PLTOFF:
12914 return TARGET_64BIT;
12915 case UNSPEC_TPOFF:
12916 x = XVECEXP (inner, 0, 0);
12917 return (GET_CODE (x) == SYMBOL_REF
12918 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12919 case UNSPEC_MACHOPIC_OFFSET:
12920 return legitimate_pic_address_disp_p (x);
12921 default:
12922 return false;
12924 /* FALLTHRU */
12926 case SYMBOL_REF:
12927 case LABEL_REF:
12928 return legitimate_pic_address_disp_p (x);
12930 default:
12931 return true;
12935 /* Determine if a given CONST RTX is a valid memory displacement
12936 in PIC mode. */
12938 bool
12939 legitimate_pic_address_disp_p (rtx disp)
12941 bool saw_plus;
12943 /* In 64bit mode we can allow direct addresses of symbols and labels
12944 when they are not dynamic symbols. */
12945 if (TARGET_64BIT)
12947 rtx op0 = disp, op1;
12949 switch (GET_CODE (disp))
12951 case LABEL_REF:
12952 return true;
12954 case CONST:
12955 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12956 break;
12957 op0 = XEXP (XEXP (disp, 0), 0);
12958 op1 = XEXP (XEXP (disp, 0), 1);
12959 if (!CONST_INT_P (op1)
12960 || INTVAL (op1) >= 16*1024*1024
12961 || INTVAL (op1) < -16*1024*1024)
12962 break;
12963 if (GET_CODE (op0) == LABEL_REF)
12964 return true;
12965 if (GET_CODE (op0) == CONST
12966 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12967 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12968 return true;
12969 if (GET_CODE (op0) == UNSPEC
12970 && XINT (op0, 1) == UNSPEC_PCREL)
12971 return true;
12972 if (GET_CODE (op0) != SYMBOL_REF)
12973 break;
12974 /* FALLTHRU */
12976 case SYMBOL_REF:
12977 /* TLS references should always be enclosed in UNSPEC.
12978 The dllimported symbol needs always to be resolved. */
12979 if (SYMBOL_REF_TLS_MODEL (op0)
12980 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12981 return false;
12983 if (TARGET_PECOFF)
12985 if (is_imported_p (op0))
12986 return true;
12988 if (SYMBOL_REF_FAR_ADDR_P (op0)
12989 || !SYMBOL_REF_LOCAL_P (op0))
12990 break;
12992 /* Function-symbols need to be resolved only for
12993 large-model.
12994 For the small-model we don't need to resolve anything
12995 here. */
12996 if ((ix86_cmodel != CM_LARGE_PIC
12997 && SYMBOL_REF_FUNCTION_P (op0))
12998 || ix86_cmodel == CM_SMALL_PIC)
12999 return true;
13000 /* Non-external symbols don't need to be resolved for
13001 large, and medium-model. */
13002 if ((ix86_cmodel == CM_LARGE_PIC
13003 || ix86_cmodel == CM_MEDIUM_PIC)
13004 && !SYMBOL_REF_EXTERNAL_P (op0))
13005 return true;
13007 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13008 && (SYMBOL_REF_LOCAL_P (op0)
13009 || (HAVE_LD_PIE_COPYRELOC
13010 && flag_pie
13011 && !(SYMBOL_REF_WEAK (op0)
13012 /* TODO:Temporary fix for weak defined symbols. Weak defined
13013 symbols in an executable cannot be overridden even with
13014 a non-weak symbol in a shared library.
13015 Revert after fix is checked in here:
13016 http://gcc.gnu.org/ml/gcc-patches/2015-02/msg00366.html*/
13017 && SYMBOL_REF_EXTERNAL_P (op0))
13018 && !SYMBOL_REF_FUNCTION_P (op0)))
13019 && ix86_cmodel != CM_LARGE_PIC)
13020 return true;
13021 break;
13023 default:
13024 break;
13027 if (GET_CODE (disp) != CONST)
13028 return false;
13029 disp = XEXP (disp, 0);
13031 if (TARGET_64BIT)
13033 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13034 of GOT tables. We should not need these anyway. */
13035 if (GET_CODE (disp) != UNSPEC
13036 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13037 && XINT (disp, 1) != UNSPEC_GOTOFF
13038 && XINT (disp, 1) != UNSPEC_PCREL
13039 && XINT (disp, 1) != UNSPEC_PLTOFF))
13040 return false;
13042 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13043 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13044 return false;
13045 return true;
13048 saw_plus = false;
13049 if (GET_CODE (disp) == PLUS)
13051 if (!CONST_INT_P (XEXP (disp, 1)))
13052 return false;
13053 disp = XEXP (disp, 0);
13054 saw_plus = true;
13057 if (TARGET_MACHO && darwin_local_data_pic (disp))
13058 return true;
13060 if (GET_CODE (disp) != UNSPEC)
13061 return false;
13063 switch (XINT (disp, 1))
13065 case UNSPEC_GOT:
13066 if (saw_plus)
13067 return false;
13068 /* We need to check for both symbols and labels because VxWorks loads
13069 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13070 details. */
13071 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13072 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13073 case UNSPEC_GOTOFF:
13074 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13075 While ABI specify also 32bit relocation but we don't produce it in
13076 small PIC model at all. */
13077 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13078 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13079 && !TARGET_64BIT)
13080 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13081 return false;
13082 case UNSPEC_GOTTPOFF:
13083 case UNSPEC_GOTNTPOFF:
13084 case UNSPEC_INDNTPOFF:
13085 if (saw_plus)
13086 return false;
13087 disp = XVECEXP (disp, 0, 0);
13088 return (GET_CODE (disp) == SYMBOL_REF
13089 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13090 case UNSPEC_NTPOFF:
13091 disp = XVECEXP (disp, 0, 0);
13092 return (GET_CODE (disp) == SYMBOL_REF
13093 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13094 case UNSPEC_DTPOFF:
13095 disp = XVECEXP (disp, 0, 0);
13096 return (GET_CODE (disp) == SYMBOL_REF
13097 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13100 return false;
13103 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
13104 replace the input X, or the original X if no replacement is called for.
13105 The output parameter *WIN is 1 if the calling macro should goto WIN,
13106 0 if it should not. */
13108 bool
13109 ix86_legitimize_reload_address (rtx x,
13110 enum machine_mode mode ATTRIBUTE_UNUSED,
13111 int opnum, int type,
13112 int ind_levels ATTRIBUTE_UNUSED)
13114 /* Reload can generate:
13116 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13117 (reg:DI 97))
13118 (reg:DI 2 cx))
13120 This RTX is rejected from ix86_legitimate_address_p due to
13121 non-strictness of base register 97. Following this rejection,
13122 reload pushes all three components into separate registers,
13123 creating invalid memory address RTX.
13125 Following code reloads only the invalid part of the
13126 memory address RTX. */
13128 if (GET_CODE (x) == PLUS
13129 && REG_P (XEXP (x, 1))
13130 && GET_CODE (XEXP (x, 0)) == PLUS
13131 && REG_P (XEXP (XEXP (x, 0), 1)))
13133 rtx base, index;
13134 bool something_reloaded = false;
13136 base = XEXP (XEXP (x, 0), 1);
13137 if (!REG_OK_FOR_BASE_STRICT_P (base))
13139 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13140 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13141 opnum, (enum reload_type) type);
13142 something_reloaded = true;
13145 index = XEXP (x, 1);
13146 if (!REG_OK_FOR_INDEX_STRICT_P (index))
13148 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13149 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13150 opnum, (enum reload_type) type);
13151 something_reloaded = true;
13154 gcc_assert (something_reloaded);
13155 return true;
13158 return false;
13161 /* Determine if op is suitable RTX for an address register.
13162 Return naked register if a register or a register subreg is
13163 found, otherwise return NULL_RTX. */
13165 static rtx
13166 ix86_validate_address_register (rtx op)
13168 enum machine_mode mode = GET_MODE (op);
13170 /* Only SImode or DImode registers can form the address. */
13171 if (mode != SImode && mode != DImode)
13172 return NULL_RTX;
13174 if (REG_P (op))
13175 return op;
13176 else if (GET_CODE (op) == SUBREG)
13178 rtx reg = SUBREG_REG (op);
13180 if (!REG_P (reg))
13181 return NULL_RTX;
13183 mode = GET_MODE (reg);
13185 /* Don't allow SUBREGs that span more than a word. It can
13186 lead to spill failures when the register is one word out
13187 of a two word structure. */
13188 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13189 return NULL_RTX;
13191 /* Allow only SUBREGs of non-eliminable hard registers. */
13192 if (register_no_elim_operand (reg, mode))
13193 return reg;
13196 /* Op is not a register. */
13197 return NULL_RTX;
13200 /* Recognizes RTL expressions that are valid memory addresses for an
13201 instruction. The MODE argument is the machine mode for the MEM
13202 expression that wants to use this address.
13204 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13205 convert common non-canonical forms to canonical form so that they will
13206 be recognized. */
13208 static bool
13209 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
13210 rtx addr, bool strict)
13212 struct ix86_address parts;
13213 rtx base, index, disp;
13214 HOST_WIDE_INT scale;
13215 enum ix86_address_seg seg;
13217 if (ix86_decompose_address (addr, &parts) <= 0)
13218 /* Decomposition failed. */
13219 return false;
13221 base = parts.base;
13222 index = parts.index;
13223 disp = parts.disp;
13224 scale = parts.scale;
13225 seg = parts.seg;
13227 /* Validate base register. */
13228 if (base)
13230 rtx reg = ix86_validate_address_register (base);
13232 if (reg == NULL_RTX)
13233 return false;
13235 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13236 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13237 /* Base is not valid. */
13238 return false;
13241 /* Validate index register. */
13242 if (index)
13244 rtx reg = ix86_validate_address_register (index);
13246 if (reg == NULL_RTX)
13247 return false;
13249 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13250 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13251 /* Index is not valid. */
13252 return false;
13255 /* Index and base should have the same mode. */
13256 if (base && index
13257 && GET_MODE (base) != GET_MODE (index))
13258 return false;
13260 /* Address override works only on the (%reg) part of %fs:(%reg). */
13261 if (seg != SEG_DEFAULT
13262 && ((base && GET_MODE (base) != word_mode)
13263 || (index && GET_MODE (index) != word_mode)))
13264 return false;
13266 /* Validate scale factor. */
13267 if (scale != 1)
13269 if (!index)
13270 /* Scale without index. */
13271 return false;
13273 if (scale != 2 && scale != 4 && scale != 8)
13274 /* Scale is not a valid multiplier. */
13275 return false;
13278 /* Validate displacement. */
13279 if (disp)
13281 if (GET_CODE (disp) == CONST
13282 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13283 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13284 switch (XINT (XEXP (disp, 0), 1))
13286 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13287 used. While ABI specify also 32bit relocations, we don't produce
13288 them at all and use IP relative instead. */
13289 case UNSPEC_GOT:
13290 case UNSPEC_GOTOFF:
13291 gcc_assert (flag_pic);
13292 if (!TARGET_64BIT)
13293 goto is_legitimate_pic;
13295 /* 64bit address unspec. */
13296 return false;
13298 case UNSPEC_GOTPCREL:
13299 case UNSPEC_PCREL:
13300 gcc_assert (flag_pic);
13301 goto is_legitimate_pic;
13303 case UNSPEC_GOTTPOFF:
13304 case UNSPEC_GOTNTPOFF:
13305 case UNSPEC_INDNTPOFF:
13306 case UNSPEC_NTPOFF:
13307 case UNSPEC_DTPOFF:
13308 break;
13310 case UNSPEC_STACK_CHECK:
13311 gcc_assert (flag_split_stack);
13312 break;
13314 default:
13315 /* Invalid address unspec. */
13316 return false;
13319 else if (SYMBOLIC_CONST (disp)
13320 && (flag_pic
13321 || (TARGET_MACHO
13322 #if TARGET_MACHO
13323 && MACHOPIC_INDIRECT
13324 && !machopic_operand_p (disp)
13325 #endif
13329 is_legitimate_pic:
13330 if (TARGET_64BIT && (index || base))
13332 /* foo@dtpoff(%rX) is ok. */
13333 if (GET_CODE (disp) != CONST
13334 || GET_CODE (XEXP (disp, 0)) != PLUS
13335 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13336 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13337 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13338 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13339 /* Non-constant pic memory reference. */
13340 return false;
13342 else if ((!TARGET_MACHO || flag_pic)
13343 && ! legitimate_pic_address_disp_p (disp))
13344 /* Displacement is an invalid pic construct. */
13345 return false;
13346 #if TARGET_MACHO
13347 else if (MACHO_DYNAMIC_NO_PIC_P
13348 && !ix86_legitimate_constant_p (Pmode, disp))
13349 /* displacment must be referenced via non_lazy_pointer */
13350 return false;
13351 #endif
13353 /* This code used to verify that a symbolic pic displacement
13354 includes the pic_offset_table_rtx register.
13356 While this is good idea, unfortunately these constructs may
13357 be created by "adds using lea" optimization for incorrect
13358 code like:
13360 int a;
13361 int foo(int i)
13363 return *(&a+i);
13366 This code is nonsensical, but results in addressing
13367 GOT table with pic_offset_table_rtx base. We can't
13368 just refuse it easily, since it gets matched by
13369 "addsi3" pattern, that later gets split to lea in the
13370 case output register differs from input. While this
13371 can be handled by separate addsi pattern for this case
13372 that never results in lea, this seems to be easier and
13373 correct fix for crash to disable this test. */
13375 else if (GET_CODE (disp) != LABEL_REF
13376 && !CONST_INT_P (disp)
13377 && (GET_CODE (disp) != CONST
13378 || !ix86_legitimate_constant_p (Pmode, disp))
13379 && (GET_CODE (disp) != SYMBOL_REF
13380 || !ix86_legitimate_constant_p (Pmode, disp)))
13381 /* Displacement is not constant. */
13382 return false;
13383 else if (TARGET_64BIT
13384 && !x86_64_immediate_operand (disp, VOIDmode))
13385 /* Displacement is out of range. */
13386 return false;
13387 /* In x32 mode, constant addresses are sign extended to 64bit, so
13388 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13389 else if (TARGET_X32 && !(index || base)
13390 && CONST_INT_P (disp)
13391 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13392 return false;
13395 /* Everything looks valid. */
13396 return true;
13399 /* Determine if a given RTX is a valid constant address. */
13401 bool
13402 constant_address_p (rtx x)
13404 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13407 /* Return a unique alias set for the GOT. */
13409 static alias_set_type
13410 ix86_GOT_alias_set (void)
13412 static alias_set_type set = -1;
13413 if (set == -1)
13414 set = new_alias_set ();
13415 return set;
13418 /* Return a legitimate reference for ORIG (an address) using the
13419 register REG. If REG is 0, a new pseudo is generated.
13421 There are two types of references that must be handled:
13423 1. Global data references must load the address from the GOT, via
13424 the PIC reg. An insn is emitted to do this load, and the reg is
13425 returned.
13427 2. Static data references, constant pool addresses, and code labels
13428 compute the address as an offset from the GOT, whose base is in
13429 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13430 differentiate them from global data objects. The returned
13431 address is the PIC reg + an unspec constant.
13433 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13434 reg also appears in the address. */
13436 static rtx
13437 legitimize_pic_address (rtx orig, rtx reg)
13439 rtx addr = orig;
13440 rtx new_rtx = orig;
13442 #if TARGET_MACHO
13443 if (TARGET_MACHO && !TARGET_64BIT)
13445 if (reg == 0)
13446 reg = gen_reg_rtx (Pmode);
13447 /* Use the generic Mach-O PIC machinery. */
13448 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13450 #endif
13452 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13454 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13455 if (tmp)
13456 return tmp;
13459 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13460 new_rtx = addr;
13461 else if (TARGET_64BIT && !TARGET_PECOFF
13462 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13464 rtx tmpreg;
13465 /* This symbol may be referenced via a displacement from the PIC
13466 base address (@GOTOFF). */
13468 if (reload_in_progress)
13469 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13470 if (GET_CODE (addr) == CONST)
13471 addr = XEXP (addr, 0);
13472 if (GET_CODE (addr) == PLUS)
13474 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13475 UNSPEC_GOTOFF);
13476 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13478 else
13479 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13480 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13481 if (!reg)
13482 tmpreg = gen_reg_rtx (Pmode);
13483 else
13484 tmpreg = reg;
13485 emit_move_insn (tmpreg, new_rtx);
13487 if (reg != 0)
13489 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13490 tmpreg, 1, OPTAB_DIRECT);
13491 new_rtx = reg;
13493 else
13494 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13496 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13498 /* This symbol may be referenced via a displacement from the PIC
13499 base address (@GOTOFF). */
13501 if (reload_in_progress)
13502 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13503 if (GET_CODE (addr) == CONST)
13504 addr = XEXP (addr, 0);
13505 if (GET_CODE (addr) == PLUS)
13507 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13508 UNSPEC_GOTOFF);
13509 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13511 else
13512 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13513 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13514 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13516 if (reg != 0)
13518 emit_move_insn (reg, new_rtx);
13519 new_rtx = reg;
13522 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13523 /* We can't use @GOTOFF for text labels on VxWorks;
13524 see gotoff_operand. */
13525 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13527 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13528 if (tmp)
13529 return tmp;
13531 /* For x64 PE-COFF there is no GOT table. So we use address
13532 directly. */
13533 if (TARGET_64BIT && TARGET_PECOFF)
13535 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13536 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13538 if (reg == 0)
13539 reg = gen_reg_rtx (Pmode);
13540 emit_move_insn (reg, new_rtx);
13541 new_rtx = reg;
13543 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13545 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13546 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13547 new_rtx = gen_const_mem (Pmode, new_rtx);
13548 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13550 if (reg == 0)
13551 reg = gen_reg_rtx (Pmode);
13552 /* Use directly gen_movsi, otherwise the address is loaded
13553 into register for CSE. We don't want to CSE this addresses,
13554 instead we CSE addresses from the GOT table, so skip this. */
13555 emit_insn (gen_movsi (reg, new_rtx));
13556 new_rtx = reg;
13558 else
13560 /* This symbol must be referenced via a load from the
13561 Global Offset Table (@GOT). */
13563 if (reload_in_progress)
13564 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13565 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13566 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13567 if (TARGET_64BIT)
13568 new_rtx = force_reg (Pmode, new_rtx);
13569 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13570 new_rtx = gen_const_mem (Pmode, new_rtx);
13571 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13573 if (reg == 0)
13574 reg = gen_reg_rtx (Pmode);
13575 emit_move_insn (reg, new_rtx);
13576 new_rtx = reg;
13579 else
13581 if (CONST_INT_P (addr)
13582 && !x86_64_immediate_operand (addr, VOIDmode))
13584 if (reg)
13586 emit_move_insn (reg, addr);
13587 new_rtx = reg;
13589 else
13590 new_rtx = force_reg (Pmode, addr);
13592 else if (GET_CODE (addr) == CONST)
13594 addr = XEXP (addr, 0);
13596 /* We must match stuff we generate before. Assume the only
13597 unspecs that can get here are ours. Not that we could do
13598 anything with them anyway.... */
13599 if (GET_CODE (addr) == UNSPEC
13600 || (GET_CODE (addr) == PLUS
13601 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13602 return orig;
13603 gcc_assert (GET_CODE (addr) == PLUS);
13605 if (GET_CODE (addr) == PLUS)
13607 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13609 /* Check first to see if this is a constant offset from a @GOTOFF
13610 symbol reference. */
13611 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13612 && CONST_INT_P (op1))
13614 if (!TARGET_64BIT)
13616 if (reload_in_progress)
13617 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13618 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13619 UNSPEC_GOTOFF);
13620 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13621 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13622 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13624 if (reg != 0)
13626 emit_move_insn (reg, new_rtx);
13627 new_rtx = reg;
13630 else
13632 if (INTVAL (op1) < -16*1024*1024
13633 || INTVAL (op1) >= 16*1024*1024)
13635 if (!x86_64_immediate_operand (op1, Pmode))
13636 op1 = force_reg (Pmode, op1);
13637 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13641 else
13643 rtx base = legitimize_pic_address (op0, reg);
13644 enum machine_mode mode = GET_MODE (base);
13645 new_rtx
13646 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13648 if (CONST_INT_P (new_rtx))
13650 if (INTVAL (new_rtx) < -16*1024*1024
13651 || INTVAL (new_rtx) >= 16*1024*1024)
13653 if (!x86_64_immediate_operand (new_rtx, mode))
13654 new_rtx = force_reg (mode, new_rtx);
13655 new_rtx
13656 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13658 else
13659 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13661 else
13663 if (GET_CODE (new_rtx) == PLUS
13664 && CONSTANT_P (XEXP (new_rtx, 1)))
13666 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13667 new_rtx = XEXP (new_rtx, 1);
13669 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13674 return new_rtx;
13677 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13679 static rtx
13680 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13682 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13684 if (GET_MODE (tp) != tp_mode)
13686 gcc_assert (GET_MODE (tp) == SImode);
13687 gcc_assert (tp_mode == DImode);
13689 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13692 if (to_reg)
13693 tp = copy_to_mode_reg (tp_mode, tp);
13695 return tp;
13698 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13700 static GTY(()) rtx ix86_tls_symbol;
13702 static rtx
13703 ix86_tls_get_addr (void)
13705 if (!ix86_tls_symbol)
13707 const char *sym
13708 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13709 ? "___tls_get_addr" : "__tls_get_addr");
13711 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13714 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13716 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13717 UNSPEC_PLTOFF);
13718 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13719 gen_rtx_CONST (Pmode, unspec));
13722 return ix86_tls_symbol;
13725 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13727 static GTY(()) rtx ix86_tls_module_base_symbol;
13730 ix86_tls_module_base (void)
13732 if (!ix86_tls_module_base_symbol)
13734 ix86_tls_module_base_symbol
13735 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13737 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13738 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13741 return ix86_tls_module_base_symbol;
13744 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13745 false if we expect this to be used for a memory address and true if
13746 we expect to load the address into a register. */
13748 static rtx
13749 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13751 rtx dest, base, off;
13752 rtx pic = NULL_RTX, tp = NULL_RTX;
13753 enum machine_mode tp_mode = Pmode;
13754 int type;
13756 /* Fall back to global dynamic model if tool chain cannot support local
13757 dynamic. */
13758 if (TARGET_SUN_TLS && !TARGET_64BIT
13759 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13760 && model == TLS_MODEL_LOCAL_DYNAMIC)
13761 model = TLS_MODEL_GLOBAL_DYNAMIC;
13763 switch (model)
13765 case TLS_MODEL_GLOBAL_DYNAMIC:
13766 dest = gen_reg_rtx (Pmode);
13768 if (!TARGET_64BIT)
13770 if (flag_pic && !TARGET_PECOFF)
13771 pic = pic_offset_table_rtx;
13772 else
13774 pic = gen_reg_rtx (Pmode);
13775 emit_insn (gen_set_got (pic));
13779 if (TARGET_GNU2_TLS)
13781 if (TARGET_64BIT)
13782 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13783 else
13784 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13786 tp = get_thread_pointer (Pmode, true);
13787 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13789 if (GET_MODE (x) != Pmode)
13790 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13792 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13794 else
13796 rtx caddr = ix86_tls_get_addr ();
13798 if (TARGET_64BIT)
13800 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13801 rtx insns;
13803 start_sequence ();
13804 emit_call_insn
13805 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13806 insns = get_insns ();
13807 end_sequence ();
13809 if (GET_MODE (x) != Pmode)
13810 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13812 RTL_CONST_CALL_P (insns) = 1;
13813 emit_libcall_block (insns, dest, rax, x);
13815 else
13816 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13818 break;
13820 case TLS_MODEL_LOCAL_DYNAMIC:
13821 base = gen_reg_rtx (Pmode);
13823 if (!TARGET_64BIT)
13825 if (flag_pic)
13826 pic = pic_offset_table_rtx;
13827 else
13829 pic = gen_reg_rtx (Pmode);
13830 emit_insn (gen_set_got (pic));
13834 if (TARGET_GNU2_TLS)
13836 rtx tmp = ix86_tls_module_base ();
13838 if (TARGET_64BIT)
13839 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13840 else
13841 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13843 tp = get_thread_pointer (Pmode, true);
13844 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13845 gen_rtx_MINUS (Pmode, tmp, tp));
13847 else
13849 rtx caddr = ix86_tls_get_addr ();
13851 if (TARGET_64BIT)
13853 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13854 rtx insns, eqv;
13856 start_sequence ();
13857 emit_call_insn
13858 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13859 insns = get_insns ();
13860 end_sequence ();
13862 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13863 share the LD_BASE result with other LD model accesses. */
13864 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13865 UNSPEC_TLS_LD_BASE);
13867 RTL_CONST_CALL_P (insns) = 1;
13868 emit_libcall_block (insns, base, rax, eqv);
13870 else
13871 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13874 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13875 off = gen_rtx_CONST (Pmode, off);
13877 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13879 if (TARGET_GNU2_TLS)
13881 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13883 if (GET_MODE (x) != Pmode)
13884 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13886 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13888 break;
13890 case TLS_MODEL_INITIAL_EXEC:
13891 if (TARGET_64BIT)
13893 if (TARGET_SUN_TLS && !TARGET_X32)
13895 /* The Sun linker took the AMD64 TLS spec literally
13896 and can only handle %rax as destination of the
13897 initial executable code sequence. */
13899 dest = gen_reg_rtx (DImode);
13900 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13901 return dest;
13904 /* Generate DImode references to avoid %fs:(%reg32)
13905 problems and linker IE->LE relaxation bug. */
13906 tp_mode = DImode;
13907 pic = NULL;
13908 type = UNSPEC_GOTNTPOFF;
13910 else if (flag_pic)
13912 if (reload_in_progress)
13913 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13914 pic = pic_offset_table_rtx;
13915 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13917 else if (!TARGET_ANY_GNU_TLS)
13919 pic = gen_reg_rtx (Pmode);
13920 emit_insn (gen_set_got (pic));
13921 type = UNSPEC_GOTTPOFF;
13923 else
13925 pic = NULL;
13926 type = UNSPEC_INDNTPOFF;
13929 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13930 off = gen_rtx_CONST (tp_mode, off);
13931 if (pic)
13932 off = gen_rtx_PLUS (tp_mode, pic, off);
13933 off = gen_const_mem (tp_mode, off);
13934 set_mem_alias_set (off, ix86_GOT_alias_set ());
13936 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13938 base = get_thread_pointer (tp_mode,
13939 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13940 off = force_reg (tp_mode, off);
13941 return gen_rtx_PLUS (tp_mode, base, off);
13943 else
13945 base = get_thread_pointer (Pmode, true);
13946 dest = gen_reg_rtx (Pmode);
13947 emit_insn (ix86_gen_sub3 (dest, base, off));
13949 break;
13951 case TLS_MODEL_LOCAL_EXEC:
13952 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13953 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13954 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13955 off = gen_rtx_CONST (Pmode, off);
13957 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13959 base = get_thread_pointer (Pmode,
13960 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13961 return gen_rtx_PLUS (Pmode, base, off);
13963 else
13965 base = get_thread_pointer (Pmode, true);
13966 dest = gen_reg_rtx (Pmode);
13967 emit_insn (ix86_gen_sub3 (dest, base, off));
13969 break;
13971 default:
13972 gcc_unreachable ();
13975 return dest;
13978 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13979 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13980 unique refptr-DECL symbol corresponding to symbol DECL. */
13982 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13983 htab_t dllimport_map;
13985 static tree
13986 get_dllimport_decl (tree decl, bool beimport)
13988 struct tree_map *h, in;
13989 void **loc;
13990 const char *name;
13991 const char *prefix;
13992 size_t namelen, prefixlen;
13993 char *imp_name;
13994 tree to;
13995 rtx rtl;
13997 if (!dllimport_map)
13998 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
14000 in.hash = htab_hash_pointer (decl);
14001 in.base.from = decl;
14002 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
14003 h = (struct tree_map *) *loc;
14004 if (h)
14005 return h->to;
14007 *loc = h = ggc_alloc_tree_map ();
14008 h->hash = in.hash;
14009 h->base.from = decl;
14010 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14011 VAR_DECL, NULL, ptr_type_node);
14012 DECL_ARTIFICIAL (to) = 1;
14013 DECL_IGNORED_P (to) = 1;
14014 DECL_EXTERNAL (to) = 1;
14015 TREE_READONLY (to) = 1;
14017 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14018 name = targetm.strip_name_encoding (name);
14019 if (beimport)
14020 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14021 ? "*__imp_" : "*__imp__";
14022 else
14023 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14024 namelen = strlen (name);
14025 prefixlen = strlen (prefix);
14026 imp_name = (char *) alloca (namelen + prefixlen + 1);
14027 memcpy (imp_name, prefix, prefixlen);
14028 memcpy (imp_name + prefixlen, name, namelen + 1);
14030 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14031 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14032 SET_SYMBOL_REF_DECL (rtl, to);
14033 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14034 if (!beimport)
14036 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14037 #ifdef SUB_TARGET_RECORD_STUB
14038 SUB_TARGET_RECORD_STUB (name);
14039 #endif
14042 rtl = gen_const_mem (Pmode, rtl);
14043 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14045 SET_DECL_RTL (to, rtl);
14046 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14048 return to;
14051 /* Expand SYMBOL into its corresponding far-addresse symbol.
14052 WANT_REG is true if we require the result be a register. */
14054 static rtx
14055 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14057 tree imp_decl;
14058 rtx x;
14060 gcc_assert (SYMBOL_REF_DECL (symbol));
14061 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14063 x = DECL_RTL (imp_decl);
14064 if (want_reg)
14065 x = force_reg (Pmode, x);
14066 return x;
14069 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14070 true if we require the result be a register. */
14072 static rtx
14073 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14075 tree imp_decl;
14076 rtx x;
14078 gcc_assert (SYMBOL_REF_DECL (symbol));
14079 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14081 x = DECL_RTL (imp_decl);
14082 if (want_reg)
14083 x = force_reg (Pmode, x);
14084 return x;
14087 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14088 is true if we require the result be a register. */
14090 static rtx
14091 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14093 if (!TARGET_PECOFF)
14094 return NULL_RTX;
14096 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14098 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14099 return legitimize_dllimport_symbol (addr, inreg);
14100 if (GET_CODE (addr) == CONST
14101 && GET_CODE (XEXP (addr, 0)) == PLUS
14102 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14103 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14105 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14106 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14110 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14111 return NULL_RTX;
14112 if (GET_CODE (addr) == SYMBOL_REF
14113 && !is_imported_p (addr)
14114 && SYMBOL_REF_EXTERNAL_P (addr)
14115 && SYMBOL_REF_DECL (addr))
14116 return legitimize_pe_coff_extern_decl (addr, inreg);
14118 if (GET_CODE (addr) == CONST
14119 && GET_CODE (XEXP (addr, 0)) == PLUS
14120 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14121 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14122 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14123 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14125 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14126 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14128 return NULL_RTX;
14131 /* Try machine-dependent ways of modifying an illegitimate address
14132 to be legitimate. If we find one, return the new, valid address.
14133 This macro is used in only one place: `memory_address' in explow.c.
14135 OLDX is the address as it was before break_out_memory_refs was called.
14136 In some cases it is useful to look at this to decide what needs to be done.
14138 It is always safe for this macro to do nothing. It exists to recognize
14139 opportunities to optimize the output.
14141 For the 80386, we handle X+REG by loading X into a register R and
14142 using R+REG. R will go in a general reg and indexing will be used.
14143 However, if REG is a broken-out memory address or multiplication,
14144 nothing needs to be done because REG can certainly go in a general reg.
14146 When -fpic is used, special handling is needed for symbolic references.
14147 See comments by legitimize_pic_address in i386.c for details. */
14149 static rtx
14150 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
14151 enum machine_mode mode)
14153 int changed = 0;
14154 unsigned log;
14156 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14157 if (log)
14158 return legitimize_tls_address (x, (enum tls_model) log, false);
14159 if (GET_CODE (x) == CONST
14160 && GET_CODE (XEXP (x, 0)) == PLUS
14161 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14162 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14164 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14165 (enum tls_model) log, false);
14166 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14169 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14171 rtx tmp = legitimize_pe_coff_symbol (x, true);
14172 if (tmp)
14173 return tmp;
14176 if (flag_pic && SYMBOLIC_CONST (x))
14177 return legitimize_pic_address (x, 0);
14179 #if TARGET_MACHO
14180 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14181 return machopic_indirect_data_reference (x, 0);
14182 #endif
14184 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14185 if (GET_CODE (x) == ASHIFT
14186 && CONST_INT_P (XEXP (x, 1))
14187 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14189 changed = 1;
14190 log = INTVAL (XEXP (x, 1));
14191 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14192 GEN_INT (1 << log));
14195 if (GET_CODE (x) == PLUS)
14197 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14199 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14200 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14201 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14203 changed = 1;
14204 log = INTVAL (XEXP (XEXP (x, 0), 1));
14205 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14206 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14207 GEN_INT (1 << log));
14210 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14211 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14212 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14214 changed = 1;
14215 log = INTVAL (XEXP (XEXP (x, 1), 1));
14216 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14217 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14218 GEN_INT (1 << log));
14221 /* Put multiply first if it isn't already. */
14222 if (GET_CODE (XEXP (x, 1)) == MULT)
14224 rtx tmp = XEXP (x, 0);
14225 XEXP (x, 0) = XEXP (x, 1);
14226 XEXP (x, 1) = tmp;
14227 changed = 1;
14230 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14231 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14232 created by virtual register instantiation, register elimination, and
14233 similar optimizations. */
14234 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14236 changed = 1;
14237 x = gen_rtx_PLUS (Pmode,
14238 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14239 XEXP (XEXP (x, 1), 0)),
14240 XEXP (XEXP (x, 1), 1));
14243 /* Canonicalize
14244 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14245 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14246 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14247 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14248 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14249 && CONSTANT_P (XEXP (x, 1)))
14251 rtx constant;
14252 rtx other = NULL_RTX;
14254 if (CONST_INT_P (XEXP (x, 1)))
14256 constant = XEXP (x, 1);
14257 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14259 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14261 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14262 other = XEXP (x, 1);
14264 else
14265 constant = 0;
14267 if (constant)
14269 changed = 1;
14270 x = gen_rtx_PLUS (Pmode,
14271 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14272 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14273 plus_constant (Pmode, other,
14274 INTVAL (constant)));
14278 if (changed && ix86_legitimate_address_p (mode, x, false))
14279 return x;
14281 if (GET_CODE (XEXP (x, 0)) == MULT)
14283 changed = 1;
14284 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14287 if (GET_CODE (XEXP (x, 1)) == MULT)
14289 changed = 1;
14290 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14293 if (changed
14294 && REG_P (XEXP (x, 1))
14295 && REG_P (XEXP (x, 0)))
14296 return x;
14298 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14300 changed = 1;
14301 x = legitimize_pic_address (x, 0);
14304 if (changed && ix86_legitimate_address_p (mode, x, false))
14305 return x;
14307 if (REG_P (XEXP (x, 0)))
14309 rtx temp = gen_reg_rtx (Pmode);
14310 rtx val = force_operand (XEXP (x, 1), temp);
14311 if (val != temp)
14313 val = convert_to_mode (Pmode, val, 1);
14314 emit_move_insn (temp, val);
14317 XEXP (x, 1) = temp;
14318 return x;
14321 else if (REG_P (XEXP (x, 1)))
14323 rtx temp = gen_reg_rtx (Pmode);
14324 rtx val = force_operand (XEXP (x, 0), temp);
14325 if (val != temp)
14327 val = convert_to_mode (Pmode, val, 1);
14328 emit_move_insn (temp, val);
14331 XEXP (x, 0) = temp;
14332 return x;
14336 return x;
14339 /* Print an integer constant expression in assembler syntax. Addition
14340 and subtraction are the only arithmetic that may appear in these
14341 expressions. FILE is the stdio stream to write to, X is the rtx, and
14342 CODE is the operand print code from the output string. */
14344 static void
14345 output_pic_addr_const (FILE *file, rtx x, int code)
14347 char buf[256];
14349 switch (GET_CODE (x))
14351 case PC:
14352 gcc_assert (flag_pic);
14353 putc ('.', file);
14354 break;
14356 case SYMBOL_REF:
14357 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14358 output_addr_const (file, x);
14359 else
14361 const char *name = XSTR (x, 0);
14363 /* Mark the decl as referenced so that cgraph will
14364 output the function. */
14365 if (SYMBOL_REF_DECL (x))
14366 mark_decl_referenced (SYMBOL_REF_DECL (x));
14368 #if TARGET_MACHO
14369 if (MACHOPIC_INDIRECT
14370 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14371 name = machopic_indirection_name (x, /*stub_p=*/true);
14372 #endif
14373 assemble_name (file, name);
14375 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14376 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14377 fputs ("@PLT", file);
14378 break;
14380 case LABEL_REF:
14381 x = XEXP (x, 0);
14382 /* FALLTHRU */
14383 case CODE_LABEL:
14384 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14385 assemble_name (asm_out_file, buf);
14386 break;
14388 case CONST_INT:
14389 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14390 break;
14392 case CONST:
14393 /* This used to output parentheses around the expression,
14394 but that does not work on the 386 (either ATT or BSD assembler). */
14395 output_pic_addr_const (file, XEXP (x, 0), code);
14396 break;
14398 case CONST_DOUBLE:
14399 if (GET_MODE (x) == VOIDmode)
14401 /* We can use %d if the number is <32 bits and positive. */
14402 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14403 fprintf (file, "0x%lx%08lx",
14404 (unsigned long) CONST_DOUBLE_HIGH (x),
14405 (unsigned long) CONST_DOUBLE_LOW (x));
14406 else
14407 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14409 else
14410 /* We can't handle floating point constants;
14411 TARGET_PRINT_OPERAND must handle them. */
14412 output_operand_lossage ("floating constant misused");
14413 break;
14415 case PLUS:
14416 /* Some assemblers need integer constants to appear first. */
14417 if (CONST_INT_P (XEXP (x, 0)))
14419 output_pic_addr_const (file, XEXP (x, 0), code);
14420 putc ('+', file);
14421 output_pic_addr_const (file, XEXP (x, 1), code);
14423 else
14425 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14426 output_pic_addr_const (file, XEXP (x, 1), code);
14427 putc ('+', file);
14428 output_pic_addr_const (file, XEXP (x, 0), code);
14430 break;
14432 case MINUS:
14433 if (!TARGET_MACHO)
14434 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14435 output_pic_addr_const (file, XEXP (x, 0), code);
14436 putc ('-', file);
14437 output_pic_addr_const (file, XEXP (x, 1), code);
14438 if (!TARGET_MACHO)
14439 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14440 break;
14442 case UNSPEC:
14443 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14445 bool f = i386_asm_output_addr_const_extra (file, x);
14446 gcc_assert (f);
14447 break;
14450 gcc_assert (XVECLEN (x, 0) == 1);
14451 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14452 switch (XINT (x, 1))
14454 case UNSPEC_GOT:
14455 fputs ("@GOT", file);
14456 break;
14457 case UNSPEC_GOTOFF:
14458 fputs ("@GOTOFF", file);
14459 break;
14460 case UNSPEC_PLTOFF:
14461 fputs ("@PLTOFF", file);
14462 break;
14463 case UNSPEC_PCREL:
14464 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14465 "(%rip)" : "[rip]", file);
14466 break;
14467 case UNSPEC_GOTPCREL:
14468 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14469 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14470 break;
14471 case UNSPEC_GOTTPOFF:
14472 /* FIXME: This might be @TPOFF in Sun ld too. */
14473 fputs ("@gottpoff", file);
14474 break;
14475 case UNSPEC_TPOFF:
14476 fputs ("@tpoff", file);
14477 break;
14478 case UNSPEC_NTPOFF:
14479 if (TARGET_64BIT)
14480 fputs ("@tpoff", file);
14481 else
14482 fputs ("@ntpoff", file);
14483 break;
14484 case UNSPEC_DTPOFF:
14485 fputs ("@dtpoff", file);
14486 break;
14487 case UNSPEC_GOTNTPOFF:
14488 if (TARGET_64BIT)
14489 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14490 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14491 else
14492 fputs ("@gotntpoff", file);
14493 break;
14494 case UNSPEC_INDNTPOFF:
14495 fputs ("@indntpoff", file);
14496 break;
14497 #if TARGET_MACHO
14498 case UNSPEC_MACHOPIC_OFFSET:
14499 putc ('-', file);
14500 machopic_output_function_base_name (file);
14501 break;
14502 #endif
14503 default:
14504 output_operand_lossage ("invalid UNSPEC as operand");
14505 break;
14507 break;
14509 default:
14510 output_operand_lossage ("invalid expression as operand");
14514 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14515 We need to emit DTP-relative relocations. */
14517 static void ATTRIBUTE_UNUSED
14518 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14520 fputs (ASM_LONG, file);
14521 output_addr_const (file, x);
14522 fputs ("@dtpoff", file);
14523 switch (size)
14525 case 4:
14526 break;
14527 case 8:
14528 fputs (", 0", file);
14529 break;
14530 default:
14531 gcc_unreachable ();
14535 /* Return true if X is a representation of the PIC register. This copes
14536 with calls from ix86_find_base_term, where the register might have
14537 been replaced by a cselib value. */
14539 static bool
14540 ix86_pic_register_p (rtx x)
14542 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14543 return (pic_offset_table_rtx
14544 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14545 else
14546 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14549 /* Helper function for ix86_delegitimize_address.
14550 Attempt to delegitimize TLS local-exec accesses. */
14552 static rtx
14553 ix86_delegitimize_tls_address (rtx orig_x)
14555 rtx x = orig_x, unspec;
14556 struct ix86_address addr;
14558 if (!TARGET_TLS_DIRECT_SEG_REFS)
14559 return orig_x;
14560 if (MEM_P (x))
14561 x = XEXP (x, 0);
14562 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14563 return orig_x;
14564 if (ix86_decompose_address (x, &addr) == 0
14565 || addr.seg != DEFAULT_TLS_SEG_REG
14566 || addr.disp == NULL_RTX
14567 || GET_CODE (addr.disp) != CONST)
14568 return orig_x;
14569 unspec = XEXP (addr.disp, 0);
14570 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14571 unspec = XEXP (unspec, 0);
14572 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14573 return orig_x;
14574 x = XVECEXP (unspec, 0, 0);
14575 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14576 if (unspec != XEXP (addr.disp, 0))
14577 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14578 if (addr.index)
14580 rtx idx = addr.index;
14581 if (addr.scale != 1)
14582 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14583 x = gen_rtx_PLUS (Pmode, idx, x);
14585 if (addr.base)
14586 x = gen_rtx_PLUS (Pmode, addr.base, x);
14587 if (MEM_P (orig_x))
14588 x = replace_equiv_address_nv (orig_x, x);
14589 return x;
14592 /* In the name of slightly smaller debug output, and to cater to
14593 general assembler lossage, recognize PIC+GOTOFF and turn it back
14594 into a direct symbol reference.
14596 On Darwin, this is necessary to avoid a crash, because Darwin
14597 has a different PIC label for each routine but the DWARF debugging
14598 information is not associated with any particular routine, so it's
14599 necessary to remove references to the PIC label from RTL stored by
14600 the DWARF output code. */
14602 static rtx
14603 ix86_delegitimize_address (rtx x)
14605 rtx orig_x = delegitimize_mem_from_attrs (x);
14606 /* addend is NULL or some rtx if x is something+GOTOFF where
14607 something doesn't include the PIC register. */
14608 rtx addend = NULL_RTX;
14609 /* reg_addend is NULL or a multiple of some register. */
14610 rtx reg_addend = NULL_RTX;
14611 /* const_addend is NULL or a const_int. */
14612 rtx const_addend = NULL_RTX;
14613 /* This is the result, or NULL. */
14614 rtx result = NULL_RTX;
14616 x = orig_x;
14618 if (MEM_P (x))
14619 x = XEXP (x, 0);
14621 if (TARGET_64BIT)
14623 if (GET_CODE (x) == CONST
14624 && GET_CODE (XEXP (x, 0)) == PLUS
14625 && GET_MODE (XEXP (x, 0)) == Pmode
14626 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14627 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14628 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14630 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14631 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14632 if (MEM_P (orig_x))
14633 x = replace_equiv_address_nv (orig_x, x);
14634 return x;
14637 if (GET_CODE (x) == CONST
14638 && GET_CODE (XEXP (x, 0)) == UNSPEC
14639 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14640 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14641 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14643 x = XVECEXP (XEXP (x, 0), 0, 0);
14644 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14646 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14647 GET_MODE (x), 0);
14648 if (x == NULL_RTX)
14649 return orig_x;
14651 return x;
14654 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14655 return ix86_delegitimize_tls_address (orig_x);
14657 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14658 and -mcmodel=medium -fpic. */
14661 if (GET_CODE (x) != PLUS
14662 || GET_CODE (XEXP (x, 1)) != CONST)
14663 return ix86_delegitimize_tls_address (orig_x);
14665 if (ix86_pic_register_p (XEXP (x, 0)))
14666 /* %ebx + GOT/GOTOFF */
14668 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14670 /* %ebx + %reg * scale + GOT/GOTOFF */
14671 reg_addend = XEXP (x, 0);
14672 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14673 reg_addend = XEXP (reg_addend, 1);
14674 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14675 reg_addend = XEXP (reg_addend, 0);
14676 else
14678 reg_addend = NULL_RTX;
14679 addend = XEXP (x, 0);
14682 else
14683 addend = XEXP (x, 0);
14685 x = XEXP (XEXP (x, 1), 0);
14686 if (GET_CODE (x) == PLUS
14687 && CONST_INT_P (XEXP (x, 1)))
14689 const_addend = XEXP (x, 1);
14690 x = XEXP (x, 0);
14693 if (GET_CODE (x) == UNSPEC
14694 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14695 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14696 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14697 && !MEM_P (orig_x) && !addend)))
14698 result = XVECEXP (x, 0, 0);
14700 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14701 && !MEM_P (orig_x))
14702 result = XVECEXP (x, 0, 0);
14704 if (! result)
14705 return ix86_delegitimize_tls_address (orig_x);
14707 if (const_addend)
14708 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14709 if (reg_addend)
14710 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14711 if (addend)
14713 /* If the rest of original X doesn't involve the PIC register, add
14714 addend and subtract pic_offset_table_rtx. This can happen e.g.
14715 for code like:
14716 leal (%ebx, %ecx, 4), %ecx
14718 movl foo@GOTOFF(%ecx), %edx
14719 in which case we return (%ecx - %ebx) + foo. */
14720 if (pic_offset_table_rtx)
14721 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14722 pic_offset_table_rtx),
14723 result);
14724 else
14725 return orig_x;
14727 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14729 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14730 if (result == NULL_RTX)
14731 return orig_x;
14733 return result;
14736 /* If X is a machine specific address (i.e. a symbol or label being
14737 referenced as a displacement from the GOT implemented using an
14738 UNSPEC), then return the base term. Otherwise return X. */
14741 ix86_find_base_term (rtx x)
14743 rtx term;
14745 if (TARGET_64BIT)
14747 if (GET_CODE (x) != CONST)
14748 return x;
14749 term = XEXP (x, 0);
14750 if (GET_CODE (term) == PLUS
14751 && (CONST_INT_P (XEXP (term, 1))
14752 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14753 term = XEXP (term, 0);
14754 if (GET_CODE (term) != UNSPEC
14755 || (XINT (term, 1) != UNSPEC_GOTPCREL
14756 && XINT (term, 1) != UNSPEC_PCREL))
14757 return x;
14759 return XVECEXP (term, 0, 0);
14762 return ix86_delegitimize_address (x);
14765 static void
14766 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14767 bool fp, FILE *file)
14769 const char *suffix;
14771 if (mode == CCFPmode || mode == CCFPUmode)
14773 code = ix86_fp_compare_code_to_integer (code);
14774 mode = CCmode;
14776 if (reverse)
14777 code = reverse_condition (code);
14779 switch (code)
14781 case EQ:
14782 switch (mode)
14784 case CCAmode:
14785 suffix = "a";
14786 break;
14788 case CCCmode:
14789 suffix = "c";
14790 break;
14792 case CCOmode:
14793 suffix = "o";
14794 break;
14796 case CCSmode:
14797 suffix = "s";
14798 break;
14800 default:
14801 suffix = "e";
14803 break;
14804 case NE:
14805 switch (mode)
14807 case CCAmode:
14808 suffix = "na";
14809 break;
14811 case CCCmode:
14812 suffix = "nc";
14813 break;
14815 case CCOmode:
14816 suffix = "no";
14817 break;
14819 case CCSmode:
14820 suffix = "ns";
14821 break;
14823 default:
14824 suffix = "ne";
14826 break;
14827 case GT:
14828 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14829 suffix = "g";
14830 break;
14831 case GTU:
14832 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14833 Those same assemblers have the same but opposite lossage on cmov. */
14834 if (mode == CCmode)
14835 suffix = fp ? "nbe" : "a";
14836 else
14837 gcc_unreachable ();
14838 break;
14839 case LT:
14840 switch (mode)
14842 case CCNOmode:
14843 case CCGOCmode:
14844 suffix = "s";
14845 break;
14847 case CCmode:
14848 case CCGCmode:
14849 suffix = "l";
14850 break;
14852 default:
14853 gcc_unreachable ();
14855 break;
14856 case LTU:
14857 if (mode == CCmode)
14858 suffix = "b";
14859 else if (mode == CCCmode)
14860 suffix = fp ? "b" : "c";
14861 else
14862 gcc_unreachable ();
14863 break;
14864 case GE:
14865 switch (mode)
14867 case CCNOmode:
14868 case CCGOCmode:
14869 suffix = "ns";
14870 break;
14872 case CCmode:
14873 case CCGCmode:
14874 suffix = "ge";
14875 break;
14877 default:
14878 gcc_unreachable ();
14880 break;
14881 case GEU:
14882 if (mode == CCmode)
14883 suffix = "nb";
14884 else if (mode == CCCmode)
14885 suffix = fp ? "nb" : "nc";
14886 else
14887 gcc_unreachable ();
14888 break;
14889 case LE:
14890 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14891 suffix = "le";
14892 break;
14893 case LEU:
14894 if (mode == CCmode)
14895 suffix = "be";
14896 else
14897 gcc_unreachable ();
14898 break;
14899 case UNORDERED:
14900 suffix = fp ? "u" : "p";
14901 break;
14902 case ORDERED:
14903 suffix = fp ? "nu" : "np";
14904 break;
14905 default:
14906 gcc_unreachable ();
14908 fputs (suffix, file);
14911 /* Print the name of register X to FILE based on its machine mode and number.
14912 If CODE is 'w', pretend the mode is HImode.
14913 If CODE is 'b', pretend the mode is QImode.
14914 If CODE is 'k', pretend the mode is SImode.
14915 If CODE is 'q', pretend the mode is DImode.
14916 If CODE is 'x', pretend the mode is V4SFmode.
14917 If CODE is 't', pretend the mode is V8SFmode.
14918 If CODE is 'g', pretend the mode is V16SFmode.
14919 If CODE is 'h', pretend the reg is the 'high' byte register.
14920 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14921 If CODE is 'd', duplicate the operand for AVX instruction.
14924 void
14925 print_reg (rtx x, int code, FILE *file)
14927 const char *reg;
14928 unsigned int regno;
14929 bool duplicated = code == 'd' && TARGET_AVX;
14931 if (ASSEMBLER_DIALECT == ASM_ATT)
14932 putc ('%', file);
14934 if (x == pc_rtx)
14936 gcc_assert (TARGET_64BIT);
14937 fputs ("rip", file);
14938 return;
14941 regno = true_regnum (x);
14942 gcc_assert (regno != ARG_POINTER_REGNUM
14943 && regno != FRAME_POINTER_REGNUM
14944 && regno != FLAGS_REG
14945 && regno != FPSR_REG
14946 && regno != FPCR_REG);
14948 if (code == 'w' || MMX_REG_P (x))
14949 code = 2;
14950 else if (code == 'b')
14951 code = 1;
14952 else if (code == 'k')
14953 code = 4;
14954 else if (code == 'q')
14955 code = 8;
14956 else if (code == 'y')
14957 code = 3;
14958 else if (code == 'h')
14959 code = 0;
14960 else if (code == 'x')
14961 code = 16;
14962 else if (code == 't')
14963 code = 32;
14964 else if (code == 'g')
14965 code = 64;
14966 else
14967 code = GET_MODE_SIZE (GET_MODE (x));
14969 /* Irritatingly, AMD extended registers use different naming convention
14970 from the normal registers: "r%d[bwd]" */
14971 if (REX_INT_REGNO_P (regno))
14973 gcc_assert (TARGET_64BIT);
14974 putc ('r', file);
14975 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14976 switch (code)
14978 case 0:
14979 error ("extended registers have no high halves");
14980 break;
14981 case 1:
14982 putc ('b', file);
14983 break;
14984 case 2:
14985 putc ('w', file);
14986 break;
14987 case 4:
14988 putc ('d', file);
14989 break;
14990 case 8:
14991 /* no suffix */
14992 break;
14993 default:
14994 error ("unsupported operand size for extended register");
14995 break;
14997 return;
15000 reg = NULL;
15001 switch (code)
15003 case 3:
15004 if (STACK_TOP_P (x))
15006 reg = "st(0)";
15007 break;
15009 /* FALLTHRU */
15010 case 8:
15011 case 4:
15012 case 12:
15013 if (! ANY_FP_REG_P (x))
15014 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15015 /* FALLTHRU */
15016 case 16:
15017 case 2:
15018 normal:
15019 reg = hi_reg_name[regno];
15020 break;
15021 case 1:
15022 if (regno >= ARRAY_SIZE (qi_reg_name))
15023 goto normal;
15024 reg = qi_reg_name[regno];
15025 break;
15026 case 0:
15027 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15028 goto normal;
15029 reg = qi_high_reg_name[regno];
15030 break;
15031 case 32:
15032 if (SSE_REG_P (x))
15034 gcc_assert (!duplicated);
15035 putc ('y', file);
15036 fputs (hi_reg_name[regno] + 1, file);
15037 return;
15039 case 64:
15040 if (SSE_REG_P (x))
15042 gcc_assert (!duplicated);
15043 putc ('z', file);
15044 fputs (hi_reg_name[REGNO (x)] + 1, file);
15045 return;
15047 break;
15048 default:
15049 gcc_unreachable ();
15052 fputs (reg, file);
15053 if (duplicated)
15055 if (ASSEMBLER_DIALECT == ASM_ATT)
15056 fprintf (file, ", %%%s", reg);
15057 else
15058 fprintf (file, ", %s", reg);
15062 /* Locate some local-dynamic symbol still in use by this function
15063 so that we can print its name in some tls_local_dynamic_base
15064 pattern. */
15066 static int
15067 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
15069 rtx x = *px;
15071 if (GET_CODE (x) == SYMBOL_REF
15072 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
15074 cfun->machine->some_ld_name = XSTR (x, 0);
15075 return 1;
15078 return 0;
15081 static const char *
15082 get_some_local_dynamic_name (void)
15084 rtx insn;
15086 if (cfun->machine->some_ld_name)
15087 return cfun->machine->some_ld_name;
15089 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
15090 if (NONDEBUG_INSN_P (insn)
15091 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
15092 return cfun->machine->some_ld_name;
15094 return NULL;
15097 /* Meaning of CODE:
15098 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15099 C -- print opcode suffix for set/cmov insn.
15100 c -- like C, but print reversed condition
15101 F,f -- likewise, but for floating-point.
15102 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15103 otherwise nothing
15104 R -- print embeded rounding and sae.
15105 r -- print only sae.
15106 z -- print the opcode suffix for the size of the current operand.
15107 Z -- likewise, with special suffixes for x87 instructions.
15108 * -- print a star (in certain assembler syntax)
15109 A -- print an absolute memory reference.
15110 E -- print address with DImode register names if TARGET_64BIT.
15111 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15112 s -- print a shift double count, followed by the assemblers argument
15113 delimiter.
15114 b -- print the QImode name of the register for the indicated operand.
15115 %b0 would print %al if operands[0] is reg 0.
15116 w -- likewise, print the HImode name of the register.
15117 k -- likewise, print the SImode name of the register.
15118 q -- likewise, print the DImode name of the register.
15119 x -- likewise, print the V4SFmode name of the register.
15120 t -- likewise, print the V8SFmode name of the register.
15121 g -- likewise, print the V16SFmode name of the register.
15122 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15123 y -- print "st(0)" instead of "st" as a register.
15124 d -- print duplicated register operand for AVX instruction.
15125 D -- print condition for SSE cmp instruction.
15126 P -- if PIC, print an @PLT suffix.
15127 p -- print raw symbol name.
15128 X -- don't print any sort of PIC '@' suffix for a symbol.
15129 & -- print some in-use local-dynamic symbol name.
15130 H -- print a memory address offset by 8; used for sse high-parts
15131 Y -- print condition for XOP pcom* instruction.
15132 + -- print a branch hint as 'cs' or 'ds' prefix
15133 ; -- print a semicolon (after prefixes due to bug in older gas).
15134 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15135 @ -- print a segment register of thread base pointer load
15136 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15139 void
15140 ix86_print_operand (FILE *file, rtx x, int code)
15142 if (code)
15144 switch (code)
15146 case 'A':
15147 switch (ASSEMBLER_DIALECT)
15149 case ASM_ATT:
15150 putc ('*', file);
15151 break;
15153 case ASM_INTEL:
15154 /* Intel syntax. For absolute addresses, registers should not
15155 be surrounded by braces. */
15156 if (!REG_P (x))
15158 putc ('[', file);
15159 ix86_print_operand (file, x, 0);
15160 putc (']', file);
15161 return;
15163 break;
15165 default:
15166 gcc_unreachable ();
15169 ix86_print_operand (file, x, 0);
15170 return;
15172 case 'E':
15173 /* Wrap address in an UNSPEC to declare special handling. */
15174 if (TARGET_64BIT)
15175 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15177 output_address (x);
15178 return;
15180 case 'L':
15181 if (ASSEMBLER_DIALECT == ASM_ATT)
15182 putc ('l', file);
15183 return;
15185 case 'W':
15186 if (ASSEMBLER_DIALECT == ASM_ATT)
15187 putc ('w', file);
15188 return;
15190 case 'B':
15191 if (ASSEMBLER_DIALECT == ASM_ATT)
15192 putc ('b', file);
15193 return;
15195 case 'Q':
15196 if (ASSEMBLER_DIALECT == ASM_ATT)
15197 putc ('l', file);
15198 return;
15200 case 'S':
15201 if (ASSEMBLER_DIALECT == ASM_ATT)
15202 putc ('s', file);
15203 return;
15205 case 'T':
15206 if (ASSEMBLER_DIALECT == ASM_ATT)
15207 putc ('t', file);
15208 return;
15210 case 'O':
15211 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15212 if (ASSEMBLER_DIALECT != ASM_ATT)
15213 return;
15215 switch (GET_MODE_SIZE (GET_MODE (x)))
15217 case 2:
15218 putc ('w', file);
15219 break;
15221 case 4:
15222 putc ('l', file);
15223 break;
15225 case 8:
15226 putc ('q', file);
15227 break;
15229 default:
15230 output_operand_lossage
15231 ("invalid operand size for operand code 'O'");
15232 return;
15235 putc ('.', file);
15236 #endif
15237 return;
15239 case 'z':
15240 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15242 /* Opcodes don't get size suffixes if using Intel opcodes. */
15243 if (ASSEMBLER_DIALECT == ASM_INTEL)
15244 return;
15246 switch (GET_MODE_SIZE (GET_MODE (x)))
15248 case 1:
15249 putc ('b', file);
15250 return;
15252 case 2:
15253 putc ('w', file);
15254 return;
15256 case 4:
15257 putc ('l', file);
15258 return;
15260 case 8:
15261 putc ('q', file);
15262 return;
15264 default:
15265 output_operand_lossage
15266 ("invalid operand size for operand code 'z'");
15267 return;
15271 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15272 warning
15273 (0, "non-integer operand used with operand code 'z'");
15274 /* FALLTHRU */
15276 case 'Z':
15277 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15278 if (ASSEMBLER_DIALECT == ASM_INTEL)
15279 return;
15281 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15283 switch (GET_MODE_SIZE (GET_MODE (x)))
15285 case 2:
15286 #ifdef HAVE_AS_IX86_FILDS
15287 putc ('s', file);
15288 #endif
15289 return;
15291 case 4:
15292 putc ('l', file);
15293 return;
15295 case 8:
15296 #ifdef HAVE_AS_IX86_FILDQ
15297 putc ('q', file);
15298 #else
15299 fputs ("ll", file);
15300 #endif
15301 return;
15303 default:
15304 break;
15307 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15309 /* 387 opcodes don't get size suffixes
15310 if the operands are registers. */
15311 if (STACK_REG_P (x))
15312 return;
15314 switch (GET_MODE_SIZE (GET_MODE (x)))
15316 case 4:
15317 putc ('s', file);
15318 return;
15320 case 8:
15321 putc ('l', file);
15322 return;
15324 case 12:
15325 case 16:
15326 putc ('t', file);
15327 return;
15329 default:
15330 break;
15333 else
15335 output_operand_lossage
15336 ("invalid operand type used with operand code 'Z'");
15337 return;
15340 output_operand_lossage
15341 ("invalid operand size for operand code 'Z'");
15342 return;
15344 case 'd':
15345 case 'b':
15346 case 'w':
15347 case 'k':
15348 case 'q':
15349 case 'h':
15350 case 't':
15351 case 'g':
15352 case 'y':
15353 case 'x':
15354 case 'X':
15355 case 'P':
15356 case 'p':
15357 break;
15359 case 's':
15360 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15362 ix86_print_operand (file, x, 0);
15363 fputs (", ", file);
15365 return;
15367 case 'Y':
15368 switch (GET_CODE (x))
15370 case NE:
15371 fputs ("neq", file);
15372 break;
15373 case EQ:
15374 fputs ("eq", file);
15375 break;
15376 case GE:
15377 case GEU:
15378 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15379 break;
15380 case GT:
15381 case GTU:
15382 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15383 break;
15384 case LE:
15385 case LEU:
15386 fputs ("le", file);
15387 break;
15388 case LT:
15389 case LTU:
15390 fputs ("lt", file);
15391 break;
15392 case UNORDERED:
15393 fputs ("unord", file);
15394 break;
15395 case ORDERED:
15396 fputs ("ord", file);
15397 break;
15398 case UNEQ:
15399 fputs ("ueq", file);
15400 break;
15401 case UNGE:
15402 fputs ("nlt", file);
15403 break;
15404 case UNGT:
15405 fputs ("nle", file);
15406 break;
15407 case UNLE:
15408 fputs ("ule", file);
15409 break;
15410 case UNLT:
15411 fputs ("ult", file);
15412 break;
15413 case LTGT:
15414 fputs ("une", file);
15415 break;
15416 default:
15417 output_operand_lossage ("operand is not a condition code, "
15418 "invalid operand code 'Y'");
15419 return;
15421 return;
15423 case 'D':
15424 /* Little bit of braindamage here. The SSE compare instructions
15425 does use completely different names for the comparisons that the
15426 fp conditional moves. */
15427 switch (GET_CODE (x))
15429 case UNEQ:
15430 if (TARGET_AVX)
15432 fputs ("eq_us", file);
15433 break;
15435 case EQ:
15436 fputs ("eq", file);
15437 break;
15438 case UNLT:
15439 if (TARGET_AVX)
15441 fputs ("nge", file);
15442 break;
15444 case LT:
15445 fputs ("lt", file);
15446 break;
15447 case UNLE:
15448 if (TARGET_AVX)
15450 fputs ("ngt", file);
15451 break;
15453 case LE:
15454 fputs ("le", file);
15455 break;
15456 case UNORDERED:
15457 fputs ("unord", file);
15458 break;
15459 case LTGT:
15460 if (TARGET_AVX)
15462 fputs ("neq_oq", file);
15463 break;
15465 case NE:
15466 fputs ("neq", file);
15467 break;
15468 case GE:
15469 if (TARGET_AVX)
15471 fputs ("ge", file);
15472 break;
15474 case UNGE:
15475 fputs ("nlt", file);
15476 break;
15477 case GT:
15478 if (TARGET_AVX)
15480 fputs ("gt", file);
15481 break;
15483 case UNGT:
15484 fputs ("nle", file);
15485 break;
15486 case ORDERED:
15487 fputs ("ord", file);
15488 break;
15489 default:
15490 output_operand_lossage ("operand is not a condition code, "
15491 "invalid operand code 'D'");
15492 return;
15494 return;
15496 case 'F':
15497 case 'f':
15498 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15499 if (ASSEMBLER_DIALECT == ASM_ATT)
15500 putc ('.', file);
15501 #endif
15503 case 'C':
15504 case 'c':
15505 if (!COMPARISON_P (x))
15507 output_operand_lossage ("operand is not a condition code, "
15508 "invalid operand code '%c'", code);
15509 return;
15511 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15512 code == 'c' || code == 'f',
15513 code == 'F' || code == 'f',
15514 file);
15515 return;
15517 case 'H':
15518 if (!offsettable_memref_p (x))
15520 output_operand_lossage ("operand is not an offsettable memory "
15521 "reference, invalid operand code 'H'");
15522 return;
15524 /* It doesn't actually matter what mode we use here, as we're
15525 only going to use this for printing. */
15526 x = adjust_address_nv (x, DImode, 8);
15527 /* Output 'qword ptr' for intel assembler dialect. */
15528 if (ASSEMBLER_DIALECT == ASM_INTEL)
15529 code = 'q';
15530 break;
15532 case 'K':
15533 gcc_assert (CONST_INT_P (x));
15535 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15536 #ifdef HAVE_AS_IX86_HLE
15537 fputs ("xacquire ", file);
15538 #else
15539 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15540 #endif
15541 else if (INTVAL (x) & IX86_HLE_RELEASE)
15542 #ifdef HAVE_AS_IX86_HLE
15543 fputs ("xrelease ", file);
15544 #else
15545 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15546 #endif
15547 /* We do not want to print value of the operand. */
15548 return;
15550 case 'N':
15551 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15552 fputs ("{z}", file);
15553 return;
15555 case 'r':
15556 gcc_assert (CONST_INT_P (x));
15557 gcc_assert (INTVAL (x) == ROUND_SAE);
15559 if (ASSEMBLER_DIALECT == ASM_INTEL)
15560 fputs (", ", file);
15562 fputs ("{sae}", file);
15564 if (ASSEMBLER_DIALECT == ASM_ATT)
15565 fputs (", ", file);
15567 return;
15569 case 'R':
15570 gcc_assert (CONST_INT_P (x));
15572 if (ASSEMBLER_DIALECT == ASM_INTEL)
15573 fputs (", ", file);
15575 switch (INTVAL (x))
15577 case ROUND_NEAREST_INT | ROUND_SAE:
15578 fputs ("{rn-sae}", file);
15579 break;
15580 case ROUND_NEG_INF | ROUND_SAE:
15581 fputs ("{rd-sae}", file);
15582 break;
15583 case ROUND_POS_INF | ROUND_SAE:
15584 fputs ("{ru-sae}", file);
15585 break;
15586 case ROUND_ZERO | ROUND_SAE:
15587 fputs ("{rz-sae}", file);
15588 break;
15589 default:
15590 gcc_unreachable ();
15593 if (ASSEMBLER_DIALECT == ASM_ATT)
15594 fputs (", ", file);
15596 return;
15598 case '*':
15599 if (ASSEMBLER_DIALECT == ASM_ATT)
15600 putc ('*', file);
15601 return;
15603 case '&':
15605 const char *name = get_some_local_dynamic_name ();
15606 if (name == NULL)
15607 output_operand_lossage ("'%%&' used without any "
15608 "local dynamic TLS references");
15609 else
15610 assemble_name (file, name);
15611 return;
15614 case '+':
15616 rtx x;
15618 if (!optimize
15619 || optimize_function_for_size_p (cfun)
15620 || !TARGET_BRANCH_PREDICTION_HINTS)
15621 return;
15623 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15624 if (x)
15626 int pred_val = XINT (x, 0);
15628 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15629 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15631 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15632 bool cputaken
15633 = final_forward_branch_p (current_output_insn) == 0;
15635 /* Emit hints only in the case default branch prediction
15636 heuristics would fail. */
15637 if (taken != cputaken)
15639 /* We use 3e (DS) prefix for taken branches and
15640 2e (CS) prefix for not taken branches. */
15641 if (taken)
15642 fputs ("ds ; ", file);
15643 else
15644 fputs ("cs ; ", file);
15648 return;
15651 case ';':
15652 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15653 putc (';', file);
15654 #endif
15655 return;
15657 case '@':
15658 if (ASSEMBLER_DIALECT == ASM_ATT)
15659 putc ('%', file);
15661 /* The kernel uses a different segment register for performance
15662 reasons; a system call would not have to trash the userspace
15663 segment register, which would be expensive. */
15664 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15665 fputs ("fs", file);
15666 else
15667 fputs ("gs", file);
15668 return;
15670 case '~':
15671 putc (TARGET_AVX2 ? 'i' : 'f', file);
15672 return;
15674 case '^':
15675 if (TARGET_64BIT && Pmode != word_mode)
15676 fputs ("addr32 ", file);
15677 return;
15679 default:
15680 output_operand_lossage ("invalid operand code '%c'", code);
15684 if (REG_P (x))
15685 print_reg (x, code, file);
15687 else if (MEM_P (x))
15689 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15690 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15691 && GET_MODE (x) != BLKmode)
15693 const char * size;
15694 switch (GET_MODE_SIZE (GET_MODE (x)))
15696 case 1: size = "BYTE"; break;
15697 case 2: size = "WORD"; break;
15698 case 4: size = "DWORD"; break;
15699 case 8: size = "QWORD"; break;
15700 case 12: size = "TBYTE"; break;
15701 case 16:
15702 if (GET_MODE (x) == XFmode)
15703 size = "TBYTE";
15704 else
15705 size = "XMMWORD";
15706 break;
15707 case 32: size = "YMMWORD"; break;
15708 case 64: size = "ZMMWORD"; break;
15709 default:
15710 gcc_unreachable ();
15713 /* Check for explicit size override (codes 'b', 'w', 'k',
15714 'q' and 'x') */
15715 if (code == 'b')
15716 size = "BYTE";
15717 else if (code == 'w')
15718 size = "WORD";
15719 else if (code == 'k')
15720 size = "DWORD";
15721 else if (code == 'q')
15722 size = "QWORD";
15723 else if (code == 'x')
15724 size = "XMMWORD";
15726 fputs (size, file);
15727 fputs (" PTR ", file);
15730 x = XEXP (x, 0);
15731 /* Avoid (%rip) for call operands. */
15732 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15733 && !CONST_INT_P (x))
15734 output_addr_const (file, x);
15735 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15736 output_operand_lossage ("invalid constraints for operand");
15737 else
15738 output_address (x);
15741 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15743 REAL_VALUE_TYPE r;
15744 long l;
15746 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15747 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15749 if (ASSEMBLER_DIALECT == ASM_ATT)
15750 putc ('$', file);
15751 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15752 if (code == 'q')
15753 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15754 (unsigned long long) (int) l);
15755 else
15756 fprintf (file, "0x%08x", (unsigned int) l);
15759 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15761 REAL_VALUE_TYPE r;
15762 long l[2];
15764 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15765 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15767 if (ASSEMBLER_DIALECT == ASM_ATT)
15768 putc ('$', file);
15769 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15772 /* These float cases don't actually occur as immediate operands. */
15773 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15775 char dstr[30];
15777 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15778 fputs (dstr, file);
15781 else
15783 /* We have patterns that allow zero sets of memory, for instance.
15784 In 64-bit mode, we should probably support all 8-byte vectors,
15785 since we can in fact encode that into an immediate. */
15786 if (GET_CODE (x) == CONST_VECTOR)
15788 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15789 x = const0_rtx;
15792 if (code != 'P' && code != 'p')
15794 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15796 if (ASSEMBLER_DIALECT == ASM_ATT)
15797 putc ('$', file);
15799 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15800 || GET_CODE (x) == LABEL_REF)
15802 if (ASSEMBLER_DIALECT == ASM_ATT)
15803 putc ('$', file);
15804 else
15805 fputs ("OFFSET FLAT:", file);
15808 if (CONST_INT_P (x))
15809 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15810 else if (flag_pic || MACHOPIC_INDIRECT)
15811 output_pic_addr_const (file, x, code);
15812 else
15813 output_addr_const (file, x);
15817 static bool
15818 ix86_print_operand_punct_valid_p (unsigned char code)
15820 return (code == '@' || code == '*' || code == '+' || code == '&'
15821 || code == ';' || code == '~' || code == '^');
15824 /* Print a memory operand whose address is ADDR. */
15826 static void
15827 ix86_print_operand_address (FILE *file, rtx addr)
15829 struct ix86_address parts;
15830 rtx base, index, disp;
15831 int scale;
15832 int ok;
15833 bool vsib = false;
15834 int code = 0;
15836 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15838 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15839 gcc_assert (parts.index == NULL_RTX);
15840 parts.index = XVECEXP (addr, 0, 1);
15841 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15842 addr = XVECEXP (addr, 0, 0);
15843 vsib = true;
15845 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15847 gcc_assert (TARGET_64BIT);
15848 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15849 code = 'q';
15851 else
15852 ok = ix86_decompose_address (addr, &parts);
15854 gcc_assert (ok);
15856 base = parts.base;
15857 index = parts.index;
15858 disp = parts.disp;
15859 scale = parts.scale;
15861 switch (parts.seg)
15863 case SEG_DEFAULT:
15864 break;
15865 case SEG_FS:
15866 case SEG_GS:
15867 if (ASSEMBLER_DIALECT == ASM_ATT)
15868 putc ('%', file);
15869 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15870 break;
15871 default:
15872 gcc_unreachable ();
15875 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15876 if (TARGET_64BIT && !base && !index)
15878 rtx symbol = disp;
15880 if (GET_CODE (disp) == CONST
15881 && GET_CODE (XEXP (disp, 0)) == PLUS
15882 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15883 symbol = XEXP (XEXP (disp, 0), 0);
15885 if (GET_CODE (symbol) == LABEL_REF
15886 || (GET_CODE (symbol) == SYMBOL_REF
15887 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15888 base = pc_rtx;
15890 if (!base && !index)
15892 /* Displacement only requires special attention. */
15894 if (CONST_INT_P (disp))
15896 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15897 fputs ("ds:", file);
15898 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15900 else if (flag_pic)
15901 output_pic_addr_const (file, disp, 0);
15902 else
15903 output_addr_const (file, disp);
15905 else
15907 /* Print SImode register names to force addr32 prefix. */
15908 if (SImode_address_operand (addr, VOIDmode))
15910 #ifdef ENABLE_CHECKING
15911 gcc_assert (TARGET_64BIT);
15912 switch (GET_CODE (addr))
15914 case SUBREG:
15915 gcc_assert (GET_MODE (addr) == SImode);
15916 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15917 break;
15918 case ZERO_EXTEND:
15919 case AND:
15920 gcc_assert (GET_MODE (addr) == DImode);
15921 break;
15922 default:
15923 gcc_unreachable ();
15925 #endif
15926 gcc_assert (!code);
15927 code = 'k';
15929 else if (code == 0
15930 && TARGET_X32
15931 && disp
15932 && CONST_INT_P (disp)
15933 && INTVAL (disp) < -16*1024*1024)
15935 /* X32 runs in 64-bit mode, where displacement, DISP, in
15936 address DISP(%r64), is encoded as 32-bit immediate sign-
15937 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15938 address is %r64 + 0xffffffffbffffd00. When %r64 <
15939 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15940 which is invalid for x32. The correct address is %r64
15941 - 0x40000300 == 0xf7ffdd64. To properly encode
15942 -0x40000300(%r64) for x32, we zero-extend negative
15943 displacement by forcing addr32 prefix which truncates
15944 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15945 zero-extend all negative displacements, including -1(%rsp).
15946 However, for small negative displacements, sign-extension
15947 won't cause overflow. We only zero-extend negative
15948 displacements if they < -16*1024*1024, which is also used
15949 to check legitimate address displacements for PIC. */
15950 code = 'k';
15953 if (ASSEMBLER_DIALECT == ASM_ATT)
15955 if (disp)
15957 if (flag_pic)
15958 output_pic_addr_const (file, disp, 0);
15959 else if (GET_CODE (disp) == LABEL_REF)
15960 output_asm_label (disp);
15961 else
15962 output_addr_const (file, disp);
15965 putc ('(', file);
15966 if (base)
15967 print_reg (base, code, file);
15968 if (index)
15970 putc (',', file);
15971 print_reg (index, vsib ? 0 : code, file);
15972 if (scale != 1 || vsib)
15973 fprintf (file, ",%d", scale);
15975 putc (')', file);
15977 else
15979 rtx offset = NULL_RTX;
15981 if (disp)
15983 /* Pull out the offset of a symbol; print any symbol itself. */
15984 if (GET_CODE (disp) == CONST
15985 && GET_CODE (XEXP (disp, 0)) == PLUS
15986 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15988 offset = XEXP (XEXP (disp, 0), 1);
15989 disp = gen_rtx_CONST (VOIDmode,
15990 XEXP (XEXP (disp, 0), 0));
15993 if (flag_pic)
15994 output_pic_addr_const (file, disp, 0);
15995 else if (GET_CODE (disp) == LABEL_REF)
15996 output_asm_label (disp);
15997 else if (CONST_INT_P (disp))
15998 offset = disp;
15999 else
16000 output_addr_const (file, disp);
16003 putc ('[', file);
16004 if (base)
16006 print_reg (base, code, file);
16007 if (offset)
16009 if (INTVAL (offset) >= 0)
16010 putc ('+', file);
16011 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16014 else if (offset)
16015 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16016 else
16017 putc ('0', file);
16019 if (index)
16021 putc ('+', file);
16022 print_reg (index, vsib ? 0 : code, file);
16023 if (scale != 1 || vsib)
16024 fprintf (file, "*%d", scale);
16026 putc (']', file);
16031 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16033 static bool
16034 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16036 rtx op;
16038 if (GET_CODE (x) != UNSPEC)
16039 return false;
16041 op = XVECEXP (x, 0, 0);
16042 switch (XINT (x, 1))
16044 case UNSPEC_GOTTPOFF:
16045 output_addr_const (file, op);
16046 /* FIXME: This might be @TPOFF in Sun ld. */
16047 fputs ("@gottpoff", file);
16048 break;
16049 case UNSPEC_TPOFF:
16050 output_addr_const (file, op);
16051 fputs ("@tpoff", file);
16052 break;
16053 case UNSPEC_NTPOFF:
16054 output_addr_const (file, op);
16055 if (TARGET_64BIT)
16056 fputs ("@tpoff", file);
16057 else
16058 fputs ("@ntpoff", file);
16059 break;
16060 case UNSPEC_DTPOFF:
16061 output_addr_const (file, op);
16062 fputs ("@dtpoff", file);
16063 break;
16064 case UNSPEC_GOTNTPOFF:
16065 output_addr_const (file, op);
16066 if (TARGET_64BIT)
16067 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16068 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16069 else
16070 fputs ("@gotntpoff", file);
16071 break;
16072 case UNSPEC_INDNTPOFF:
16073 output_addr_const (file, op);
16074 fputs ("@indntpoff", file);
16075 break;
16076 #if TARGET_MACHO
16077 case UNSPEC_MACHOPIC_OFFSET:
16078 output_addr_const (file, op);
16079 putc ('-', file);
16080 machopic_output_function_base_name (file);
16081 break;
16082 #endif
16084 case UNSPEC_STACK_CHECK:
16086 int offset;
16088 gcc_assert (flag_split_stack);
16090 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16091 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16092 #else
16093 gcc_unreachable ();
16094 #endif
16096 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16098 break;
16100 default:
16101 return false;
16104 return true;
16107 /* Split one or more double-mode RTL references into pairs of half-mode
16108 references. The RTL can be REG, offsettable MEM, integer constant, or
16109 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16110 split and "num" is its length. lo_half and hi_half are output arrays
16111 that parallel "operands". */
16113 void
16114 split_double_mode (enum machine_mode mode, rtx operands[],
16115 int num, rtx lo_half[], rtx hi_half[])
16117 enum machine_mode half_mode;
16118 unsigned int byte;
16120 switch (mode)
16122 case TImode:
16123 half_mode = DImode;
16124 break;
16125 case DImode:
16126 half_mode = SImode;
16127 break;
16128 default:
16129 gcc_unreachable ();
16132 byte = GET_MODE_SIZE (half_mode);
16134 while (num--)
16136 rtx op = operands[num];
16138 /* simplify_subreg refuse to split volatile memory addresses,
16139 but we still have to handle it. */
16140 if (MEM_P (op))
16142 lo_half[num] = adjust_address (op, half_mode, 0);
16143 hi_half[num] = adjust_address (op, half_mode, byte);
16145 else
16147 lo_half[num] = simplify_gen_subreg (half_mode, op,
16148 GET_MODE (op) == VOIDmode
16149 ? mode : GET_MODE (op), 0);
16150 hi_half[num] = simplify_gen_subreg (half_mode, op,
16151 GET_MODE (op) == VOIDmode
16152 ? mode : GET_MODE (op), byte);
16157 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16158 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16159 is the expression of the binary operation. The output may either be
16160 emitted here, or returned to the caller, like all output_* functions.
16162 There is no guarantee that the operands are the same mode, as they
16163 might be within FLOAT or FLOAT_EXTEND expressions. */
16165 #ifndef SYSV386_COMPAT
16166 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16167 wants to fix the assemblers because that causes incompatibility
16168 with gcc. No-one wants to fix gcc because that causes
16169 incompatibility with assemblers... You can use the option of
16170 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16171 #define SYSV386_COMPAT 1
16172 #endif
16174 const char *
16175 output_387_binary_op (rtx insn, rtx *operands)
16177 static char buf[40];
16178 const char *p;
16179 const char *ssep;
16180 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16182 #ifdef ENABLE_CHECKING
16183 /* Even if we do not want to check the inputs, this documents input
16184 constraints. Which helps in understanding the following code. */
16185 if (STACK_REG_P (operands[0])
16186 && ((REG_P (operands[1])
16187 && REGNO (operands[0]) == REGNO (operands[1])
16188 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16189 || (REG_P (operands[2])
16190 && REGNO (operands[0]) == REGNO (operands[2])
16191 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16192 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16193 ; /* ok */
16194 else
16195 gcc_assert (is_sse);
16196 #endif
16198 switch (GET_CODE (operands[3]))
16200 case PLUS:
16201 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16202 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16203 p = "fiadd";
16204 else
16205 p = "fadd";
16206 ssep = "vadd";
16207 break;
16209 case MINUS:
16210 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16211 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16212 p = "fisub";
16213 else
16214 p = "fsub";
16215 ssep = "vsub";
16216 break;
16218 case MULT:
16219 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16220 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16221 p = "fimul";
16222 else
16223 p = "fmul";
16224 ssep = "vmul";
16225 break;
16227 case DIV:
16228 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16229 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16230 p = "fidiv";
16231 else
16232 p = "fdiv";
16233 ssep = "vdiv";
16234 break;
16236 default:
16237 gcc_unreachable ();
16240 if (is_sse)
16242 if (TARGET_AVX)
16244 strcpy (buf, ssep);
16245 if (GET_MODE (operands[0]) == SFmode)
16246 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16247 else
16248 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16250 else
16252 strcpy (buf, ssep + 1);
16253 if (GET_MODE (operands[0]) == SFmode)
16254 strcat (buf, "ss\t{%2, %0|%0, %2}");
16255 else
16256 strcat (buf, "sd\t{%2, %0|%0, %2}");
16258 return buf;
16260 strcpy (buf, p);
16262 switch (GET_CODE (operands[3]))
16264 case MULT:
16265 case PLUS:
16266 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16268 rtx temp = operands[2];
16269 operands[2] = operands[1];
16270 operands[1] = temp;
16273 /* know operands[0] == operands[1]. */
16275 if (MEM_P (operands[2]))
16277 p = "%Z2\t%2";
16278 break;
16281 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16283 if (STACK_TOP_P (operands[0]))
16284 /* How is it that we are storing to a dead operand[2]?
16285 Well, presumably operands[1] is dead too. We can't
16286 store the result to st(0) as st(0) gets popped on this
16287 instruction. Instead store to operands[2] (which I
16288 think has to be st(1)). st(1) will be popped later.
16289 gcc <= 2.8.1 didn't have this check and generated
16290 assembly code that the Unixware assembler rejected. */
16291 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16292 else
16293 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16294 break;
16297 if (STACK_TOP_P (operands[0]))
16298 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16299 else
16300 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16301 break;
16303 case MINUS:
16304 case DIV:
16305 if (MEM_P (operands[1]))
16307 p = "r%Z1\t%1";
16308 break;
16311 if (MEM_P (operands[2]))
16313 p = "%Z2\t%2";
16314 break;
16317 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16319 #if SYSV386_COMPAT
16320 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16321 derived assemblers, confusingly reverse the direction of
16322 the operation for fsub{r} and fdiv{r} when the
16323 destination register is not st(0). The Intel assembler
16324 doesn't have this brain damage. Read !SYSV386_COMPAT to
16325 figure out what the hardware really does. */
16326 if (STACK_TOP_P (operands[0]))
16327 p = "{p\t%0, %2|rp\t%2, %0}";
16328 else
16329 p = "{rp\t%2, %0|p\t%0, %2}";
16330 #else
16331 if (STACK_TOP_P (operands[0]))
16332 /* As above for fmul/fadd, we can't store to st(0). */
16333 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16334 else
16335 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16336 #endif
16337 break;
16340 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16342 #if SYSV386_COMPAT
16343 if (STACK_TOP_P (operands[0]))
16344 p = "{rp\t%0, %1|p\t%1, %0}";
16345 else
16346 p = "{p\t%1, %0|rp\t%0, %1}";
16347 #else
16348 if (STACK_TOP_P (operands[0]))
16349 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16350 else
16351 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16352 #endif
16353 break;
16356 if (STACK_TOP_P (operands[0]))
16358 if (STACK_TOP_P (operands[1]))
16359 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16360 else
16361 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16362 break;
16364 else if (STACK_TOP_P (operands[1]))
16366 #if SYSV386_COMPAT
16367 p = "{\t%1, %0|r\t%0, %1}";
16368 #else
16369 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16370 #endif
16372 else
16374 #if SYSV386_COMPAT
16375 p = "{r\t%2, %0|\t%0, %2}";
16376 #else
16377 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16378 #endif
16380 break;
16382 default:
16383 gcc_unreachable ();
16386 strcat (buf, p);
16387 return buf;
16390 /* Check if a 256bit AVX register is referenced inside of EXP. */
16392 static int
16393 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16395 rtx exp = *pexp;
16397 if (GET_CODE (exp) == SUBREG)
16398 exp = SUBREG_REG (exp);
16400 if (REG_P (exp)
16401 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16402 return 1;
16404 return 0;
16407 /* Return needed mode for entity in optimize_mode_switching pass. */
16409 static int
16410 ix86_avx_u128_mode_needed (rtx insn)
16412 if (CALL_P (insn))
16414 rtx link;
16416 /* Needed mode is set to AVX_U128_CLEAN if there are
16417 no 256bit modes used in function arguments. */
16418 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16419 link;
16420 link = XEXP (link, 1))
16422 if (GET_CODE (XEXP (link, 0)) == USE)
16424 rtx arg = XEXP (XEXP (link, 0), 0);
16426 if (ix86_check_avx256_register (&arg, NULL))
16427 return AVX_U128_DIRTY;
16431 return AVX_U128_CLEAN;
16434 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16435 changes state only when a 256bit register is written to, but we need
16436 to prevent the compiler from moving optimal insertion point above
16437 eventual read from 256bit register. */
16438 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16439 return AVX_U128_DIRTY;
16441 return AVX_U128_ANY;
16444 /* Return mode that i387 must be switched into
16445 prior to the execution of insn. */
16447 static int
16448 ix86_i387_mode_needed (int entity, rtx insn)
16450 enum attr_i387_cw mode;
16452 /* The mode UNINITIALIZED is used to store control word after a
16453 function call or ASM pattern. The mode ANY specify that function
16454 has no requirements on the control word and make no changes in the
16455 bits we are interested in. */
16457 if (CALL_P (insn)
16458 || (NONJUMP_INSN_P (insn)
16459 && (asm_noperands (PATTERN (insn)) >= 0
16460 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16461 return I387_CW_UNINITIALIZED;
16463 if (recog_memoized (insn) < 0)
16464 return I387_CW_ANY;
16466 mode = get_attr_i387_cw (insn);
16468 switch (entity)
16470 case I387_TRUNC:
16471 if (mode == I387_CW_TRUNC)
16472 return mode;
16473 break;
16475 case I387_FLOOR:
16476 if (mode == I387_CW_FLOOR)
16477 return mode;
16478 break;
16480 case I387_CEIL:
16481 if (mode == I387_CW_CEIL)
16482 return mode;
16483 break;
16485 case I387_MASK_PM:
16486 if (mode == I387_CW_MASK_PM)
16487 return mode;
16488 break;
16490 default:
16491 gcc_unreachable ();
16494 return I387_CW_ANY;
16497 /* Return mode that entity must be switched into
16498 prior to the execution of insn. */
16501 ix86_mode_needed (int entity, rtx insn)
16503 switch (entity)
16505 case AVX_U128:
16506 return ix86_avx_u128_mode_needed (insn);
16507 case I387_TRUNC:
16508 case I387_FLOOR:
16509 case I387_CEIL:
16510 case I387_MASK_PM:
16511 return ix86_i387_mode_needed (entity, insn);
16512 default:
16513 gcc_unreachable ();
16515 return 0;
16518 /* Check if a 256bit AVX register is referenced in stores. */
16520 static void
16521 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16523 if (ix86_check_avx256_register (&dest, NULL))
16525 bool *used = (bool *) data;
16526 *used = true;
16530 /* Calculate mode of upper 128bit AVX registers after the insn. */
16532 static int
16533 ix86_avx_u128_mode_after (int mode, rtx insn)
16535 rtx pat = PATTERN (insn);
16537 if (vzeroupper_operation (pat, VOIDmode)
16538 || vzeroall_operation (pat, VOIDmode))
16539 return AVX_U128_CLEAN;
16541 /* We know that state is clean after CALL insn if there are no
16542 256bit registers used in the function return register. */
16543 if (CALL_P (insn))
16545 bool avx_reg256_found = false;
16546 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16548 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16551 /* Otherwise, return current mode. Remember that if insn
16552 references AVX 256bit registers, the mode was already changed
16553 to DIRTY from MODE_NEEDED. */
16554 return mode;
16557 /* Return the mode that an insn results in. */
16560 ix86_mode_after (int entity, int mode, rtx insn)
16562 switch (entity)
16564 case AVX_U128:
16565 return ix86_avx_u128_mode_after (mode, insn);
16566 case I387_TRUNC:
16567 case I387_FLOOR:
16568 case I387_CEIL:
16569 case I387_MASK_PM:
16570 return mode;
16571 default:
16572 gcc_unreachable ();
16576 static int
16577 ix86_avx_u128_mode_entry (void)
16579 tree arg;
16581 /* Entry mode is set to AVX_U128_DIRTY if there are
16582 256bit modes used in function arguments. */
16583 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16584 arg = TREE_CHAIN (arg))
16586 rtx incoming = DECL_INCOMING_RTL (arg);
16588 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16589 return AVX_U128_DIRTY;
16592 return AVX_U128_CLEAN;
16595 /* Return a mode that ENTITY is assumed to be
16596 switched to at function entry. */
16599 ix86_mode_entry (int entity)
16601 switch (entity)
16603 case AVX_U128:
16604 return ix86_avx_u128_mode_entry ();
16605 case I387_TRUNC:
16606 case I387_FLOOR:
16607 case I387_CEIL:
16608 case I387_MASK_PM:
16609 return I387_CW_ANY;
16610 default:
16611 gcc_unreachable ();
16615 static int
16616 ix86_avx_u128_mode_exit (void)
16618 rtx reg = crtl->return_rtx;
16620 /* Exit mode is set to AVX_U128_DIRTY if there are
16621 256bit modes used in the function return register. */
16622 if (reg && ix86_check_avx256_register (&reg, NULL))
16623 return AVX_U128_DIRTY;
16625 return AVX_U128_CLEAN;
16628 /* Return a mode that ENTITY is assumed to be
16629 switched to at function exit. */
16632 ix86_mode_exit (int entity)
16634 switch (entity)
16636 case AVX_U128:
16637 return ix86_avx_u128_mode_exit ();
16638 case I387_TRUNC:
16639 case I387_FLOOR:
16640 case I387_CEIL:
16641 case I387_MASK_PM:
16642 return I387_CW_ANY;
16643 default:
16644 gcc_unreachable ();
16648 /* Output code to initialize control word copies used by trunc?f?i and
16649 rounding patterns. CURRENT_MODE is set to current control word,
16650 while NEW_MODE is set to new control word. */
16652 static void
16653 emit_i387_cw_initialization (int mode)
16655 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16656 rtx new_mode;
16658 enum ix86_stack_slot slot;
16660 rtx reg = gen_reg_rtx (HImode);
16662 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16663 emit_move_insn (reg, copy_rtx (stored_mode));
16665 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16666 || optimize_insn_for_size_p ())
16668 switch (mode)
16670 case I387_CW_TRUNC:
16671 /* round toward zero (truncate) */
16672 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16673 slot = SLOT_CW_TRUNC;
16674 break;
16676 case I387_CW_FLOOR:
16677 /* round down toward -oo */
16678 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16679 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16680 slot = SLOT_CW_FLOOR;
16681 break;
16683 case I387_CW_CEIL:
16684 /* round up toward +oo */
16685 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16686 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16687 slot = SLOT_CW_CEIL;
16688 break;
16690 case I387_CW_MASK_PM:
16691 /* mask precision exception for nearbyint() */
16692 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16693 slot = SLOT_CW_MASK_PM;
16694 break;
16696 default:
16697 gcc_unreachable ();
16700 else
16702 switch (mode)
16704 case I387_CW_TRUNC:
16705 /* round toward zero (truncate) */
16706 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16707 slot = SLOT_CW_TRUNC;
16708 break;
16710 case I387_CW_FLOOR:
16711 /* round down toward -oo */
16712 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16713 slot = SLOT_CW_FLOOR;
16714 break;
16716 case I387_CW_CEIL:
16717 /* round up toward +oo */
16718 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16719 slot = SLOT_CW_CEIL;
16720 break;
16722 case I387_CW_MASK_PM:
16723 /* mask precision exception for nearbyint() */
16724 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16725 slot = SLOT_CW_MASK_PM;
16726 break;
16728 default:
16729 gcc_unreachable ();
16733 gcc_assert (slot < MAX_386_STACK_LOCALS);
16735 new_mode = assign_386_stack_local (HImode, slot);
16736 emit_move_insn (new_mode, reg);
16739 /* Emit vzeroupper. */
16741 void
16742 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16744 int i;
16746 /* Cancel automatic vzeroupper insertion if there are
16747 live call-saved SSE registers at the insertion point. */
16749 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16750 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16751 return;
16753 if (TARGET_64BIT)
16754 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16755 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16756 return;
16758 emit_insn (gen_avx_vzeroupper ());
16761 /* Generate one or more insns to set ENTITY to MODE. */
16763 void
16764 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16766 switch (entity)
16768 case AVX_U128:
16769 if (mode == AVX_U128_CLEAN)
16770 ix86_avx_emit_vzeroupper (regs_live);
16771 break;
16772 case I387_TRUNC:
16773 case I387_FLOOR:
16774 case I387_CEIL:
16775 case I387_MASK_PM:
16776 if (mode != I387_CW_ANY
16777 && mode != I387_CW_UNINITIALIZED)
16778 emit_i387_cw_initialization (mode);
16779 break;
16780 default:
16781 gcc_unreachable ();
16785 /* Output code for INSN to convert a float to a signed int. OPERANDS
16786 are the insn operands. The output may be [HSD]Imode and the input
16787 operand may be [SDX]Fmode. */
16789 const char *
16790 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16792 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16793 int dimode_p = GET_MODE (operands[0]) == DImode;
16794 int round_mode = get_attr_i387_cw (insn);
16796 /* Jump through a hoop or two for DImode, since the hardware has no
16797 non-popping instruction. We used to do this a different way, but
16798 that was somewhat fragile and broke with post-reload splitters. */
16799 if ((dimode_p || fisttp) && !stack_top_dies)
16800 output_asm_insn ("fld\t%y1", operands);
16802 gcc_assert (STACK_TOP_P (operands[1]));
16803 gcc_assert (MEM_P (operands[0]));
16804 gcc_assert (GET_MODE (operands[1]) != TFmode);
16806 if (fisttp)
16807 output_asm_insn ("fisttp%Z0\t%0", operands);
16808 else
16810 if (round_mode != I387_CW_ANY)
16811 output_asm_insn ("fldcw\t%3", operands);
16812 if (stack_top_dies || dimode_p)
16813 output_asm_insn ("fistp%Z0\t%0", operands);
16814 else
16815 output_asm_insn ("fist%Z0\t%0", operands);
16816 if (round_mode != I387_CW_ANY)
16817 output_asm_insn ("fldcw\t%2", operands);
16820 return "";
16823 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16824 have the values zero or one, indicates the ffreep insn's operand
16825 from the OPERANDS array. */
16827 static const char *
16828 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16830 if (TARGET_USE_FFREEP)
16831 #ifdef HAVE_AS_IX86_FFREEP
16832 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16833 #else
16835 static char retval[32];
16836 int regno = REGNO (operands[opno]);
16838 gcc_assert (STACK_REGNO_P (regno));
16840 regno -= FIRST_STACK_REG;
16842 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16843 return retval;
16845 #endif
16847 return opno ? "fstp\t%y1" : "fstp\t%y0";
16851 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16852 should be used. UNORDERED_P is true when fucom should be used. */
16854 const char *
16855 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16857 int stack_top_dies;
16858 rtx cmp_op0, cmp_op1;
16859 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16861 if (eflags_p)
16863 cmp_op0 = operands[0];
16864 cmp_op1 = operands[1];
16866 else
16868 cmp_op0 = operands[1];
16869 cmp_op1 = operands[2];
16872 if (is_sse)
16874 if (GET_MODE (operands[0]) == SFmode)
16875 if (unordered_p)
16876 return "%vucomiss\t{%1, %0|%0, %1}";
16877 else
16878 return "%vcomiss\t{%1, %0|%0, %1}";
16879 else
16880 if (unordered_p)
16881 return "%vucomisd\t{%1, %0|%0, %1}";
16882 else
16883 return "%vcomisd\t{%1, %0|%0, %1}";
16886 gcc_assert (STACK_TOP_P (cmp_op0));
16888 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16890 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16892 if (stack_top_dies)
16894 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16895 return output_387_ffreep (operands, 1);
16897 else
16898 return "ftst\n\tfnstsw\t%0";
16901 if (STACK_REG_P (cmp_op1)
16902 && stack_top_dies
16903 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16904 && REGNO (cmp_op1) != FIRST_STACK_REG)
16906 /* If both the top of the 387 stack dies, and the other operand
16907 is also a stack register that dies, then this must be a
16908 `fcompp' float compare */
16910 if (eflags_p)
16912 /* There is no double popping fcomi variant. Fortunately,
16913 eflags is immune from the fstp's cc clobbering. */
16914 if (unordered_p)
16915 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16916 else
16917 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16918 return output_387_ffreep (operands, 0);
16920 else
16922 if (unordered_p)
16923 return "fucompp\n\tfnstsw\t%0";
16924 else
16925 return "fcompp\n\tfnstsw\t%0";
16928 else
16930 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16932 static const char * const alt[16] =
16934 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16935 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16936 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16937 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16939 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16940 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16941 NULL,
16942 NULL,
16944 "fcomi\t{%y1, %0|%0, %y1}",
16945 "fcomip\t{%y1, %0|%0, %y1}",
16946 "fucomi\t{%y1, %0|%0, %y1}",
16947 "fucomip\t{%y1, %0|%0, %y1}",
16949 NULL,
16950 NULL,
16951 NULL,
16952 NULL
16955 int mask;
16956 const char *ret;
16958 mask = eflags_p << 3;
16959 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16960 mask |= unordered_p << 1;
16961 mask |= stack_top_dies;
16963 gcc_assert (mask < 16);
16964 ret = alt[mask];
16965 gcc_assert (ret);
16967 return ret;
16971 void
16972 ix86_output_addr_vec_elt (FILE *file, int value)
16974 const char *directive = ASM_LONG;
16976 #ifdef ASM_QUAD
16977 if (TARGET_LP64)
16978 directive = ASM_QUAD;
16979 #else
16980 gcc_assert (!TARGET_64BIT);
16981 #endif
16983 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16986 void
16987 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16989 const char *directive = ASM_LONG;
16991 #ifdef ASM_QUAD
16992 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16993 directive = ASM_QUAD;
16994 #else
16995 gcc_assert (!TARGET_64BIT);
16996 #endif
16997 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16998 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16999 fprintf (file, "%s%s%d-%s%d\n",
17000 directive, LPREFIX, value, LPREFIX, rel);
17001 else if (HAVE_AS_GOTOFF_IN_DATA)
17002 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17003 #if TARGET_MACHO
17004 else if (TARGET_MACHO)
17006 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17007 machopic_output_function_base_name (file);
17008 putc ('\n', file);
17010 #endif
17011 else
17012 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17013 GOT_SYMBOL_NAME, LPREFIX, value);
17016 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17017 for the target. */
17019 void
17020 ix86_expand_clear (rtx dest)
17022 rtx tmp;
17024 /* We play register width games, which are only valid after reload. */
17025 gcc_assert (reload_completed);
17027 /* Avoid HImode and its attendant prefix byte. */
17028 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17029 dest = gen_rtx_REG (SImode, REGNO (dest));
17030 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17032 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
17033 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
17035 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17036 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17039 emit_insn (tmp);
17042 /* X is an unchanging MEM. If it is a constant pool reference, return
17043 the constant pool rtx, else NULL. */
17046 maybe_get_pool_constant (rtx x)
17048 x = ix86_delegitimize_address (XEXP (x, 0));
17050 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17051 return get_pool_constant (x);
17053 return NULL_RTX;
17056 void
17057 ix86_expand_move (enum machine_mode mode, rtx operands[])
17059 rtx op0, op1;
17060 enum tls_model model;
17062 op0 = operands[0];
17063 op1 = operands[1];
17065 if (GET_CODE (op1) == SYMBOL_REF)
17067 rtx tmp;
17069 model = SYMBOL_REF_TLS_MODEL (op1);
17070 if (model)
17072 op1 = legitimize_tls_address (op1, model, true);
17073 op1 = force_operand (op1, op0);
17074 if (op1 == op0)
17075 return;
17076 op1 = convert_to_mode (mode, op1, 1);
17078 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17079 op1 = tmp;
17081 else if (GET_CODE (op1) == CONST
17082 && GET_CODE (XEXP (op1, 0)) == PLUS
17083 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17085 rtx addend = XEXP (XEXP (op1, 0), 1);
17086 rtx symbol = XEXP (XEXP (op1, 0), 0);
17087 rtx tmp;
17089 model = SYMBOL_REF_TLS_MODEL (symbol);
17090 if (model)
17091 tmp = legitimize_tls_address (symbol, model, true);
17092 else
17093 tmp = legitimize_pe_coff_symbol (symbol, true);
17095 if (tmp)
17097 tmp = force_operand (tmp, NULL);
17098 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17099 op0, 1, OPTAB_DIRECT);
17100 if (tmp == op0)
17101 return;
17102 op1 = convert_to_mode (mode, tmp, 1);
17106 if ((flag_pic || MACHOPIC_INDIRECT)
17107 && symbolic_operand (op1, mode))
17109 if (TARGET_MACHO && !TARGET_64BIT)
17111 #if TARGET_MACHO
17112 /* dynamic-no-pic */
17113 if (MACHOPIC_INDIRECT)
17115 rtx temp = ((reload_in_progress
17116 || ((op0 && REG_P (op0))
17117 && mode == Pmode))
17118 ? op0 : gen_reg_rtx (Pmode));
17119 op1 = machopic_indirect_data_reference (op1, temp);
17120 if (MACHOPIC_PURE)
17121 op1 = machopic_legitimize_pic_address (op1, mode,
17122 temp == op1 ? 0 : temp);
17124 if (op0 != op1 && GET_CODE (op0) != MEM)
17126 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17127 emit_insn (insn);
17128 return;
17130 if (GET_CODE (op0) == MEM)
17131 op1 = force_reg (Pmode, op1);
17132 else
17134 rtx temp = op0;
17135 if (GET_CODE (temp) != REG)
17136 temp = gen_reg_rtx (Pmode);
17137 temp = legitimize_pic_address (op1, temp);
17138 if (temp == op0)
17139 return;
17140 op1 = temp;
17142 /* dynamic-no-pic */
17143 #endif
17145 else
17147 if (MEM_P (op0))
17148 op1 = force_reg (mode, op1);
17149 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17151 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17152 op1 = legitimize_pic_address (op1, reg);
17153 if (op0 == op1)
17154 return;
17155 op1 = convert_to_mode (mode, op1, 1);
17159 else
17161 if (MEM_P (op0)
17162 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17163 || !push_operand (op0, mode))
17164 && MEM_P (op1))
17165 op1 = force_reg (mode, op1);
17167 if (push_operand (op0, mode)
17168 && ! general_no_elim_operand (op1, mode))
17169 op1 = copy_to_mode_reg (mode, op1);
17171 /* Force large constants in 64bit compilation into register
17172 to get them CSEed. */
17173 if (can_create_pseudo_p ()
17174 && (mode == DImode) && TARGET_64BIT
17175 && immediate_operand (op1, mode)
17176 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17177 && !register_operand (op0, mode)
17178 && optimize)
17179 op1 = copy_to_mode_reg (mode, op1);
17181 if (can_create_pseudo_p ()
17182 && FLOAT_MODE_P (mode)
17183 && GET_CODE (op1) == CONST_DOUBLE)
17185 /* If we are loading a floating point constant to a register,
17186 force the value to memory now, since we'll get better code
17187 out the back end. */
17189 op1 = validize_mem (force_const_mem (mode, op1));
17190 if (!register_operand (op0, mode))
17192 rtx temp = gen_reg_rtx (mode);
17193 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17194 emit_move_insn (op0, temp);
17195 return;
17200 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17203 void
17204 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
17206 rtx op0 = operands[0], op1 = operands[1];
17207 unsigned int align = GET_MODE_ALIGNMENT (mode);
17209 if (push_operand (op0, VOIDmode))
17210 op0 = emit_move_resolve_push (mode, op0);
17212 /* Force constants other than zero into memory. We do not know how
17213 the instructions used to build constants modify the upper 64 bits
17214 of the register, once we have that information we may be able
17215 to handle some of them more efficiently. */
17216 if (can_create_pseudo_p ()
17217 && register_operand (op0, mode)
17218 && (CONSTANT_P (op1)
17219 || (GET_CODE (op1) == SUBREG
17220 && CONSTANT_P (SUBREG_REG (op1))))
17221 && !standard_sse_constant_p (op1))
17222 op1 = validize_mem (force_const_mem (mode, op1));
17224 /* We need to check memory alignment for SSE mode since attribute
17225 can make operands unaligned. */
17226 if (can_create_pseudo_p ()
17227 && SSE_REG_MODE_P (mode)
17228 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17229 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17231 rtx tmp[2];
17233 /* ix86_expand_vector_move_misalign() does not like constants ... */
17234 if (CONSTANT_P (op1)
17235 || (GET_CODE (op1) == SUBREG
17236 && CONSTANT_P (SUBREG_REG (op1))))
17237 op1 = validize_mem (force_const_mem (mode, op1));
17239 /* ... nor both arguments in memory. */
17240 if (!register_operand (op0, mode)
17241 && !register_operand (op1, mode))
17242 op1 = force_reg (mode, op1);
17244 tmp[0] = op0; tmp[1] = op1;
17245 ix86_expand_vector_move_misalign (mode, tmp);
17246 return;
17249 /* Make operand1 a register if it isn't already. */
17250 if (can_create_pseudo_p ()
17251 && !register_operand (op0, mode)
17252 && !register_operand (op1, mode))
17254 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17255 return;
17258 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17261 /* Split 32-byte AVX unaligned load and store if needed. */
17263 static void
17264 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17266 rtx m;
17267 rtx (*extract) (rtx, rtx, rtx);
17268 rtx (*load_unaligned) (rtx, rtx);
17269 rtx (*store_unaligned) (rtx, rtx);
17270 enum machine_mode mode;
17272 switch (GET_MODE (op0))
17274 default:
17275 gcc_unreachable ();
17276 case V32QImode:
17277 extract = gen_avx_vextractf128v32qi;
17278 load_unaligned = gen_avx_loaddquv32qi;
17279 store_unaligned = gen_avx_storedquv32qi;
17280 mode = V16QImode;
17281 break;
17282 case V8SFmode:
17283 extract = gen_avx_vextractf128v8sf;
17284 load_unaligned = gen_avx_loadups256;
17285 store_unaligned = gen_avx_storeups256;
17286 mode = V4SFmode;
17287 break;
17288 case V4DFmode:
17289 extract = gen_avx_vextractf128v4df;
17290 load_unaligned = gen_avx_loadupd256;
17291 store_unaligned = gen_avx_storeupd256;
17292 mode = V2DFmode;
17293 break;
17296 if (MEM_P (op1))
17298 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17300 rtx r = gen_reg_rtx (mode);
17301 m = adjust_address (op1, mode, 0);
17302 emit_move_insn (r, m);
17303 m = adjust_address (op1, mode, 16);
17304 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17305 emit_move_insn (op0, r);
17307 /* Normal *mov<mode>_internal pattern will handle
17308 unaligned loads just fine if misaligned_operand
17309 is true, and without the UNSPEC it can be combined
17310 with arithmetic instructions. */
17311 else if (misaligned_operand (op1, GET_MODE (op1)))
17312 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17313 else
17314 emit_insn (load_unaligned (op0, op1));
17316 else if (MEM_P (op0))
17318 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17320 m = adjust_address (op0, mode, 0);
17321 emit_insn (extract (m, op1, const0_rtx));
17322 m = adjust_address (op0, mode, 16);
17323 emit_insn (extract (m, op1, const1_rtx));
17325 else
17326 emit_insn (store_unaligned (op0, op1));
17328 else
17329 gcc_unreachable ();
17332 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17333 straight to ix86_expand_vector_move. */
17334 /* Code generation for scalar reg-reg moves of single and double precision data:
17335 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17336 movaps reg, reg
17337 else
17338 movss reg, reg
17339 if (x86_sse_partial_reg_dependency == true)
17340 movapd reg, reg
17341 else
17342 movsd reg, reg
17344 Code generation for scalar loads of double precision data:
17345 if (x86_sse_split_regs == true)
17346 movlpd mem, reg (gas syntax)
17347 else
17348 movsd mem, reg
17350 Code generation for unaligned packed loads of single precision data
17351 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17352 if (x86_sse_unaligned_move_optimal)
17353 movups mem, reg
17355 if (x86_sse_partial_reg_dependency == true)
17357 xorps reg, reg
17358 movlps mem, reg
17359 movhps mem+8, reg
17361 else
17363 movlps mem, reg
17364 movhps mem+8, reg
17367 Code generation for unaligned packed loads of double precision data
17368 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17369 if (x86_sse_unaligned_move_optimal)
17370 movupd mem, reg
17372 if (x86_sse_split_regs == true)
17374 movlpd mem, reg
17375 movhpd mem+8, reg
17377 else
17379 movsd mem, reg
17380 movhpd mem+8, reg
17384 void
17385 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17387 rtx op0, op1, orig_op0 = NULL_RTX, m;
17388 rtx (*load_unaligned) (rtx, rtx);
17389 rtx (*store_unaligned) (rtx, rtx);
17391 op0 = operands[0];
17392 op1 = operands[1];
17394 if (GET_MODE_SIZE (mode) == 64)
17396 switch (GET_MODE_CLASS (mode))
17398 case MODE_VECTOR_INT:
17399 case MODE_INT:
17400 if (GET_MODE (op0) != V16SImode)
17402 if (!MEM_P (op0))
17404 orig_op0 = op0;
17405 op0 = gen_reg_rtx (V16SImode);
17407 else
17408 op0 = gen_lowpart (V16SImode, op0);
17410 op1 = gen_lowpart (V16SImode, op1);
17411 /* FALLTHRU */
17413 case MODE_VECTOR_FLOAT:
17414 switch (GET_MODE (op0))
17416 default:
17417 gcc_unreachable ();
17418 case V16SImode:
17419 load_unaligned = gen_avx512f_loaddquv16si;
17420 store_unaligned = gen_avx512f_storedquv16si;
17421 break;
17422 case V16SFmode:
17423 load_unaligned = gen_avx512f_loadups512;
17424 store_unaligned = gen_avx512f_storeups512;
17425 break;
17426 case V8DFmode:
17427 load_unaligned = gen_avx512f_loadupd512;
17428 store_unaligned = gen_avx512f_storeupd512;
17429 break;
17432 if (MEM_P (op1))
17433 emit_insn (load_unaligned (op0, op1));
17434 else if (MEM_P (op0))
17435 emit_insn (store_unaligned (op0, op1));
17436 else
17437 gcc_unreachable ();
17438 if (orig_op0)
17439 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17440 break;
17442 default:
17443 gcc_unreachable ();
17446 return;
17449 if (TARGET_AVX
17450 && GET_MODE_SIZE (mode) == 32)
17452 switch (GET_MODE_CLASS (mode))
17454 case MODE_VECTOR_INT:
17455 case MODE_INT:
17456 if (GET_MODE (op0) != V32QImode)
17458 if (!MEM_P (op0))
17460 orig_op0 = op0;
17461 op0 = gen_reg_rtx (V32QImode);
17463 else
17464 op0 = gen_lowpart (V32QImode, op0);
17466 op1 = gen_lowpart (V32QImode, op1);
17467 /* FALLTHRU */
17469 case MODE_VECTOR_FLOAT:
17470 ix86_avx256_split_vector_move_misalign (op0, op1);
17471 if (orig_op0)
17472 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17473 break;
17475 default:
17476 gcc_unreachable ();
17479 return;
17482 if (MEM_P (op1))
17484 /* Normal *mov<mode>_internal pattern will handle
17485 unaligned loads just fine if misaligned_operand
17486 is true, and without the UNSPEC it can be combined
17487 with arithmetic instructions. */
17488 if (TARGET_AVX
17489 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17490 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17491 && misaligned_operand (op1, GET_MODE (op1)))
17492 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17493 /* ??? If we have typed data, then it would appear that using
17494 movdqu is the only way to get unaligned data loaded with
17495 integer type. */
17496 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17498 if (GET_MODE (op0) != V16QImode)
17500 orig_op0 = op0;
17501 op0 = gen_reg_rtx (V16QImode);
17503 op1 = gen_lowpart (V16QImode, op1);
17504 /* We will eventually emit movups based on insn attributes. */
17505 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17506 if (orig_op0)
17507 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17509 else if (TARGET_SSE2 && mode == V2DFmode)
17511 rtx zero;
17513 if (TARGET_AVX
17514 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17515 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17516 || optimize_insn_for_size_p ())
17518 /* We will eventually emit movups based on insn attributes. */
17519 emit_insn (gen_sse2_loadupd (op0, op1));
17520 return;
17523 /* When SSE registers are split into halves, we can avoid
17524 writing to the top half twice. */
17525 if (TARGET_SSE_SPLIT_REGS)
17527 emit_clobber (op0);
17528 zero = op0;
17530 else
17532 /* ??? Not sure about the best option for the Intel chips.
17533 The following would seem to satisfy; the register is
17534 entirely cleared, breaking the dependency chain. We
17535 then store to the upper half, with a dependency depth
17536 of one. A rumor has it that Intel recommends two movsd
17537 followed by an unpacklpd, but this is unconfirmed. And
17538 given that the dependency depth of the unpacklpd would
17539 still be one, I'm not sure why this would be better. */
17540 zero = CONST0_RTX (V2DFmode);
17543 m = adjust_address (op1, DFmode, 0);
17544 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17545 m = adjust_address (op1, DFmode, 8);
17546 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17548 else
17550 rtx t;
17552 if (TARGET_AVX
17553 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17554 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17555 || optimize_insn_for_size_p ())
17557 if (GET_MODE (op0) != V4SFmode)
17559 orig_op0 = op0;
17560 op0 = gen_reg_rtx (V4SFmode);
17562 op1 = gen_lowpart (V4SFmode, op1);
17563 emit_insn (gen_sse_loadups (op0, op1));
17564 if (orig_op0)
17565 emit_move_insn (orig_op0,
17566 gen_lowpart (GET_MODE (orig_op0), op0));
17567 return;
17570 if (mode != V4SFmode)
17571 t = gen_reg_rtx (V4SFmode);
17572 else
17573 t = op0;
17575 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17576 emit_move_insn (t, CONST0_RTX (V4SFmode));
17577 else
17578 emit_clobber (t);
17580 m = adjust_address (op1, V2SFmode, 0);
17581 emit_insn (gen_sse_loadlps (t, t, m));
17582 m = adjust_address (op1, V2SFmode, 8);
17583 emit_insn (gen_sse_loadhps (t, t, m));
17584 if (mode != V4SFmode)
17585 emit_move_insn (op0, gen_lowpart (mode, t));
17588 else if (MEM_P (op0))
17590 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17592 op0 = gen_lowpart (V16QImode, op0);
17593 op1 = gen_lowpart (V16QImode, op1);
17594 /* We will eventually emit movups based on insn attributes. */
17595 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17597 else if (TARGET_SSE2 && mode == V2DFmode)
17599 if (TARGET_AVX
17600 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17601 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17602 || optimize_insn_for_size_p ())
17603 /* We will eventually emit movups based on insn attributes. */
17604 emit_insn (gen_sse2_storeupd (op0, op1));
17605 else
17607 m = adjust_address (op0, DFmode, 0);
17608 emit_insn (gen_sse2_storelpd (m, op1));
17609 m = adjust_address (op0, DFmode, 8);
17610 emit_insn (gen_sse2_storehpd (m, op1));
17613 else
17615 if (mode != V4SFmode)
17616 op1 = gen_lowpart (V4SFmode, op1);
17618 if (TARGET_AVX
17619 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17620 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17621 || optimize_insn_for_size_p ())
17623 op0 = gen_lowpart (V4SFmode, op0);
17624 emit_insn (gen_sse_storeups (op0, op1));
17626 else
17628 m = adjust_address (op0, V2SFmode, 0);
17629 emit_insn (gen_sse_storelps (m, op1));
17630 m = adjust_address (op0, V2SFmode, 8);
17631 emit_insn (gen_sse_storehps (m, op1));
17635 else
17636 gcc_unreachable ();
17639 /* Helper function of ix86_fixup_binary_operands to canonicalize
17640 operand order. Returns true if the operands should be swapped. */
17642 static bool
17643 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17644 rtx operands[])
17646 rtx dst = operands[0];
17647 rtx src1 = operands[1];
17648 rtx src2 = operands[2];
17650 /* If the operation is not commutative, we can't do anything. */
17651 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17652 return false;
17654 /* Highest priority is that src1 should match dst. */
17655 if (rtx_equal_p (dst, src1))
17656 return false;
17657 if (rtx_equal_p (dst, src2))
17658 return true;
17660 /* Next highest priority is that immediate constants come second. */
17661 if (immediate_operand (src2, mode))
17662 return false;
17663 if (immediate_operand (src1, mode))
17664 return true;
17666 /* Lowest priority is that memory references should come second. */
17667 if (MEM_P (src2))
17668 return false;
17669 if (MEM_P (src1))
17670 return true;
17672 return false;
17676 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17677 destination to use for the operation. If different from the true
17678 destination in operands[0], a copy operation will be required. */
17681 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17682 rtx operands[])
17684 rtx dst = operands[0];
17685 rtx src1 = operands[1];
17686 rtx src2 = operands[2];
17688 /* Canonicalize operand order. */
17689 if (ix86_swap_binary_operands_p (code, mode, operands))
17691 rtx temp;
17693 /* It is invalid to swap operands of different modes. */
17694 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17696 temp = src1;
17697 src1 = src2;
17698 src2 = temp;
17701 /* Both source operands cannot be in memory. */
17702 if (MEM_P (src1) && MEM_P (src2))
17704 /* Optimization: Only read from memory once. */
17705 if (rtx_equal_p (src1, src2))
17707 src2 = force_reg (mode, src2);
17708 src1 = src2;
17710 else if (rtx_equal_p (dst, src1))
17711 src2 = force_reg (mode, src2);
17712 else
17713 src1 = force_reg (mode, src1);
17716 /* If the destination is memory, and we do not have matching source
17717 operands, do things in registers. */
17718 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17719 dst = gen_reg_rtx (mode);
17721 /* Source 1 cannot be a constant. */
17722 if (CONSTANT_P (src1))
17723 src1 = force_reg (mode, src1);
17725 /* Source 1 cannot be a non-matching memory. */
17726 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17727 src1 = force_reg (mode, src1);
17729 /* Improve address combine. */
17730 if (code == PLUS
17731 && GET_MODE_CLASS (mode) == MODE_INT
17732 && MEM_P (src2))
17733 src2 = force_reg (mode, src2);
17735 operands[1] = src1;
17736 operands[2] = src2;
17737 return dst;
17740 /* Similarly, but assume that the destination has already been
17741 set up properly. */
17743 void
17744 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17745 enum machine_mode mode, rtx operands[])
17747 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17748 gcc_assert (dst == operands[0]);
17751 /* Attempt to expand a binary operator. Make the expansion closer to the
17752 actual machine, then just general_operand, which will allow 3 separate
17753 memory references (one output, two input) in a single insn. */
17755 void
17756 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17757 rtx operands[])
17759 rtx src1, src2, dst, op, clob;
17761 dst = ix86_fixup_binary_operands (code, mode, operands);
17762 src1 = operands[1];
17763 src2 = operands[2];
17765 /* Emit the instruction. */
17767 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17768 if (reload_in_progress)
17770 /* Reload doesn't know about the flags register, and doesn't know that
17771 it doesn't want to clobber it. We can only do this with PLUS. */
17772 gcc_assert (code == PLUS);
17773 emit_insn (op);
17775 else if (reload_completed
17776 && code == PLUS
17777 && !rtx_equal_p (dst, src1))
17779 /* This is going to be an LEA; avoid splitting it later. */
17780 emit_insn (op);
17782 else
17784 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17785 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17788 /* Fix up the destination if needed. */
17789 if (dst != operands[0])
17790 emit_move_insn (operands[0], dst);
17793 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17794 the given OPERANDS. */
17796 void
17797 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17798 rtx operands[])
17800 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17801 if (GET_CODE (operands[1]) == SUBREG)
17803 op1 = operands[1];
17804 op2 = operands[2];
17806 else if (GET_CODE (operands[2]) == SUBREG)
17808 op1 = operands[2];
17809 op2 = operands[1];
17811 /* Optimize (__m128i) d | (__m128i) e and similar code
17812 when d and e are float vectors into float vector logical
17813 insn. In C/C++ without using intrinsics there is no other way
17814 to express vector logical operation on float vectors than
17815 to cast them temporarily to integer vectors. */
17816 if (op1
17817 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17818 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17819 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17820 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17821 && SUBREG_BYTE (op1) == 0
17822 && (GET_CODE (op2) == CONST_VECTOR
17823 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17824 && SUBREG_BYTE (op2) == 0))
17825 && can_create_pseudo_p ())
17827 rtx dst;
17828 switch (GET_MODE (SUBREG_REG (op1)))
17830 case V4SFmode:
17831 case V8SFmode:
17832 case V2DFmode:
17833 case V4DFmode:
17834 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17835 if (GET_CODE (op2) == CONST_VECTOR)
17837 op2 = gen_lowpart (GET_MODE (dst), op2);
17838 op2 = force_reg (GET_MODE (dst), op2);
17840 else
17842 op1 = operands[1];
17843 op2 = SUBREG_REG (operands[2]);
17844 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17845 op2 = force_reg (GET_MODE (dst), op2);
17847 op1 = SUBREG_REG (op1);
17848 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17849 op1 = force_reg (GET_MODE (dst), op1);
17850 emit_insn (gen_rtx_SET (VOIDmode, dst,
17851 gen_rtx_fmt_ee (code, GET_MODE (dst),
17852 op1, op2)));
17853 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17854 return;
17855 default:
17856 break;
17859 if (!nonimmediate_operand (operands[1], mode))
17860 operands[1] = force_reg (mode, operands[1]);
17861 if (!nonimmediate_operand (operands[2], mode))
17862 operands[2] = force_reg (mode, operands[2]);
17863 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17864 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17865 gen_rtx_fmt_ee (code, mode, operands[1],
17866 operands[2])));
17869 /* Return TRUE or FALSE depending on whether the binary operator meets the
17870 appropriate constraints. */
17872 bool
17873 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17874 rtx operands[3])
17876 rtx dst = operands[0];
17877 rtx src1 = operands[1];
17878 rtx src2 = operands[2];
17880 /* Both source operands cannot be in memory. */
17881 if (MEM_P (src1) && MEM_P (src2))
17882 return false;
17884 /* Canonicalize operand order for commutative operators. */
17885 if (ix86_swap_binary_operands_p (code, mode, operands))
17887 rtx temp = src1;
17888 src1 = src2;
17889 src2 = temp;
17892 /* If the destination is memory, we must have a matching source operand. */
17893 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17894 return false;
17896 /* Source 1 cannot be a constant. */
17897 if (CONSTANT_P (src1))
17898 return false;
17900 /* Source 1 cannot be a non-matching memory. */
17901 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17902 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17903 return (code == AND
17904 && (mode == HImode
17905 || mode == SImode
17906 || (TARGET_64BIT && mode == DImode))
17907 && satisfies_constraint_L (src2));
17909 return true;
17912 /* Attempt to expand a unary operator. Make the expansion closer to the
17913 actual machine, then just general_operand, which will allow 2 separate
17914 memory references (one output, one input) in a single insn. */
17916 void
17917 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17918 rtx operands[])
17920 int matching_memory;
17921 rtx src, dst, op, clob;
17923 dst = operands[0];
17924 src = operands[1];
17926 /* If the destination is memory, and we do not have matching source
17927 operands, do things in registers. */
17928 matching_memory = 0;
17929 if (MEM_P (dst))
17931 if (rtx_equal_p (dst, src))
17932 matching_memory = 1;
17933 else
17934 dst = gen_reg_rtx (mode);
17937 /* When source operand is memory, destination must match. */
17938 if (MEM_P (src) && !matching_memory)
17939 src = force_reg (mode, src);
17941 /* Emit the instruction. */
17943 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17944 if (reload_in_progress || code == NOT)
17946 /* Reload doesn't know about the flags register, and doesn't know that
17947 it doesn't want to clobber it. */
17948 gcc_assert (code == NOT);
17949 emit_insn (op);
17951 else
17953 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17954 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17957 /* Fix up the destination if needed. */
17958 if (dst != operands[0])
17959 emit_move_insn (operands[0], dst);
17962 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17963 divisor are within the range [0-255]. */
17965 void
17966 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17967 bool signed_p)
17969 rtx end_label, qimode_label;
17970 rtx insn, div, mod;
17971 rtx scratch, tmp0, tmp1, tmp2;
17972 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17973 rtx (*gen_zero_extend) (rtx, rtx);
17974 rtx (*gen_test_ccno_1) (rtx, rtx);
17976 switch (mode)
17978 case SImode:
17979 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17980 gen_test_ccno_1 = gen_testsi_ccno_1;
17981 gen_zero_extend = gen_zero_extendqisi2;
17982 break;
17983 case DImode:
17984 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17985 gen_test_ccno_1 = gen_testdi_ccno_1;
17986 gen_zero_extend = gen_zero_extendqidi2;
17987 break;
17988 default:
17989 gcc_unreachable ();
17992 end_label = gen_label_rtx ();
17993 qimode_label = gen_label_rtx ();
17995 scratch = gen_reg_rtx (mode);
17997 /* Use 8bit unsigned divimod if dividend and divisor are within
17998 the range [0-255]. */
17999 emit_move_insn (scratch, operands[2]);
18000 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18001 scratch, 1, OPTAB_DIRECT);
18002 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18003 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18004 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18005 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18006 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18007 pc_rtx);
18008 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18009 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18010 JUMP_LABEL (insn) = qimode_label;
18012 /* Generate original signed/unsigned divimod. */
18013 div = gen_divmod4_1 (operands[0], operands[1],
18014 operands[2], operands[3]);
18015 emit_insn (div);
18017 /* Branch to the end. */
18018 emit_jump_insn (gen_jump (end_label));
18019 emit_barrier ();
18021 /* Generate 8bit unsigned divide. */
18022 emit_label (qimode_label);
18023 /* Don't use operands[0] for result of 8bit divide since not all
18024 registers support QImode ZERO_EXTRACT. */
18025 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18026 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18027 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18028 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18030 if (signed_p)
18032 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18033 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18035 else
18037 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18038 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18041 /* Extract remainder from AH. */
18042 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18043 if (REG_P (operands[1]))
18044 insn = emit_move_insn (operands[1], tmp1);
18045 else
18047 /* Need a new scratch register since the old one has result
18048 of 8bit divide. */
18049 scratch = gen_reg_rtx (mode);
18050 emit_move_insn (scratch, tmp1);
18051 insn = emit_move_insn (operands[1], scratch);
18053 set_unique_reg_note (insn, REG_EQUAL, mod);
18055 /* Zero extend quotient from AL. */
18056 tmp1 = gen_lowpart (QImode, tmp0);
18057 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18058 set_unique_reg_note (insn, REG_EQUAL, div);
18060 emit_label (end_label);
18063 /* Whether it is OK to emit CFI directives when emitting asm code. */
18065 bool
18066 ix86_emit_cfi ()
18068 return dwarf2out_do_cfi_asm ();
18071 #define LEA_MAX_STALL (3)
18072 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18074 /* Increase given DISTANCE in half-cycles according to
18075 dependencies between PREV and NEXT instructions.
18076 Add 1 half-cycle if there is no dependency and
18077 go to next cycle if there is some dependecy. */
18079 static unsigned int
18080 increase_distance (rtx prev, rtx next, unsigned int distance)
18082 df_ref *use_rec;
18083 df_ref *def_rec;
18085 if (!prev || !next)
18086 return distance + (distance & 1) + 2;
18088 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18089 return distance + 1;
18091 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
18092 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
18093 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
18094 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
18095 return distance + (distance & 1) + 2;
18097 return distance + 1;
18100 /* Function checks if instruction INSN defines register number
18101 REGNO1 or REGNO2. */
18103 static bool
18104 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18105 rtx insn)
18107 df_ref *def_rec;
18109 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
18110 if (DF_REF_REG_DEF_P (*def_rec)
18111 && !DF_REF_IS_ARTIFICIAL (*def_rec)
18112 && (regno1 == DF_REF_REGNO (*def_rec)
18113 || regno2 == DF_REF_REGNO (*def_rec)))
18115 return true;
18118 return false;
18121 /* Function checks if instruction INSN uses register number
18122 REGNO as a part of address expression. */
18124 static bool
18125 insn_uses_reg_mem (unsigned int regno, rtx insn)
18127 df_ref *use_rec;
18129 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
18130 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
18131 return true;
18133 return false;
18136 /* Search backward for non-agu definition of register number REGNO1
18137 or register number REGNO2 in basic block starting from instruction
18138 START up to head of basic block or instruction INSN.
18140 Function puts true value into *FOUND var if definition was found
18141 and false otherwise.
18143 Distance in half-cycles between START and found instruction or head
18144 of BB is added to DISTANCE and returned. */
18146 static int
18147 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18148 rtx insn, int distance,
18149 rtx start, bool *found)
18151 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18152 rtx prev = start;
18153 rtx next = NULL;
18155 *found = false;
18157 while (prev
18158 && prev != insn
18159 && distance < LEA_SEARCH_THRESHOLD)
18161 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18163 distance = increase_distance (prev, next, distance);
18164 if (insn_defines_reg (regno1, regno2, prev))
18166 if (recog_memoized (prev) < 0
18167 || get_attr_type (prev) != TYPE_LEA)
18169 *found = true;
18170 return distance;
18174 next = prev;
18176 if (prev == BB_HEAD (bb))
18177 break;
18179 prev = PREV_INSN (prev);
18182 return distance;
18185 /* Search backward for non-agu definition of register number REGNO1
18186 or register number REGNO2 in INSN's basic block until
18187 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18188 2. Reach neighbour BBs boundary, or
18189 3. Reach agu definition.
18190 Returns the distance between the non-agu definition point and INSN.
18191 If no definition point, returns -1. */
18193 static int
18194 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18195 rtx insn)
18197 basic_block bb = BLOCK_FOR_INSN (insn);
18198 int distance = 0;
18199 bool found = false;
18201 if (insn != BB_HEAD (bb))
18202 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18203 distance, PREV_INSN (insn),
18204 &found);
18206 if (!found && distance < LEA_SEARCH_THRESHOLD)
18208 edge e;
18209 edge_iterator ei;
18210 bool simple_loop = false;
18212 FOR_EACH_EDGE (e, ei, bb->preds)
18213 if (e->src == bb)
18215 simple_loop = true;
18216 break;
18219 if (simple_loop)
18220 distance = distance_non_agu_define_in_bb (regno1, regno2,
18221 insn, distance,
18222 BB_END (bb), &found);
18223 else
18225 int shortest_dist = -1;
18226 bool found_in_bb = false;
18228 FOR_EACH_EDGE (e, ei, bb->preds)
18230 int bb_dist
18231 = distance_non_agu_define_in_bb (regno1, regno2,
18232 insn, distance,
18233 BB_END (e->src),
18234 &found_in_bb);
18235 if (found_in_bb)
18237 if (shortest_dist < 0)
18238 shortest_dist = bb_dist;
18239 else if (bb_dist > 0)
18240 shortest_dist = MIN (bb_dist, shortest_dist);
18242 found = true;
18246 distance = shortest_dist;
18250 /* get_attr_type may modify recog data. We want to make sure
18251 that recog data is valid for instruction INSN, on which
18252 distance_non_agu_define is called. INSN is unchanged here. */
18253 extract_insn_cached (insn);
18255 if (!found)
18256 return -1;
18258 return distance >> 1;
18261 /* Return the distance in half-cycles between INSN and the next
18262 insn that uses register number REGNO in memory address added
18263 to DISTANCE. Return -1 if REGNO0 is set.
18265 Put true value into *FOUND if register usage was found and
18266 false otherwise.
18267 Put true value into *REDEFINED if register redefinition was
18268 found and false otherwise. */
18270 static int
18271 distance_agu_use_in_bb (unsigned int regno,
18272 rtx insn, int distance, rtx start,
18273 bool *found, bool *redefined)
18275 basic_block bb = NULL;
18276 rtx next = start;
18277 rtx prev = NULL;
18279 *found = false;
18280 *redefined = false;
18282 if (start != NULL_RTX)
18284 bb = BLOCK_FOR_INSN (start);
18285 if (start != BB_HEAD (bb))
18286 /* If insn and start belong to the same bb, set prev to insn,
18287 so the call to increase_distance will increase the distance
18288 between insns by 1. */
18289 prev = insn;
18292 while (next
18293 && next != insn
18294 && distance < LEA_SEARCH_THRESHOLD)
18296 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18298 distance = increase_distance(prev, next, distance);
18299 if (insn_uses_reg_mem (regno, next))
18301 /* Return DISTANCE if OP0 is used in memory
18302 address in NEXT. */
18303 *found = true;
18304 return distance;
18307 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18309 /* Return -1 if OP0 is set in NEXT. */
18310 *redefined = true;
18311 return -1;
18314 prev = next;
18317 if (next == BB_END (bb))
18318 break;
18320 next = NEXT_INSN (next);
18323 return distance;
18326 /* Return the distance between INSN and the next insn that uses
18327 register number REGNO0 in memory address. Return -1 if no such
18328 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18330 static int
18331 distance_agu_use (unsigned int regno0, rtx insn)
18333 basic_block bb = BLOCK_FOR_INSN (insn);
18334 int distance = 0;
18335 bool found = false;
18336 bool redefined = false;
18338 if (insn != BB_END (bb))
18339 distance = distance_agu_use_in_bb (regno0, insn, distance,
18340 NEXT_INSN (insn),
18341 &found, &redefined);
18343 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18345 edge e;
18346 edge_iterator ei;
18347 bool simple_loop = false;
18349 FOR_EACH_EDGE (e, ei, bb->succs)
18350 if (e->dest == bb)
18352 simple_loop = true;
18353 break;
18356 if (simple_loop)
18357 distance = distance_agu_use_in_bb (regno0, insn,
18358 distance, BB_HEAD (bb),
18359 &found, &redefined);
18360 else
18362 int shortest_dist = -1;
18363 bool found_in_bb = false;
18364 bool redefined_in_bb = false;
18366 FOR_EACH_EDGE (e, ei, bb->succs)
18368 int bb_dist
18369 = distance_agu_use_in_bb (regno0, insn,
18370 distance, BB_HEAD (e->dest),
18371 &found_in_bb, &redefined_in_bb);
18372 if (found_in_bb)
18374 if (shortest_dist < 0)
18375 shortest_dist = bb_dist;
18376 else if (bb_dist > 0)
18377 shortest_dist = MIN (bb_dist, shortest_dist);
18379 found = true;
18383 distance = shortest_dist;
18387 if (!found || redefined)
18388 return -1;
18390 return distance >> 1;
18393 /* Define this macro to tune LEA priority vs ADD, it take effect when
18394 there is a dilemma of choicing LEA or ADD
18395 Negative value: ADD is more preferred than LEA
18396 Zero: Netrual
18397 Positive value: LEA is more preferred than ADD*/
18398 #define IX86_LEA_PRIORITY 0
18400 /* Return true if usage of lea INSN has performance advantage
18401 over a sequence of instructions. Instructions sequence has
18402 SPLIT_COST cycles higher latency than lea latency. */
18404 static bool
18405 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18406 unsigned int regno2, int split_cost, bool has_scale)
18408 int dist_define, dist_use;
18410 /* For Silvermont if using a 2-source or 3-source LEA for
18411 non-destructive destination purposes, or due to wanting
18412 ability to use SCALE, the use of LEA is justified. */
18413 if (TARGET_SILVERMONT || TARGET_INTEL)
18415 if (has_scale)
18416 return true;
18417 if (split_cost < 1)
18418 return false;
18419 if (regno0 == regno1 || regno0 == regno2)
18420 return false;
18421 return true;
18424 dist_define = distance_non_agu_define (regno1, regno2, insn);
18425 dist_use = distance_agu_use (regno0, insn);
18427 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18429 /* If there is no non AGU operand definition, no AGU
18430 operand usage and split cost is 0 then both lea
18431 and non lea variants have same priority. Currently
18432 we prefer lea for 64 bit code and non lea on 32 bit
18433 code. */
18434 if (dist_use < 0 && split_cost == 0)
18435 return TARGET_64BIT || IX86_LEA_PRIORITY;
18436 else
18437 return true;
18440 /* With longer definitions distance lea is more preferable.
18441 Here we change it to take into account splitting cost and
18442 lea priority. */
18443 dist_define += split_cost + IX86_LEA_PRIORITY;
18445 /* If there is no use in memory addess then we just check
18446 that split cost exceeds AGU stall. */
18447 if (dist_use < 0)
18448 return dist_define > LEA_MAX_STALL;
18450 /* If this insn has both backward non-agu dependence and forward
18451 agu dependence, the one with short distance takes effect. */
18452 return dist_define >= dist_use;
18455 /* Return true if it is legal to clobber flags by INSN and
18456 false otherwise. */
18458 static bool
18459 ix86_ok_to_clobber_flags (rtx insn)
18461 basic_block bb = BLOCK_FOR_INSN (insn);
18462 df_ref *use;
18463 bitmap live;
18465 while (insn)
18467 if (NONDEBUG_INSN_P (insn))
18469 for (use = DF_INSN_USES (insn); *use; use++)
18470 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18471 return false;
18473 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18474 return true;
18477 if (insn == BB_END (bb))
18478 break;
18480 insn = NEXT_INSN (insn);
18483 live = df_get_live_out(bb);
18484 return !REGNO_REG_SET_P (live, FLAGS_REG);
18487 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18488 move and add to avoid AGU stalls. */
18490 bool
18491 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18493 unsigned int regno0, regno1, regno2;
18495 /* Check if we need to optimize. */
18496 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18497 return false;
18499 /* Check it is correct to split here. */
18500 if (!ix86_ok_to_clobber_flags(insn))
18501 return false;
18503 regno0 = true_regnum (operands[0]);
18504 regno1 = true_regnum (operands[1]);
18505 regno2 = true_regnum (operands[2]);
18507 /* We need to split only adds with non destructive
18508 destination operand. */
18509 if (regno0 == regno1 || regno0 == regno2)
18510 return false;
18511 else
18512 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18515 /* Return true if we should emit lea instruction instead of mov
18516 instruction. */
18518 bool
18519 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18521 unsigned int regno0, regno1;
18523 /* Check if we need to optimize. */
18524 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18525 return false;
18527 /* Use lea for reg to reg moves only. */
18528 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18529 return false;
18531 regno0 = true_regnum (operands[0]);
18532 regno1 = true_regnum (operands[1]);
18534 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18537 /* Return true if we need to split lea into a sequence of
18538 instructions to avoid AGU stalls. */
18540 bool
18541 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18543 unsigned int regno0, regno1, regno2;
18544 int split_cost;
18545 struct ix86_address parts;
18546 int ok;
18548 /* Check we need to optimize. */
18549 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18550 return false;
18552 /* The "at least two components" test below might not catch simple
18553 move or zero extension insns if parts.base is non-NULL and parts.disp
18554 is const0_rtx as the only components in the address, e.g. if the
18555 register is %rbp or %r13. As this test is much cheaper and moves or
18556 zero extensions are the common case, do this check first. */
18557 if (REG_P (operands[1])
18558 || (SImode_address_operand (operands[1], VOIDmode)
18559 && REG_P (XEXP (operands[1], 0))))
18560 return false;
18562 /* Check if it is OK to split here. */
18563 if (!ix86_ok_to_clobber_flags (insn))
18564 return false;
18566 ok = ix86_decompose_address (operands[1], &parts);
18567 gcc_assert (ok);
18569 /* There should be at least two components in the address. */
18570 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18571 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18572 return false;
18574 /* We should not split into add if non legitimate pic
18575 operand is used as displacement. */
18576 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18577 return false;
18579 regno0 = true_regnum (operands[0]) ;
18580 regno1 = INVALID_REGNUM;
18581 regno2 = INVALID_REGNUM;
18583 if (parts.base)
18584 regno1 = true_regnum (parts.base);
18585 if (parts.index)
18586 regno2 = true_regnum (parts.index);
18588 split_cost = 0;
18590 /* Compute how many cycles we will add to execution time
18591 if split lea into a sequence of instructions. */
18592 if (parts.base || parts.index)
18594 /* Have to use mov instruction if non desctructive
18595 destination form is used. */
18596 if (regno1 != regno0 && regno2 != regno0)
18597 split_cost += 1;
18599 /* Have to add index to base if both exist. */
18600 if (parts.base && parts.index)
18601 split_cost += 1;
18603 /* Have to use shift and adds if scale is 2 or greater. */
18604 if (parts.scale > 1)
18606 if (regno0 != regno1)
18607 split_cost += 1;
18608 else if (regno2 == regno0)
18609 split_cost += 4;
18610 else
18611 split_cost += parts.scale;
18614 /* Have to use add instruction with immediate if
18615 disp is non zero. */
18616 if (parts.disp && parts.disp != const0_rtx)
18617 split_cost += 1;
18619 /* Subtract the price of lea. */
18620 split_cost -= 1;
18623 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18624 parts.scale > 1);
18627 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18628 matches destination. RTX includes clobber of FLAGS_REG. */
18630 static void
18631 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18632 rtx dst, rtx src)
18634 rtx op, clob;
18636 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18637 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18639 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18642 /* Return true if regno1 def is nearest to the insn. */
18644 static bool
18645 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18647 rtx prev = insn;
18648 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18650 if (insn == start)
18651 return false;
18652 while (prev && prev != start)
18654 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18656 prev = PREV_INSN (prev);
18657 continue;
18659 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18660 return true;
18661 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18662 return false;
18663 prev = PREV_INSN (prev);
18666 /* None of the regs is defined in the bb. */
18667 return false;
18670 /* Split lea instructions into a sequence of instructions
18671 which are executed on ALU to avoid AGU stalls.
18672 It is assumed that it is allowed to clobber flags register
18673 at lea position. */
18675 void
18676 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18678 unsigned int regno0, regno1, regno2;
18679 struct ix86_address parts;
18680 rtx target, tmp;
18681 int ok, adds;
18683 ok = ix86_decompose_address (operands[1], &parts);
18684 gcc_assert (ok);
18686 target = gen_lowpart (mode, operands[0]);
18688 regno0 = true_regnum (target);
18689 regno1 = INVALID_REGNUM;
18690 regno2 = INVALID_REGNUM;
18692 if (parts.base)
18694 parts.base = gen_lowpart (mode, parts.base);
18695 regno1 = true_regnum (parts.base);
18698 if (parts.index)
18700 parts.index = gen_lowpart (mode, parts.index);
18701 regno2 = true_regnum (parts.index);
18704 if (parts.disp)
18705 parts.disp = gen_lowpart (mode, parts.disp);
18707 if (parts.scale > 1)
18709 /* Case r1 = r1 + ... */
18710 if (regno1 == regno0)
18712 /* If we have a case r1 = r1 + C * r2 then we
18713 should use multiplication which is very
18714 expensive. Assume cost model is wrong if we
18715 have such case here. */
18716 gcc_assert (regno2 != regno0);
18718 for (adds = parts.scale; adds > 0; adds--)
18719 ix86_emit_binop (PLUS, mode, target, parts.index);
18721 else
18723 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18724 if (regno0 != regno2)
18725 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18727 /* Use shift for scaling. */
18728 ix86_emit_binop (ASHIFT, mode, target,
18729 GEN_INT (exact_log2 (parts.scale)));
18731 if (parts.base)
18732 ix86_emit_binop (PLUS, mode, target, parts.base);
18734 if (parts.disp && parts.disp != const0_rtx)
18735 ix86_emit_binop (PLUS, mode, target, parts.disp);
18738 else if (!parts.base && !parts.index)
18740 gcc_assert(parts.disp);
18741 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18743 else
18745 if (!parts.base)
18747 if (regno0 != regno2)
18748 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18750 else if (!parts.index)
18752 if (regno0 != regno1)
18753 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18755 else
18757 if (regno0 == regno1)
18758 tmp = parts.index;
18759 else if (regno0 == regno2)
18760 tmp = parts.base;
18761 else
18763 rtx tmp1;
18765 /* Find better operand for SET instruction, depending
18766 on which definition is farther from the insn. */
18767 if (find_nearest_reg_def (insn, regno1, regno2))
18768 tmp = parts.index, tmp1 = parts.base;
18769 else
18770 tmp = parts.base, tmp1 = parts.index;
18772 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18774 if (parts.disp && parts.disp != const0_rtx)
18775 ix86_emit_binop (PLUS, mode, target, parts.disp);
18777 ix86_emit_binop (PLUS, mode, target, tmp1);
18778 return;
18781 ix86_emit_binop (PLUS, mode, target, tmp);
18784 if (parts.disp && parts.disp != const0_rtx)
18785 ix86_emit_binop (PLUS, mode, target, parts.disp);
18789 /* Return true if it is ok to optimize an ADD operation to LEA
18790 operation to avoid flag register consumation. For most processors,
18791 ADD is faster than LEA. For the processors like BONNELL, if the
18792 destination register of LEA holds an actual address which will be
18793 used soon, LEA is better and otherwise ADD is better. */
18795 bool
18796 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18798 unsigned int regno0 = true_regnum (operands[0]);
18799 unsigned int regno1 = true_regnum (operands[1]);
18800 unsigned int regno2 = true_regnum (operands[2]);
18802 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18803 if (regno0 != regno1 && regno0 != regno2)
18804 return true;
18806 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18807 return false;
18809 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18812 /* Return true if destination reg of SET_BODY is shift count of
18813 USE_BODY. */
18815 static bool
18816 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18818 rtx set_dest;
18819 rtx shift_rtx;
18820 int i;
18822 /* Retrieve destination of SET_BODY. */
18823 switch (GET_CODE (set_body))
18825 case SET:
18826 set_dest = SET_DEST (set_body);
18827 if (!set_dest || !REG_P (set_dest))
18828 return false;
18829 break;
18830 case PARALLEL:
18831 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18832 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18833 use_body))
18834 return true;
18835 default:
18836 return false;
18837 break;
18840 /* Retrieve shift count of USE_BODY. */
18841 switch (GET_CODE (use_body))
18843 case SET:
18844 shift_rtx = XEXP (use_body, 1);
18845 break;
18846 case PARALLEL:
18847 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18848 if (ix86_dep_by_shift_count_body (set_body,
18849 XVECEXP (use_body, 0, i)))
18850 return true;
18851 default:
18852 return false;
18853 break;
18856 if (shift_rtx
18857 && (GET_CODE (shift_rtx) == ASHIFT
18858 || GET_CODE (shift_rtx) == LSHIFTRT
18859 || GET_CODE (shift_rtx) == ASHIFTRT
18860 || GET_CODE (shift_rtx) == ROTATE
18861 || GET_CODE (shift_rtx) == ROTATERT))
18863 rtx shift_count = XEXP (shift_rtx, 1);
18865 /* Return true if shift count is dest of SET_BODY. */
18866 if (REG_P (shift_count))
18868 /* Add check since it can be invoked before register
18869 allocation in pre-reload schedule. */
18870 if (reload_completed
18871 && true_regnum (set_dest) == true_regnum (shift_count))
18872 return true;
18873 else if (REGNO(set_dest) == REGNO(shift_count))
18874 return true;
18878 return false;
18881 /* Return true if destination reg of SET_INSN is shift count of
18882 USE_INSN. */
18884 bool
18885 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18887 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18888 PATTERN (use_insn));
18891 /* Return TRUE or FALSE depending on whether the unary operator meets the
18892 appropriate constraints. */
18894 bool
18895 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18896 enum machine_mode mode ATTRIBUTE_UNUSED,
18897 rtx operands[2])
18899 /* If one of operands is memory, source and destination must match. */
18900 if ((MEM_P (operands[0])
18901 || MEM_P (operands[1]))
18902 && ! rtx_equal_p (operands[0], operands[1]))
18903 return false;
18904 return true;
18907 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18908 are ok, keeping in mind the possible movddup alternative. */
18910 bool
18911 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18913 if (MEM_P (operands[0]))
18914 return rtx_equal_p (operands[0], operands[1 + high]);
18915 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18916 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18917 return true;
18920 /* Post-reload splitter for converting an SF or DFmode value in an
18921 SSE register into an unsigned SImode. */
18923 void
18924 ix86_split_convert_uns_si_sse (rtx operands[])
18926 enum machine_mode vecmode;
18927 rtx value, large, zero_or_two31, input, two31, x;
18929 large = operands[1];
18930 zero_or_two31 = operands[2];
18931 input = operands[3];
18932 two31 = operands[4];
18933 vecmode = GET_MODE (large);
18934 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18936 /* Load up the value into the low element. We must ensure that the other
18937 elements are valid floats -- zero is the easiest such value. */
18938 if (MEM_P (input))
18940 if (vecmode == V4SFmode)
18941 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18942 else
18943 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18945 else
18947 input = gen_rtx_REG (vecmode, REGNO (input));
18948 emit_move_insn (value, CONST0_RTX (vecmode));
18949 if (vecmode == V4SFmode)
18950 emit_insn (gen_sse_movss (value, value, input));
18951 else
18952 emit_insn (gen_sse2_movsd (value, value, input));
18955 emit_move_insn (large, two31);
18956 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18958 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18959 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18961 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18962 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18964 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18965 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18967 large = gen_rtx_REG (V4SImode, REGNO (large));
18968 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18970 x = gen_rtx_REG (V4SImode, REGNO (value));
18971 if (vecmode == V4SFmode)
18972 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18973 else
18974 emit_insn (gen_sse2_cvttpd2dq (x, value));
18975 value = x;
18977 emit_insn (gen_xorv4si3 (value, value, large));
18980 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18981 Expects the 64-bit DImode to be supplied in a pair of integral
18982 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18983 -mfpmath=sse, !optimize_size only. */
18985 void
18986 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18988 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18989 rtx int_xmm, fp_xmm;
18990 rtx biases, exponents;
18991 rtx x;
18993 int_xmm = gen_reg_rtx (V4SImode);
18994 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18995 emit_insn (gen_movdi_to_sse (int_xmm, input));
18996 else if (TARGET_SSE_SPLIT_REGS)
18998 emit_clobber (int_xmm);
18999 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19001 else
19003 x = gen_reg_rtx (V2DImode);
19004 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19005 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19008 x = gen_rtx_CONST_VECTOR (V4SImode,
19009 gen_rtvec (4, GEN_INT (0x43300000UL),
19010 GEN_INT (0x45300000UL),
19011 const0_rtx, const0_rtx));
19012 exponents = validize_mem (force_const_mem (V4SImode, x));
19014 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19015 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19017 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19018 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19019 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19020 (0x1.0p84 + double(fp_value_hi_xmm)).
19021 Note these exponents differ by 32. */
19023 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19025 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19026 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19027 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19028 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19029 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19030 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19031 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19032 biases = validize_mem (force_const_mem (V2DFmode, biases));
19033 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19035 /* Add the upper and lower DFmode values together. */
19036 if (TARGET_SSE3)
19037 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19038 else
19040 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19041 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19042 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19045 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19048 /* Not used, but eases macroization of patterns. */
19049 void
19050 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
19051 rtx input ATTRIBUTE_UNUSED)
19053 gcc_unreachable ();
19056 /* Convert an unsigned SImode value into a DFmode. Only currently used
19057 for SSE, but applicable anywhere. */
19059 void
19060 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19062 REAL_VALUE_TYPE TWO31r;
19063 rtx x, fp;
19065 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19066 NULL, 1, OPTAB_DIRECT);
19068 fp = gen_reg_rtx (DFmode);
19069 emit_insn (gen_floatsidf2 (fp, x));
19071 real_ldexp (&TWO31r, &dconst1, 31);
19072 x = const_double_from_real_value (TWO31r, DFmode);
19074 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19075 if (x != target)
19076 emit_move_insn (target, x);
19079 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19080 32-bit mode; otherwise we have a direct convert instruction. */
19082 void
19083 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19085 REAL_VALUE_TYPE TWO32r;
19086 rtx fp_lo, fp_hi, x;
19088 fp_lo = gen_reg_rtx (DFmode);
19089 fp_hi = gen_reg_rtx (DFmode);
19091 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19093 real_ldexp (&TWO32r, &dconst1, 32);
19094 x = const_double_from_real_value (TWO32r, DFmode);
19095 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19097 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19099 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19100 0, OPTAB_DIRECT);
19101 if (x != target)
19102 emit_move_insn (target, x);
19105 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19106 For x86_32, -mfpmath=sse, !optimize_size only. */
19107 void
19108 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19110 REAL_VALUE_TYPE ONE16r;
19111 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19113 real_ldexp (&ONE16r, &dconst1, 16);
19114 x = const_double_from_real_value (ONE16r, SFmode);
19115 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19116 NULL, 0, OPTAB_DIRECT);
19117 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19118 NULL, 0, OPTAB_DIRECT);
19119 fp_hi = gen_reg_rtx (SFmode);
19120 fp_lo = gen_reg_rtx (SFmode);
19121 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19122 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19123 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19124 0, OPTAB_DIRECT);
19125 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19126 0, OPTAB_DIRECT);
19127 if (!rtx_equal_p (target, fp_hi))
19128 emit_move_insn (target, fp_hi);
19131 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19132 a vector of unsigned ints VAL to vector of floats TARGET. */
19134 void
19135 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19137 rtx tmp[8];
19138 REAL_VALUE_TYPE TWO16r;
19139 enum machine_mode intmode = GET_MODE (val);
19140 enum machine_mode fltmode = GET_MODE (target);
19141 rtx (*cvt) (rtx, rtx);
19143 if (intmode == V4SImode)
19144 cvt = gen_floatv4siv4sf2;
19145 else
19146 cvt = gen_floatv8siv8sf2;
19147 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19148 tmp[0] = force_reg (intmode, tmp[0]);
19149 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19150 OPTAB_DIRECT);
19151 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19152 NULL_RTX, 1, OPTAB_DIRECT);
19153 tmp[3] = gen_reg_rtx (fltmode);
19154 emit_insn (cvt (tmp[3], tmp[1]));
19155 tmp[4] = gen_reg_rtx (fltmode);
19156 emit_insn (cvt (tmp[4], tmp[2]));
19157 real_ldexp (&TWO16r, &dconst1, 16);
19158 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19159 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19160 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19161 OPTAB_DIRECT);
19162 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19163 OPTAB_DIRECT);
19164 if (tmp[7] != target)
19165 emit_move_insn (target, tmp[7]);
19168 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19169 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19170 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19171 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19174 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19176 REAL_VALUE_TYPE TWO31r;
19177 rtx two31r, tmp[4];
19178 enum machine_mode mode = GET_MODE (val);
19179 enum machine_mode scalarmode = GET_MODE_INNER (mode);
19180 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19181 rtx (*cmp) (rtx, rtx, rtx, rtx);
19182 int i;
19184 for (i = 0; i < 3; i++)
19185 tmp[i] = gen_reg_rtx (mode);
19186 real_ldexp (&TWO31r, &dconst1, 31);
19187 two31r = const_double_from_real_value (TWO31r, scalarmode);
19188 two31r = ix86_build_const_vector (mode, 1, two31r);
19189 two31r = force_reg (mode, two31r);
19190 switch (mode)
19192 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19193 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19194 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19195 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19196 default: gcc_unreachable ();
19198 tmp[3] = gen_rtx_LE (mode, two31r, val);
19199 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19200 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19201 0, OPTAB_DIRECT);
19202 if (intmode == V4SImode || TARGET_AVX2)
19203 *xorp = expand_simple_binop (intmode, ASHIFT,
19204 gen_lowpart (intmode, tmp[0]),
19205 GEN_INT (31), NULL_RTX, 0,
19206 OPTAB_DIRECT);
19207 else
19209 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19210 two31 = ix86_build_const_vector (intmode, 1, two31);
19211 *xorp = expand_simple_binop (intmode, AND,
19212 gen_lowpart (intmode, tmp[0]),
19213 two31, NULL_RTX, 0,
19214 OPTAB_DIRECT);
19216 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19217 0, OPTAB_DIRECT);
19220 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19221 then replicate the value for all elements of the vector
19222 register. */
19225 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
19227 int i, n_elt;
19228 rtvec v;
19229 enum machine_mode scalar_mode;
19231 switch (mode)
19233 case V64QImode:
19234 case V32QImode:
19235 case V16QImode:
19236 case V32HImode:
19237 case V16HImode:
19238 case V8HImode:
19239 case V16SImode:
19240 case V8SImode:
19241 case V4SImode:
19242 case V8DImode:
19243 case V4DImode:
19244 case V2DImode:
19245 gcc_assert (vect);
19246 case V16SFmode:
19247 case V8SFmode:
19248 case V4SFmode:
19249 case V8DFmode:
19250 case V4DFmode:
19251 case V2DFmode:
19252 n_elt = GET_MODE_NUNITS (mode);
19253 v = rtvec_alloc (n_elt);
19254 scalar_mode = GET_MODE_INNER (mode);
19256 RTVEC_ELT (v, 0) = value;
19258 for (i = 1; i < n_elt; ++i)
19259 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19261 return gen_rtx_CONST_VECTOR (mode, v);
19263 default:
19264 gcc_unreachable ();
19268 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19269 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19270 for an SSE register. If VECT is true, then replicate the mask for
19271 all elements of the vector register. If INVERT is true, then create
19272 a mask excluding the sign bit. */
19275 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19277 enum machine_mode vec_mode, imode;
19278 HOST_WIDE_INT hi, lo;
19279 int shift = 63;
19280 rtx v;
19281 rtx mask;
19283 /* Find the sign bit, sign extended to 2*HWI. */
19284 switch (mode)
19286 case V16SImode:
19287 case V16SFmode:
19288 case V8SImode:
19289 case V4SImode:
19290 case V8SFmode:
19291 case V4SFmode:
19292 vec_mode = mode;
19293 mode = GET_MODE_INNER (mode);
19294 imode = SImode;
19295 lo = 0x80000000, hi = lo < 0;
19296 break;
19298 case V8DImode:
19299 case V4DImode:
19300 case V2DImode:
19301 case V8DFmode:
19302 case V4DFmode:
19303 case V2DFmode:
19304 vec_mode = mode;
19305 mode = GET_MODE_INNER (mode);
19306 imode = DImode;
19307 if (HOST_BITS_PER_WIDE_INT >= 64)
19308 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19309 else
19310 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19311 break;
19313 case TImode:
19314 case TFmode:
19315 vec_mode = VOIDmode;
19316 if (HOST_BITS_PER_WIDE_INT >= 64)
19318 imode = TImode;
19319 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19321 else
19323 rtvec vec;
19325 imode = DImode;
19326 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19328 if (invert)
19330 lo = ~lo, hi = ~hi;
19331 v = constm1_rtx;
19333 else
19334 v = const0_rtx;
19336 mask = immed_double_const (lo, hi, imode);
19338 vec = gen_rtvec (2, v, mask);
19339 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19340 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19342 return v;
19344 break;
19346 default:
19347 gcc_unreachable ();
19350 if (invert)
19351 lo = ~lo, hi = ~hi;
19353 /* Force this value into the low part of a fp vector constant. */
19354 mask = immed_double_const (lo, hi, imode);
19355 mask = gen_lowpart (mode, mask);
19357 if (vec_mode == VOIDmode)
19358 return force_reg (mode, mask);
19360 v = ix86_build_const_vector (vec_mode, vect, mask);
19361 return force_reg (vec_mode, v);
19364 /* Generate code for floating point ABS or NEG. */
19366 void
19367 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19368 rtx operands[])
19370 rtx mask, set, dst, src;
19371 bool use_sse = false;
19372 bool vector_mode = VECTOR_MODE_P (mode);
19373 enum machine_mode vmode = mode;
19375 if (vector_mode)
19376 use_sse = true;
19377 else if (mode == TFmode)
19378 use_sse = true;
19379 else if (TARGET_SSE_MATH)
19381 use_sse = SSE_FLOAT_MODE_P (mode);
19382 if (mode == SFmode)
19383 vmode = V4SFmode;
19384 else if (mode == DFmode)
19385 vmode = V2DFmode;
19388 /* NEG and ABS performed with SSE use bitwise mask operations.
19389 Create the appropriate mask now. */
19390 if (use_sse)
19391 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19392 else
19393 mask = NULL_RTX;
19395 dst = operands[0];
19396 src = operands[1];
19398 set = gen_rtx_fmt_e (code, mode, src);
19399 set = gen_rtx_SET (VOIDmode, dst, set);
19401 if (mask)
19403 rtx use, clob;
19404 rtvec par;
19406 use = gen_rtx_USE (VOIDmode, mask);
19407 if (vector_mode)
19408 par = gen_rtvec (2, set, use);
19409 else
19411 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19412 par = gen_rtvec (3, set, use, clob);
19414 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19416 else
19417 emit_insn (set);
19420 /* Expand a copysign operation. Special case operand 0 being a constant. */
19422 void
19423 ix86_expand_copysign (rtx operands[])
19425 enum machine_mode mode, vmode;
19426 rtx dest, op0, op1, mask, nmask;
19428 dest = operands[0];
19429 op0 = operands[1];
19430 op1 = operands[2];
19432 mode = GET_MODE (dest);
19434 if (mode == SFmode)
19435 vmode = V4SFmode;
19436 else if (mode == DFmode)
19437 vmode = V2DFmode;
19438 else
19439 vmode = mode;
19441 if (GET_CODE (op0) == CONST_DOUBLE)
19443 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19445 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19446 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19448 if (mode == SFmode || mode == DFmode)
19450 if (op0 == CONST0_RTX (mode))
19451 op0 = CONST0_RTX (vmode);
19452 else
19454 rtx v = ix86_build_const_vector (vmode, false, op0);
19456 op0 = force_reg (vmode, v);
19459 else if (op0 != CONST0_RTX (mode))
19460 op0 = force_reg (mode, op0);
19462 mask = ix86_build_signbit_mask (vmode, 0, 0);
19464 if (mode == SFmode)
19465 copysign_insn = gen_copysignsf3_const;
19466 else if (mode == DFmode)
19467 copysign_insn = gen_copysigndf3_const;
19468 else
19469 copysign_insn = gen_copysigntf3_const;
19471 emit_insn (copysign_insn (dest, op0, op1, mask));
19473 else
19475 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19477 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19478 mask = ix86_build_signbit_mask (vmode, 0, 0);
19480 if (mode == SFmode)
19481 copysign_insn = gen_copysignsf3_var;
19482 else if (mode == DFmode)
19483 copysign_insn = gen_copysigndf3_var;
19484 else
19485 copysign_insn = gen_copysigntf3_var;
19487 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19491 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19492 be a constant, and so has already been expanded into a vector constant. */
19494 void
19495 ix86_split_copysign_const (rtx operands[])
19497 enum machine_mode mode, vmode;
19498 rtx dest, op0, mask, x;
19500 dest = operands[0];
19501 op0 = operands[1];
19502 mask = operands[3];
19504 mode = GET_MODE (dest);
19505 vmode = GET_MODE (mask);
19507 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19508 x = gen_rtx_AND (vmode, dest, mask);
19509 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19511 if (op0 != CONST0_RTX (vmode))
19513 x = gen_rtx_IOR (vmode, dest, op0);
19514 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19518 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19519 so we have to do two masks. */
19521 void
19522 ix86_split_copysign_var (rtx operands[])
19524 enum machine_mode mode, vmode;
19525 rtx dest, scratch, op0, op1, mask, nmask, x;
19527 dest = operands[0];
19528 scratch = operands[1];
19529 op0 = operands[2];
19530 op1 = operands[3];
19531 nmask = operands[4];
19532 mask = operands[5];
19534 mode = GET_MODE (dest);
19535 vmode = GET_MODE (mask);
19537 if (rtx_equal_p (op0, op1))
19539 /* Shouldn't happen often (it's useless, obviously), but when it does
19540 we'd generate incorrect code if we continue below. */
19541 emit_move_insn (dest, op0);
19542 return;
19545 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19547 gcc_assert (REGNO (op1) == REGNO (scratch));
19549 x = gen_rtx_AND (vmode, scratch, mask);
19550 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19552 dest = mask;
19553 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19554 x = gen_rtx_NOT (vmode, dest);
19555 x = gen_rtx_AND (vmode, x, op0);
19556 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19558 else
19560 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19562 x = gen_rtx_AND (vmode, scratch, mask);
19564 else /* alternative 2,4 */
19566 gcc_assert (REGNO (mask) == REGNO (scratch));
19567 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19568 x = gen_rtx_AND (vmode, scratch, op1);
19570 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19572 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19574 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19575 x = gen_rtx_AND (vmode, dest, nmask);
19577 else /* alternative 3,4 */
19579 gcc_assert (REGNO (nmask) == REGNO (dest));
19580 dest = nmask;
19581 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19582 x = gen_rtx_AND (vmode, dest, op0);
19584 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19587 x = gen_rtx_IOR (vmode, dest, scratch);
19588 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19591 /* Return TRUE or FALSE depending on whether the first SET in INSN
19592 has source and destination with matching CC modes, and that the
19593 CC mode is at least as constrained as REQ_MODE. */
19595 bool
19596 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19598 rtx set;
19599 enum machine_mode set_mode;
19601 set = PATTERN (insn);
19602 if (GET_CODE (set) == PARALLEL)
19603 set = XVECEXP (set, 0, 0);
19604 gcc_assert (GET_CODE (set) == SET);
19605 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19607 set_mode = GET_MODE (SET_DEST (set));
19608 switch (set_mode)
19610 case CCNOmode:
19611 if (req_mode != CCNOmode
19612 && (req_mode != CCmode
19613 || XEXP (SET_SRC (set), 1) != const0_rtx))
19614 return false;
19615 break;
19616 case CCmode:
19617 if (req_mode == CCGCmode)
19618 return false;
19619 /* FALLTHRU */
19620 case CCGCmode:
19621 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19622 return false;
19623 /* FALLTHRU */
19624 case CCGOCmode:
19625 if (req_mode == CCZmode)
19626 return false;
19627 /* FALLTHRU */
19628 case CCZmode:
19629 break;
19631 case CCAmode:
19632 case CCCmode:
19633 case CCOmode:
19634 case CCSmode:
19635 if (set_mode != req_mode)
19636 return false;
19637 break;
19639 default:
19640 gcc_unreachable ();
19643 return GET_MODE (SET_SRC (set)) == set_mode;
19646 /* Generate insn patterns to do an integer compare of OPERANDS. */
19648 static rtx
19649 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19651 enum machine_mode cmpmode;
19652 rtx tmp, flags;
19654 cmpmode = SELECT_CC_MODE (code, op0, op1);
19655 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19657 /* This is very simple, but making the interface the same as in the
19658 FP case makes the rest of the code easier. */
19659 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19660 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19662 /* Return the test that should be put into the flags user, i.e.
19663 the bcc, scc, or cmov instruction. */
19664 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19667 /* Figure out whether to use ordered or unordered fp comparisons.
19668 Return the appropriate mode to use. */
19670 enum machine_mode
19671 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19673 /* ??? In order to make all comparisons reversible, we do all comparisons
19674 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19675 all forms trapping and nontrapping comparisons, we can make inequality
19676 comparisons trapping again, since it results in better code when using
19677 FCOM based compares. */
19678 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19681 enum machine_mode
19682 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19684 enum machine_mode mode = GET_MODE (op0);
19686 if (SCALAR_FLOAT_MODE_P (mode))
19688 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19689 return ix86_fp_compare_mode (code);
19692 switch (code)
19694 /* Only zero flag is needed. */
19695 case EQ: /* ZF=0 */
19696 case NE: /* ZF!=0 */
19697 return CCZmode;
19698 /* Codes needing carry flag. */
19699 case GEU: /* CF=0 */
19700 case LTU: /* CF=1 */
19701 /* Detect overflow checks. They need just the carry flag. */
19702 if (GET_CODE (op0) == PLUS
19703 && rtx_equal_p (op1, XEXP (op0, 0)))
19704 return CCCmode;
19705 else
19706 return CCmode;
19707 case GTU: /* CF=0 & ZF=0 */
19708 case LEU: /* CF=1 | ZF=1 */
19709 return CCmode;
19710 /* Codes possibly doable only with sign flag when
19711 comparing against zero. */
19712 case GE: /* SF=OF or SF=0 */
19713 case LT: /* SF<>OF or SF=1 */
19714 if (op1 == const0_rtx)
19715 return CCGOCmode;
19716 else
19717 /* For other cases Carry flag is not required. */
19718 return CCGCmode;
19719 /* Codes doable only with sign flag when comparing
19720 against zero, but we miss jump instruction for it
19721 so we need to use relational tests against overflow
19722 that thus needs to be zero. */
19723 case GT: /* ZF=0 & SF=OF */
19724 case LE: /* ZF=1 | SF<>OF */
19725 if (op1 == const0_rtx)
19726 return CCNOmode;
19727 else
19728 return CCGCmode;
19729 /* strcmp pattern do (use flags) and combine may ask us for proper
19730 mode. */
19731 case USE:
19732 return CCmode;
19733 default:
19734 gcc_unreachable ();
19738 /* Return the fixed registers used for condition codes. */
19740 static bool
19741 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19743 *p1 = FLAGS_REG;
19744 *p2 = FPSR_REG;
19745 return true;
19748 /* If two condition code modes are compatible, return a condition code
19749 mode which is compatible with both. Otherwise, return
19750 VOIDmode. */
19752 static enum machine_mode
19753 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19755 if (m1 == m2)
19756 return m1;
19758 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19759 return VOIDmode;
19761 if ((m1 == CCGCmode && m2 == CCGOCmode)
19762 || (m1 == CCGOCmode && m2 == CCGCmode))
19763 return CCGCmode;
19765 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19766 return m2;
19767 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19768 return m1;
19770 switch (m1)
19772 default:
19773 gcc_unreachable ();
19775 case CCmode:
19776 case CCGCmode:
19777 case CCGOCmode:
19778 case CCNOmode:
19779 case CCAmode:
19780 case CCCmode:
19781 case CCOmode:
19782 case CCSmode:
19783 case CCZmode:
19784 switch (m2)
19786 default:
19787 return VOIDmode;
19789 case CCmode:
19790 case CCGCmode:
19791 case CCGOCmode:
19792 case CCNOmode:
19793 case CCAmode:
19794 case CCCmode:
19795 case CCOmode:
19796 case CCSmode:
19797 case CCZmode:
19798 return CCmode;
19801 case CCFPmode:
19802 case CCFPUmode:
19803 /* These are only compatible with themselves, which we already
19804 checked above. */
19805 return VOIDmode;
19810 /* Return a comparison we can do and that it is equivalent to
19811 swap_condition (code) apart possibly from orderedness.
19812 But, never change orderedness if TARGET_IEEE_FP, returning
19813 UNKNOWN in that case if necessary. */
19815 static enum rtx_code
19816 ix86_fp_swap_condition (enum rtx_code code)
19818 switch (code)
19820 case GT: /* GTU - CF=0 & ZF=0 */
19821 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19822 case GE: /* GEU - CF=0 */
19823 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19824 case UNLT: /* LTU - CF=1 */
19825 return TARGET_IEEE_FP ? UNKNOWN : GT;
19826 case UNLE: /* LEU - CF=1 | ZF=1 */
19827 return TARGET_IEEE_FP ? UNKNOWN : GE;
19828 default:
19829 return swap_condition (code);
19833 /* Return cost of comparison CODE using the best strategy for performance.
19834 All following functions do use number of instructions as a cost metrics.
19835 In future this should be tweaked to compute bytes for optimize_size and
19836 take into account performance of various instructions on various CPUs. */
19838 static int
19839 ix86_fp_comparison_cost (enum rtx_code code)
19841 int arith_cost;
19843 /* The cost of code using bit-twiddling on %ah. */
19844 switch (code)
19846 case UNLE:
19847 case UNLT:
19848 case LTGT:
19849 case GT:
19850 case GE:
19851 case UNORDERED:
19852 case ORDERED:
19853 case UNEQ:
19854 arith_cost = 4;
19855 break;
19856 case LT:
19857 case NE:
19858 case EQ:
19859 case UNGE:
19860 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19861 break;
19862 case LE:
19863 case UNGT:
19864 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19865 break;
19866 default:
19867 gcc_unreachable ();
19870 switch (ix86_fp_comparison_strategy (code))
19872 case IX86_FPCMP_COMI:
19873 return arith_cost > 4 ? 3 : 2;
19874 case IX86_FPCMP_SAHF:
19875 return arith_cost > 4 ? 4 : 3;
19876 default:
19877 return arith_cost;
19881 /* Return strategy to use for floating-point. We assume that fcomi is always
19882 preferrable where available, since that is also true when looking at size
19883 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19885 enum ix86_fpcmp_strategy
19886 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19888 /* Do fcomi/sahf based test when profitable. */
19890 if (TARGET_CMOVE)
19891 return IX86_FPCMP_COMI;
19893 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19894 return IX86_FPCMP_SAHF;
19896 return IX86_FPCMP_ARITH;
19899 /* Swap, force into registers, or otherwise massage the two operands
19900 to a fp comparison. The operands are updated in place; the new
19901 comparison code is returned. */
19903 static enum rtx_code
19904 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19906 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19907 rtx op0 = *pop0, op1 = *pop1;
19908 enum machine_mode op_mode = GET_MODE (op0);
19909 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19911 /* All of the unordered compare instructions only work on registers.
19912 The same is true of the fcomi compare instructions. The XFmode
19913 compare instructions require registers except when comparing
19914 against zero or when converting operand 1 from fixed point to
19915 floating point. */
19917 if (!is_sse
19918 && (fpcmp_mode == CCFPUmode
19919 || (op_mode == XFmode
19920 && ! (standard_80387_constant_p (op0) == 1
19921 || standard_80387_constant_p (op1) == 1)
19922 && GET_CODE (op1) != FLOAT)
19923 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19925 op0 = force_reg (op_mode, op0);
19926 op1 = force_reg (op_mode, op1);
19928 else
19930 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19931 things around if they appear profitable, otherwise force op0
19932 into a register. */
19934 if (standard_80387_constant_p (op0) == 0
19935 || (MEM_P (op0)
19936 && ! (standard_80387_constant_p (op1) == 0
19937 || MEM_P (op1))))
19939 enum rtx_code new_code = ix86_fp_swap_condition (code);
19940 if (new_code != UNKNOWN)
19942 rtx tmp;
19943 tmp = op0, op0 = op1, op1 = tmp;
19944 code = new_code;
19948 if (!REG_P (op0))
19949 op0 = force_reg (op_mode, op0);
19951 if (CONSTANT_P (op1))
19953 int tmp = standard_80387_constant_p (op1);
19954 if (tmp == 0)
19955 op1 = validize_mem (force_const_mem (op_mode, op1));
19956 else if (tmp == 1)
19958 if (TARGET_CMOVE)
19959 op1 = force_reg (op_mode, op1);
19961 else
19962 op1 = force_reg (op_mode, op1);
19966 /* Try to rearrange the comparison to make it cheaper. */
19967 if (ix86_fp_comparison_cost (code)
19968 > ix86_fp_comparison_cost (swap_condition (code))
19969 && (REG_P (op1) || can_create_pseudo_p ()))
19971 rtx tmp;
19972 tmp = op0, op0 = op1, op1 = tmp;
19973 code = swap_condition (code);
19974 if (!REG_P (op0))
19975 op0 = force_reg (op_mode, op0);
19978 *pop0 = op0;
19979 *pop1 = op1;
19980 return code;
19983 /* Convert comparison codes we use to represent FP comparison to integer
19984 code that will result in proper branch. Return UNKNOWN if no such code
19985 is available. */
19987 enum rtx_code
19988 ix86_fp_compare_code_to_integer (enum rtx_code code)
19990 switch (code)
19992 case GT:
19993 return GTU;
19994 case GE:
19995 return GEU;
19996 case ORDERED:
19997 case UNORDERED:
19998 return code;
19999 break;
20000 case UNEQ:
20001 return EQ;
20002 break;
20003 case UNLT:
20004 return LTU;
20005 break;
20006 case UNLE:
20007 return LEU;
20008 break;
20009 case LTGT:
20010 return NE;
20011 break;
20012 default:
20013 return UNKNOWN;
20017 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20019 static rtx
20020 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20022 enum machine_mode fpcmp_mode, intcmp_mode;
20023 rtx tmp, tmp2;
20025 fpcmp_mode = ix86_fp_compare_mode (code);
20026 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20028 /* Do fcomi/sahf based test when profitable. */
20029 switch (ix86_fp_comparison_strategy (code))
20031 case IX86_FPCMP_COMI:
20032 intcmp_mode = fpcmp_mode;
20033 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20034 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20035 tmp);
20036 emit_insn (tmp);
20037 break;
20039 case IX86_FPCMP_SAHF:
20040 intcmp_mode = fpcmp_mode;
20041 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20042 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20043 tmp);
20045 if (!scratch)
20046 scratch = gen_reg_rtx (HImode);
20047 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20048 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20049 break;
20051 case IX86_FPCMP_ARITH:
20052 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20053 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20054 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20055 if (!scratch)
20056 scratch = gen_reg_rtx (HImode);
20057 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20059 /* In the unordered case, we have to check C2 for NaN's, which
20060 doesn't happen to work out to anything nice combination-wise.
20061 So do some bit twiddling on the value we've got in AH to come
20062 up with an appropriate set of condition codes. */
20064 intcmp_mode = CCNOmode;
20065 switch (code)
20067 case GT:
20068 case UNGT:
20069 if (code == GT || !TARGET_IEEE_FP)
20071 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20072 code = EQ;
20074 else
20076 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20077 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20078 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20079 intcmp_mode = CCmode;
20080 code = GEU;
20082 break;
20083 case LT:
20084 case UNLT:
20085 if (code == LT && TARGET_IEEE_FP)
20087 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20088 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20089 intcmp_mode = CCmode;
20090 code = EQ;
20092 else
20094 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20095 code = NE;
20097 break;
20098 case GE:
20099 case UNGE:
20100 if (code == GE || !TARGET_IEEE_FP)
20102 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20103 code = EQ;
20105 else
20107 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20108 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20109 code = NE;
20111 break;
20112 case LE:
20113 case UNLE:
20114 if (code == LE && TARGET_IEEE_FP)
20116 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20117 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20118 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20119 intcmp_mode = CCmode;
20120 code = LTU;
20122 else
20124 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20125 code = NE;
20127 break;
20128 case EQ:
20129 case UNEQ:
20130 if (code == EQ && TARGET_IEEE_FP)
20132 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20133 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20134 intcmp_mode = CCmode;
20135 code = EQ;
20137 else
20139 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20140 code = NE;
20142 break;
20143 case NE:
20144 case LTGT:
20145 if (code == NE && TARGET_IEEE_FP)
20147 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20148 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20149 GEN_INT (0x40)));
20150 code = NE;
20152 else
20154 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20155 code = EQ;
20157 break;
20159 case UNORDERED:
20160 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20161 code = NE;
20162 break;
20163 case ORDERED:
20164 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20165 code = EQ;
20166 break;
20168 default:
20169 gcc_unreachable ();
20171 break;
20173 default:
20174 gcc_unreachable();
20177 /* Return the test that should be put into the flags user, i.e.
20178 the bcc, scc, or cmov instruction. */
20179 return gen_rtx_fmt_ee (code, VOIDmode,
20180 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20181 const0_rtx);
20184 static rtx
20185 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20187 rtx ret;
20189 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20190 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20192 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20194 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20195 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20197 else
20198 ret = ix86_expand_int_compare (code, op0, op1);
20200 return ret;
20203 void
20204 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20206 enum machine_mode mode = GET_MODE (op0);
20207 rtx tmp;
20209 switch (mode)
20211 case SFmode:
20212 case DFmode:
20213 case XFmode:
20214 case QImode:
20215 case HImode:
20216 case SImode:
20217 simple:
20218 tmp = ix86_expand_compare (code, op0, op1);
20219 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20220 gen_rtx_LABEL_REF (VOIDmode, label),
20221 pc_rtx);
20222 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20223 return;
20225 case DImode:
20226 if (TARGET_64BIT)
20227 goto simple;
20228 case TImode:
20229 /* Expand DImode branch into multiple compare+branch. */
20231 rtx lo[2], hi[2], label2;
20232 enum rtx_code code1, code2, code3;
20233 enum machine_mode submode;
20235 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20237 tmp = op0, op0 = op1, op1 = tmp;
20238 code = swap_condition (code);
20241 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20242 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20244 submode = mode == DImode ? SImode : DImode;
20246 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20247 avoid two branches. This costs one extra insn, so disable when
20248 optimizing for size. */
20250 if ((code == EQ || code == NE)
20251 && (!optimize_insn_for_size_p ()
20252 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20254 rtx xor0, xor1;
20256 xor1 = hi[0];
20257 if (hi[1] != const0_rtx)
20258 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20259 NULL_RTX, 0, OPTAB_WIDEN);
20261 xor0 = lo[0];
20262 if (lo[1] != const0_rtx)
20263 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20264 NULL_RTX, 0, OPTAB_WIDEN);
20266 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20267 NULL_RTX, 0, OPTAB_WIDEN);
20269 ix86_expand_branch (code, tmp, const0_rtx, label);
20270 return;
20273 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20274 op1 is a constant and the low word is zero, then we can just
20275 examine the high word. Similarly for low word -1 and
20276 less-or-equal-than or greater-than. */
20278 if (CONST_INT_P (hi[1]))
20279 switch (code)
20281 case LT: case LTU: case GE: case GEU:
20282 if (lo[1] == const0_rtx)
20284 ix86_expand_branch (code, hi[0], hi[1], label);
20285 return;
20287 break;
20288 case LE: case LEU: case GT: case GTU:
20289 if (lo[1] == constm1_rtx)
20291 ix86_expand_branch (code, hi[0], hi[1], label);
20292 return;
20294 break;
20295 default:
20296 break;
20299 /* Otherwise, we need two or three jumps. */
20301 label2 = gen_label_rtx ();
20303 code1 = code;
20304 code2 = swap_condition (code);
20305 code3 = unsigned_condition (code);
20307 switch (code)
20309 case LT: case GT: case LTU: case GTU:
20310 break;
20312 case LE: code1 = LT; code2 = GT; break;
20313 case GE: code1 = GT; code2 = LT; break;
20314 case LEU: code1 = LTU; code2 = GTU; break;
20315 case GEU: code1 = GTU; code2 = LTU; break;
20317 case EQ: code1 = UNKNOWN; code2 = NE; break;
20318 case NE: code2 = UNKNOWN; break;
20320 default:
20321 gcc_unreachable ();
20325 * a < b =>
20326 * if (hi(a) < hi(b)) goto true;
20327 * if (hi(a) > hi(b)) goto false;
20328 * if (lo(a) < lo(b)) goto true;
20329 * false:
20332 if (code1 != UNKNOWN)
20333 ix86_expand_branch (code1, hi[0], hi[1], label);
20334 if (code2 != UNKNOWN)
20335 ix86_expand_branch (code2, hi[0], hi[1], label2);
20337 ix86_expand_branch (code3, lo[0], lo[1], label);
20339 if (code2 != UNKNOWN)
20340 emit_label (label2);
20341 return;
20344 default:
20345 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20346 goto simple;
20350 /* Split branch based on floating point condition. */
20351 void
20352 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20353 rtx target1, rtx target2, rtx tmp)
20355 rtx condition;
20356 rtx i;
20358 if (target2 != pc_rtx)
20360 rtx tmp = target2;
20361 code = reverse_condition_maybe_unordered (code);
20362 target2 = target1;
20363 target1 = tmp;
20366 condition = ix86_expand_fp_compare (code, op1, op2,
20367 tmp);
20369 i = emit_jump_insn (gen_rtx_SET
20370 (VOIDmode, pc_rtx,
20371 gen_rtx_IF_THEN_ELSE (VOIDmode,
20372 condition, target1, target2)));
20373 if (split_branch_probability >= 0)
20374 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20377 void
20378 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20380 rtx ret;
20382 gcc_assert (GET_MODE (dest) == QImode);
20384 ret = ix86_expand_compare (code, op0, op1);
20385 PUT_MODE (ret, QImode);
20386 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20389 /* Expand comparison setting or clearing carry flag. Return true when
20390 successful and set pop for the operation. */
20391 static bool
20392 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20394 enum machine_mode mode =
20395 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20397 /* Do not handle double-mode compares that go through special path. */
20398 if (mode == (TARGET_64BIT ? TImode : DImode))
20399 return false;
20401 if (SCALAR_FLOAT_MODE_P (mode))
20403 rtx compare_op, compare_seq;
20405 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20407 /* Shortcut: following common codes never translate
20408 into carry flag compares. */
20409 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20410 || code == ORDERED || code == UNORDERED)
20411 return false;
20413 /* These comparisons require zero flag; swap operands so they won't. */
20414 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20415 && !TARGET_IEEE_FP)
20417 rtx tmp = op0;
20418 op0 = op1;
20419 op1 = tmp;
20420 code = swap_condition (code);
20423 /* Try to expand the comparison and verify that we end up with
20424 carry flag based comparison. This fails to be true only when
20425 we decide to expand comparison using arithmetic that is not
20426 too common scenario. */
20427 start_sequence ();
20428 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20429 compare_seq = get_insns ();
20430 end_sequence ();
20432 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20433 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20434 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20435 else
20436 code = GET_CODE (compare_op);
20438 if (code != LTU && code != GEU)
20439 return false;
20441 emit_insn (compare_seq);
20442 *pop = compare_op;
20443 return true;
20446 if (!INTEGRAL_MODE_P (mode))
20447 return false;
20449 switch (code)
20451 case LTU:
20452 case GEU:
20453 break;
20455 /* Convert a==0 into (unsigned)a<1. */
20456 case EQ:
20457 case NE:
20458 if (op1 != const0_rtx)
20459 return false;
20460 op1 = const1_rtx;
20461 code = (code == EQ ? LTU : GEU);
20462 break;
20464 /* Convert a>b into b<a or a>=b-1. */
20465 case GTU:
20466 case LEU:
20467 if (CONST_INT_P (op1))
20469 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20470 /* Bail out on overflow. We still can swap operands but that
20471 would force loading of the constant into register. */
20472 if (op1 == const0_rtx
20473 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20474 return false;
20475 code = (code == GTU ? GEU : LTU);
20477 else
20479 rtx tmp = op1;
20480 op1 = op0;
20481 op0 = tmp;
20482 code = (code == GTU ? LTU : GEU);
20484 break;
20486 /* Convert a>=0 into (unsigned)a<0x80000000. */
20487 case LT:
20488 case GE:
20489 if (mode == DImode || op1 != const0_rtx)
20490 return false;
20491 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20492 code = (code == LT ? GEU : LTU);
20493 break;
20494 case LE:
20495 case GT:
20496 if (mode == DImode || op1 != constm1_rtx)
20497 return false;
20498 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20499 code = (code == LE ? GEU : LTU);
20500 break;
20502 default:
20503 return false;
20505 /* Swapping operands may cause constant to appear as first operand. */
20506 if (!nonimmediate_operand (op0, VOIDmode))
20508 if (!can_create_pseudo_p ())
20509 return false;
20510 op0 = force_reg (mode, op0);
20512 *pop = ix86_expand_compare (code, op0, op1);
20513 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20514 return true;
20517 bool
20518 ix86_expand_int_movcc (rtx operands[])
20520 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20521 rtx compare_seq, compare_op;
20522 enum machine_mode mode = GET_MODE (operands[0]);
20523 bool sign_bit_compare_p = false;
20524 rtx op0 = XEXP (operands[1], 0);
20525 rtx op1 = XEXP (operands[1], 1);
20527 if (GET_MODE (op0) == TImode
20528 || (GET_MODE (op0) == DImode
20529 && !TARGET_64BIT))
20530 return false;
20532 start_sequence ();
20533 compare_op = ix86_expand_compare (code, op0, op1);
20534 compare_seq = get_insns ();
20535 end_sequence ();
20537 compare_code = GET_CODE (compare_op);
20539 if ((op1 == const0_rtx && (code == GE || code == LT))
20540 || (op1 == constm1_rtx && (code == GT || code == LE)))
20541 sign_bit_compare_p = true;
20543 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20544 HImode insns, we'd be swallowed in word prefix ops. */
20546 if ((mode != HImode || TARGET_FAST_PREFIX)
20547 && (mode != (TARGET_64BIT ? TImode : DImode))
20548 && CONST_INT_P (operands[2])
20549 && CONST_INT_P (operands[3]))
20551 rtx out = operands[0];
20552 HOST_WIDE_INT ct = INTVAL (operands[2]);
20553 HOST_WIDE_INT cf = INTVAL (operands[3]);
20554 HOST_WIDE_INT diff;
20556 diff = ct - cf;
20557 /* Sign bit compares are better done using shifts than we do by using
20558 sbb. */
20559 if (sign_bit_compare_p
20560 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20562 /* Detect overlap between destination and compare sources. */
20563 rtx tmp = out;
20565 if (!sign_bit_compare_p)
20567 rtx flags;
20568 bool fpcmp = false;
20570 compare_code = GET_CODE (compare_op);
20572 flags = XEXP (compare_op, 0);
20574 if (GET_MODE (flags) == CCFPmode
20575 || GET_MODE (flags) == CCFPUmode)
20577 fpcmp = true;
20578 compare_code
20579 = ix86_fp_compare_code_to_integer (compare_code);
20582 /* To simplify rest of code, restrict to the GEU case. */
20583 if (compare_code == LTU)
20585 HOST_WIDE_INT tmp = ct;
20586 ct = cf;
20587 cf = tmp;
20588 compare_code = reverse_condition (compare_code);
20589 code = reverse_condition (code);
20591 else
20593 if (fpcmp)
20594 PUT_CODE (compare_op,
20595 reverse_condition_maybe_unordered
20596 (GET_CODE (compare_op)));
20597 else
20598 PUT_CODE (compare_op,
20599 reverse_condition (GET_CODE (compare_op)));
20601 diff = ct - cf;
20603 if (reg_overlap_mentioned_p (out, op0)
20604 || reg_overlap_mentioned_p (out, op1))
20605 tmp = gen_reg_rtx (mode);
20607 if (mode == DImode)
20608 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20609 else
20610 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20611 flags, compare_op));
20613 else
20615 if (code == GT || code == GE)
20616 code = reverse_condition (code);
20617 else
20619 HOST_WIDE_INT tmp = ct;
20620 ct = cf;
20621 cf = tmp;
20622 diff = ct - cf;
20624 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20627 if (diff == 1)
20630 * cmpl op0,op1
20631 * sbbl dest,dest
20632 * [addl dest, ct]
20634 * Size 5 - 8.
20636 if (ct)
20637 tmp = expand_simple_binop (mode, PLUS,
20638 tmp, GEN_INT (ct),
20639 copy_rtx (tmp), 1, OPTAB_DIRECT);
20641 else if (cf == -1)
20644 * cmpl op0,op1
20645 * sbbl dest,dest
20646 * orl $ct, dest
20648 * Size 8.
20650 tmp = expand_simple_binop (mode, IOR,
20651 tmp, GEN_INT (ct),
20652 copy_rtx (tmp), 1, OPTAB_DIRECT);
20654 else if (diff == -1 && ct)
20657 * cmpl op0,op1
20658 * sbbl dest,dest
20659 * notl dest
20660 * [addl dest, cf]
20662 * Size 8 - 11.
20664 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20665 if (cf)
20666 tmp = expand_simple_binop (mode, PLUS,
20667 copy_rtx (tmp), GEN_INT (cf),
20668 copy_rtx (tmp), 1, OPTAB_DIRECT);
20670 else
20673 * cmpl op0,op1
20674 * sbbl dest,dest
20675 * [notl dest]
20676 * andl cf - ct, dest
20677 * [addl dest, ct]
20679 * Size 8 - 11.
20682 if (cf == 0)
20684 cf = ct;
20685 ct = 0;
20686 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20689 tmp = expand_simple_binop (mode, AND,
20690 copy_rtx (tmp),
20691 gen_int_mode (cf - ct, mode),
20692 copy_rtx (tmp), 1, OPTAB_DIRECT);
20693 if (ct)
20694 tmp = expand_simple_binop (mode, PLUS,
20695 copy_rtx (tmp), GEN_INT (ct),
20696 copy_rtx (tmp), 1, OPTAB_DIRECT);
20699 if (!rtx_equal_p (tmp, out))
20700 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20702 return true;
20705 if (diff < 0)
20707 enum machine_mode cmp_mode = GET_MODE (op0);
20709 HOST_WIDE_INT tmp;
20710 tmp = ct, ct = cf, cf = tmp;
20711 diff = -diff;
20713 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20715 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20717 /* We may be reversing unordered compare to normal compare, that
20718 is not valid in general (we may convert non-trapping condition
20719 to trapping one), however on i386 we currently emit all
20720 comparisons unordered. */
20721 compare_code = reverse_condition_maybe_unordered (compare_code);
20722 code = reverse_condition_maybe_unordered (code);
20724 else
20726 compare_code = reverse_condition (compare_code);
20727 code = reverse_condition (code);
20731 compare_code = UNKNOWN;
20732 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20733 && CONST_INT_P (op1))
20735 if (op1 == const0_rtx
20736 && (code == LT || code == GE))
20737 compare_code = code;
20738 else if (op1 == constm1_rtx)
20740 if (code == LE)
20741 compare_code = LT;
20742 else if (code == GT)
20743 compare_code = GE;
20747 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20748 if (compare_code != UNKNOWN
20749 && GET_MODE (op0) == GET_MODE (out)
20750 && (cf == -1 || ct == -1))
20752 /* If lea code below could be used, only optimize
20753 if it results in a 2 insn sequence. */
20755 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20756 || diff == 3 || diff == 5 || diff == 9)
20757 || (compare_code == LT && ct == -1)
20758 || (compare_code == GE && cf == -1))
20761 * notl op1 (if necessary)
20762 * sarl $31, op1
20763 * orl cf, op1
20765 if (ct != -1)
20767 cf = ct;
20768 ct = -1;
20769 code = reverse_condition (code);
20772 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20774 out = expand_simple_binop (mode, IOR,
20775 out, GEN_INT (cf),
20776 out, 1, OPTAB_DIRECT);
20777 if (out != operands[0])
20778 emit_move_insn (operands[0], out);
20780 return true;
20785 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20786 || diff == 3 || diff == 5 || diff == 9)
20787 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20788 && (mode != DImode
20789 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20792 * xorl dest,dest
20793 * cmpl op1,op2
20794 * setcc dest
20795 * lea cf(dest*(ct-cf)),dest
20797 * Size 14.
20799 * This also catches the degenerate setcc-only case.
20802 rtx tmp;
20803 int nops;
20805 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20807 nops = 0;
20808 /* On x86_64 the lea instruction operates on Pmode, so we need
20809 to get arithmetics done in proper mode to match. */
20810 if (diff == 1)
20811 tmp = copy_rtx (out);
20812 else
20814 rtx out1;
20815 out1 = copy_rtx (out);
20816 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20817 nops++;
20818 if (diff & 1)
20820 tmp = gen_rtx_PLUS (mode, tmp, out1);
20821 nops++;
20824 if (cf != 0)
20826 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20827 nops++;
20829 if (!rtx_equal_p (tmp, out))
20831 if (nops == 1)
20832 out = force_operand (tmp, copy_rtx (out));
20833 else
20834 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20836 if (!rtx_equal_p (out, operands[0]))
20837 emit_move_insn (operands[0], copy_rtx (out));
20839 return true;
20843 * General case: Jumpful:
20844 * xorl dest,dest cmpl op1, op2
20845 * cmpl op1, op2 movl ct, dest
20846 * setcc dest jcc 1f
20847 * decl dest movl cf, dest
20848 * andl (cf-ct),dest 1:
20849 * addl ct,dest
20851 * Size 20. Size 14.
20853 * This is reasonably steep, but branch mispredict costs are
20854 * high on modern cpus, so consider failing only if optimizing
20855 * for space.
20858 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20859 && BRANCH_COST (optimize_insn_for_speed_p (),
20860 false) >= 2)
20862 if (cf == 0)
20864 enum machine_mode cmp_mode = GET_MODE (op0);
20866 cf = ct;
20867 ct = 0;
20869 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20871 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20873 /* We may be reversing unordered compare to normal compare,
20874 that is not valid in general (we may convert non-trapping
20875 condition to trapping one), however on i386 we currently
20876 emit all comparisons unordered. */
20877 code = reverse_condition_maybe_unordered (code);
20879 else
20881 code = reverse_condition (code);
20882 if (compare_code != UNKNOWN)
20883 compare_code = reverse_condition (compare_code);
20887 if (compare_code != UNKNOWN)
20889 /* notl op1 (if needed)
20890 sarl $31, op1
20891 andl (cf-ct), op1
20892 addl ct, op1
20894 For x < 0 (resp. x <= -1) there will be no notl,
20895 so if possible swap the constants to get rid of the
20896 complement.
20897 True/false will be -1/0 while code below (store flag
20898 followed by decrement) is 0/-1, so the constants need
20899 to be exchanged once more. */
20901 if (compare_code == GE || !cf)
20903 code = reverse_condition (code);
20904 compare_code = LT;
20906 else
20908 HOST_WIDE_INT tmp = cf;
20909 cf = ct;
20910 ct = tmp;
20913 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20915 else
20917 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20919 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20920 constm1_rtx,
20921 copy_rtx (out), 1, OPTAB_DIRECT);
20924 out = expand_simple_binop (mode, AND, copy_rtx (out),
20925 gen_int_mode (cf - ct, mode),
20926 copy_rtx (out), 1, OPTAB_DIRECT);
20927 if (ct)
20928 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20929 copy_rtx (out), 1, OPTAB_DIRECT);
20930 if (!rtx_equal_p (out, operands[0]))
20931 emit_move_insn (operands[0], copy_rtx (out));
20933 return true;
20937 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20939 /* Try a few things more with specific constants and a variable. */
20941 optab op;
20942 rtx var, orig_out, out, tmp;
20944 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20945 return false;
20947 /* If one of the two operands is an interesting constant, load a
20948 constant with the above and mask it in with a logical operation. */
20950 if (CONST_INT_P (operands[2]))
20952 var = operands[3];
20953 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20954 operands[3] = constm1_rtx, op = and_optab;
20955 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20956 operands[3] = const0_rtx, op = ior_optab;
20957 else
20958 return false;
20960 else if (CONST_INT_P (operands[3]))
20962 var = operands[2];
20963 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20964 operands[2] = constm1_rtx, op = and_optab;
20965 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20966 operands[2] = const0_rtx, op = ior_optab;
20967 else
20968 return false;
20970 else
20971 return false;
20973 orig_out = operands[0];
20974 tmp = gen_reg_rtx (mode);
20975 operands[0] = tmp;
20977 /* Recurse to get the constant loaded. */
20978 if (ix86_expand_int_movcc (operands) == 0)
20979 return false;
20981 /* Mask in the interesting variable. */
20982 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20983 OPTAB_WIDEN);
20984 if (!rtx_equal_p (out, orig_out))
20985 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20987 return true;
20991 * For comparison with above,
20993 * movl cf,dest
20994 * movl ct,tmp
20995 * cmpl op1,op2
20996 * cmovcc tmp,dest
20998 * Size 15.
21001 if (! nonimmediate_operand (operands[2], mode))
21002 operands[2] = force_reg (mode, operands[2]);
21003 if (! nonimmediate_operand (operands[3], mode))
21004 operands[3] = force_reg (mode, operands[3]);
21006 if (! register_operand (operands[2], VOIDmode)
21007 && (mode == QImode
21008 || ! register_operand (operands[3], VOIDmode)))
21009 operands[2] = force_reg (mode, operands[2]);
21011 if (mode == QImode
21012 && ! register_operand (operands[3], VOIDmode))
21013 operands[3] = force_reg (mode, operands[3]);
21015 emit_insn (compare_seq);
21016 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21017 gen_rtx_IF_THEN_ELSE (mode,
21018 compare_op, operands[2],
21019 operands[3])));
21020 return true;
21023 /* Swap, force into registers, or otherwise massage the two operands
21024 to an sse comparison with a mask result. Thus we differ a bit from
21025 ix86_prepare_fp_compare_args which expects to produce a flags result.
21027 The DEST operand exists to help determine whether to commute commutative
21028 operators. The POP0/POP1 operands are updated in place. The new
21029 comparison code is returned, or UNKNOWN if not implementable. */
21031 static enum rtx_code
21032 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21033 rtx *pop0, rtx *pop1)
21035 rtx tmp;
21037 switch (code)
21039 case LTGT:
21040 case UNEQ:
21041 /* AVX supports all the needed comparisons. */
21042 if (TARGET_AVX)
21043 break;
21044 /* We have no LTGT as an operator. We could implement it with
21045 NE & ORDERED, but this requires an extra temporary. It's
21046 not clear that it's worth it. */
21047 return UNKNOWN;
21049 case LT:
21050 case LE:
21051 case UNGT:
21052 case UNGE:
21053 /* These are supported directly. */
21054 break;
21056 case EQ:
21057 case NE:
21058 case UNORDERED:
21059 case ORDERED:
21060 /* AVX has 3 operand comparisons, no need to swap anything. */
21061 if (TARGET_AVX)
21062 break;
21063 /* For commutative operators, try to canonicalize the destination
21064 operand to be first in the comparison - this helps reload to
21065 avoid extra moves. */
21066 if (!dest || !rtx_equal_p (dest, *pop1))
21067 break;
21068 /* FALLTHRU */
21070 case GE:
21071 case GT:
21072 case UNLE:
21073 case UNLT:
21074 /* These are not supported directly before AVX, and furthermore
21075 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21076 comparison operands to transform into something that is
21077 supported. */
21078 tmp = *pop0;
21079 *pop0 = *pop1;
21080 *pop1 = tmp;
21081 code = swap_condition (code);
21082 break;
21084 default:
21085 gcc_unreachable ();
21088 return code;
21091 /* Detect conditional moves that exactly match min/max operational
21092 semantics. Note that this is IEEE safe, as long as we don't
21093 interchange the operands.
21095 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21096 and TRUE if the operation is successful and instructions are emitted. */
21098 static bool
21099 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21100 rtx cmp_op1, rtx if_true, rtx if_false)
21102 enum machine_mode mode;
21103 bool is_min;
21104 rtx tmp;
21106 if (code == LT)
21108 else if (code == UNGE)
21110 tmp = if_true;
21111 if_true = if_false;
21112 if_false = tmp;
21114 else
21115 return false;
21117 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21118 is_min = true;
21119 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21120 is_min = false;
21121 else
21122 return false;
21124 mode = GET_MODE (dest);
21126 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21127 but MODE may be a vector mode and thus not appropriate. */
21128 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21130 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21131 rtvec v;
21133 if_true = force_reg (mode, if_true);
21134 v = gen_rtvec (2, if_true, if_false);
21135 tmp = gen_rtx_UNSPEC (mode, v, u);
21137 else
21139 code = is_min ? SMIN : SMAX;
21140 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21143 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21144 return true;
21147 /* Expand an sse vector comparison. Return the register with the result. */
21149 static rtx
21150 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21151 rtx op_true, rtx op_false)
21153 enum machine_mode mode = GET_MODE (dest);
21154 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21156 /* In general case result of comparison can differ from operands' type. */
21157 enum machine_mode cmp_mode;
21159 /* In AVX512F the result of comparison is an integer mask. */
21160 bool maskcmp = false;
21161 rtx x;
21163 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21165 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21166 gcc_assert (cmp_mode != BLKmode);
21168 maskcmp = true;
21170 else
21171 cmp_mode = cmp_ops_mode;
21174 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21175 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21176 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21178 if (optimize
21179 || reg_overlap_mentioned_p (dest, op_true)
21180 || reg_overlap_mentioned_p (dest, op_false))
21181 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21183 /* Compare patterns for int modes are unspec in AVX512F only. */
21184 if (maskcmp && (code == GT || code == EQ))
21186 rtx (*gen)(rtx, rtx, rtx);
21188 switch (cmp_ops_mode)
21190 case V16SImode:
21191 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21192 break;
21193 case V8DImode:
21194 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21195 break;
21196 default:
21197 gen = NULL;
21200 if (gen)
21202 emit_insn (gen (dest, cmp_op0, cmp_op1));
21203 return dest;
21206 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21208 if (cmp_mode != mode && !maskcmp)
21210 x = force_reg (cmp_ops_mode, x);
21211 convert_move (dest, x, false);
21213 else
21214 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21216 return dest;
21219 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21220 operations. This is used for both scalar and vector conditional moves. */
21222 static void
21223 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21225 enum machine_mode mode = GET_MODE (dest);
21226 enum machine_mode cmpmode = GET_MODE (cmp);
21228 /* In AVX512F the result of comparison is an integer mask. */
21229 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21231 rtx t2, t3, x;
21233 if (vector_all_ones_operand (op_true, mode)
21234 && rtx_equal_p (op_false, CONST0_RTX (mode))
21235 && !maskcmp)
21237 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21239 else if (op_false == CONST0_RTX (mode)
21240 && !maskcmp)
21242 op_true = force_reg (mode, op_true);
21243 x = gen_rtx_AND (mode, cmp, op_true);
21244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21246 else if (op_true == CONST0_RTX (mode)
21247 && !maskcmp)
21249 op_false = force_reg (mode, op_false);
21250 x = gen_rtx_NOT (mode, cmp);
21251 x = gen_rtx_AND (mode, x, op_false);
21252 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21254 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21255 && !maskcmp)
21257 op_false = force_reg (mode, op_false);
21258 x = gen_rtx_IOR (mode, cmp, op_false);
21259 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21261 else if (TARGET_XOP
21262 && !maskcmp)
21264 op_true = force_reg (mode, op_true);
21266 if (!nonimmediate_operand (op_false, mode))
21267 op_false = force_reg (mode, op_false);
21269 emit_insn (gen_rtx_SET (mode, dest,
21270 gen_rtx_IF_THEN_ELSE (mode, cmp,
21271 op_true,
21272 op_false)));
21274 else
21276 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21277 rtx d = dest;
21279 if (!nonimmediate_operand (op_true, mode))
21280 op_true = force_reg (mode, op_true);
21282 op_false = force_reg (mode, op_false);
21284 switch (mode)
21286 case V4SFmode:
21287 if (TARGET_SSE4_1)
21288 gen = gen_sse4_1_blendvps;
21289 break;
21290 case V2DFmode:
21291 if (TARGET_SSE4_1)
21292 gen = gen_sse4_1_blendvpd;
21293 break;
21294 case V16QImode:
21295 case V8HImode:
21296 case V4SImode:
21297 case V2DImode:
21298 if (TARGET_SSE4_1)
21300 gen = gen_sse4_1_pblendvb;
21301 if (mode != V16QImode)
21302 d = gen_reg_rtx (V16QImode);
21303 op_false = gen_lowpart (V16QImode, op_false);
21304 op_true = gen_lowpart (V16QImode, op_true);
21305 cmp = gen_lowpart (V16QImode, cmp);
21307 break;
21308 case V8SFmode:
21309 if (TARGET_AVX)
21310 gen = gen_avx_blendvps256;
21311 break;
21312 case V4DFmode:
21313 if (TARGET_AVX)
21314 gen = gen_avx_blendvpd256;
21315 break;
21316 case V32QImode:
21317 case V16HImode:
21318 case V8SImode:
21319 case V4DImode:
21320 if (TARGET_AVX2)
21322 gen = gen_avx2_pblendvb;
21323 if (mode != V32QImode)
21324 d = gen_reg_rtx (V32QImode);
21325 op_false = gen_lowpart (V32QImode, op_false);
21326 op_true = gen_lowpart (V32QImode, op_true);
21327 cmp = gen_lowpart (V32QImode, cmp);
21329 break;
21331 case V16SImode:
21332 gen = gen_avx512f_blendmv16si;
21333 break;
21334 case V8DImode:
21335 gen = gen_avx512f_blendmv8di;
21336 break;
21337 case V8DFmode:
21338 gen = gen_avx512f_blendmv8df;
21339 break;
21340 case V16SFmode:
21341 gen = gen_avx512f_blendmv16sf;
21342 break;
21344 default:
21345 break;
21348 if (gen != NULL)
21350 emit_insn (gen (d, op_false, op_true, cmp));
21351 if (d != dest)
21352 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21354 else
21356 op_true = force_reg (mode, op_true);
21358 t2 = gen_reg_rtx (mode);
21359 if (optimize)
21360 t3 = gen_reg_rtx (mode);
21361 else
21362 t3 = dest;
21364 x = gen_rtx_AND (mode, op_true, cmp);
21365 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21367 x = gen_rtx_NOT (mode, cmp);
21368 x = gen_rtx_AND (mode, x, op_false);
21369 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21371 x = gen_rtx_IOR (mode, t3, t2);
21372 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21377 /* Expand a floating-point conditional move. Return true if successful. */
21379 bool
21380 ix86_expand_fp_movcc (rtx operands[])
21382 enum machine_mode mode = GET_MODE (operands[0]);
21383 enum rtx_code code = GET_CODE (operands[1]);
21384 rtx tmp, compare_op;
21385 rtx op0 = XEXP (operands[1], 0);
21386 rtx op1 = XEXP (operands[1], 1);
21388 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21390 enum machine_mode cmode;
21392 /* Since we've no cmove for sse registers, don't force bad register
21393 allocation just to gain access to it. Deny movcc when the
21394 comparison mode doesn't match the move mode. */
21395 cmode = GET_MODE (op0);
21396 if (cmode == VOIDmode)
21397 cmode = GET_MODE (op1);
21398 if (cmode != mode)
21399 return false;
21401 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21402 if (code == UNKNOWN)
21403 return false;
21405 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21406 operands[2], operands[3]))
21407 return true;
21409 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21410 operands[2], operands[3]);
21411 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21412 return true;
21415 if (GET_MODE (op0) == TImode
21416 || (GET_MODE (op0) == DImode
21417 && !TARGET_64BIT))
21418 return false;
21420 /* The floating point conditional move instructions don't directly
21421 support conditions resulting from a signed integer comparison. */
21423 compare_op = ix86_expand_compare (code, op0, op1);
21424 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21426 tmp = gen_reg_rtx (QImode);
21427 ix86_expand_setcc (tmp, code, op0, op1);
21429 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21432 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21433 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21434 operands[2], operands[3])));
21436 return true;
21439 /* Expand a floating-point vector conditional move; a vcond operation
21440 rather than a movcc operation. */
21442 bool
21443 ix86_expand_fp_vcond (rtx operands[])
21445 enum rtx_code code = GET_CODE (operands[3]);
21446 rtx cmp;
21448 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21449 &operands[4], &operands[5]);
21450 if (code == UNKNOWN)
21452 rtx temp;
21453 switch (GET_CODE (operands[3]))
21455 case LTGT:
21456 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21457 operands[5], operands[0], operands[0]);
21458 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21459 operands[5], operands[1], operands[2]);
21460 code = AND;
21461 break;
21462 case UNEQ:
21463 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21464 operands[5], operands[0], operands[0]);
21465 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21466 operands[5], operands[1], operands[2]);
21467 code = IOR;
21468 break;
21469 default:
21470 gcc_unreachable ();
21472 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21473 OPTAB_DIRECT);
21474 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21475 return true;
21478 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21479 operands[5], operands[1], operands[2]))
21480 return true;
21482 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21483 operands[1], operands[2]);
21484 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21485 return true;
21488 /* Expand a signed/unsigned integral vector conditional move. */
21490 bool
21491 ix86_expand_int_vcond (rtx operands[])
21493 enum machine_mode data_mode = GET_MODE (operands[0]);
21494 enum machine_mode mode = GET_MODE (operands[4]);
21495 enum rtx_code code = GET_CODE (operands[3]);
21496 bool negate = false;
21497 rtx x, cop0, cop1;
21499 cop0 = operands[4];
21500 cop1 = operands[5];
21502 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21503 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21504 if ((code == LT || code == GE)
21505 && data_mode == mode
21506 && cop1 == CONST0_RTX (mode)
21507 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21508 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21509 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21510 && (GET_MODE_SIZE (data_mode) == 16
21511 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21513 rtx negop = operands[2 - (code == LT)];
21514 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21515 if (negop == CONST1_RTX (data_mode))
21517 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21518 operands[0], 1, OPTAB_DIRECT);
21519 if (res != operands[0])
21520 emit_move_insn (operands[0], res);
21521 return true;
21523 else if (GET_MODE_INNER (data_mode) != DImode
21524 && vector_all_ones_operand (negop, data_mode))
21526 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21527 operands[0], 0, OPTAB_DIRECT);
21528 if (res != operands[0])
21529 emit_move_insn (operands[0], res);
21530 return true;
21534 if (!nonimmediate_operand (cop1, mode))
21535 cop1 = force_reg (mode, cop1);
21536 if (!general_operand (operands[1], data_mode))
21537 operands[1] = force_reg (data_mode, operands[1]);
21538 if (!general_operand (operands[2], data_mode))
21539 operands[2] = force_reg (data_mode, operands[2]);
21541 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21542 if (TARGET_XOP
21543 && (mode == V16QImode || mode == V8HImode
21544 || mode == V4SImode || mode == V2DImode))
21546 else
21548 /* Canonicalize the comparison to EQ, GT, GTU. */
21549 switch (code)
21551 case EQ:
21552 case GT:
21553 case GTU:
21554 break;
21556 case NE:
21557 case LE:
21558 case LEU:
21559 code = reverse_condition (code);
21560 negate = true;
21561 break;
21563 case GE:
21564 case GEU:
21565 code = reverse_condition (code);
21566 negate = true;
21567 /* FALLTHRU */
21569 case LT:
21570 case LTU:
21571 code = swap_condition (code);
21572 x = cop0, cop0 = cop1, cop1 = x;
21573 break;
21575 default:
21576 gcc_unreachable ();
21579 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21580 if (mode == V2DImode)
21582 switch (code)
21584 case EQ:
21585 /* SSE4.1 supports EQ. */
21586 if (!TARGET_SSE4_1)
21587 return false;
21588 break;
21590 case GT:
21591 case GTU:
21592 /* SSE4.2 supports GT/GTU. */
21593 if (!TARGET_SSE4_2)
21594 return false;
21595 break;
21597 default:
21598 gcc_unreachable ();
21602 /* Unsigned parallel compare is not supported by the hardware.
21603 Play some tricks to turn this into a signed comparison
21604 against 0. */
21605 if (code == GTU)
21607 cop0 = force_reg (mode, cop0);
21609 switch (mode)
21611 case V16SImode:
21612 case V8DImode:
21613 case V8SImode:
21614 case V4DImode:
21615 case V4SImode:
21616 case V2DImode:
21618 rtx t1, t2, mask;
21619 rtx (*gen_sub3) (rtx, rtx, rtx);
21621 switch (mode)
21623 case V16SImode: gen_sub3 = gen_subv16si3; break;
21624 case V8DImode: gen_sub3 = gen_subv8di3; break;
21625 case V8SImode: gen_sub3 = gen_subv8si3; break;
21626 case V4DImode: gen_sub3 = gen_subv4di3; break;
21627 case V4SImode: gen_sub3 = gen_subv4si3; break;
21628 case V2DImode: gen_sub3 = gen_subv2di3; break;
21629 default:
21630 gcc_unreachable ();
21632 /* Subtract (-(INT MAX) - 1) from both operands to make
21633 them signed. */
21634 mask = ix86_build_signbit_mask (mode, true, false);
21635 t1 = gen_reg_rtx (mode);
21636 emit_insn (gen_sub3 (t1, cop0, mask));
21638 t2 = gen_reg_rtx (mode);
21639 emit_insn (gen_sub3 (t2, cop1, mask));
21641 cop0 = t1;
21642 cop1 = t2;
21643 code = GT;
21645 break;
21647 case V32QImode:
21648 case V16HImode:
21649 case V16QImode:
21650 case V8HImode:
21651 /* Perform a parallel unsigned saturating subtraction. */
21652 x = gen_reg_rtx (mode);
21653 emit_insn (gen_rtx_SET (VOIDmode, x,
21654 gen_rtx_US_MINUS (mode, cop0, cop1)));
21656 cop0 = x;
21657 cop1 = CONST0_RTX (mode);
21658 code = EQ;
21659 negate = !negate;
21660 break;
21662 default:
21663 gcc_unreachable ();
21668 /* Allow the comparison to be done in one mode, but the movcc to
21669 happen in another mode. */
21670 if (data_mode == mode)
21672 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21673 operands[1+negate], operands[2-negate]);
21675 else
21677 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21678 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21679 operands[1+negate], operands[2-negate]);
21680 if (GET_MODE (x) == mode)
21681 x = gen_lowpart (data_mode, x);
21684 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21685 operands[2-negate]);
21686 return true;
21689 static bool
21690 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21692 enum machine_mode mode = GET_MODE (op0);
21693 switch (mode)
21695 case V16SImode:
21696 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21697 force_reg (V16SImode, mask),
21698 op1));
21699 return true;
21700 case V16SFmode:
21701 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21702 force_reg (V16SImode, mask),
21703 op1));
21704 return true;
21705 case V8DImode:
21706 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21707 force_reg (V8DImode, mask), op1));
21708 return true;
21709 case V8DFmode:
21710 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21711 force_reg (V8DImode, mask), op1));
21712 return true;
21713 default:
21714 return false;
21718 /* Expand a variable vector permutation. */
21720 void
21721 ix86_expand_vec_perm (rtx operands[])
21723 rtx target = operands[0];
21724 rtx op0 = operands[1];
21725 rtx op1 = operands[2];
21726 rtx mask = operands[3];
21727 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21728 enum machine_mode mode = GET_MODE (op0);
21729 enum machine_mode maskmode = GET_MODE (mask);
21730 int w, e, i;
21731 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21733 /* Number of elements in the vector. */
21734 w = GET_MODE_NUNITS (mode);
21735 e = GET_MODE_UNIT_SIZE (mode);
21736 gcc_assert (w <= 64);
21738 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21739 return;
21741 if (TARGET_AVX2)
21743 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21745 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21746 an constant shuffle operand. With a tiny bit of effort we can
21747 use VPERMD instead. A re-interpretation stall for V4DFmode is
21748 unfortunate but there's no avoiding it.
21749 Similarly for V16HImode we don't have instructions for variable
21750 shuffling, while for V32QImode we can use after preparing suitable
21751 masks vpshufb; vpshufb; vpermq; vpor. */
21753 if (mode == V16HImode)
21755 maskmode = mode = V32QImode;
21756 w = 32;
21757 e = 1;
21759 else
21761 maskmode = mode = V8SImode;
21762 w = 8;
21763 e = 4;
21765 t1 = gen_reg_rtx (maskmode);
21767 /* Replicate the low bits of the V4DImode mask into V8SImode:
21768 mask = { A B C D }
21769 t1 = { A A B B C C D D }. */
21770 for (i = 0; i < w / 2; ++i)
21771 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21772 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21773 vt = force_reg (maskmode, vt);
21774 mask = gen_lowpart (maskmode, mask);
21775 if (maskmode == V8SImode)
21776 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21777 else
21778 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21780 /* Multiply the shuffle indicies by two. */
21781 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21782 OPTAB_DIRECT);
21784 /* Add one to the odd shuffle indicies:
21785 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21786 for (i = 0; i < w / 2; ++i)
21788 vec[i * 2] = const0_rtx;
21789 vec[i * 2 + 1] = const1_rtx;
21791 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21792 vt = validize_mem (force_const_mem (maskmode, vt));
21793 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21794 OPTAB_DIRECT);
21796 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21797 operands[3] = mask = t1;
21798 target = gen_reg_rtx (mode);
21799 op0 = gen_lowpart (mode, op0);
21800 op1 = gen_lowpart (mode, op1);
21803 switch (mode)
21805 case V8SImode:
21806 /* The VPERMD and VPERMPS instructions already properly ignore
21807 the high bits of the shuffle elements. No need for us to
21808 perform an AND ourselves. */
21809 if (one_operand_shuffle)
21811 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21812 if (target != operands[0])
21813 emit_move_insn (operands[0],
21814 gen_lowpart (GET_MODE (operands[0]), target));
21816 else
21818 t1 = gen_reg_rtx (V8SImode);
21819 t2 = gen_reg_rtx (V8SImode);
21820 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21821 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21822 goto merge_two;
21824 return;
21826 case V8SFmode:
21827 mask = gen_lowpart (V8SImode, mask);
21828 if (one_operand_shuffle)
21829 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21830 else
21832 t1 = gen_reg_rtx (V8SFmode);
21833 t2 = gen_reg_rtx (V8SFmode);
21834 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21835 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21836 goto merge_two;
21838 return;
21840 case V4SImode:
21841 /* By combining the two 128-bit input vectors into one 256-bit
21842 input vector, we can use VPERMD and VPERMPS for the full
21843 two-operand shuffle. */
21844 t1 = gen_reg_rtx (V8SImode);
21845 t2 = gen_reg_rtx (V8SImode);
21846 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21847 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21848 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21849 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21850 return;
21852 case V4SFmode:
21853 t1 = gen_reg_rtx (V8SFmode);
21854 t2 = gen_reg_rtx (V8SImode);
21855 mask = gen_lowpart (V4SImode, mask);
21856 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21857 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21858 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21859 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21860 return;
21862 case V32QImode:
21863 t1 = gen_reg_rtx (V32QImode);
21864 t2 = gen_reg_rtx (V32QImode);
21865 t3 = gen_reg_rtx (V32QImode);
21866 vt2 = GEN_INT (-128);
21867 for (i = 0; i < 32; i++)
21868 vec[i] = vt2;
21869 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21870 vt = force_reg (V32QImode, vt);
21871 for (i = 0; i < 32; i++)
21872 vec[i] = i < 16 ? vt2 : const0_rtx;
21873 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21874 vt2 = force_reg (V32QImode, vt2);
21875 /* From mask create two adjusted masks, which contain the same
21876 bits as mask in the low 7 bits of each vector element.
21877 The first mask will have the most significant bit clear
21878 if it requests element from the same 128-bit lane
21879 and MSB set if it requests element from the other 128-bit lane.
21880 The second mask will have the opposite values of the MSB,
21881 and additionally will have its 128-bit lanes swapped.
21882 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21883 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21884 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21885 stands for other 12 bytes. */
21886 /* The bit whether element is from the same lane or the other
21887 lane is bit 4, so shift it up by 3 to the MSB position. */
21888 t5 = gen_reg_rtx (V4DImode);
21889 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21890 GEN_INT (3)));
21891 /* Clear MSB bits from the mask just in case it had them set. */
21892 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21893 /* After this t1 will have MSB set for elements from other lane. */
21894 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21895 /* Clear bits other than MSB. */
21896 emit_insn (gen_andv32qi3 (t1, t1, vt));
21897 /* Or in the lower bits from mask into t3. */
21898 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21899 /* And invert MSB bits in t1, so MSB is set for elements from the same
21900 lane. */
21901 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21902 /* Swap 128-bit lanes in t3. */
21903 t6 = gen_reg_rtx (V4DImode);
21904 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21905 const2_rtx, GEN_INT (3),
21906 const0_rtx, const1_rtx));
21907 /* And or in the lower bits from mask into t1. */
21908 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21909 if (one_operand_shuffle)
21911 /* Each of these shuffles will put 0s in places where
21912 element from the other 128-bit lane is needed, otherwise
21913 will shuffle in the requested value. */
21914 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21915 gen_lowpart (V32QImode, t6)));
21916 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21917 /* For t3 the 128-bit lanes are swapped again. */
21918 t7 = gen_reg_rtx (V4DImode);
21919 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21920 const2_rtx, GEN_INT (3),
21921 const0_rtx, const1_rtx));
21922 /* And oring both together leads to the result. */
21923 emit_insn (gen_iorv32qi3 (target, t1,
21924 gen_lowpart (V32QImode, t7)));
21925 if (target != operands[0])
21926 emit_move_insn (operands[0],
21927 gen_lowpart (GET_MODE (operands[0]), target));
21928 return;
21931 t4 = gen_reg_rtx (V32QImode);
21932 /* Similarly to the above one_operand_shuffle code,
21933 just for repeated twice for each operand. merge_two:
21934 code will merge the two results together. */
21935 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21936 gen_lowpart (V32QImode, t6)));
21937 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21938 gen_lowpart (V32QImode, t6)));
21939 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21940 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21941 t7 = gen_reg_rtx (V4DImode);
21942 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21943 const2_rtx, GEN_INT (3),
21944 const0_rtx, const1_rtx));
21945 t8 = gen_reg_rtx (V4DImode);
21946 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21947 const2_rtx, GEN_INT (3),
21948 const0_rtx, const1_rtx));
21949 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21950 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21951 t1 = t4;
21952 t2 = t3;
21953 goto merge_two;
21955 default:
21956 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21957 break;
21961 if (TARGET_XOP)
21963 /* The XOP VPPERM insn supports three inputs. By ignoring the
21964 one_operand_shuffle special case, we avoid creating another
21965 set of constant vectors in memory. */
21966 one_operand_shuffle = false;
21968 /* mask = mask & {2*w-1, ...} */
21969 vt = GEN_INT (2*w - 1);
21971 else
21973 /* mask = mask & {w-1, ...} */
21974 vt = GEN_INT (w - 1);
21977 for (i = 0; i < w; i++)
21978 vec[i] = vt;
21979 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21980 mask = expand_simple_binop (maskmode, AND, mask, vt,
21981 NULL_RTX, 0, OPTAB_DIRECT);
21983 /* For non-QImode operations, convert the word permutation control
21984 into a byte permutation control. */
21985 if (mode != V16QImode)
21987 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21988 GEN_INT (exact_log2 (e)),
21989 NULL_RTX, 0, OPTAB_DIRECT);
21991 /* Convert mask to vector of chars. */
21992 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21994 /* Replicate each of the input bytes into byte positions:
21995 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21996 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21997 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21998 for (i = 0; i < 16; ++i)
21999 vec[i] = GEN_INT (i/e * e);
22000 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22001 vt = validize_mem (force_const_mem (V16QImode, vt));
22002 if (TARGET_XOP)
22003 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22004 else
22005 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22007 /* Convert it into the byte positions by doing
22008 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22009 for (i = 0; i < 16; ++i)
22010 vec[i] = GEN_INT (i % e);
22011 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22012 vt = validize_mem (force_const_mem (V16QImode, vt));
22013 emit_insn (gen_addv16qi3 (mask, mask, vt));
22016 /* The actual shuffle operations all operate on V16QImode. */
22017 op0 = gen_lowpart (V16QImode, op0);
22018 op1 = gen_lowpart (V16QImode, op1);
22020 if (TARGET_XOP)
22022 if (GET_MODE (target) != V16QImode)
22023 target = gen_reg_rtx (V16QImode);
22024 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22025 if (target != operands[0])
22026 emit_move_insn (operands[0],
22027 gen_lowpart (GET_MODE (operands[0]), target));
22029 else if (one_operand_shuffle)
22031 if (GET_MODE (target) != V16QImode)
22032 target = gen_reg_rtx (V16QImode);
22033 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22034 if (target != operands[0])
22035 emit_move_insn (operands[0],
22036 gen_lowpart (GET_MODE (operands[0]), target));
22038 else
22040 rtx xops[6];
22041 bool ok;
22043 /* Shuffle the two input vectors independently. */
22044 t1 = gen_reg_rtx (V16QImode);
22045 t2 = gen_reg_rtx (V16QImode);
22046 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22047 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22049 merge_two:
22050 /* Then merge them together. The key is whether any given control
22051 element contained a bit set that indicates the second word. */
22052 mask = operands[3];
22053 vt = GEN_INT (w);
22054 if (maskmode == V2DImode && !TARGET_SSE4_1)
22056 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22057 more shuffle to convert the V2DI input mask into a V4SI
22058 input mask. At which point the masking that expand_int_vcond
22059 will work as desired. */
22060 rtx t3 = gen_reg_rtx (V4SImode);
22061 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22062 const0_rtx, const0_rtx,
22063 const2_rtx, const2_rtx));
22064 mask = t3;
22065 maskmode = V4SImode;
22066 e = w = 4;
22069 for (i = 0; i < w; i++)
22070 vec[i] = vt;
22071 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22072 vt = force_reg (maskmode, vt);
22073 mask = expand_simple_binop (maskmode, AND, mask, vt,
22074 NULL_RTX, 0, OPTAB_DIRECT);
22076 if (GET_MODE (target) != mode)
22077 target = gen_reg_rtx (mode);
22078 xops[0] = target;
22079 xops[1] = gen_lowpart (mode, t2);
22080 xops[2] = gen_lowpart (mode, t1);
22081 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22082 xops[4] = mask;
22083 xops[5] = vt;
22084 ok = ix86_expand_int_vcond (xops);
22085 gcc_assert (ok);
22086 if (target != operands[0])
22087 emit_move_insn (operands[0],
22088 gen_lowpart (GET_MODE (operands[0]), target));
22092 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22093 true if we should do zero extension, else sign extension. HIGH_P is
22094 true if we want the N/2 high elements, else the low elements. */
22096 void
22097 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22099 enum machine_mode imode = GET_MODE (src);
22100 rtx tmp;
22102 if (TARGET_SSE4_1)
22104 rtx (*unpack)(rtx, rtx);
22105 rtx (*extract)(rtx, rtx) = NULL;
22106 enum machine_mode halfmode = BLKmode;
22108 switch (imode)
22110 case V32QImode:
22111 if (unsigned_p)
22112 unpack = gen_avx2_zero_extendv16qiv16hi2;
22113 else
22114 unpack = gen_avx2_sign_extendv16qiv16hi2;
22115 halfmode = V16QImode;
22116 extract
22117 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22118 break;
22119 case V32HImode:
22120 if (unsigned_p)
22121 unpack = gen_avx512f_zero_extendv16hiv16si2;
22122 else
22123 unpack = gen_avx512f_sign_extendv16hiv16si2;
22124 halfmode = V16HImode;
22125 extract
22126 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22127 break;
22128 case V16HImode:
22129 if (unsigned_p)
22130 unpack = gen_avx2_zero_extendv8hiv8si2;
22131 else
22132 unpack = gen_avx2_sign_extendv8hiv8si2;
22133 halfmode = V8HImode;
22134 extract
22135 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22136 break;
22137 case V16SImode:
22138 if (unsigned_p)
22139 unpack = gen_avx512f_zero_extendv8siv8di2;
22140 else
22141 unpack = gen_avx512f_sign_extendv8siv8di2;
22142 halfmode = V8SImode;
22143 extract
22144 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22145 break;
22146 case V8SImode:
22147 if (unsigned_p)
22148 unpack = gen_avx2_zero_extendv4siv4di2;
22149 else
22150 unpack = gen_avx2_sign_extendv4siv4di2;
22151 halfmode = V4SImode;
22152 extract
22153 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22154 break;
22155 case V16QImode:
22156 if (unsigned_p)
22157 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22158 else
22159 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22160 break;
22161 case V8HImode:
22162 if (unsigned_p)
22163 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22164 else
22165 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22166 break;
22167 case V4SImode:
22168 if (unsigned_p)
22169 unpack = gen_sse4_1_zero_extendv2siv2di2;
22170 else
22171 unpack = gen_sse4_1_sign_extendv2siv2di2;
22172 break;
22173 default:
22174 gcc_unreachable ();
22177 if (GET_MODE_SIZE (imode) >= 32)
22179 tmp = gen_reg_rtx (halfmode);
22180 emit_insn (extract (tmp, src));
22182 else if (high_p)
22184 /* Shift higher 8 bytes to lower 8 bytes. */
22185 tmp = gen_reg_rtx (V1TImode);
22186 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22187 GEN_INT (64)));
22188 tmp = gen_lowpart (imode, tmp);
22190 else
22191 tmp = src;
22193 emit_insn (unpack (dest, tmp));
22195 else
22197 rtx (*unpack)(rtx, rtx, rtx);
22199 switch (imode)
22201 case V16QImode:
22202 if (high_p)
22203 unpack = gen_vec_interleave_highv16qi;
22204 else
22205 unpack = gen_vec_interleave_lowv16qi;
22206 break;
22207 case V8HImode:
22208 if (high_p)
22209 unpack = gen_vec_interleave_highv8hi;
22210 else
22211 unpack = gen_vec_interleave_lowv8hi;
22212 break;
22213 case V4SImode:
22214 if (high_p)
22215 unpack = gen_vec_interleave_highv4si;
22216 else
22217 unpack = gen_vec_interleave_lowv4si;
22218 break;
22219 default:
22220 gcc_unreachable ();
22223 if (unsigned_p)
22224 tmp = force_reg (imode, CONST0_RTX (imode));
22225 else
22226 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22227 src, pc_rtx, pc_rtx);
22229 rtx tmp2 = gen_reg_rtx (imode);
22230 emit_insn (unpack (tmp2, src, tmp));
22231 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22235 /* Expand conditional increment or decrement using adb/sbb instructions.
22236 The default case using setcc followed by the conditional move can be
22237 done by generic code. */
22238 bool
22239 ix86_expand_int_addcc (rtx operands[])
22241 enum rtx_code code = GET_CODE (operands[1]);
22242 rtx flags;
22243 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22244 rtx compare_op;
22245 rtx val = const0_rtx;
22246 bool fpcmp = false;
22247 enum machine_mode mode;
22248 rtx op0 = XEXP (operands[1], 0);
22249 rtx op1 = XEXP (operands[1], 1);
22251 if (operands[3] != const1_rtx
22252 && operands[3] != constm1_rtx)
22253 return false;
22254 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22255 return false;
22256 code = GET_CODE (compare_op);
22258 flags = XEXP (compare_op, 0);
22260 if (GET_MODE (flags) == CCFPmode
22261 || GET_MODE (flags) == CCFPUmode)
22263 fpcmp = true;
22264 code = ix86_fp_compare_code_to_integer (code);
22267 if (code != LTU)
22269 val = constm1_rtx;
22270 if (fpcmp)
22271 PUT_CODE (compare_op,
22272 reverse_condition_maybe_unordered
22273 (GET_CODE (compare_op)));
22274 else
22275 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22278 mode = GET_MODE (operands[0]);
22280 /* Construct either adc or sbb insn. */
22281 if ((code == LTU) == (operands[3] == constm1_rtx))
22283 switch (mode)
22285 case QImode:
22286 insn = gen_subqi3_carry;
22287 break;
22288 case HImode:
22289 insn = gen_subhi3_carry;
22290 break;
22291 case SImode:
22292 insn = gen_subsi3_carry;
22293 break;
22294 case DImode:
22295 insn = gen_subdi3_carry;
22296 break;
22297 default:
22298 gcc_unreachable ();
22301 else
22303 switch (mode)
22305 case QImode:
22306 insn = gen_addqi3_carry;
22307 break;
22308 case HImode:
22309 insn = gen_addhi3_carry;
22310 break;
22311 case SImode:
22312 insn = gen_addsi3_carry;
22313 break;
22314 case DImode:
22315 insn = gen_adddi3_carry;
22316 break;
22317 default:
22318 gcc_unreachable ();
22321 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22323 return true;
22327 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22328 but works for floating pointer parameters and nonoffsetable memories.
22329 For pushes, it returns just stack offsets; the values will be saved
22330 in the right order. Maximally three parts are generated. */
22332 static int
22333 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22335 int size;
22337 if (!TARGET_64BIT)
22338 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22339 else
22340 size = (GET_MODE_SIZE (mode) + 4) / 8;
22342 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22343 gcc_assert (size >= 2 && size <= 4);
22345 /* Optimize constant pool reference to immediates. This is used by fp
22346 moves, that force all constants to memory to allow combining. */
22347 if (MEM_P (operand) && MEM_READONLY_P (operand))
22349 rtx tmp = maybe_get_pool_constant (operand);
22350 if (tmp)
22351 operand = tmp;
22354 if (MEM_P (operand) && !offsettable_memref_p (operand))
22356 /* The only non-offsetable memories we handle are pushes. */
22357 int ok = push_operand (operand, VOIDmode);
22359 gcc_assert (ok);
22361 operand = copy_rtx (operand);
22362 PUT_MODE (operand, word_mode);
22363 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22364 return size;
22367 if (GET_CODE (operand) == CONST_VECTOR)
22369 enum machine_mode imode = int_mode_for_mode (mode);
22370 /* Caution: if we looked through a constant pool memory above,
22371 the operand may actually have a different mode now. That's
22372 ok, since we want to pun this all the way back to an integer. */
22373 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22374 gcc_assert (operand != NULL);
22375 mode = imode;
22378 if (!TARGET_64BIT)
22380 if (mode == DImode)
22381 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22382 else
22384 int i;
22386 if (REG_P (operand))
22388 gcc_assert (reload_completed);
22389 for (i = 0; i < size; i++)
22390 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22392 else if (offsettable_memref_p (operand))
22394 operand = adjust_address (operand, SImode, 0);
22395 parts[0] = operand;
22396 for (i = 1; i < size; i++)
22397 parts[i] = adjust_address (operand, SImode, 4 * i);
22399 else if (GET_CODE (operand) == CONST_DOUBLE)
22401 REAL_VALUE_TYPE r;
22402 long l[4];
22404 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22405 switch (mode)
22407 case TFmode:
22408 real_to_target (l, &r, mode);
22409 parts[3] = gen_int_mode (l[3], SImode);
22410 parts[2] = gen_int_mode (l[2], SImode);
22411 break;
22412 case XFmode:
22413 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22414 long double may not be 80-bit. */
22415 real_to_target (l, &r, mode);
22416 parts[2] = gen_int_mode (l[2], SImode);
22417 break;
22418 case DFmode:
22419 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22420 break;
22421 default:
22422 gcc_unreachable ();
22424 parts[1] = gen_int_mode (l[1], SImode);
22425 parts[0] = gen_int_mode (l[0], SImode);
22427 else
22428 gcc_unreachable ();
22431 else
22433 if (mode == TImode)
22434 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22435 if (mode == XFmode || mode == TFmode)
22437 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22438 if (REG_P (operand))
22440 gcc_assert (reload_completed);
22441 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22442 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22444 else if (offsettable_memref_p (operand))
22446 operand = adjust_address (operand, DImode, 0);
22447 parts[0] = operand;
22448 parts[1] = adjust_address (operand, upper_mode, 8);
22450 else if (GET_CODE (operand) == CONST_DOUBLE)
22452 REAL_VALUE_TYPE r;
22453 long l[4];
22455 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22456 real_to_target (l, &r, mode);
22458 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22459 if (HOST_BITS_PER_WIDE_INT >= 64)
22460 parts[0]
22461 = gen_int_mode
22462 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22463 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22464 DImode);
22465 else
22466 parts[0] = immed_double_const (l[0], l[1], DImode);
22468 if (upper_mode == SImode)
22469 parts[1] = gen_int_mode (l[2], SImode);
22470 else if (HOST_BITS_PER_WIDE_INT >= 64)
22471 parts[1]
22472 = gen_int_mode
22473 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22474 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22475 DImode);
22476 else
22477 parts[1] = immed_double_const (l[2], l[3], DImode);
22479 else
22480 gcc_unreachable ();
22484 return size;
22487 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22488 Return false when normal moves are needed; true when all required
22489 insns have been emitted. Operands 2-4 contain the input values
22490 int the correct order; operands 5-7 contain the output values. */
22492 void
22493 ix86_split_long_move (rtx operands[])
22495 rtx part[2][4];
22496 int nparts, i, j;
22497 int push = 0;
22498 int collisions = 0;
22499 enum machine_mode mode = GET_MODE (operands[0]);
22500 bool collisionparts[4];
22502 /* The DFmode expanders may ask us to move double.
22503 For 64bit target this is single move. By hiding the fact
22504 here we simplify i386.md splitters. */
22505 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22507 /* Optimize constant pool reference to immediates. This is used by
22508 fp moves, that force all constants to memory to allow combining. */
22510 if (MEM_P (operands[1])
22511 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22512 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22513 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22514 if (push_operand (operands[0], VOIDmode))
22516 operands[0] = copy_rtx (operands[0]);
22517 PUT_MODE (operands[0], word_mode);
22519 else
22520 operands[0] = gen_lowpart (DImode, operands[0]);
22521 operands[1] = gen_lowpart (DImode, operands[1]);
22522 emit_move_insn (operands[0], operands[1]);
22523 return;
22526 /* The only non-offsettable memory we handle is push. */
22527 if (push_operand (operands[0], VOIDmode))
22528 push = 1;
22529 else
22530 gcc_assert (!MEM_P (operands[0])
22531 || offsettable_memref_p (operands[0]));
22533 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22534 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22536 /* When emitting push, take care for source operands on the stack. */
22537 if (push && MEM_P (operands[1])
22538 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22540 rtx src_base = XEXP (part[1][nparts - 1], 0);
22542 /* Compensate for the stack decrement by 4. */
22543 if (!TARGET_64BIT && nparts == 3
22544 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22545 src_base = plus_constant (Pmode, src_base, 4);
22547 /* src_base refers to the stack pointer and is
22548 automatically decreased by emitted push. */
22549 for (i = 0; i < nparts; i++)
22550 part[1][i] = change_address (part[1][i],
22551 GET_MODE (part[1][i]), src_base);
22554 /* We need to do copy in the right order in case an address register
22555 of the source overlaps the destination. */
22556 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22558 rtx tmp;
22560 for (i = 0; i < nparts; i++)
22562 collisionparts[i]
22563 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22564 if (collisionparts[i])
22565 collisions++;
22568 /* Collision in the middle part can be handled by reordering. */
22569 if (collisions == 1 && nparts == 3 && collisionparts [1])
22571 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22572 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22574 else if (collisions == 1
22575 && nparts == 4
22576 && (collisionparts [1] || collisionparts [2]))
22578 if (collisionparts [1])
22580 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22581 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22583 else
22585 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22586 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22590 /* If there are more collisions, we can't handle it by reordering.
22591 Do an lea to the last part and use only one colliding move. */
22592 else if (collisions > 1)
22594 rtx base;
22596 collisions = 1;
22598 base = part[0][nparts - 1];
22600 /* Handle the case when the last part isn't valid for lea.
22601 Happens in 64-bit mode storing the 12-byte XFmode. */
22602 if (GET_MODE (base) != Pmode)
22603 base = gen_rtx_REG (Pmode, REGNO (base));
22605 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22606 part[1][0] = replace_equiv_address (part[1][0], base);
22607 for (i = 1; i < nparts; i++)
22609 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22610 part[1][i] = replace_equiv_address (part[1][i], tmp);
22615 if (push)
22617 if (!TARGET_64BIT)
22619 if (nparts == 3)
22621 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22622 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22623 stack_pointer_rtx, GEN_INT (-4)));
22624 emit_move_insn (part[0][2], part[1][2]);
22626 else if (nparts == 4)
22628 emit_move_insn (part[0][3], part[1][3]);
22629 emit_move_insn (part[0][2], part[1][2]);
22632 else
22634 /* In 64bit mode we don't have 32bit push available. In case this is
22635 register, it is OK - we will just use larger counterpart. We also
22636 retype memory - these comes from attempt to avoid REX prefix on
22637 moving of second half of TFmode value. */
22638 if (GET_MODE (part[1][1]) == SImode)
22640 switch (GET_CODE (part[1][1]))
22642 case MEM:
22643 part[1][1] = adjust_address (part[1][1], DImode, 0);
22644 break;
22646 case REG:
22647 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22648 break;
22650 default:
22651 gcc_unreachable ();
22654 if (GET_MODE (part[1][0]) == SImode)
22655 part[1][0] = part[1][1];
22658 emit_move_insn (part[0][1], part[1][1]);
22659 emit_move_insn (part[0][0], part[1][0]);
22660 return;
22663 /* Choose correct order to not overwrite the source before it is copied. */
22664 if ((REG_P (part[0][0])
22665 && REG_P (part[1][1])
22666 && (REGNO (part[0][0]) == REGNO (part[1][1])
22667 || (nparts == 3
22668 && REGNO (part[0][0]) == REGNO (part[1][2]))
22669 || (nparts == 4
22670 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22671 || (collisions > 0
22672 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22674 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22676 operands[2 + i] = part[0][j];
22677 operands[6 + i] = part[1][j];
22680 else
22682 for (i = 0; i < nparts; i++)
22684 operands[2 + i] = part[0][i];
22685 operands[6 + i] = part[1][i];
22689 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22690 if (optimize_insn_for_size_p ())
22692 for (j = 0; j < nparts - 1; j++)
22693 if (CONST_INT_P (operands[6 + j])
22694 && operands[6 + j] != const0_rtx
22695 && REG_P (operands[2 + j]))
22696 for (i = j; i < nparts - 1; i++)
22697 if (CONST_INT_P (operands[7 + i])
22698 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22699 operands[7 + i] = operands[2 + j];
22702 for (i = 0; i < nparts; i++)
22703 emit_move_insn (operands[2 + i], operands[6 + i]);
22705 return;
22708 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22709 left shift by a constant, either using a single shift or
22710 a sequence of add instructions. */
22712 static void
22713 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22715 rtx (*insn)(rtx, rtx, rtx);
22717 if (count == 1
22718 || (count * ix86_cost->add <= ix86_cost->shift_const
22719 && !optimize_insn_for_size_p ()))
22721 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22722 while (count-- > 0)
22723 emit_insn (insn (operand, operand, operand));
22725 else
22727 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22728 emit_insn (insn (operand, operand, GEN_INT (count)));
22732 void
22733 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22735 rtx (*gen_ashl3)(rtx, rtx, rtx);
22736 rtx (*gen_shld)(rtx, rtx, rtx);
22737 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22739 rtx low[2], high[2];
22740 int count;
22742 if (CONST_INT_P (operands[2]))
22744 split_double_mode (mode, operands, 2, low, high);
22745 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22747 if (count >= half_width)
22749 emit_move_insn (high[0], low[1]);
22750 emit_move_insn (low[0], const0_rtx);
22752 if (count > half_width)
22753 ix86_expand_ashl_const (high[0], count - half_width, mode);
22755 else
22757 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22759 if (!rtx_equal_p (operands[0], operands[1]))
22760 emit_move_insn (operands[0], operands[1]);
22762 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22763 ix86_expand_ashl_const (low[0], count, mode);
22765 return;
22768 split_double_mode (mode, operands, 1, low, high);
22770 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22772 if (operands[1] == const1_rtx)
22774 /* Assuming we've chosen a QImode capable registers, then 1 << N
22775 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22776 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22778 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22780 ix86_expand_clear (low[0]);
22781 ix86_expand_clear (high[0]);
22782 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22784 d = gen_lowpart (QImode, low[0]);
22785 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22786 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22787 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22789 d = gen_lowpart (QImode, high[0]);
22790 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22791 s = gen_rtx_NE (QImode, flags, const0_rtx);
22792 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22795 /* Otherwise, we can get the same results by manually performing
22796 a bit extract operation on bit 5/6, and then performing the two
22797 shifts. The two methods of getting 0/1 into low/high are exactly
22798 the same size. Avoiding the shift in the bit extract case helps
22799 pentium4 a bit; no one else seems to care much either way. */
22800 else
22802 enum machine_mode half_mode;
22803 rtx (*gen_lshr3)(rtx, rtx, rtx);
22804 rtx (*gen_and3)(rtx, rtx, rtx);
22805 rtx (*gen_xor3)(rtx, rtx, rtx);
22806 HOST_WIDE_INT bits;
22807 rtx x;
22809 if (mode == DImode)
22811 half_mode = SImode;
22812 gen_lshr3 = gen_lshrsi3;
22813 gen_and3 = gen_andsi3;
22814 gen_xor3 = gen_xorsi3;
22815 bits = 5;
22817 else
22819 half_mode = DImode;
22820 gen_lshr3 = gen_lshrdi3;
22821 gen_and3 = gen_anddi3;
22822 gen_xor3 = gen_xordi3;
22823 bits = 6;
22826 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22827 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22828 else
22829 x = gen_lowpart (half_mode, operands[2]);
22830 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22832 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22833 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22834 emit_move_insn (low[0], high[0]);
22835 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22838 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22839 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22840 return;
22843 if (operands[1] == constm1_rtx)
22845 /* For -1 << N, we can avoid the shld instruction, because we
22846 know that we're shifting 0...31/63 ones into a -1. */
22847 emit_move_insn (low[0], constm1_rtx);
22848 if (optimize_insn_for_size_p ())
22849 emit_move_insn (high[0], low[0]);
22850 else
22851 emit_move_insn (high[0], constm1_rtx);
22853 else
22855 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22857 if (!rtx_equal_p (operands[0], operands[1]))
22858 emit_move_insn (operands[0], operands[1]);
22860 split_double_mode (mode, operands, 1, low, high);
22861 emit_insn (gen_shld (high[0], low[0], operands[2]));
22864 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22866 if (TARGET_CMOVE && scratch)
22868 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22869 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22871 ix86_expand_clear (scratch);
22872 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22874 else
22876 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22877 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22879 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22883 void
22884 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22886 rtx (*gen_ashr3)(rtx, rtx, rtx)
22887 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22888 rtx (*gen_shrd)(rtx, rtx, rtx);
22889 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22891 rtx low[2], high[2];
22892 int count;
22894 if (CONST_INT_P (operands[2]))
22896 split_double_mode (mode, operands, 2, low, high);
22897 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22899 if (count == GET_MODE_BITSIZE (mode) - 1)
22901 emit_move_insn (high[0], high[1]);
22902 emit_insn (gen_ashr3 (high[0], high[0],
22903 GEN_INT (half_width - 1)));
22904 emit_move_insn (low[0], high[0]);
22907 else if (count >= half_width)
22909 emit_move_insn (low[0], high[1]);
22910 emit_move_insn (high[0], low[0]);
22911 emit_insn (gen_ashr3 (high[0], high[0],
22912 GEN_INT (half_width - 1)));
22914 if (count > half_width)
22915 emit_insn (gen_ashr3 (low[0], low[0],
22916 GEN_INT (count - half_width)));
22918 else
22920 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22922 if (!rtx_equal_p (operands[0], operands[1]))
22923 emit_move_insn (operands[0], operands[1]);
22925 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22926 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22929 else
22931 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22933 if (!rtx_equal_p (operands[0], operands[1]))
22934 emit_move_insn (operands[0], operands[1]);
22936 split_double_mode (mode, operands, 1, low, high);
22938 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22939 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22941 if (TARGET_CMOVE && scratch)
22943 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22944 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22946 emit_move_insn (scratch, high[0]);
22947 emit_insn (gen_ashr3 (scratch, scratch,
22948 GEN_INT (half_width - 1)));
22949 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22950 scratch));
22952 else
22954 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22955 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22957 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22962 void
22963 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22965 rtx (*gen_lshr3)(rtx, rtx, rtx)
22966 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22967 rtx (*gen_shrd)(rtx, rtx, rtx);
22968 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22970 rtx low[2], high[2];
22971 int count;
22973 if (CONST_INT_P (operands[2]))
22975 split_double_mode (mode, operands, 2, low, high);
22976 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22978 if (count >= half_width)
22980 emit_move_insn (low[0], high[1]);
22981 ix86_expand_clear (high[0]);
22983 if (count > half_width)
22984 emit_insn (gen_lshr3 (low[0], low[0],
22985 GEN_INT (count - half_width)));
22987 else
22989 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22991 if (!rtx_equal_p (operands[0], operands[1]))
22992 emit_move_insn (operands[0], operands[1]);
22994 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22995 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22998 else
23000 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23002 if (!rtx_equal_p (operands[0], operands[1]))
23003 emit_move_insn (operands[0], operands[1]);
23005 split_double_mode (mode, operands, 1, low, high);
23007 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23008 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23010 if (TARGET_CMOVE && scratch)
23012 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23013 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23015 ix86_expand_clear (scratch);
23016 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23017 scratch));
23019 else
23021 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23022 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23024 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23029 /* Predict just emitted jump instruction to be taken with probability PROB. */
23030 static void
23031 predict_jump (int prob)
23033 rtx insn = get_last_insn ();
23034 gcc_assert (JUMP_P (insn));
23035 add_int_reg_note (insn, REG_BR_PROB, prob);
23038 /* Helper function for the string operations below. Dest VARIABLE whether
23039 it is aligned to VALUE bytes. If true, jump to the label. */
23040 static rtx
23041 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23043 rtx label = gen_label_rtx ();
23044 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23045 if (GET_MODE (variable) == DImode)
23046 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23047 else
23048 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23049 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23050 1, label);
23051 if (epilogue)
23052 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23053 else
23054 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23055 return label;
23058 /* Adjust COUNTER by the VALUE. */
23059 static void
23060 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23062 rtx (*gen_add)(rtx, rtx, rtx)
23063 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23065 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23068 /* Zero extend possibly SImode EXP to Pmode register. */
23070 ix86_zero_extend_to_Pmode (rtx exp)
23072 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23075 /* Divide COUNTREG by SCALE. */
23076 static rtx
23077 scale_counter (rtx countreg, int scale)
23079 rtx sc;
23081 if (scale == 1)
23082 return countreg;
23083 if (CONST_INT_P (countreg))
23084 return GEN_INT (INTVAL (countreg) / scale);
23085 gcc_assert (REG_P (countreg));
23087 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23088 GEN_INT (exact_log2 (scale)),
23089 NULL, 1, OPTAB_DIRECT);
23090 return sc;
23093 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23094 DImode for constant loop counts. */
23096 static enum machine_mode
23097 counter_mode (rtx count_exp)
23099 if (GET_MODE (count_exp) != VOIDmode)
23100 return GET_MODE (count_exp);
23101 if (!CONST_INT_P (count_exp))
23102 return Pmode;
23103 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23104 return DImode;
23105 return SImode;
23108 /* Copy the address to a Pmode register. This is used for x32 to
23109 truncate DImode TLS address to a SImode register. */
23111 static rtx
23112 ix86_copy_addr_to_reg (rtx addr)
23114 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23115 return copy_addr_to_reg (addr);
23116 else
23118 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23119 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
23123 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23124 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23125 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23126 memory by VALUE (supposed to be in MODE).
23128 The size is rounded down to whole number of chunk size moved at once.
23129 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23132 static void
23133 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23134 rtx destptr, rtx srcptr, rtx value,
23135 rtx count, enum machine_mode mode, int unroll,
23136 int expected_size, bool issetmem)
23138 rtx out_label, top_label, iter, tmp;
23139 enum machine_mode iter_mode = counter_mode (count);
23140 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23141 rtx piece_size = GEN_INT (piece_size_n);
23142 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23143 rtx size;
23144 int i;
23146 top_label = gen_label_rtx ();
23147 out_label = gen_label_rtx ();
23148 iter = gen_reg_rtx (iter_mode);
23150 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23151 NULL, 1, OPTAB_DIRECT);
23152 /* Those two should combine. */
23153 if (piece_size == const1_rtx)
23155 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23156 true, out_label);
23157 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23159 emit_move_insn (iter, const0_rtx);
23161 emit_label (top_label);
23163 tmp = convert_modes (Pmode, iter_mode, iter, true);
23165 /* This assert could be relaxed - in this case we'll need to compute
23166 smallest power of two, containing in PIECE_SIZE_N and pass it to
23167 offset_address. */
23168 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23169 destmem = offset_address (destmem, tmp, piece_size_n);
23170 destmem = adjust_address (destmem, mode, 0);
23172 if (!issetmem)
23174 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23175 srcmem = adjust_address (srcmem, mode, 0);
23177 /* When unrolling for chips that reorder memory reads and writes,
23178 we can save registers by using single temporary.
23179 Also using 4 temporaries is overkill in 32bit mode. */
23180 if (!TARGET_64BIT && 0)
23182 for (i = 0; i < unroll; i++)
23184 if (i)
23186 destmem =
23187 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23188 srcmem =
23189 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23191 emit_move_insn (destmem, srcmem);
23194 else
23196 rtx tmpreg[4];
23197 gcc_assert (unroll <= 4);
23198 for (i = 0; i < unroll; i++)
23200 tmpreg[i] = gen_reg_rtx (mode);
23201 if (i)
23203 srcmem =
23204 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23206 emit_move_insn (tmpreg[i], srcmem);
23208 for (i = 0; i < unroll; i++)
23210 if (i)
23212 destmem =
23213 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23215 emit_move_insn (destmem, tmpreg[i]);
23219 else
23220 for (i = 0; i < unroll; i++)
23222 if (i)
23223 destmem =
23224 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23225 emit_move_insn (destmem, value);
23228 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23229 true, OPTAB_LIB_WIDEN);
23230 if (tmp != iter)
23231 emit_move_insn (iter, tmp);
23233 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23234 true, top_label);
23235 if (expected_size != -1)
23237 expected_size /= GET_MODE_SIZE (mode) * unroll;
23238 if (expected_size == 0)
23239 predict_jump (0);
23240 else if (expected_size > REG_BR_PROB_BASE)
23241 predict_jump (REG_BR_PROB_BASE - 1);
23242 else
23243 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23245 else
23246 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23247 iter = ix86_zero_extend_to_Pmode (iter);
23248 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23249 true, OPTAB_LIB_WIDEN);
23250 if (tmp != destptr)
23251 emit_move_insn (destptr, tmp);
23252 if (!issetmem)
23254 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23255 true, OPTAB_LIB_WIDEN);
23256 if (tmp != srcptr)
23257 emit_move_insn (srcptr, tmp);
23259 emit_label (out_label);
23262 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23263 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23264 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23265 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23266 ORIG_VALUE is the original value passed to memset to fill the memory with.
23267 Other arguments have same meaning as for previous function. */
23269 static void
23270 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23271 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23272 rtx count,
23273 enum machine_mode mode, bool issetmem)
23275 rtx destexp;
23276 rtx srcexp;
23277 rtx countreg;
23278 HOST_WIDE_INT rounded_count;
23280 /* If possible, it is shorter to use rep movs.
23281 TODO: Maybe it is better to move this logic to decide_alg. */
23282 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23283 && (!issetmem || orig_value == const0_rtx))
23284 mode = SImode;
23286 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23287 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23289 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23290 GET_MODE_SIZE (mode)));
23291 if (mode != QImode)
23293 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23294 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23295 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23297 else
23298 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23299 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23301 rounded_count = (INTVAL (count)
23302 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23303 destmem = shallow_copy_rtx (destmem);
23304 set_mem_size (destmem, rounded_count);
23306 else if (MEM_SIZE_KNOWN_P (destmem))
23307 clear_mem_size (destmem);
23309 if (issetmem)
23311 value = force_reg (mode, gen_lowpart (mode, value));
23312 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23314 else
23316 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23317 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23318 if (mode != QImode)
23320 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23321 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23322 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23324 else
23325 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23326 if (CONST_INT_P (count))
23328 rounded_count = (INTVAL (count)
23329 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23330 srcmem = shallow_copy_rtx (srcmem);
23331 set_mem_size (srcmem, rounded_count);
23333 else
23335 if (MEM_SIZE_KNOWN_P (srcmem))
23336 clear_mem_size (srcmem);
23338 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23339 destexp, srcexp));
23343 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23344 DESTMEM.
23345 SRC is passed by pointer to be updated on return.
23346 Return value is updated DST. */
23347 static rtx
23348 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23349 HOST_WIDE_INT size_to_move)
23351 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23352 enum insn_code code;
23353 enum machine_mode move_mode;
23354 int piece_size, i;
23356 /* Find the widest mode in which we could perform moves.
23357 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23358 it until move of such size is supported. */
23359 piece_size = 1 << floor_log2 (size_to_move);
23360 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23361 code = optab_handler (mov_optab, move_mode);
23362 while (code == CODE_FOR_nothing && piece_size > 1)
23364 piece_size >>= 1;
23365 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23366 code = optab_handler (mov_optab, move_mode);
23369 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23370 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23371 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23373 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23374 move_mode = mode_for_vector (word_mode, nunits);
23375 code = optab_handler (mov_optab, move_mode);
23376 if (code == CODE_FOR_nothing)
23378 move_mode = word_mode;
23379 piece_size = GET_MODE_SIZE (move_mode);
23380 code = optab_handler (mov_optab, move_mode);
23383 gcc_assert (code != CODE_FOR_nothing);
23385 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23386 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23388 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23389 gcc_assert (size_to_move % piece_size == 0);
23390 adjust = GEN_INT (piece_size);
23391 for (i = 0; i < size_to_move; i += piece_size)
23393 /* We move from memory to memory, so we'll need to do it via
23394 a temporary register. */
23395 tempreg = gen_reg_rtx (move_mode);
23396 emit_insn (GEN_FCN (code) (tempreg, src));
23397 emit_insn (GEN_FCN (code) (dst, tempreg));
23399 emit_move_insn (destptr,
23400 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23401 emit_move_insn (srcptr,
23402 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23404 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23405 piece_size);
23406 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23407 piece_size);
23410 /* Update DST and SRC rtx. */
23411 *srcmem = src;
23412 return dst;
23415 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23416 static void
23417 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23418 rtx destptr, rtx srcptr, rtx count, int max_size)
23420 rtx src, dest;
23421 if (CONST_INT_P (count))
23423 HOST_WIDE_INT countval = INTVAL (count);
23424 HOST_WIDE_INT epilogue_size = countval % max_size;
23425 int i;
23427 /* For now MAX_SIZE should be a power of 2. This assert could be
23428 relaxed, but it'll require a bit more complicated epilogue
23429 expanding. */
23430 gcc_assert ((max_size & (max_size - 1)) == 0);
23431 for (i = max_size; i >= 1; i >>= 1)
23433 if (epilogue_size & i)
23434 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23436 return;
23438 if (max_size > 8)
23440 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23441 count, 1, OPTAB_DIRECT);
23442 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23443 count, QImode, 1, 4, false);
23444 return;
23447 /* When there are stringops, we can cheaply increase dest and src pointers.
23448 Otherwise we save code size by maintaining offset (zero is readily
23449 available from preceding rep operation) and using x86 addressing modes.
23451 if (TARGET_SINGLE_STRINGOP)
23453 if (max_size > 4)
23455 rtx label = ix86_expand_aligntest (count, 4, true);
23456 src = change_address (srcmem, SImode, srcptr);
23457 dest = change_address (destmem, SImode, destptr);
23458 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23459 emit_label (label);
23460 LABEL_NUSES (label) = 1;
23462 if (max_size > 2)
23464 rtx label = ix86_expand_aligntest (count, 2, true);
23465 src = change_address (srcmem, HImode, srcptr);
23466 dest = change_address (destmem, HImode, destptr);
23467 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23468 emit_label (label);
23469 LABEL_NUSES (label) = 1;
23471 if (max_size > 1)
23473 rtx label = ix86_expand_aligntest (count, 1, true);
23474 src = change_address (srcmem, QImode, srcptr);
23475 dest = change_address (destmem, QImode, destptr);
23476 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23477 emit_label (label);
23478 LABEL_NUSES (label) = 1;
23481 else
23483 rtx offset = force_reg (Pmode, const0_rtx);
23484 rtx tmp;
23486 if (max_size > 4)
23488 rtx label = ix86_expand_aligntest (count, 4, true);
23489 src = change_address (srcmem, SImode, srcptr);
23490 dest = change_address (destmem, SImode, destptr);
23491 emit_move_insn (dest, src);
23492 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23493 true, OPTAB_LIB_WIDEN);
23494 if (tmp != offset)
23495 emit_move_insn (offset, tmp);
23496 emit_label (label);
23497 LABEL_NUSES (label) = 1;
23499 if (max_size > 2)
23501 rtx label = ix86_expand_aligntest (count, 2, true);
23502 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23503 src = change_address (srcmem, HImode, tmp);
23504 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23505 dest = change_address (destmem, HImode, tmp);
23506 emit_move_insn (dest, src);
23507 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23508 true, OPTAB_LIB_WIDEN);
23509 if (tmp != offset)
23510 emit_move_insn (offset, tmp);
23511 emit_label (label);
23512 LABEL_NUSES (label) = 1;
23514 if (max_size > 1)
23516 rtx label = ix86_expand_aligntest (count, 1, true);
23517 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23518 src = change_address (srcmem, QImode, tmp);
23519 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23520 dest = change_address (destmem, QImode, tmp);
23521 emit_move_insn (dest, src);
23522 emit_label (label);
23523 LABEL_NUSES (label) = 1;
23528 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23529 with value PROMOTED_VAL.
23530 SRC is passed by pointer to be updated on return.
23531 Return value is updated DST. */
23532 static rtx
23533 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23534 HOST_WIDE_INT size_to_move)
23536 rtx dst = destmem, adjust;
23537 enum insn_code code;
23538 enum machine_mode move_mode;
23539 int piece_size, i;
23541 /* Find the widest mode in which we could perform moves.
23542 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23543 it until move of such size is supported. */
23544 move_mode = GET_MODE (promoted_val);
23545 if (move_mode == VOIDmode)
23546 move_mode = QImode;
23547 if (size_to_move < GET_MODE_SIZE (move_mode))
23549 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23550 promoted_val = gen_lowpart (move_mode, promoted_val);
23552 piece_size = GET_MODE_SIZE (move_mode);
23553 code = optab_handler (mov_optab, move_mode);
23554 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23556 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23558 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23559 gcc_assert (size_to_move % piece_size == 0);
23560 adjust = GEN_INT (piece_size);
23561 for (i = 0; i < size_to_move; i += piece_size)
23563 if (piece_size <= GET_MODE_SIZE (word_mode))
23565 emit_insn (gen_strset (destptr, dst, promoted_val));
23566 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23567 piece_size);
23568 continue;
23571 emit_insn (GEN_FCN (code) (dst, promoted_val));
23573 emit_move_insn (destptr,
23574 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23576 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23577 piece_size);
23580 /* Update DST rtx. */
23581 return dst;
23583 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23584 static void
23585 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23586 rtx count, int max_size)
23588 count =
23589 expand_simple_binop (counter_mode (count), AND, count,
23590 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23591 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23592 gen_lowpart (QImode, value), count, QImode,
23593 1, max_size / 2, true);
23596 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23597 static void
23598 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23599 rtx count, int max_size)
23601 rtx dest;
23603 if (CONST_INT_P (count))
23605 HOST_WIDE_INT countval = INTVAL (count);
23606 HOST_WIDE_INT epilogue_size = countval % max_size;
23607 int i;
23609 /* For now MAX_SIZE should be a power of 2. This assert could be
23610 relaxed, but it'll require a bit more complicated epilogue
23611 expanding. */
23612 gcc_assert ((max_size & (max_size - 1)) == 0);
23613 for (i = max_size; i >= 1; i >>= 1)
23615 if (epilogue_size & i)
23617 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23618 destmem = emit_memset (destmem, destptr, vec_value, i);
23619 else
23620 destmem = emit_memset (destmem, destptr, value, i);
23623 return;
23625 if (max_size > 32)
23627 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23628 return;
23630 if (max_size > 16)
23632 rtx label = ix86_expand_aligntest (count, 16, true);
23633 if (TARGET_64BIT)
23635 dest = change_address (destmem, DImode, destptr);
23636 emit_insn (gen_strset (destptr, dest, value));
23637 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23638 emit_insn (gen_strset (destptr, dest, value));
23640 else
23642 dest = change_address (destmem, SImode, destptr);
23643 emit_insn (gen_strset (destptr, dest, value));
23644 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23645 emit_insn (gen_strset (destptr, dest, value));
23646 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23647 emit_insn (gen_strset (destptr, dest, value));
23648 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23649 emit_insn (gen_strset (destptr, dest, value));
23651 emit_label (label);
23652 LABEL_NUSES (label) = 1;
23654 if (max_size > 8)
23656 rtx label = ix86_expand_aligntest (count, 8, true);
23657 if (TARGET_64BIT)
23659 dest = change_address (destmem, DImode, destptr);
23660 emit_insn (gen_strset (destptr, dest, value));
23662 else
23664 dest = change_address (destmem, SImode, destptr);
23665 emit_insn (gen_strset (destptr, dest, value));
23666 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23667 emit_insn (gen_strset (destptr, dest, value));
23669 emit_label (label);
23670 LABEL_NUSES (label) = 1;
23672 if (max_size > 4)
23674 rtx label = ix86_expand_aligntest (count, 4, true);
23675 dest = change_address (destmem, SImode, destptr);
23676 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23677 emit_label (label);
23678 LABEL_NUSES (label) = 1;
23680 if (max_size > 2)
23682 rtx label = ix86_expand_aligntest (count, 2, true);
23683 dest = change_address (destmem, HImode, destptr);
23684 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23685 emit_label (label);
23686 LABEL_NUSES (label) = 1;
23688 if (max_size > 1)
23690 rtx label = ix86_expand_aligntest (count, 1, true);
23691 dest = change_address (destmem, QImode, destptr);
23692 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23693 emit_label (label);
23694 LABEL_NUSES (label) = 1;
23698 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23699 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23700 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23701 ignored.
23702 Return value is updated DESTMEM. */
23703 static rtx
23704 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23705 rtx destptr, rtx srcptr, rtx value,
23706 rtx vec_value, rtx count, int align,
23707 int desired_alignment, bool issetmem)
23709 int i;
23710 for (i = 1; i < desired_alignment; i <<= 1)
23712 if (align <= i)
23714 rtx label = ix86_expand_aligntest (destptr, i, false);
23715 if (issetmem)
23717 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23718 destmem = emit_memset (destmem, destptr, vec_value, i);
23719 else
23720 destmem = emit_memset (destmem, destptr, value, i);
23722 else
23723 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23724 ix86_adjust_counter (count, i);
23725 emit_label (label);
23726 LABEL_NUSES (label) = 1;
23727 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23730 return destmem;
23733 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23734 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23735 and jump to DONE_LABEL. */
23736 static void
23737 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23738 rtx destptr, rtx srcptr,
23739 rtx value, rtx vec_value,
23740 rtx count, int size,
23741 rtx done_label, bool issetmem)
23743 rtx label = ix86_expand_aligntest (count, size, false);
23744 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23745 rtx modesize;
23746 int n;
23748 /* If we do not have vector value to copy, we must reduce size. */
23749 if (issetmem)
23751 if (!vec_value)
23753 if (GET_MODE (value) == VOIDmode && size > 8)
23754 mode = Pmode;
23755 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23756 mode = GET_MODE (value);
23758 else
23759 mode = GET_MODE (vec_value), value = vec_value;
23761 else
23763 /* Choose appropriate vector mode. */
23764 if (size >= 32)
23765 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23766 else if (size >= 16)
23767 mode = TARGET_SSE ? V16QImode : DImode;
23768 srcmem = change_address (srcmem, mode, srcptr);
23770 destmem = change_address (destmem, mode, destptr);
23771 modesize = GEN_INT (GET_MODE_SIZE (mode));
23772 gcc_assert (GET_MODE_SIZE (mode) <= size);
23773 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23775 if (issetmem)
23776 emit_move_insn (destmem, gen_lowpart (mode, value));
23777 else
23779 emit_move_insn (destmem, srcmem);
23780 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23782 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23785 destmem = offset_address (destmem, count, 1);
23786 destmem = offset_address (destmem, GEN_INT (-2 * size),
23787 GET_MODE_SIZE (mode));
23788 if (!issetmem)
23790 srcmem = offset_address (srcmem, count, 1);
23791 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23792 GET_MODE_SIZE (mode));
23794 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23796 if (issetmem)
23797 emit_move_insn (destmem, gen_lowpart (mode, value));
23798 else
23800 emit_move_insn (destmem, srcmem);
23801 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23803 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23805 emit_jump_insn (gen_jump (done_label));
23806 emit_barrier ();
23808 emit_label (label);
23809 LABEL_NUSES (label) = 1;
23812 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23813 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23814 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23815 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23816 DONE_LABEL is a label after the whole copying sequence. The label is created
23817 on demand if *DONE_LABEL is NULL.
23818 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23819 bounds after the initial copies.
23821 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23822 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23823 we will dispatch to a library call for large blocks.
23825 In pseudocode we do:
23827 if (COUNT < SIZE)
23829 Assume that SIZE is 4. Bigger sizes are handled analogously
23830 if (COUNT & 4)
23832 copy 4 bytes from SRCPTR to DESTPTR
23833 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23834 goto done_label
23836 if (!COUNT)
23837 goto done_label;
23838 copy 1 byte from SRCPTR to DESTPTR
23839 if (COUNT & 2)
23841 copy 2 bytes from SRCPTR to DESTPTR
23842 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23845 else
23847 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23848 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23850 OLD_DESPTR = DESTPTR;
23851 Align DESTPTR up to DESIRED_ALIGN
23852 SRCPTR += DESTPTR - OLD_DESTPTR
23853 COUNT -= DEST_PTR - OLD_DESTPTR
23854 if (DYNAMIC_CHECK)
23855 Round COUNT down to multiple of SIZE
23856 << optional caller supplied zero size guard is here >>
23857 << optional caller suppplied dynamic check is here >>
23858 << caller supplied main copy loop is here >>
23860 done_label:
23862 static void
23863 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23864 rtx *destptr, rtx *srcptr,
23865 enum machine_mode mode,
23866 rtx value, rtx vec_value,
23867 rtx *count,
23868 rtx *done_label,
23869 int size,
23870 int desired_align,
23871 int align,
23872 unsigned HOST_WIDE_INT *min_size,
23873 bool dynamic_check,
23874 bool issetmem)
23876 rtx loop_label = NULL, label;
23877 int n;
23878 rtx modesize;
23879 int prolog_size = 0;
23880 rtx mode_value;
23882 /* Chose proper value to copy. */
23883 if (issetmem && VECTOR_MODE_P (mode))
23884 mode_value = vec_value;
23885 else
23886 mode_value = value;
23887 gcc_assert (GET_MODE_SIZE (mode) <= size);
23889 /* See if block is big or small, handle small blocks. */
23890 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23892 int size2 = size;
23893 loop_label = gen_label_rtx ();
23895 if (!*done_label)
23896 *done_label = gen_label_rtx ();
23898 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23899 1, loop_label);
23900 size2 >>= 1;
23902 /* Handle sizes > 3. */
23903 for (;size2 > 2; size2 >>= 1)
23904 expand_small_movmem_or_setmem (destmem, srcmem,
23905 *destptr, *srcptr,
23906 value, vec_value,
23907 *count,
23908 size2, *done_label, issetmem);
23909 /* Nothing to copy? Jump to DONE_LABEL if so */
23910 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23911 1, *done_label);
23913 /* Do a byte copy. */
23914 destmem = change_address (destmem, QImode, *destptr);
23915 if (issetmem)
23916 emit_move_insn (destmem, gen_lowpart (QImode, value));
23917 else
23919 srcmem = change_address (srcmem, QImode, *srcptr);
23920 emit_move_insn (destmem, srcmem);
23923 /* Handle sizes 2 and 3. */
23924 label = ix86_expand_aligntest (*count, 2, false);
23925 destmem = change_address (destmem, HImode, *destptr);
23926 destmem = offset_address (destmem, *count, 1);
23927 destmem = offset_address (destmem, GEN_INT (-2), 2);
23928 if (issetmem)
23929 emit_move_insn (destmem, gen_lowpart (HImode, value));
23930 else
23932 srcmem = change_address (srcmem, HImode, *srcptr);
23933 srcmem = offset_address (srcmem, *count, 1);
23934 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23935 emit_move_insn (destmem, srcmem);
23938 emit_label (label);
23939 LABEL_NUSES (label) = 1;
23940 emit_jump_insn (gen_jump (*done_label));
23941 emit_barrier ();
23943 else
23944 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23945 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23947 /* Start memcpy for COUNT >= SIZE. */
23948 if (loop_label)
23950 emit_label (loop_label);
23951 LABEL_NUSES (loop_label) = 1;
23954 /* Copy first desired_align bytes. */
23955 if (!issetmem)
23956 srcmem = change_address (srcmem, mode, *srcptr);
23957 destmem = change_address (destmem, mode, *destptr);
23958 modesize = GEN_INT (GET_MODE_SIZE (mode));
23959 for (n = 0; prolog_size < desired_align - align; n++)
23961 if (issetmem)
23962 emit_move_insn (destmem, mode_value);
23963 else
23965 emit_move_insn (destmem, srcmem);
23966 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23968 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23969 prolog_size += GET_MODE_SIZE (mode);
23973 /* Copy last SIZE bytes. */
23974 destmem = offset_address (destmem, *count, 1);
23975 destmem = offset_address (destmem,
23976 GEN_INT (-size - prolog_size),
23978 if (issetmem)
23979 emit_move_insn (destmem, mode_value);
23980 else
23982 srcmem = offset_address (srcmem, *count, 1);
23983 srcmem = offset_address (srcmem,
23984 GEN_INT (-size - prolog_size),
23986 emit_move_insn (destmem, srcmem);
23988 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23990 destmem = offset_address (destmem, modesize, 1);
23991 if (issetmem)
23992 emit_move_insn (destmem, mode_value);
23993 else
23995 srcmem = offset_address (srcmem, modesize, 1);
23996 emit_move_insn (destmem, srcmem);
24000 /* Align destination. */
24001 if (desired_align > 1 && desired_align > align)
24003 rtx saveddest = *destptr;
24005 gcc_assert (desired_align <= size);
24006 /* Align destptr up, place it to new register. */
24007 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24008 GEN_INT (prolog_size),
24009 NULL_RTX, 1, OPTAB_DIRECT);
24010 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24011 GEN_INT (-desired_align),
24012 *destptr, 1, OPTAB_DIRECT);
24013 /* See how many bytes we skipped. */
24014 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24015 *destptr,
24016 saveddest, 1, OPTAB_DIRECT);
24017 /* Adjust srcptr and count. */
24018 if (!issetmem)
24019 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
24020 *srcptr, 1, OPTAB_DIRECT);
24021 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24022 saveddest, *count, 1, OPTAB_DIRECT);
24023 /* We copied at most size + prolog_size. */
24024 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24025 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24026 else
24027 *min_size = 0;
24029 /* Our loops always round down the bock size, but for dispatch to library
24030 we need precise value. */
24031 if (dynamic_check)
24032 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24033 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24035 else
24037 gcc_assert (prolog_size == 0);
24038 /* Decrease count, so we won't end up copying last word twice. */
24039 if (!CONST_INT_P (*count))
24040 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24041 constm1_rtx, *count, 1, OPTAB_DIRECT);
24042 else
24043 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24044 if (*min_size)
24045 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24050 /* This function is like the previous one, except here we know how many bytes
24051 need to be copied. That allows us to update alignment not only of DST, which
24052 is returned, but also of SRC, which is passed as a pointer for that
24053 reason. */
24054 static rtx
24055 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24056 rtx srcreg, rtx value, rtx vec_value,
24057 int desired_align, int align_bytes,
24058 bool issetmem)
24060 rtx src = NULL;
24061 rtx orig_dst = dst;
24062 rtx orig_src = NULL;
24063 int piece_size = 1;
24064 int copied_bytes = 0;
24066 if (!issetmem)
24068 gcc_assert (srcp != NULL);
24069 src = *srcp;
24070 orig_src = src;
24073 for (piece_size = 1;
24074 piece_size <= desired_align && copied_bytes < align_bytes;
24075 piece_size <<= 1)
24077 if (align_bytes & piece_size)
24079 if (issetmem)
24081 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24082 dst = emit_memset (dst, destreg, vec_value, piece_size);
24083 else
24084 dst = emit_memset (dst, destreg, value, piece_size);
24086 else
24087 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24088 copied_bytes += piece_size;
24091 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24092 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24093 if (MEM_SIZE_KNOWN_P (orig_dst))
24094 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24096 if (!issetmem)
24098 int src_align_bytes = get_mem_align_offset (src, desired_align
24099 * BITS_PER_UNIT);
24100 if (src_align_bytes >= 0)
24101 src_align_bytes = desired_align - src_align_bytes;
24102 if (src_align_bytes >= 0)
24104 unsigned int src_align;
24105 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24107 if ((src_align_bytes & (src_align - 1))
24108 == (align_bytes & (src_align - 1)))
24109 break;
24111 if (src_align > (unsigned int) desired_align)
24112 src_align = desired_align;
24113 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24114 set_mem_align (src, src_align * BITS_PER_UNIT);
24116 if (MEM_SIZE_KNOWN_P (orig_src))
24117 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24118 *srcp = src;
24121 return dst;
24124 /* Return true if ALG can be used in current context.
24125 Assume we expand memset if MEMSET is true. */
24126 static bool
24127 alg_usable_p (enum stringop_alg alg, bool memset)
24129 if (alg == no_stringop)
24130 return false;
24131 if (alg == vector_loop)
24132 return TARGET_SSE || TARGET_AVX;
24133 /* Algorithms using the rep prefix want at least edi and ecx;
24134 additionally, memset wants eax and memcpy wants esi. Don't
24135 consider such algorithms if the user has appropriated those
24136 registers for their own purposes. */
24137 if (alg == rep_prefix_1_byte
24138 || alg == rep_prefix_4_byte
24139 || alg == rep_prefix_8_byte)
24140 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24141 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24142 return true;
24145 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24146 static enum stringop_alg
24147 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24148 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24149 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24151 const struct stringop_algs * algs;
24152 bool optimize_for_speed;
24153 int max = 0;
24154 const struct processor_costs *cost;
24155 int i;
24156 bool any_alg_usable_p = false;
24158 *noalign = false;
24159 *dynamic_check = -1;
24161 /* Even if the string operation call is cold, we still might spend a lot
24162 of time processing large blocks. */
24163 if (optimize_function_for_size_p (cfun)
24164 || (optimize_insn_for_size_p ()
24165 && (max_size < 256
24166 || (expected_size != -1 && expected_size < 256))))
24167 optimize_for_speed = false;
24168 else
24169 optimize_for_speed = true;
24171 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24172 if (memset)
24173 algs = &cost->memset[TARGET_64BIT != 0];
24174 else
24175 algs = &cost->memcpy[TARGET_64BIT != 0];
24177 /* See maximal size for user defined algorithm. */
24178 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24180 enum stringop_alg candidate = algs->size[i].alg;
24181 bool usable = alg_usable_p (candidate, memset);
24182 any_alg_usable_p |= usable;
24184 if (candidate != libcall && candidate && usable)
24185 max = algs->size[i].max;
24188 /* If expected size is not known but max size is small enough
24189 so inline version is a win, set expected size into
24190 the range. */
24191 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24192 && expected_size == -1)
24193 expected_size = min_size / 2 + max_size / 2;
24195 /* If user specified the algorithm, honnor it if possible. */
24196 if (ix86_stringop_alg != no_stringop
24197 && alg_usable_p (ix86_stringop_alg, memset))
24198 return ix86_stringop_alg;
24199 /* rep; movq or rep; movl is the smallest variant. */
24200 else if (!optimize_for_speed)
24202 *noalign = true;
24203 if (!count || (count & 3) || (memset && !zero_memset))
24204 return alg_usable_p (rep_prefix_1_byte, memset)
24205 ? rep_prefix_1_byte : loop_1_byte;
24206 else
24207 return alg_usable_p (rep_prefix_4_byte, memset)
24208 ? rep_prefix_4_byte : loop;
24210 /* Very tiny blocks are best handled via the loop, REP is expensive to
24211 setup. */
24212 else if (expected_size != -1 && expected_size < 4)
24213 return loop_1_byte;
24214 else if (expected_size != -1)
24216 enum stringop_alg alg = libcall;
24217 bool alg_noalign = false;
24218 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24220 /* We get here if the algorithms that were not libcall-based
24221 were rep-prefix based and we are unable to use rep prefixes
24222 based on global register usage. Break out of the loop and
24223 use the heuristic below. */
24224 if (algs->size[i].max == 0)
24225 break;
24226 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24228 enum stringop_alg candidate = algs->size[i].alg;
24230 if (candidate != libcall && alg_usable_p (candidate, memset))
24232 alg = candidate;
24233 alg_noalign = algs->size[i].noalign;
24235 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24236 last non-libcall inline algorithm. */
24237 if (TARGET_INLINE_ALL_STRINGOPS)
24239 /* When the current size is best to be copied by a libcall,
24240 but we are still forced to inline, run the heuristic below
24241 that will pick code for medium sized blocks. */
24242 if (alg != libcall)
24244 *noalign = alg_noalign;
24245 return alg;
24247 else if (!any_alg_usable_p)
24248 break;
24250 else if (alg_usable_p (candidate, memset))
24252 *noalign = algs->size[i].noalign;
24253 return candidate;
24258 /* When asked to inline the call anyway, try to pick meaningful choice.
24259 We look for maximal size of block that is faster to copy by hand and
24260 take blocks of at most of that size guessing that average size will
24261 be roughly half of the block.
24263 If this turns out to be bad, we might simply specify the preferred
24264 choice in ix86_costs. */
24265 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24266 && (algs->unknown_size == libcall
24267 || !alg_usable_p (algs->unknown_size, memset)))
24269 enum stringop_alg alg;
24271 /* If there aren't any usable algorithms, then recursing on
24272 smaller sizes isn't going to find anything. Just return the
24273 simple byte-at-a-time copy loop. */
24274 if (!any_alg_usable_p)
24276 /* Pick something reasonable. */
24277 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24278 *dynamic_check = 128;
24279 return loop_1_byte;
24281 if (max <= 0)
24282 max = 4096;
24283 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24284 zero_memset, dynamic_check, noalign);
24285 gcc_assert (*dynamic_check == -1);
24286 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24287 *dynamic_check = max;
24288 else
24289 gcc_assert (alg != libcall);
24290 return alg;
24292 return (alg_usable_p (algs->unknown_size, memset)
24293 ? algs->unknown_size : libcall);
24296 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24297 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24298 static int
24299 decide_alignment (int align,
24300 enum stringop_alg alg,
24301 int expected_size,
24302 enum machine_mode move_mode)
24304 int desired_align = 0;
24306 gcc_assert (alg != no_stringop);
24308 if (alg == libcall)
24309 return 0;
24310 if (move_mode == VOIDmode)
24311 return 0;
24313 desired_align = GET_MODE_SIZE (move_mode);
24314 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24315 copying whole cacheline at once. */
24316 if (TARGET_PENTIUMPRO
24317 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24318 desired_align = 8;
24320 if (optimize_size)
24321 desired_align = 1;
24322 if (desired_align < align)
24323 desired_align = align;
24324 if (expected_size != -1 && expected_size < 4)
24325 desired_align = align;
24327 return desired_align;
24331 /* Helper function for memcpy. For QImode value 0xXY produce
24332 0xXYXYXYXY of wide specified by MODE. This is essentially
24333 a * 0x10101010, but we can do slightly better than
24334 synth_mult by unwinding the sequence by hand on CPUs with
24335 slow multiply. */
24336 static rtx
24337 promote_duplicated_reg (enum machine_mode mode, rtx val)
24339 enum machine_mode valmode = GET_MODE (val);
24340 rtx tmp;
24341 int nops = mode == DImode ? 3 : 2;
24343 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24344 if (val == const0_rtx)
24345 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24346 if (CONST_INT_P (val))
24348 HOST_WIDE_INT v = INTVAL (val) & 255;
24350 v |= v << 8;
24351 v |= v << 16;
24352 if (mode == DImode)
24353 v |= (v << 16) << 16;
24354 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24357 if (valmode == VOIDmode)
24358 valmode = QImode;
24359 if (valmode != QImode)
24360 val = gen_lowpart (QImode, val);
24361 if (mode == QImode)
24362 return val;
24363 if (!TARGET_PARTIAL_REG_STALL)
24364 nops--;
24365 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24366 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24367 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24368 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24370 rtx reg = convert_modes (mode, QImode, val, true);
24371 tmp = promote_duplicated_reg (mode, const1_rtx);
24372 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24373 OPTAB_DIRECT);
24375 else
24377 rtx reg = convert_modes (mode, QImode, val, true);
24379 if (!TARGET_PARTIAL_REG_STALL)
24380 if (mode == SImode)
24381 emit_insn (gen_movsi_insv_1 (reg, reg));
24382 else
24383 emit_insn (gen_movdi_insv_1 (reg, reg));
24384 else
24386 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24387 NULL, 1, OPTAB_DIRECT);
24388 reg =
24389 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24391 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24392 NULL, 1, OPTAB_DIRECT);
24393 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24394 if (mode == SImode)
24395 return reg;
24396 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24397 NULL, 1, OPTAB_DIRECT);
24398 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24399 return reg;
24403 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24404 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24405 alignment from ALIGN to DESIRED_ALIGN. */
24406 static rtx
24407 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24408 int align)
24410 rtx promoted_val;
24412 if (TARGET_64BIT
24413 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24414 promoted_val = promote_duplicated_reg (DImode, val);
24415 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24416 promoted_val = promote_duplicated_reg (SImode, val);
24417 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24418 promoted_val = promote_duplicated_reg (HImode, val);
24419 else
24420 promoted_val = val;
24422 return promoted_val;
24425 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24426 operations when profitable. The code depends upon architecture, block size
24427 and alignment, but always has one of the following overall structures:
24429 Aligned move sequence:
24431 1) Prologue guard: Conditional that jumps up to epilogues for small
24432 blocks that can be handled by epilogue alone. This is faster
24433 but also needed for correctness, since prologue assume the block
24434 is larger than the desired alignment.
24436 Optional dynamic check for size and libcall for large
24437 blocks is emitted here too, with -minline-stringops-dynamically.
24439 2) Prologue: copy first few bytes in order to get destination
24440 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24441 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24442 copied. We emit either a jump tree on power of two sized
24443 blocks, or a byte loop.
24445 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24446 with specified algorithm.
24448 4) Epilogue: code copying tail of the block that is too small to be
24449 handled by main body (or up to size guarded by prologue guard).
24451 Misaligned move sequence
24453 1) missaligned move prologue/epilogue containing:
24454 a) Prologue handling small memory blocks and jumping to done_label
24455 (skipped if blocks are known to be large enough)
24456 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24457 needed by single possibly misaligned move
24458 (skipped if alignment is not needed)
24459 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24461 2) Zero size guard dispatching to done_label, if needed
24463 3) dispatch to library call, if needed,
24465 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24466 with specified algorithm. */
24467 bool
24468 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24469 rtx align_exp, rtx expected_align_exp,
24470 rtx expected_size_exp, rtx min_size_exp,
24471 rtx max_size_exp, rtx probable_max_size_exp,
24472 bool issetmem)
24474 rtx destreg;
24475 rtx srcreg = NULL;
24476 rtx label = NULL;
24477 rtx tmp;
24478 rtx jump_around_label = NULL;
24479 HOST_WIDE_INT align = 1;
24480 unsigned HOST_WIDE_INT count = 0;
24481 HOST_WIDE_INT expected_size = -1;
24482 int size_needed = 0, epilogue_size_needed;
24483 int desired_align = 0, align_bytes = 0;
24484 enum stringop_alg alg;
24485 rtx promoted_val = NULL;
24486 rtx vec_promoted_val = NULL;
24487 bool force_loopy_epilogue = false;
24488 int dynamic_check;
24489 bool need_zero_guard = false;
24490 bool noalign;
24491 enum machine_mode move_mode = VOIDmode;
24492 int unroll_factor = 1;
24493 /* TODO: Once value ranges are available, fill in proper data. */
24494 unsigned HOST_WIDE_INT min_size = 0;
24495 unsigned HOST_WIDE_INT max_size = -1;
24496 unsigned HOST_WIDE_INT probable_max_size = -1;
24497 bool misaligned_prologue_used = false;
24499 if (CONST_INT_P (align_exp))
24500 align = INTVAL (align_exp);
24501 /* i386 can do misaligned access on reasonably increased cost. */
24502 if (CONST_INT_P (expected_align_exp)
24503 && INTVAL (expected_align_exp) > align)
24504 align = INTVAL (expected_align_exp);
24505 /* ALIGN is the minimum of destination and source alignment, but we care here
24506 just about destination alignment. */
24507 else if (!issetmem
24508 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24509 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24511 if (CONST_INT_P (count_exp))
24513 min_size = max_size = probable_max_size = count = expected_size
24514 = INTVAL (count_exp);
24515 /* When COUNT is 0, there is nothing to do. */
24516 if (!count)
24517 return true;
24519 else
24521 if (min_size_exp)
24522 min_size = INTVAL (min_size_exp);
24523 if (max_size_exp)
24524 max_size = INTVAL (max_size_exp);
24525 if (probable_max_size_exp)
24526 probable_max_size = INTVAL (probable_max_size_exp);
24527 if (CONST_INT_P (expected_size_exp))
24528 expected_size = INTVAL (expected_size_exp);
24531 /* Make sure we don't need to care about overflow later on. */
24532 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24533 return false;
24535 /* Step 0: Decide on preferred algorithm, desired alignment and
24536 size of chunks to be copied by main loop. */
24537 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24538 issetmem,
24539 issetmem && val_exp == const0_rtx,
24540 &dynamic_check, &noalign);
24541 if (alg == libcall)
24542 return false;
24543 gcc_assert (alg != no_stringop);
24545 /* For now vector-version of memset is generated only for memory zeroing, as
24546 creating of promoted vector value is very cheap in this case. */
24547 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24548 alg = unrolled_loop;
24550 if (!count)
24551 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24552 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24553 if (!issetmem)
24554 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24556 unroll_factor = 1;
24557 move_mode = word_mode;
24558 switch (alg)
24560 case libcall:
24561 case no_stringop:
24562 case last_alg:
24563 gcc_unreachable ();
24564 case loop_1_byte:
24565 need_zero_guard = true;
24566 move_mode = QImode;
24567 break;
24568 case loop:
24569 need_zero_guard = true;
24570 break;
24571 case unrolled_loop:
24572 need_zero_guard = true;
24573 unroll_factor = (TARGET_64BIT ? 4 : 2);
24574 break;
24575 case vector_loop:
24576 need_zero_guard = true;
24577 unroll_factor = 4;
24578 /* Find the widest supported mode. */
24579 move_mode = word_mode;
24580 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24581 != CODE_FOR_nothing)
24582 move_mode = GET_MODE_WIDER_MODE (move_mode);
24584 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24585 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24586 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24588 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24589 move_mode = mode_for_vector (word_mode, nunits);
24590 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24591 move_mode = word_mode;
24593 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24594 break;
24595 case rep_prefix_8_byte:
24596 move_mode = DImode;
24597 break;
24598 case rep_prefix_4_byte:
24599 move_mode = SImode;
24600 break;
24601 case rep_prefix_1_byte:
24602 move_mode = QImode;
24603 break;
24605 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24606 epilogue_size_needed = size_needed;
24608 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24609 if (!TARGET_ALIGN_STRINGOPS || noalign)
24610 align = desired_align;
24612 /* Step 1: Prologue guard. */
24614 /* Alignment code needs count to be in register. */
24615 if (CONST_INT_P (count_exp) && desired_align > align)
24617 if (INTVAL (count_exp) > desired_align
24618 && INTVAL (count_exp) > size_needed)
24620 align_bytes
24621 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24622 if (align_bytes <= 0)
24623 align_bytes = 0;
24624 else
24625 align_bytes = desired_align - align_bytes;
24627 if (align_bytes == 0)
24628 count_exp = force_reg (counter_mode (count_exp), count_exp);
24630 gcc_assert (desired_align >= 1 && align >= 1);
24632 /* Misaligned move sequences handle both prologue and epilogue at once.
24633 Default code generation results in a smaller code for large alignments
24634 and also avoids redundant job when sizes are known precisely. */
24635 misaligned_prologue_used
24636 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24637 && MAX (desired_align, epilogue_size_needed) <= 32
24638 && desired_align <= epilogue_size_needed
24639 && ((desired_align > align && !align_bytes)
24640 || (!count && epilogue_size_needed > 1)));
24642 /* Do the cheap promotion to allow better CSE across the
24643 main loop and epilogue (ie one load of the big constant in the
24644 front of all code.
24645 For now the misaligned move sequences do not have fast path
24646 without broadcasting. */
24647 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24649 if (alg == vector_loop)
24651 gcc_assert (val_exp == const0_rtx);
24652 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24653 promoted_val = promote_duplicated_reg_to_size (val_exp,
24654 GET_MODE_SIZE (word_mode),
24655 desired_align, align);
24657 else
24659 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24660 desired_align, align);
24663 /* Misaligned move sequences handles both prologues and epilogues at once.
24664 Default code generation results in smaller code for large alignments and
24665 also avoids redundant job when sizes are known precisely. */
24666 if (misaligned_prologue_used)
24668 /* Misaligned move prologue handled small blocks by itself. */
24669 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24670 (dst, src, &destreg, &srcreg,
24671 move_mode, promoted_val, vec_promoted_val,
24672 &count_exp,
24673 &jump_around_label,
24674 desired_align < align
24675 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24676 desired_align, align, &min_size, dynamic_check, issetmem);
24677 if (!issetmem)
24678 src = change_address (src, BLKmode, srcreg);
24679 dst = change_address (dst, BLKmode, destreg);
24680 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24681 epilogue_size_needed = 0;
24682 if (need_zero_guard && !min_size)
24684 /* It is possible that we copied enough so the main loop will not
24685 execute. */
24686 gcc_assert (size_needed > 1);
24687 if (jump_around_label == NULL_RTX)
24688 jump_around_label = gen_label_rtx ();
24689 emit_cmp_and_jump_insns (count_exp,
24690 GEN_INT (size_needed),
24691 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24692 if (expected_size == -1
24693 || expected_size < (desired_align - align) / 2 + size_needed)
24694 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24695 else
24696 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24699 /* Ensure that alignment prologue won't copy past end of block. */
24700 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24702 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24703 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24704 Make sure it is power of 2. */
24705 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24707 /* To improve performance of small blocks, we jump around the VAL
24708 promoting mode. This mean that if the promoted VAL is not constant,
24709 we might not use it in the epilogue and have to use byte
24710 loop variant. */
24711 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24712 force_loopy_epilogue = true;
24713 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24714 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24716 /* If main algorithm works on QImode, no epilogue is needed.
24717 For small sizes just don't align anything. */
24718 if (size_needed == 1)
24719 desired_align = align;
24720 else
24721 goto epilogue;
24723 else if (!count
24724 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24726 label = gen_label_rtx ();
24727 emit_cmp_and_jump_insns (count_exp,
24728 GEN_INT (epilogue_size_needed),
24729 LTU, 0, counter_mode (count_exp), 1, label);
24730 if (expected_size == -1 || expected_size < epilogue_size_needed)
24731 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24732 else
24733 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24737 /* Emit code to decide on runtime whether library call or inline should be
24738 used. */
24739 if (dynamic_check != -1)
24741 if (!issetmem && CONST_INT_P (count_exp))
24743 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24745 emit_block_move_via_libcall (dst, src, count_exp, false);
24746 count_exp = const0_rtx;
24747 goto epilogue;
24750 else
24752 rtx hot_label = gen_label_rtx ();
24753 if (jump_around_label == NULL_RTX)
24754 jump_around_label = gen_label_rtx ();
24755 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24756 LEU, 0, counter_mode (count_exp),
24757 1, hot_label);
24758 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24759 if (issetmem)
24760 set_storage_via_libcall (dst, count_exp, val_exp, false);
24761 else
24762 emit_block_move_via_libcall (dst, src, count_exp, false);
24763 emit_jump (jump_around_label);
24764 emit_label (hot_label);
24768 /* Step 2: Alignment prologue. */
24769 /* Do the expensive promotion once we branched off the small blocks. */
24770 if (issetmem && !promoted_val)
24771 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24772 desired_align, align);
24774 if (desired_align > align && !misaligned_prologue_used)
24776 if (align_bytes == 0)
24778 /* Except for the first move in prologue, we no longer know
24779 constant offset in aliasing info. It don't seems to worth
24780 the pain to maintain it for the first move, so throw away
24781 the info early. */
24782 dst = change_address (dst, BLKmode, destreg);
24783 if (!issetmem)
24784 src = change_address (src, BLKmode, srcreg);
24785 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24786 promoted_val, vec_promoted_val,
24787 count_exp, align, desired_align,
24788 issetmem);
24789 /* At most desired_align - align bytes are copied. */
24790 if (min_size < (unsigned)(desired_align - align))
24791 min_size = 0;
24792 else
24793 min_size -= desired_align - align;
24795 else
24797 /* If we know how many bytes need to be stored before dst is
24798 sufficiently aligned, maintain aliasing info accurately. */
24799 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24800 srcreg,
24801 promoted_val,
24802 vec_promoted_val,
24803 desired_align,
24804 align_bytes,
24805 issetmem);
24807 count_exp = plus_constant (counter_mode (count_exp),
24808 count_exp, -align_bytes);
24809 count -= align_bytes;
24810 min_size -= align_bytes;
24811 max_size -= align_bytes;
24813 if (need_zero_guard
24814 && !min_size
24815 && (count < (unsigned HOST_WIDE_INT) size_needed
24816 || (align_bytes == 0
24817 && count < ((unsigned HOST_WIDE_INT) size_needed
24818 + desired_align - align))))
24820 /* It is possible that we copied enough so the main loop will not
24821 execute. */
24822 gcc_assert (size_needed > 1);
24823 if (label == NULL_RTX)
24824 label = gen_label_rtx ();
24825 emit_cmp_and_jump_insns (count_exp,
24826 GEN_INT (size_needed),
24827 LTU, 0, counter_mode (count_exp), 1, label);
24828 if (expected_size == -1
24829 || expected_size < (desired_align - align) / 2 + size_needed)
24830 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24831 else
24832 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24835 if (label && size_needed == 1)
24837 emit_label (label);
24838 LABEL_NUSES (label) = 1;
24839 label = NULL;
24840 epilogue_size_needed = 1;
24841 if (issetmem)
24842 promoted_val = val_exp;
24844 else if (label == NULL_RTX && !misaligned_prologue_used)
24845 epilogue_size_needed = size_needed;
24847 /* Step 3: Main loop. */
24849 switch (alg)
24851 case libcall:
24852 case no_stringop:
24853 case last_alg:
24854 gcc_unreachable ();
24855 case loop_1_byte:
24856 case loop:
24857 case unrolled_loop:
24858 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24859 count_exp, move_mode, unroll_factor,
24860 expected_size, issetmem);
24861 break;
24862 case vector_loop:
24863 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24864 vec_promoted_val, count_exp, move_mode,
24865 unroll_factor, expected_size, issetmem);
24866 break;
24867 case rep_prefix_8_byte:
24868 case rep_prefix_4_byte:
24869 case rep_prefix_1_byte:
24870 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24871 val_exp, count_exp, move_mode, issetmem);
24872 break;
24874 /* Adjust properly the offset of src and dest memory for aliasing. */
24875 if (CONST_INT_P (count_exp))
24877 if (!issetmem)
24878 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24879 (count / size_needed) * size_needed);
24880 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24881 (count / size_needed) * size_needed);
24883 else
24885 if (!issetmem)
24886 src = change_address (src, BLKmode, srcreg);
24887 dst = change_address (dst, BLKmode, destreg);
24890 /* Step 4: Epilogue to copy the remaining bytes. */
24891 epilogue:
24892 if (label)
24894 /* When the main loop is done, COUNT_EXP might hold original count,
24895 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24896 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24897 bytes. Compensate if needed. */
24899 if (size_needed < epilogue_size_needed)
24901 tmp =
24902 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24903 GEN_INT (size_needed - 1), count_exp, 1,
24904 OPTAB_DIRECT);
24905 if (tmp != count_exp)
24906 emit_move_insn (count_exp, tmp);
24908 emit_label (label);
24909 LABEL_NUSES (label) = 1;
24912 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24914 if (force_loopy_epilogue)
24915 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24916 epilogue_size_needed);
24917 else
24919 if (issetmem)
24920 expand_setmem_epilogue (dst, destreg, promoted_val,
24921 vec_promoted_val, count_exp,
24922 epilogue_size_needed);
24923 else
24924 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24925 epilogue_size_needed);
24928 if (jump_around_label)
24929 emit_label (jump_around_label);
24930 return true;
24934 /* Expand the appropriate insns for doing strlen if not just doing
24935 repnz; scasb
24937 out = result, initialized with the start address
24938 align_rtx = alignment of the address.
24939 scratch = scratch register, initialized with the startaddress when
24940 not aligned, otherwise undefined
24942 This is just the body. It needs the initializations mentioned above and
24943 some address computing at the end. These things are done in i386.md. */
24945 static void
24946 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24948 int align;
24949 rtx tmp;
24950 rtx align_2_label = NULL_RTX;
24951 rtx align_3_label = NULL_RTX;
24952 rtx align_4_label = gen_label_rtx ();
24953 rtx end_0_label = gen_label_rtx ();
24954 rtx mem;
24955 rtx tmpreg = gen_reg_rtx (SImode);
24956 rtx scratch = gen_reg_rtx (SImode);
24957 rtx cmp;
24959 align = 0;
24960 if (CONST_INT_P (align_rtx))
24961 align = INTVAL (align_rtx);
24963 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24965 /* Is there a known alignment and is it less than 4? */
24966 if (align < 4)
24968 rtx scratch1 = gen_reg_rtx (Pmode);
24969 emit_move_insn (scratch1, out);
24970 /* Is there a known alignment and is it not 2? */
24971 if (align != 2)
24973 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24974 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24976 /* Leave just the 3 lower bits. */
24977 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24978 NULL_RTX, 0, OPTAB_WIDEN);
24980 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24981 Pmode, 1, align_4_label);
24982 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24983 Pmode, 1, align_2_label);
24984 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24985 Pmode, 1, align_3_label);
24987 else
24989 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24990 check if is aligned to 4 - byte. */
24992 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24993 NULL_RTX, 0, OPTAB_WIDEN);
24995 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24996 Pmode, 1, align_4_label);
24999 mem = change_address (src, QImode, out);
25001 /* Now compare the bytes. */
25003 /* Compare the first n unaligned byte on a byte per byte basis. */
25004 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25005 QImode, 1, end_0_label);
25007 /* Increment the address. */
25008 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25010 /* Not needed with an alignment of 2 */
25011 if (align != 2)
25013 emit_label (align_2_label);
25015 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25016 end_0_label);
25018 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25020 emit_label (align_3_label);
25023 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25024 end_0_label);
25026 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25029 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25030 align this loop. It gives only huge programs, but does not help to
25031 speed up. */
25032 emit_label (align_4_label);
25034 mem = change_address (src, SImode, out);
25035 emit_move_insn (scratch, mem);
25036 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25038 /* This formula yields a nonzero result iff one of the bytes is zero.
25039 This saves three branches inside loop and many cycles. */
25041 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25042 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25043 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25044 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25045 gen_int_mode (0x80808080, SImode)));
25046 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25047 align_4_label);
25049 if (TARGET_CMOVE)
25051 rtx reg = gen_reg_rtx (SImode);
25052 rtx reg2 = gen_reg_rtx (Pmode);
25053 emit_move_insn (reg, tmpreg);
25054 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25056 /* If zero is not in the first two bytes, move two bytes forward. */
25057 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25058 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25059 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25060 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25061 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25062 reg,
25063 tmpreg)));
25064 /* Emit lea manually to avoid clobbering of flags. */
25065 emit_insn (gen_rtx_SET (SImode, reg2,
25066 gen_rtx_PLUS (Pmode, out, const2_rtx)));
25068 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25069 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25070 emit_insn (gen_rtx_SET (VOIDmode, out,
25071 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25072 reg2,
25073 out)));
25075 else
25077 rtx end_2_label = gen_label_rtx ();
25078 /* Is zero in the first two bytes? */
25080 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25081 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25082 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25083 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25084 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25085 pc_rtx);
25086 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25087 JUMP_LABEL (tmp) = end_2_label;
25089 /* Not in the first two. Move two bytes forward. */
25090 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25091 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25093 emit_label (end_2_label);
25097 /* Avoid branch in fixing the byte. */
25098 tmpreg = gen_lowpart (QImode, tmpreg);
25099 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25100 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25101 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25102 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25104 emit_label (end_0_label);
25107 /* Expand strlen. */
25109 bool
25110 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25112 rtx addr, scratch1, scratch2, scratch3, scratch4;
25114 /* The generic case of strlen expander is long. Avoid it's
25115 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25117 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25118 && !TARGET_INLINE_ALL_STRINGOPS
25119 && !optimize_insn_for_size_p ()
25120 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25121 return false;
25123 addr = force_reg (Pmode, XEXP (src, 0));
25124 scratch1 = gen_reg_rtx (Pmode);
25126 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25127 && !optimize_insn_for_size_p ())
25129 /* Well it seems that some optimizer does not combine a call like
25130 foo(strlen(bar), strlen(bar));
25131 when the move and the subtraction is done here. It does calculate
25132 the length just once when these instructions are done inside of
25133 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25134 often used and I use one fewer register for the lifetime of
25135 output_strlen_unroll() this is better. */
25137 emit_move_insn (out, addr);
25139 ix86_expand_strlensi_unroll_1 (out, src, align);
25141 /* strlensi_unroll_1 returns the address of the zero at the end of
25142 the string, like memchr(), so compute the length by subtracting
25143 the start address. */
25144 emit_insn (ix86_gen_sub3 (out, out, addr));
25146 else
25148 rtx unspec;
25150 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25151 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25152 return false;
25154 scratch2 = gen_reg_rtx (Pmode);
25155 scratch3 = gen_reg_rtx (Pmode);
25156 scratch4 = force_reg (Pmode, constm1_rtx);
25158 emit_move_insn (scratch3, addr);
25159 eoschar = force_reg (QImode, eoschar);
25161 src = replace_equiv_address_nv (src, scratch3);
25163 /* If .md starts supporting :P, this can be done in .md. */
25164 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25165 scratch4), UNSPEC_SCAS);
25166 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25167 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25168 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25170 return true;
25173 /* For given symbol (function) construct code to compute address of it's PLT
25174 entry in large x86-64 PIC model. */
25175 static rtx
25176 construct_plt_address (rtx symbol)
25178 rtx tmp, unspec;
25180 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25181 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25182 gcc_assert (Pmode == DImode);
25184 tmp = gen_reg_rtx (Pmode);
25185 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25187 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25188 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25189 return tmp;
25193 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25194 rtx callarg2,
25195 rtx pop, bool sibcall)
25197 unsigned int const cregs_size
25198 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25199 rtx vec[3 + cregs_size];
25200 rtx use = NULL, call;
25201 unsigned int vec_len = 0;
25203 if (pop == const0_rtx)
25204 pop = NULL;
25205 gcc_assert (!TARGET_64BIT || !pop);
25207 if (TARGET_MACHO && !TARGET_64BIT)
25209 #if TARGET_MACHO
25210 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25211 fnaddr = machopic_indirect_call_target (fnaddr);
25212 #endif
25214 else
25216 /* Static functions and indirect calls don't need the pic register. Also,
25217 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25218 it an indirect call. */
25219 if (flag_pic
25220 && (!TARGET_64BIT
25221 || (ix86_cmodel == CM_LARGE_PIC
25222 && DEFAULT_ABI != MS_ABI))
25223 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25224 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25225 && flag_plt
25226 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25227 || !lookup_attribute ("noplt",
25228 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25229 use_reg (&use, pic_offset_table_rtx);
25232 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25234 rtx al = gen_rtx_REG (QImode, AX_REG);
25235 emit_move_insn (al, callarg2);
25236 use_reg (&use, al);
25239 if (ix86_cmodel == CM_LARGE_PIC
25240 && !TARGET_PECOFF
25241 && MEM_P (fnaddr)
25242 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25243 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25244 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25245 else if (sibcall
25246 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25247 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25249 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25250 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25253 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25254 if (retval)
25255 call = gen_rtx_SET (VOIDmode, retval, call);
25256 vec[vec_len++] = call;
25258 if (pop)
25260 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25261 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25262 vec[vec_len++] = pop;
25265 if (TARGET_64BIT_MS_ABI
25266 && (!callarg2 || INTVAL (callarg2) != -2))
25268 unsigned i;
25270 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
25271 UNSPEC_MS_TO_SYSV_CALL);
25273 for (i = 0; i < cregs_size; i++)
25275 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25276 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25278 vec[vec_len++]
25279 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
25283 if (vec_len > 1)
25284 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25285 call = emit_call_insn (call);
25286 if (use)
25287 CALL_INSN_FUNCTION_USAGE (call) = use;
25289 return call;
25292 /* Return true if the function being called was marked with attribute "noplt"
25293 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25294 handle the non-PIC case in the backend because there is no easy interface
25295 for the front-end to force non-PLT calls to use the GOT. This is currently
25296 used only with 64-bit ELF targets to call the function marked "noplt"
25297 indirectly. */
25299 static bool
25300 ix86_nopic_noplt_attribute_p (rtx call_op)
25302 if (flag_pic || ix86_cmodel == CM_LARGE
25303 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25304 || SYMBOL_REF_LOCAL_P (call_op))
25305 return false;
25307 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25309 if (!flag_plt
25310 || (symbol_decl != NULL_TREE
25311 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25312 return true;
25314 return false;
25317 /* Output the assembly for a call instruction. */
25319 const char *
25320 ix86_output_call_insn (rtx insn, rtx call_op)
25322 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25323 bool seh_nop_p = false;
25324 const char *xasm;
25326 if (SIBLING_CALL_P (insn))
25328 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25329 xasm = "jmp\t*%p0@GOTPCREL(%%rip)";
25330 else if (direct_p)
25331 xasm = "jmp\t%P0";
25332 /* SEH epilogue detection requires the indirect branch case
25333 to include REX.W. */
25334 else if (TARGET_SEH)
25335 xasm = "rex.W jmp %A0";
25336 else
25337 xasm = "jmp\t%A0";
25339 /* Just before the sibling call, add 11-bytes of nops to patch function
25340 exit: 2 bytes for 'jmp 09' and remaining 9 bytes. */
25341 if (TARGET_64BIT && patch_functions_for_instrumentation)
25342 ix86_output_function_nops_prologue_epilogue (
25343 asm_out_file,
25344 FUNCTION_PATCH_EPILOGUE_SECTION,
25345 ASM_BYTE"0xeb, 0x09",
25348 output_asm_insn (xasm, &call_op);
25349 return "";
25352 /* SEH unwinding can require an extra nop to be emitted in several
25353 circumstances. Determine if we have one of those. */
25354 if (TARGET_SEH)
25356 rtx i;
25358 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25360 /* If we get to another real insn, we don't need the nop. */
25361 if (INSN_P (i))
25362 break;
25364 /* If we get to the epilogue note, prevent a catch region from
25365 being adjacent to the standard epilogue sequence. If non-
25366 call-exceptions, we'll have done this during epilogue emission. */
25367 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25368 && !flag_non_call_exceptions
25369 && !can_throw_internal (insn))
25371 seh_nop_p = true;
25372 break;
25376 /* If we didn't find a real insn following the call, prevent the
25377 unwinder from looking into the next function. */
25378 if (i == NULL)
25379 seh_nop_p = true;
25382 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25383 xasm = "call\t*%p0@GOTPCREL(%%rip)";
25384 else if (direct_p)
25385 xasm = "call\t%P0";
25386 else
25387 xasm = "call\t%A0";
25389 output_asm_insn (xasm, &call_op);
25391 if (seh_nop_p)
25392 return "nop";
25394 return "";
25397 /* Clear stack slot assignments remembered from previous functions.
25398 This is called from INIT_EXPANDERS once before RTL is emitted for each
25399 function. */
25401 static struct machine_function *
25402 ix86_init_machine_status (void)
25404 struct machine_function *f;
25406 f = ggc_alloc_cleared_machine_function ();
25407 f->use_fast_prologue_epilogue_nregs = -1;
25408 f->call_abi = ix86_abi;
25410 return f;
25413 /* Return a MEM corresponding to a stack slot with mode MODE.
25414 Allocate a new slot if necessary.
25416 The RTL for a function can have several slots available: N is
25417 which slot to use. */
25420 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25422 struct stack_local_entry *s;
25424 gcc_assert (n < MAX_386_STACK_LOCALS);
25426 for (s = ix86_stack_locals; s; s = s->next)
25427 if (s->mode == mode && s->n == n)
25428 return validize_mem (copy_rtx (s->rtl));
25430 s = ggc_alloc_stack_local_entry ();
25431 s->n = n;
25432 s->mode = mode;
25433 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25435 s->next = ix86_stack_locals;
25436 ix86_stack_locals = s;
25437 return validize_mem (s->rtl);
25440 static void
25441 ix86_instantiate_decls (void)
25443 struct stack_local_entry *s;
25445 for (s = ix86_stack_locals; s; s = s->next)
25446 if (s->rtl != NULL_RTX)
25447 instantiate_decl_rtl (s->rtl);
25450 /* Check whether x86 address PARTS is a pc-relative address. */
25452 static bool
25453 rip_relative_addr_p (struct ix86_address *parts)
25455 rtx base, index, disp;
25457 base = parts->base;
25458 index = parts->index;
25459 disp = parts->disp;
25461 if (disp && !base && !index)
25463 if (TARGET_64BIT)
25465 rtx symbol = disp;
25467 if (GET_CODE (disp) == CONST)
25468 symbol = XEXP (disp, 0);
25469 if (GET_CODE (symbol) == PLUS
25470 && CONST_INT_P (XEXP (symbol, 1)))
25471 symbol = XEXP (symbol, 0);
25473 if (GET_CODE (symbol) == LABEL_REF
25474 || (GET_CODE (symbol) == SYMBOL_REF
25475 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25476 || (GET_CODE (symbol) == UNSPEC
25477 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25478 || XINT (symbol, 1) == UNSPEC_PCREL
25479 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25480 return true;
25483 return false;
25486 /* Calculate the length of the memory address in the instruction encoding.
25487 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25488 or other prefixes. We never generate addr32 prefix for LEA insn. */
25491 memory_address_length (rtx addr, bool lea)
25493 struct ix86_address parts;
25494 rtx base, index, disp;
25495 int len;
25496 int ok;
25498 if (GET_CODE (addr) == PRE_DEC
25499 || GET_CODE (addr) == POST_INC
25500 || GET_CODE (addr) == PRE_MODIFY
25501 || GET_CODE (addr) == POST_MODIFY)
25502 return 0;
25504 ok = ix86_decompose_address (addr, &parts);
25505 gcc_assert (ok);
25507 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25509 /* If this is not LEA instruction, add the length of addr32 prefix. */
25510 if (TARGET_64BIT && !lea
25511 && (SImode_address_operand (addr, VOIDmode)
25512 || (parts.base && GET_MODE (parts.base) == SImode)
25513 || (parts.index && GET_MODE (parts.index) == SImode)))
25514 len++;
25516 base = parts.base;
25517 index = parts.index;
25518 disp = parts.disp;
25520 if (base && GET_CODE (base) == SUBREG)
25521 base = SUBREG_REG (base);
25522 if (index && GET_CODE (index) == SUBREG)
25523 index = SUBREG_REG (index);
25525 gcc_assert (base == NULL_RTX || REG_P (base));
25526 gcc_assert (index == NULL_RTX || REG_P (index));
25528 /* Rule of thumb:
25529 - esp as the base always wants an index,
25530 - ebp as the base always wants a displacement,
25531 - r12 as the base always wants an index,
25532 - r13 as the base always wants a displacement. */
25534 /* Register Indirect. */
25535 if (base && !index && !disp)
25537 /* esp (for its index) and ebp (for its displacement) need
25538 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25539 code. */
25540 if (base == arg_pointer_rtx
25541 || base == frame_pointer_rtx
25542 || REGNO (base) == SP_REG
25543 || REGNO (base) == BP_REG
25544 || REGNO (base) == R12_REG
25545 || REGNO (base) == R13_REG)
25546 len++;
25549 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25550 is not disp32, but disp32(%rip), so for disp32
25551 SIB byte is needed, unless print_operand_address
25552 optimizes it into disp32(%rip) or (%rip) is implied
25553 by UNSPEC. */
25554 else if (disp && !base && !index)
25556 len += 4;
25557 if (rip_relative_addr_p (&parts))
25558 len++;
25560 else
25562 /* Find the length of the displacement constant. */
25563 if (disp)
25565 if (base && satisfies_constraint_K (disp))
25566 len += 1;
25567 else
25568 len += 4;
25570 /* ebp always wants a displacement. Similarly r13. */
25571 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25572 len++;
25574 /* An index requires the two-byte modrm form.... */
25575 if (index
25576 /* ...like esp (or r12), which always wants an index. */
25577 || base == arg_pointer_rtx
25578 || base == frame_pointer_rtx
25579 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25580 len++;
25583 return len;
25586 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25587 is set, expect that insn have 8bit immediate alternative. */
25589 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25591 int len = 0;
25592 int i;
25593 extract_insn_cached (insn);
25594 for (i = recog_data.n_operands - 1; i >= 0; --i)
25595 if (CONSTANT_P (recog_data.operand[i]))
25597 enum attr_mode mode = get_attr_mode (insn);
25599 gcc_assert (!len);
25600 if (shortform && CONST_INT_P (recog_data.operand[i]))
25602 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25603 switch (mode)
25605 case MODE_QI:
25606 len = 1;
25607 continue;
25608 case MODE_HI:
25609 ival = trunc_int_for_mode (ival, HImode);
25610 break;
25611 case MODE_SI:
25612 ival = trunc_int_for_mode (ival, SImode);
25613 break;
25614 default:
25615 break;
25617 if (IN_RANGE (ival, -128, 127))
25619 len = 1;
25620 continue;
25623 switch (mode)
25625 case MODE_QI:
25626 len = 1;
25627 break;
25628 case MODE_HI:
25629 len = 2;
25630 break;
25631 case MODE_SI:
25632 len = 4;
25633 break;
25634 /* Immediates for DImode instructions are encoded
25635 as 32bit sign extended values. */
25636 case MODE_DI:
25637 len = 4;
25638 break;
25639 default:
25640 fatal_insn ("unknown insn mode", insn);
25643 return len;
25646 /* Compute default value for "length_address" attribute. */
25648 ix86_attr_length_address_default (rtx insn)
25650 int i;
25652 if (get_attr_type (insn) == TYPE_LEA)
25654 rtx set = PATTERN (insn), addr;
25656 if (GET_CODE (set) == PARALLEL)
25657 set = XVECEXP (set, 0, 0);
25659 gcc_assert (GET_CODE (set) == SET);
25661 addr = SET_SRC (set);
25663 return memory_address_length (addr, true);
25666 extract_insn_cached (insn);
25667 for (i = recog_data.n_operands - 1; i >= 0; --i)
25668 if (MEM_P (recog_data.operand[i]))
25670 constrain_operands_cached (reload_completed);
25671 if (which_alternative != -1)
25673 const char *constraints = recog_data.constraints[i];
25674 int alt = which_alternative;
25676 while (*constraints == '=' || *constraints == '+')
25677 constraints++;
25678 while (alt-- > 0)
25679 while (*constraints++ != ',')
25681 /* Skip ignored operands. */
25682 if (*constraints == 'X')
25683 continue;
25685 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25687 return 0;
25690 /* Compute default value for "length_vex" attribute. It includes
25691 2 or 3 byte VEX prefix and 1 opcode byte. */
25694 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25696 int i;
25698 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25699 byte VEX prefix. */
25700 if (!has_0f_opcode || has_vex_w)
25701 return 3 + 1;
25703 /* We can always use 2 byte VEX prefix in 32bit. */
25704 if (!TARGET_64BIT)
25705 return 2 + 1;
25707 extract_insn_cached (insn);
25709 for (i = recog_data.n_operands - 1; i >= 0; --i)
25710 if (REG_P (recog_data.operand[i]))
25712 /* REX.W bit uses 3 byte VEX prefix. */
25713 if (GET_MODE (recog_data.operand[i]) == DImode
25714 && GENERAL_REG_P (recog_data.operand[i]))
25715 return 3 + 1;
25717 else
25719 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25720 if (MEM_P (recog_data.operand[i])
25721 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25722 return 3 + 1;
25725 return 2 + 1;
25728 /* Return the maximum number of instructions a cpu can issue. */
25730 static int
25731 ix86_issue_rate (void)
25733 switch (ix86_tune)
25735 case PROCESSOR_PENTIUM:
25736 case PROCESSOR_BONNELL:
25737 case PROCESSOR_SILVERMONT:
25738 case PROCESSOR_INTEL:
25739 case PROCESSOR_K6:
25740 case PROCESSOR_BTVER2:
25741 case PROCESSOR_PENTIUM4:
25742 case PROCESSOR_NOCONA:
25743 return 2;
25745 case PROCESSOR_PENTIUMPRO:
25746 case PROCESSOR_ATHLON:
25747 case PROCESSOR_K8:
25748 case PROCESSOR_AMDFAM10:
25749 case PROCESSOR_GENERIC:
25750 case PROCESSOR_BTVER1:
25751 return 3;
25753 case PROCESSOR_BDVER1:
25754 case PROCESSOR_BDVER2:
25755 case PROCESSOR_BDVER3:
25756 case PROCESSOR_BDVER4:
25757 case PROCESSOR_CORE2:
25758 case PROCESSOR_NEHALEM:
25759 case PROCESSOR_SANDYBRIDGE:
25760 case PROCESSOR_HASWELL:
25761 return 4;
25763 default:
25764 return 1;
25768 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25769 by DEP_INSN and nothing set by DEP_INSN. */
25771 static bool
25772 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25774 rtx set, set2;
25776 /* Simplify the test for uninteresting insns. */
25777 if (insn_type != TYPE_SETCC
25778 && insn_type != TYPE_ICMOV
25779 && insn_type != TYPE_FCMOV
25780 && insn_type != TYPE_IBR)
25781 return false;
25783 if ((set = single_set (dep_insn)) != 0)
25785 set = SET_DEST (set);
25786 set2 = NULL_RTX;
25788 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25789 && XVECLEN (PATTERN (dep_insn), 0) == 2
25790 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25791 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25793 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25794 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25796 else
25797 return false;
25799 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25800 return false;
25802 /* This test is true if the dependent insn reads the flags but
25803 not any other potentially set register. */
25804 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25805 return false;
25807 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25808 return false;
25810 return true;
25813 /* Return true iff USE_INSN has a memory address with operands set by
25814 SET_INSN. */
25816 bool
25817 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25819 int i;
25820 extract_insn_cached (use_insn);
25821 for (i = recog_data.n_operands - 1; i >= 0; --i)
25822 if (MEM_P (recog_data.operand[i]))
25824 rtx addr = XEXP (recog_data.operand[i], 0);
25825 return modified_in_p (addr, set_insn) != 0;
25827 return false;
25830 /* Helper function for exact_store_load_dependency.
25831 Return true if addr is found in insn. */
25832 static bool
25833 exact_dependency_1 (rtx addr, rtx insn)
25835 enum rtx_code code;
25836 const char *format_ptr;
25837 int i, j;
25839 code = GET_CODE (insn);
25840 switch (code)
25842 case MEM:
25843 if (rtx_equal_p (addr, insn))
25844 return true;
25845 break;
25846 case REG:
25847 CASE_CONST_ANY:
25848 case SYMBOL_REF:
25849 case CODE_LABEL:
25850 case PC:
25851 case CC0:
25852 case EXPR_LIST:
25853 return false;
25854 default:
25855 break;
25858 format_ptr = GET_RTX_FORMAT (code);
25859 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25861 switch (*format_ptr++)
25863 case 'e':
25864 if (exact_dependency_1 (addr, XEXP (insn, i)))
25865 return true;
25866 break;
25867 case 'E':
25868 for (j = 0; j < XVECLEN (insn, i); j++)
25869 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25870 return true;
25871 break;
25874 return false;
25877 /* Return true if there exists exact dependency for store & load, i.e.
25878 the same memory address is used in them. */
25879 static bool
25880 exact_store_load_dependency (rtx store, rtx load)
25882 rtx set1, set2;
25884 set1 = single_set (store);
25885 if (!set1)
25886 return false;
25887 if (!MEM_P (SET_DEST (set1)))
25888 return false;
25889 set2 = single_set (load);
25890 if (!set2)
25891 return false;
25892 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25893 return true;
25894 return false;
25897 static int
25898 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25900 enum attr_type insn_type, dep_insn_type;
25901 enum attr_memory memory;
25902 rtx set, set2;
25903 int dep_insn_code_number;
25905 /* Anti and output dependencies have zero cost on all CPUs. */
25906 if (REG_NOTE_KIND (link) != 0)
25907 return 0;
25909 dep_insn_code_number = recog_memoized (dep_insn);
25911 /* If we can't recognize the insns, we can't really do anything. */
25912 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25913 return cost;
25915 insn_type = get_attr_type (insn);
25916 dep_insn_type = get_attr_type (dep_insn);
25918 switch (ix86_tune)
25920 case PROCESSOR_PENTIUM:
25921 /* Address Generation Interlock adds a cycle of latency. */
25922 if (insn_type == TYPE_LEA)
25924 rtx addr = PATTERN (insn);
25926 if (GET_CODE (addr) == PARALLEL)
25927 addr = XVECEXP (addr, 0, 0);
25929 gcc_assert (GET_CODE (addr) == SET);
25931 addr = SET_SRC (addr);
25932 if (modified_in_p (addr, dep_insn))
25933 cost += 1;
25935 else if (ix86_agi_dependent (dep_insn, insn))
25936 cost += 1;
25938 /* ??? Compares pair with jump/setcc. */
25939 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25940 cost = 0;
25942 /* Floating point stores require value to be ready one cycle earlier. */
25943 if (insn_type == TYPE_FMOV
25944 && get_attr_memory (insn) == MEMORY_STORE
25945 && !ix86_agi_dependent (dep_insn, insn))
25946 cost += 1;
25947 break;
25949 case PROCESSOR_PENTIUMPRO:
25950 /* INT->FP conversion is expensive. */
25951 if (get_attr_fp_int_src (dep_insn))
25952 cost += 5;
25954 /* There is one cycle extra latency between an FP op and a store. */
25955 if (insn_type == TYPE_FMOV
25956 && (set = single_set (dep_insn)) != NULL_RTX
25957 && (set2 = single_set (insn)) != NULL_RTX
25958 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25959 && MEM_P (SET_DEST (set2)))
25960 cost += 1;
25962 memory = get_attr_memory (insn);
25964 /* Show ability of reorder buffer to hide latency of load by executing
25965 in parallel with previous instruction in case
25966 previous instruction is not needed to compute the address. */
25967 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25968 && !ix86_agi_dependent (dep_insn, insn))
25970 /* Claim moves to take one cycle, as core can issue one load
25971 at time and the next load can start cycle later. */
25972 if (dep_insn_type == TYPE_IMOV
25973 || dep_insn_type == TYPE_FMOV)
25974 cost = 1;
25975 else if (cost > 1)
25976 cost--;
25978 break;
25980 case PROCESSOR_K6:
25981 /* The esp dependency is resolved before
25982 the instruction is really finished. */
25983 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25984 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25985 return 1;
25987 /* INT->FP conversion is expensive. */
25988 if (get_attr_fp_int_src (dep_insn))
25989 cost += 5;
25991 memory = get_attr_memory (insn);
25993 /* Show ability of reorder buffer to hide latency of load by executing
25994 in parallel with previous instruction in case
25995 previous instruction is not needed to compute the address. */
25996 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25997 && !ix86_agi_dependent (dep_insn, insn))
25999 /* Claim moves to take one cycle, as core can issue one load
26000 at time and the next load can start cycle later. */
26001 if (dep_insn_type == TYPE_IMOV
26002 || dep_insn_type == TYPE_FMOV)
26003 cost = 1;
26004 else if (cost > 2)
26005 cost -= 2;
26006 else
26007 cost = 1;
26009 break;
26011 case PROCESSOR_AMDFAM10:
26012 case PROCESSOR_BDVER1:
26013 case PROCESSOR_BDVER2:
26014 case PROCESSOR_BDVER3:
26015 case PROCESSOR_BDVER4:
26016 case PROCESSOR_BTVER1:
26017 case PROCESSOR_BTVER2:
26018 case PROCESSOR_GENERIC:
26019 /* Stack engine allows to execute push&pop instructions in parall. */
26020 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26021 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26022 return 0;
26023 /* FALLTHRU */
26025 case PROCESSOR_ATHLON:
26026 case PROCESSOR_K8:
26027 memory = get_attr_memory (insn);
26029 /* Show ability of reorder buffer to hide latency of load by executing
26030 in parallel with previous instruction in case
26031 previous instruction is not needed to compute the address. */
26032 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26033 && !ix86_agi_dependent (dep_insn, insn))
26035 enum attr_unit unit = get_attr_unit (insn);
26036 int loadcost = 3;
26038 /* Because of the difference between the length of integer and
26039 floating unit pipeline preparation stages, the memory operands
26040 for floating point are cheaper.
26042 ??? For Athlon it the difference is most probably 2. */
26043 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26044 loadcost = 3;
26045 else
26046 loadcost = TARGET_ATHLON ? 2 : 0;
26048 if (cost >= loadcost)
26049 cost -= loadcost;
26050 else
26051 cost = 0;
26053 break;
26055 case PROCESSOR_CORE2:
26056 case PROCESSOR_NEHALEM:
26057 case PROCESSOR_SANDYBRIDGE:
26058 case PROCESSOR_HASWELL:
26059 /* Stack engine allows to execute push&pop instructions in parall. */
26060 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26061 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26062 return 0;
26064 memory = get_attr_memory (insn);
26066 /* Show ability of reorder buffer to hide latency of load by executing
26067 in parallel with previous instruction in case
26068 previous instruction is not needed to compute the address. */
26069 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26070 && !ix86_agi_dependent (dep_insn, insn))
26072 if (cost >= 4)
26073 cost -= 4;
26074 else
26075 cost = 0;
26077 break;
26079 case PROCESSOR_SILVERMONT:
26080 case PROCESSOR_INTEL:
26081 if (!reload_completed)
26082 return cost;
26084 /* Increase cost of integer loads. */
26085 memory = get_attr_memory (dep_insn);
26086 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26088 enum attr_unit unit = get_attr_unit (dep_insn);
26089 if (unit == UNIT_INTEGER && cost == 1)
26091 if (memory == MEMORY_LOAD)
26092 cost = 3;
26093 else
26095 /* Increase cost of ld/st for short int types only
26096 because of store forwarding issue. */
26097 rtx set = single_set (dep_insn);
26098 if (set && (GET_MODE (SET_DEST (set)) == QImode
26099 || GET_MODE (SET_DEST (set)) == HImode))
26101 /* Increase cost of store/load insn if exact
26102 dependence exists and it is load insn. */
26103 enum attr_memory insn_memory = get_attr_memory (insn);
26104 if (insn_memory == MEMORY_LOAD
26105 && exact_store_load_dependency (dep_insn, insn))
26106 cost = 3;
26112 default:
26113 break;
26116 return cost;
26119 /* How many alternative schedules to try. This should be as wide as the
26120 scheduling freedom in the DFA, but no wider. Making this value too
26121 large results extra work for the scheduler. */
26123 static int
26124 ia32_multipass_dfa_lookahead (void)
26126 switch (ix86_tune)
26128 case PROCESSOR_PENTIUM:
26129 return 2;
26131 case PROCESSOR_PENTIUMPRO:
26132 case PROCESSOR_K6:
26133 return 1;
26135 case PROCESSOR_BDVER1:
26136 case PROCESSOR_BDVER2:
26137 case PROCESSOR_BDVER3:
26138 case PROCESSOR_BDVER4:
26139 /* We use lookahead value 4 for BD both before and after reload
26140 schedules. Plan is to have value 8 included for O3. */
26141 return 4;
26143 case PROCESSOR_CORE2:
26144 case PROCESSOR_NEHALEM:
26145 case PROCESSOR_SANDYBRIDGE:
26146 case PROCESSOR_HASWELL:
26147 case PROCESSOR_BONNELL:
26148 case PROCESSOR_SILVERMONT:
26149 case PROCESSOR_INTEL:
26150 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26151 as many instructions can be executed on a cycle, i.e.,
26152 issue_rate. I wonder why tuning for many CPUs does not do this. */
26153 if (reload_completed)
26154 return ix86_issue_rate ();
26155 /* Don't use lookahead for pre-reload schedule to save compile time. */
26156 return 0;
26158 default:
26159 return 0;
26163 /* Return true if target platform supports macro-fusion. */
26165 static bool
26166 ix86_macro_fusion_p ()
26168 return TARGET_FUSE_CMP_AND_BRANCH;
26171 /* Check whether current microarchitecture support macro fusion
26172 for insn pair "CONDGEN + CONDJMP". Refer to
26173 "Intel Architectures Optimization Reference Manual". */
26175 static bool
26176 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
26178 rtx src, dest;
26179 rtx single_set = single_set (condgen);
26180 enum rtx_code ccode;
26181 rtx compare_set = NULL_RTX, test_if, cond;
26182 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26184 if (get_attr_type (condgen) != TYPE_TEST
26185 && get_attr_type (condgen) != TYPE_ICMP
26186 && get_attr_type (condgen) != TYPE_INCDEC
26187 && get_attr_type (condgen) != TYPE_ALU)
26188 return false;
26190 if (single_set == NULL_RTX
26191 && !TARGET_FUSE_ALU_AND_BRANCH)
26192 return false;
26194 if (single_set != NULL_RTX)
26195 compare_set = single_set;
26196 else
26198 int i;
26199 rtx pat = PATTERN (condgen);
26200 for (i = 0; i < XVECLEN (pat, 0); i++)
26201 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26203 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26204 if (GET_CODE (set_src) == COMPARE)
26205 compare_set = XVECEXP (pat, 0, i);
26206 else
26207 alu_set = XVECEXP (pat, 0, i);
26210 if (compare_set == NULL_RTX)
26211 return false;
26212 src = SET_SRC (compare_set);
26213 if (GET_CODE (src) != COMPARE)
26214 return false;
26216 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26217 supported. */
26218 if ((MEM_P (XEXP (src, 0))
26219 && CONST_INT_P (XEXP (src, 1)))
26220 || (MEM_P (XEXP (src, 1))
26221 && CONST_INT_P (XEXP (src, 0))))
26222 return false;
26224 /* No fusion for RIP-relative address. */
26225 if (MEM_P (XEXP (src, 0)))
26226 addr = XEXP (XEXP (src, 0), 0);
26227 else if (MEM_P (XEXP (src, 1)))
26228 addr = XEXP (XEXP (src, 1), 0);
26230 if (addr) {
26231 ix86_address parts;
26232 int ok = ix86_decompose_address (addr, &parts);
26233 gcc_assert (ok);
26235 if (rip_relative_addr_p (&parts))
26236 return false;
26239 test_if = SET_SRC (pc_set (condjmp));
26240 cond = XEXP (test_if, 0);
26241 ccode = GET_CODE (cond);
26242 /* Check whether conditional jump use Sign or Overflow Flags. */
26243 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26244 && (ccode == GE
26245 || ccode == GT
26246 || ccode == LE
26247 || ccode == LT))
26248 return false;
26250 /* Return true for TYPE_TEST and TYPE_ICMP. */
26251 if (get_attr_type (condgen) == TYPE_TEST
26252 || get_attr_type (condgen) == TYPE_ICMP)
26253 return true;
26255 /* The following is the case that macro-fusion for alu + jmp. */
26256 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26257 return false;
26259 /* No fusion for alu op with memory destination operand. */
26260 dest = SET_DEST (alu_set);
26261 if (MEM_P (dest))
26262 return false;
26264 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26265 supported. */
26266 if (get_attr_type (condgen) == TYPE_INCDEC
26267 && (ccode == GEU
26268 || ccode == GTU
26269 || ccode == LEU
26270 || ccode == LTU))
26271 return false;
26273 return true;
26276 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26277 execution. It is applied if
26278 (1) IMUL instruction is on the top of list;
26279 (2) There exists the only producer of independent IMUL instruction in
26280 ready list.
26281 Return index of IMUL producer if it was found and -1 otherwise. */
26282 static int
26283 do_reorder_for_imul (rtx *ready, int n_ready)
26285 rtx insn, set, insn1, insn2;
26286 sd_iterator_def sd_it;
26287 dep_t dep;
26288 int index = -1;
26289 int i;
26291 if (!TARGET_BONNELL)
26292 return index;
26294 /* Check that IMUL instruction is on the top of ready list. */
26295 insn = ready[n_ready - 1];
26296 set = single_set (insn);
26297 if (!set)
26298 return index;
26299 if (!(GET_CODE (SET_SRC (set)) == MULT
26300 && GET_MODE (SET_SRC (set)) == SImode))
26301 return index;
26303 /* Search for producer of independent IMUL instruction. */
26304 for (i = n_ready - 2; i >= 0; i--)
26306 insn = ready[i];
26307 if (!NONDEBUG_INSN_P (insn))
26308 continue;
26309 /* Skip IMUL instruction. */
26310 insn2 = PATTERN (insn);
26311 if (GET_CODE (insn2) == PARALLEL)
26312 insn2 = XVECEXP (insn2, 0, 0);
26313 if (GET_CODE (insn2) == SET
26314 && GET_CODE (SET_SRC (insn2)) == MULT
26315 && GET_MODE (SET_SRC (insn2)) == SImode)
26316 continue;
26318 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26320 rtx con;
26321 con = DEP_CON (dep);
26322 if (!NONDEBUG_INSN_P (con))
26323 continue;
26324 insn1 = PATTERN (con);
26325 if (GET_CODE (insn1) == PARALLEL)
26326 insn1 = XVECEXP (insn1, 0, 0);
26328 if (GET_CODE (insn1) == SET
26329 && GET_CODE (SET_SRC (insn1)) == MULT
26330 && GET_MODE (SET_SRC (insn1)) == SImode)
26332 sd_iterator_def sd_it1;
26333 dep_t dep1;
26334 /* Check if there is no other dependee for IMUL. */
26335 index = i;
26336 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26338 rtx pro;
26339 pro = DEP_PRO (dep1);
26340 if (!NONDEBUG_INSN_P (pro))
26341 continue;
26342 if (pro != insn)
26343 index = -1;
26345 if (index >= 0)
26346 break;
26349 if (index >= 0)
26350 break;
26352 return index;
26355 /* Try to find the best candidate on the top of ready list if two insns
26356 have the same priority - candidate is best if its dependees were
26357 scheduled earlier. Applied for Silvermont only.
26358 Return true if top 2 insns must be interchanged. */
26359 static bool
26360 swap_top_of_ready_list (rtx *ready, int n_ready)
26362 rtx top = ready[n_ready - 1];
26363 rtx next = ready[n_ready - 2];
26364 rtx set;
26365 sd_iterator_def sd_it;
26366 dep_t dep;
26367 int clock1 = -1;
26368 int clock2 = -1;
26369 #define INSN_TICK(INSN) (HID (INSN)->tick)
26371 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26372 return false;
26374 if (!NONDEBUG_INSN_P (top))
26375 return false;
26376 if (!NONJUMP_INSN_P (top))
26377 return false;
26378 if (!NONDEBUG_INSN_P (next))
26379 return false;
26380 if (!NONJUMP_INSN_P (next))
26381 return false;
26382 set = single_set (top);
26383 if (!set)
26384 return false;
26385 set = single_set (next);
26386 if (!set)
26387 return false;
26389 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26391 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26392 return false;
26393 /* Determine winner more precise. */
26394 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26396 rtx pro;
26397 pro = DEP_PRO (dep);
26398 if (!NONDEBUG_INSN_P (pro))
26399 continue;
26400 if (INSN_TICK (pro) > clock1)
26401 clock1 = INSN_TICK (pro);
26403 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26405 rtx pro;
26406 pro = DEP_PRO (dep);
26407 if (!NONDEBUG_INSN_P (pro))
26408 continue;
26409 if (INSN_TICK (pro) > clock2)
26410 clock2 = INSN_TICK (pro);
26413 if (clock1 == clock2)
26415 /* Determine winner - load must win. */
26416 enum attr_memory memory1, memory2;
26417 memory1 = get_attr_memory (top);
26418 memory2 = get_attr_memory (next);
26419 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26420 return true;
26422 return (bool) (clock2 < clock1);
26424 return false;
26425 #undef INSN_TICK
26428 /* Perform possible reodering of ready list for Atom/Silvermont only.
26429 Return issue rate. */
26430 static int
26431 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26432 int clock_var)
26434 int issue_rate = -1;
26435 int n_ready = *pn_ready;
26436 int i;
26437 rtx insn;
26438 int index = -1;
26440 /* Set up issue rate. */
26441 issue_rate = ix86_issue_rate ();
26443 /* Do reodering for BONNELL/SILVERMONT only. */
26444 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26445 return issue_rate;
26447 /* Nothing to do if ready list contains only 1 instruction. */
26448 if (n_ready <= 1)
26449 return issue_rate;
26451 /* Do reodering for post-reload scheduler only. */
26452 if (!reload_completed)
26453 return issue_rate;
26455 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26457 if (sched_verbose > 1)
26458 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26459 INSN_UID (ready[index]));
26461 /* Put IMUL producer (ready[index]) at the top of ready list. */
26462 insn = ready[index];
26463 for (i = index; i < n_ready - 1; i++)
26464 ready[i] = ready[i + 1];
26465 ready[n_ready - 1] = insn;
26466 return issue_rate;
26468 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26470 if (sched_verbose > 1)
26471 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26472 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26473 /* Swap 2 top elements of ready list. */
26474 insn = ready[n_ready - 1];
26475 ready[n_ready - 1] = ready[n_ready - 2];
26476 ready[n_ready - 2] = insn;
26478 return issue_rate;
26481 static bool
26482 ix86_class_likely_spilled_p (reg_class_t);
26484 /* Returns true if lhs of insn is HW function argument register and set up
26485 is_spilled to true if it is likely spilled HW register. */
26486 static bool
26487 insn_is_function_arg (rtx insn, bool* is_spilled)
26489 rtx dst;
26491 if (!NONDEBUG_INSN_P (insn))
26492 return false;
26493 /* Call instructions are not movable, ignore it. */
26494 if (CALL_P (insn))
26495 return false;
26496 insn = PATTERN (insn);
26497 if (GET_CODE (insn) == PARALLEL)
26498 insn = XVECEXP (insn, 0, 0);
26499 if (GET_CODE (insn) != SET)
26500 return false;
26501 dst = SET_DEST (insn);
26502 if (REG_P (dst) && HARD_REGISTER_P (dst)
26503 && ix86_function_arg_regno_p (REGNO (dst)))
26505 /* Is it likely spilled HW register? */
26506 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26507 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26508 *is_spilled = true;
26509 return true;
26511 return false;
26514 /* Add output dependencies for chain of function adjacent arguments if only
26515 there is a move to likely spilled HW register. Return first argument
26516 if at least one dependence was added or NULL otherwise. */
26517 static rtx
26518 add_parameter_dependencies (rtx call, rtx head)
26520 rtx insn;
26521 rtx last = call;
26522 rtx first_arg = NULL;
26523 bool is_spilled = false;
26525 head = PREV_INSN (head);
26527 /* Find nearest to call argument passing instruction. */
26528 while (true)
26530 last = PREV_INSN (last);
26531 if (last == head)
26532 return NULL;
26533 if (!NONDEBUG_INSN_P (last))
26534 continue;
26535 if (insn_is_function_arg (last, &is_spilled))
26536 break;
26537 return NULL;
26540 first_arg = last;
26541 while (true)
26543 insn = PREV_INSN (last);
26544 if (!INSN_P (insn))
26545 break;
26546 if (insn == head)
26547 break;
26548 if (!NONDEBUG_INSN_P (insn))
26550 last = insn;
26551 continue;
26553 if (insn_is_function_arg (insn, &is_spilled))
26555 /* Add output depdendence between two function arguments if chain
26556 of output arguments contains likely spilled HW registers. */
26557 if (is_spilled)
26558 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26559 first_arg = last = insn;
26561 else
26562 break;
26564 if (!is_spilled)
26565 return NULL;
26566 return first_arg;
26569 /* Add output or anti dependency from insn to first_arg to restrict its code
26570 motion. */
26571 static void
26572 avoid_func_arg_motion (rtx first_arg, rtx insn)
26574 rtx set;
26575 rtx tmp;
26577 set = single_set (insn);
26578 if (!set)
26579 return;
26580 tmp = SET_DEST (set);
26581 if (REG_P (tmp))
26583 /* Add output dependency to the first function argument. */
26584 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26585 return;
26587 /* Add anti dependency. */
26588 add_dependence (first_arg, insn, REG_DEP_ANTI);
26591 /* Avoid cross block motion of function argument through adding dependency
26592 from the first non-jump instruction in bb. */
26593 static void
26594 add_dependee_for_func_arg (rtx arg, basic_block bb)
26596 rtx insn = BB_END (bb);
26598 while (insn)
26600 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26602 rtx set = single_set (insn);
26603 if (set)
26605 avoid_func_arg_motion (arg, insn);
26606 return;
26609 if (insn == BB_HEAD (bb))
26610 return;
26611 insn = PREV_INSN (insn);
26615 /* Hook for pre-reload schedule - avoid motion of function arguments
26616 passed in likely spilled HW registers. */
26617 static void
26618 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26620 rtx insn;
26621 rtx first_arg = NULL;
26622 if (reload_completed)
26623 return;
26624 while (head != tail && DEBUG_INSN_P (head))
26625 head = NEXT_INSN (head);
26626 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26627 if (INSN_P (insn) && CALL_P (insn))
26629 first_arg = add_parameter_dependencies (insn, head);
26630 if (first_arg)
26632 /* Add dependee for first argument to predecessors if only
26633 region contains more than one block. */
26634 basic_block bb = BLOCK_FOR_INSN (insn);
26635 int rgn = CONTAINING_RGN (bb->index);
26636 int nr_blks = RGN_NR_BLOCKS (rgn);
26637 /* Skip trivial regions and region head blocks that can have
26638 predecessors outside of region. */
26639 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26641 edge e;
26642 edge_iterator ei;
26644 /* Regions are SCCs with the exception of selective
26645 scheduling with pipelining of outer blocks enabled.
26646 So also check that immediate predecessors of a non-head
26647 block are in the same region. */
26648 FOR_EACH_EDGE (e, ei, bb->preds)
26650 /* Avoid creating of loop-carried dependencies through
26651 using topological ordering in the region. */
26652 if (rgn == CONTAINING_RGN (e->src->index)
26653 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26654 add_dependee_for_func_arg (first_arg, e->src);
26657 insn = first_arg;
26658 if (insn == head)
26659 break;
26662 else if (first_arg)
26663 avoid_func_arg_motion (first_arg, insn);
26666 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26667 HW registers to maximum, to schedule them at soon as possible. These are
26668 moves from function argument registers at the top of the function entry
26669 and moves from function return value registers after call. */
26670 static int
26671 ix86_adjust_priority (rtx insn, int priority)
26673 rtx set;
26675 if (reload_completed)
26676 return priority;
26678 if (!NONDEBUG_INSN_P (insn))
26679 return priority;
26681 set = single_set (insn);
26682 if (set)
26684 rtx tmp = SET_SRC (set);
26685 if (REG_P (tmp)
26686 && HARD_REGISTER_P (tmp)
26687 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26688 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26689 return current_sched_info->sched_max_insns_priority;
26692 return priority;
26695 /* Model decoder of Core 2/i7.
26696 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26697 track the instruction fetch block boundaries and make sure that long
26698 (9+ bytes) instructions are assigned to D0. */
26700 /* Maximum length of an insn that can be handled by
26701 a secondary decoder unit. '8' for Core 2/i7. */
26702 static int core2i7_secondary_decoder_max_insn_size;
26704 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26705 '16' for Core 2/i7. */
26706 static int core2i7_ifetch_block_size;
26708 /* Maximum number of instructions decoder can handle per cycle.
26709 '6' for Core 2/i7. */
26710 static int core2i7_ifetch_block_max_insns;
26712 typedef struct ix86_first_cycle_multipass_data_ *
26713 ix86_first_cycle_multipass_data_t;
26714 typedef const struct ix86_first_cycle_multipass_data_ *
26715 const_ix86_first_cycle_multipass_data_t;
26717 /* A variable to store target state across calls to max_issue within
26718 one cycle. */
26719 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26720 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26722 /* Initialize DATA. */
26723 static void
26724 core2i7_first_cycle_multipass_init (void *_data)
26726 ix86_first_cycle_multipass_data_t data
26727 = (ix86_first_cycle_multipass_data_t) _data;
26729 data->ifetch_block_len = 0;
26730 data->ifetch_block_n_insns = 0;
26731 data->ready_try_change = NULL;
26732 data->ready_try_change_size = 0;
26735 /* Advancing the cycle; reset ifetch block counts. */
26736 static void
26737 core2i7_dfa_post_advance_cycle (void)
26739 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26741 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26743 data->ifetch_block_len = 0;
26744 data->ifetch_block_n_insns = 0;
26747 static int min_insn_size (rtx);
26749 /* Filter out insns from ready_try that the core will not be able to issue
26750 on current cycle due to decoder. */
26751 static void
26752 core2i7_first_cycle_multipass_filter_ready_try
26753 (const_ix86_first_cycle_multipass_data_t data,
26754 char *ready_try, int n_ready, bool first_cycle_insn_p)
26756 while (n_ready--)
26758 rtx insn;
26759 int insn_size;
26761 if (ready_try[n_ready])
26762 continue;
26764 insn = get_ready_element (n_ready);
26765 insn_size = min_insn_size (insn);
26767 if (/* If this is a too long an insn for a secondary decoder ... */
26768 (!first_cycle_insn_p
26769 && insn_size > core2i7_secondary_decoder_max_insn_size)
26770 /* ... or it would not fit into the ifetch block ... */
26771 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26772 /* ... or the decoder is full already ... */
26773 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26774 /* ... mask the insn out. */
26776 ready_try[n_ready] = 1;
26778 if (data->ready_try_change)
26779 bitmap_set_bit (data->ready_try_change, n_ready);
26784 /* Prepare for a new round of multipass lookahead scheduling. */
26785 static void
26786 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26787 bool first_cycle_insn_p)
26789 ix86_first_cycle_multipass_data_t data
26790 = (ix86_first_cycle_multipass_data_t) _data;
26791 const_ix86_first_cycle_multipass_data_t prev_data
26792 = ix86_first_cycle_multipass_data;
26794 /* Restore the state from the end of the previous round. */
26795 data->ifetch_block_len = prev_data->ifetch_block_len;
26796 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26798 /* Filter instructions that cannot be issued on current cycle due to
26799 decoder restrictions. */
26800 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26801 first_cycle_insn_p);
26804 /* INSN is being issued in current solution. Account for its impact on
26805 the decoder model. */
26806 static void
26807 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26808 rtx insn, const void *_prev_data)
26810 ix86_first_cycle_multipass_data_t data
26811 = (ix86_first_cycle_multipass_data_t) _data;
26812 const_ix86_first_cycle_multipass_data_t prev_data
26813 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26815 int insn_size = min_insn_size (insn);
26817 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26818 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26819 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26820 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26822 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26823 if (!data->ready_try_change)
26825 data->ready_try_change = sbitmap_alloc (n_ready);
26826 data->ready_try_change_size = n_ready;
26828 else if (data->ready_try_change_size < n_ready)
26830 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26831 n_ready, 0);
26832 data->ready_try_change_size = n_ready;
26834 bitmap_clear (data->ready_try_change);
26836 /* Filter out insns from ready_try that the core will not be able to issue
26837 on current cycle due to decoder. */
26838 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26839 false);
26842 /* Revert the effect on ready_try. */
26843 static void
26844 core2i7_first_cycle_multipass_backtrack (const void *_data,
26845 char *ready_try,
26846 int n_ready ATTRIBUTE_UNUSED)
26848 const_ix86_first_cycle_multipass_data_t data
26849 = (const_ix86_first_cycle_multipass_data_t) _data;
26850 unsigned int i = 0;
26851 sbitmap_iterator sbi;
26853 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26854 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26856 ready_try[i] = 0;
26860 /* Save the result of multipass lookahead scheduling for the next round. */
26861 static void
26862 core2i7_first_cycle_multipass_end (const void *_data)
26864 const_ix86_first_cycle_multipass_data_t data
26865 = (const_ix86_first_cycle_multipass_data_t) _data;
26866 ix86_first_cycle_multipass_data_t next_data
26867 = ix86_first_cycle_multipass_data;
26869 if (data != NULL)
26871 next_data->ifetch_block_len = data->ifetch_block_len;
26872 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26876 /* Deallocate target data. */
26877 static void
26878 core2i7_first_cycle_multipass_fini (void *_data)
26880 ix86_first_cycle_multipass_data_t data
26881 = (ix86_first_cycle_multipass_data_t) _data;
26883 if (data->ready_try_change)
26885 sbitmap_free (data->ready_try_change);
26886 data->ready_try_change = NULL;
26887 data->ready_try_change_size = 0;
26891 /* Prepare for scheduling pass. */
26892 static void
26893 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26894 int verbose ATTRIBUTE_UNUSED,
26895 int max_uid ATTRIBUTE_UNUSED)
26897 /* Install scheduling hooks for current CPU. Some of these hooks are used
26898 in time-critical parts of the scheduler, so we only set them up when
26899 they are actually used. */
26900 switch (ix86_tune)
26902 case PROCESSOR_CORE2:
26903 case PROCESSOR_NEHALEM:
26904 case PROCESSOR_SANDYBRIDGE:
26905 case PROCESSOR_HASWELL:
26906 /* Do not perform multipass scheduling for pre-reload schedule
26907 to save compile time. */
26908 if (reload_completed)
26910 targetm.sched.dfa_post_advance_cycle
26911 = core2i7_dfa_post_advance_cycle;
26912 targetm.sched.first_cycle_multipass_init
26913 = core2i7_first_cycle_multipass_init;
26914 targetm.sched.first_cycle_multipass_begin
26915 = core2i7_first_cycle_multipass_begin;
26916 targetm.sched.first_cycle_multipass_issue
26917 = core2i7_first_cycle_multipass_issue;
26918 targetm.sched.first_cycle_multipass_backtrack
26919 = core2i7_first_cycle_multipass_backtrack;
26920 targetm.sched.first_cycle_multipass_end
26921 = core2i7_first_cycle_multipass_end;
26922 targetm.sched.first_cycle_multipass_fini
26923 = core2i7_first_cycle_multipass_fini;
26925 /* Set decoder parameters. */
26926 core2i7_secondary_decoder_max_insn_size = 8;
26927 core2i7_ifetch_block_size = 16;
26928 core2i7_ifetch_block_max_insns = 6;
26929 break;
26931 /* ... Fall through ... */
26932 default:
26933 targetm.sched.dfa_post_advance_cycle = NULL;
26934 targetm.sched.first_cycle_multipass_init = NULL;
26935 targetm.sched.first_cycle_multipass_begin = NULL;
26936 targetm.sched.first_cycle_multipass_issue = NULL;
26937 targetm.sched.first_cycle_multipass_backtrack = NULL;
26938 targetm.sched.first_cycle_multipass_end = NULL;
26939 targetm.sched.first_cycle_multipass_fini = NULL;
26940 break;
26945 /* Compute the alignment given to a constant that is being placed in memory.
26946 EXP is the constant and ALIGN is the alignment that the object would
26947 ordinarily have.
26948 The value of this function is used instead of that alignment to align
26949 the object. */
26952 ix86_constant_alignment (tree exp, int align)
26954 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26955 || TREE_CODE (exp) == INTEGER_CST)
26957 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26958 return 64;
26959 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26960 return 128;
26962 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26963 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26964 return BITS_PER_WORD;
26966 return align;
26969 /* Compute the alignment for a static variable.
26970 TYPE is the data type, and ALIGN is the alignment that
26971 the object would ordinarily have. The value of this function is used
26972 instead of that alignment to align the object. */
26975 ix86_data_alignment (tree type, int align, bool opt)
26977 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26978 for symbols from other compilation units or symbols that don't need
26979 to bind locally. In order to preserve some ABI compatibility with
26980 those compilers, ensure we don't decrease alignment from what we
26981 used to assume. */
26983 int max_align_compat
26984 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26986 /* A data structure, equal or greater than the size of a cache line
26987 (64 bytes in the Pentium 4 and other recent Intel processors, including
26988 processors based on Intel Core microarchitecture) should be aligned
26989 so that its base address is a multiple of a cache line size. */
26991 int max_align
26992 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26994 if (max_align < BITS_PER_WORD)
26995 max_align = BITS_PER_WORD;
26997 if (opt
26998 && AGGREGATE_TYPE_P (type)
26999 && TYPE_SIZE (type)
27000 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27002 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align_compat
27003 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
27004 && align < max_align_compat)
27005 align = max_align_compat;
27006 if ((TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
27007 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
27008 && align < max_align)
27009 align = max_align;
27012 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27013 to 16byte boundary. */
27014 if (TARGET_64BIT)
27016 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27017 && TYPE_SIZE (type)
27018 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27019 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
27020 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
27021 return 128;
27024 if (!opt)
27025 return align;
27027 if (TREE_CODE (type) == ARRAY_TYPE)
27029 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27030 return 64;
27031 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27032 return 128;
27034 else if (TREE_CODE (type) == COMPLEX_TYPE)
27037 if (TYPE_MODE (type) == DCmode && align < 64)
27038 return 64;
27039 if ((TYPE_MODE (type) == XCmode
27040 || TYPE_MODE (type) == TCmode) && align < 128)
27041 return 128;
27043 else if ((TREE_CODE (type) == RECORD_TYPE
27044 || TREE_CODE (type) == UNION_TYPE
27045 || TREE_CODE (type) == QUAL_UNION_TYPE)
27046 && TYPE_FIELDS (type))
27048 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27049 return 64;
27050 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27051 return 128;
27053 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27054 || TREE_CODE (type) == INTEGER_TYPE)
27056 if (TYPE_MODE (type) == DFmode && align < 64)
27057 return 64;
27058 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27059 return 128;
27062 return align;
27065 /* Compute the alignment for a local variable or a stack slot. EXP is
27066 the data type or decl itself, MODE is the widest mode available and
27067 ALIGN is the alignment that the object would ordinarily have. The
27068 value of this macro is used instead of that alignment to align the
27069 object. */
27071 unsigned int
27072 ix86_local_alignment (tree exp, enum machine_mode mode,
27073 unsigned int align)
27075 tree type, decl;
27077 if (exp && DECL_P (exp))
27079 type = TREE_TYPE (exp);
27080 decl = exp;
27082 else
27084 type = exp;
27085 decl = NULL;
27088 /* Don't do dynamic stack realignment for long long objects with
27089 -mpreferred-stack-boundary=2. */
27090 if (!TARGET_64BIT
27091 && align == 64
27092 && ix86_preferred_stack_boundary < 64
27093 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27094 && (!type || !TYPE_USER_ALIGN (type))
27095 && (!decl || !DECL_USER_ALIGN (decl)))
27096 align = 32;
27098 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27099 register in MODE. We will return the largest alignment of XF
27100 and DF. */
27101 if (!type)
27103 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27104 align = GET_MODE_ALIGNMENT (DFmode);
27105 return align;
27108 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27109 to 16byte boundary. Exact wording is:
27111 An array uses the same alignment as its elements, except that a local or
27112 global array variable of length at least 16 bytes or
27113 a C99 variable-length array variable always has alignment of at least 16 bytes.
27115 This was added to allow use of aligned SSE instructions at arrays. This
27116 rule is meant for static storage (where compiler can not do the analysis
27117 by itself). We follow it for automatic variables only when convenient.
27118 We fully control everything in the function compiled and functions from
27119 other unit can not rely on the alignment.
27121 Exclude va_list type. It is the common case of local array where
27122 we can not benefit from the alignment.
27124 TODO: Probably one should optimize for size only when var is not escaping. */
27125 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27126 && TARGET_SSE)
27128 if (AGGREGATE_TYPE_P (type)
27129 && (va_list_type_node == NULL_TREE
27130 || (TYPE_MAIN_VARIANT (type)
27131 != TYPE_MAIN_VARIANT (va_list_type_node)))
27132 && TYPE_SIZE (type)
27133 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27134 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
27135 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
27136 return 128;
27138 if (TREE_CODE (type) == ARRAY_TYPE)
27140 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27141 return 64;
27142 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27143 return 128;
27145 else if (TREE_CODE (type) == COMPLEX_TYPE)
27147 if (TYPE_MODE (type) == DCmode && align < 64)
27148 return 64;
27149 if ((TYPE_MODE (type) == XCmode
27150 || TYPE_MODE (type) == TCmode) && align < 128)
27151 return 128;
27153 else if ((TREE_CODE (type) == RECORD_TYPE
27154 || TREE_CODE (type) == UNION_TYPE
27155 || TREE_CODE (type) == QUAL_UNION_TYPE)
27156 && TYPE_FIELDS (type))
27158 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27159 return 64;
27160 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27161 return 128;
27163 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27164 || TREE_CODE (type) == INTEGER_TYPE)
27167 if (TYPE_MODE (type) == DFmode && align < 64)
27168 return 64;
27169 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27170 return 128;
27172 return align;
27175 /* Compute the minimum required alignment for dynamic stack realignment
27176 purposes for a local variable, parameter or a stack slot. EXP is
27177 the data type or decl itself, MODE is its mode and ALIGN is the
27178 alignment that the object would ordinarily have. */
27180 unsigned int
27181 ix86_minimum_alignment (tree exp, enum machine_mode mode,
27182 unsigned int align)
27184 tree type, decl;
27186 if (exp && DECL_P (exp))
27188 type = TREE_TYPE (exp);
27189 decl = exp;
27191 else
27193 type = exp;
27194 decl = NULL;
27197 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27198 return align;
27200 /* Don't do dynamic stack realignment for long long objects with
27201 -mpreferred-stack-boundary=2. */
27202 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27203 && (!type || !TYPE_USER_ALIGN (type))
27204 && (!decl || !DECL_USER_ALIGN (decl)))
27205 return 32;
27207 return align;
27210 /* Find a location for the static chain incoming to a nested function.
27211 This is a register, unless all free registers are used by arguments. */
27213 static rtx
27214 ix86_static_chain (const_tree fndecl, bool incoming_p)
27216 unsigned regno;
27218 if (!DECL_STATIC_CHAIN (fndecl))
27219 return NULL;
27221 if (TARGET_64BIT)
27223 /* We always use R10 in 64-bit mode. */
27224 regno = R10_REG;
27226 else
27228 tree fntype;
27229 unsigned int ccvt;
27231 /* By default in 32-bit mode we use ECX to pass the static chain. */
27232 regno = CX_REG;
27234 fntype = TREE_TYPE (fndecl);
27235 ccvt = ix86_get_callcvt (fntype);
27236 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27238 /* Fastcall functions use ecx/edx for arguments, which leaves
27239 us with EAX for the static chain.
27240 Thiscall functions use ecx for arguments, which also
27241 leaves us with EAX for the static chain. */
27242 regno = AX_REG;
27244 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27246 /* Thiscall functions use ecx for arguments, which leaves
27247 us with EAX and EDX for the static chain.
27248 We are using for abi-compatibility EAX. */
27249 regno = AX_REG;
27251 else if (ix86_function_regparm (fntype, fndecl) == 3)
27253 /* For regparm 3, we have no free call-clobbered registers in
27254 which to store the static chain. In order to implement this,
27255 we have the trampoline push the static chain to the stack.
27256 However, we can't push a value below the return address when
27257 we call the nested function directly, so we have to use an
27258 alternate entry point. For this we use ESI, and have the
27259 alternate entry point push ESI, so that things appear the
27260 same once we're executing the nested function. */
27261 if (incoming_p)
27263 if (fndecl == current_function_decl)
27264 ix86_static_chain_on_stack = true;
27265 return gen_frame_mem (SImode,
27266 plus_constant (Pmode,
27267 arg_pointer_rtx, -8));
27269 regno = SI_REG;
27273 return gen_rtx_REG (Pmode, regno);
27276 /* Emit RTL insns to initialize the variable parts of a trampoline.
27277 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27278 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27279 to be passed to the target function. */
27281 static void
27282 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27284 rtx mem, fnaddr;
27285 int opcode;
27286 int offset = 0;
27288 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27290 if (TARGET_64BIT)
27292 int size;
27294 /* Load the function address to r11. Try to load address using
27295 the shorter movl instead of movabs. We may want to support
27296 movq for kernel mode, but kernel does not use trampolines at
27297 the moment. FNADDR is a 32bit address and may not be in
27298 DImode when ptr_mode == SImode. Always use movl in this
27299 case. */
27300 if (ptr_mode == SImode
27301 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27303 fnaddr = copy_addr_to_reg (fnaddr);
27305 mem = adjust_address (m_tramp, HImode, offset);
27306 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27308 mem = adjust_address (m_tramp, SImode, offset + 2);
27309 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27310 offset += 6;
27312 else
27314 mem = adjust_address (m_tramp, HImode, offset);
27315 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27317 mem = adjust_address (m_tramp, DImode, offset + 2);
27318 emit_move_insn (mem, fnaddr);
27319 offset += 10;
27322 /* Load static chain using movabs to r10. Use the shorter movl
27323 instead of movabs when ptr_mode == SImode. */
27324 if (ptr_mode == SImode)
27326 opcode = 0xba41;
27327 size = 6;
27329 else
27331 opcode = 0xba49;
27332 size = 10;
27335 mem = adjust_address (m_tramp, HImode, offset);
27336 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27338 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27339 emit_move_insn (mem, chain_value);
27340 offset += size;
27342 /* Jump to r11; the last (unused) byte is a nop, only there to
27343 pad the write out to a single 32-bit store. */
27344 mem = adjust_address (m_tramp, SImode, offset);
27345 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27346 offset += 4;
27348 else
27350 rtx disp, chain;
27352 /* Depending on the static chain location, either load a register
27353 with a constant, or push the constant to the stack. All of the
27354 instructions are the same size. */
27355 chain = ix86_static_chain (fndecl, true);
27356 if (REG_P (chain))
27358 switch (REGNO (chain))
27360 case AX_REG:
27361 opcode = 0xb8; break;
27362 case CX_REG:
27363 opcode = 0xb9; break;
27364 default:
27365 gcc_unreachable ();
27368 else
27369 opcode = 0x68;
27371 mem = adjust_address (m_tramp, QImode, offset);
27372 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27374 mem = adjust_address (m_tramp, SImode, offset + 1);
27375 emit_move_insn (mem, chain_value);
27376 offset += 5;
27378 mem = adjust_address (m_tramp, QImode, offset);
27379 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27381 mem = adjust_address (m_tramp, SImode, offset + 1);
27383 /* Compute offset from the end of the jmp to the target function.
27384 In the case in which the trampoline stores the static chain on
27385 the stack, we need to skip the first insn which pushes the
27386 (call-saved) register static chain; this push is 1 byte. */
27387 offset += 5;
27388 disp = expand_binop (SImode, sub_optab, fnaddr,
27389 plus_constant (Pmode, XEXP (m_tramp, 0),
27390 offset - (MEM_P (chain) ? 1 : 0)),
27391 NULL_RTX, 1, OPTAB_DIRECT);
27392 emit_move_insn (mem, disp);
27395 gcc_assert (offset <= TRAMPOLINE_SIZE);
27397 #ifdef HAVE_ENABLE_EXECUTE_STACK
27398 #ifdef CHECK_EXECUTE_STACK_ENABLED
27399 if (CHECK_EXECUTE_STACK_ENABLED)
27400 #endif
27401 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27402 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27403 #endif
27406 /* The following file contains several enumerations and data structures
27407 built from the definitions in i386-builtin-types.def. */
27409 #include "i386-builtin-types.inc"
27411 /* Table for the ix86 builtin non-function types. */
27412 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27414 /* Retrieve an element from the above table, building some of
27415 the types lazily. */
27417 static tree
27418 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27420 unsigned int index;
27421 tree type, itype;
27423 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27425 type = ix86_builtin_type_tab[(int) tcode];
27426 if (type != NULL)
27427 return type;
27429 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27430 if (tcode <= IX86_BT_LAST_VECT)
27432 enum machine_mode mode;
27434 index = tcode - IX86_BT_LAST_PRIM - 1;
27435 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27436 mode = ix86_builtin_type_vect_mode[index];
27438 type = build_vector_type_for_mode (itype, mode);
27440 else
27442 int quals;
27444 index = tcode - IX86_BT_LAST_VECT - 1;
27445 if (tcode <= IX86_BT_LAST_PTR)
27446 quals = TYPE_UNQUALIFIED;
27447 else
27448 quals = TYPE_QUAL_CONST;
27450 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27451 if (quals != TYPE_UNQUALIFIED)
27452 itype = build_qualified_type (itype, quals);
27454 type = build_pointer_type (itype);
27457 ix86_builtin_type_tab[(int) tcode] = type;
27458 return type;
27461 /* Table for the ix86 builtin function types. */
27462 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27464 /* Retrieve an element from the above table, building some of
27465 the types lazily. */
27467 static tree
27468 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27470 tree type;
27472 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27474 type = ix86_builtin_func_type_tab[(int) tcode];
27475 if (type != NULL)
27476 return type;
27478 if (tcode <= IX86_BT_LAST_FUNC)
27480 unsigned start = ix86_builtin_func_start[(int) tcode];
27481 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27482 tree rtype, atype, args = void_list_node;
27483 unsigned i;
27485 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27486 for (i = after - 1; i > start; --i)
27488 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27489 args = tree_cons (NULL, atype, args);
27492 type = build_function_type (rtype, args);
27494 else
27496 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27497 enum ix86_builtin_func_type icode;
27499 icode = ix86_builtin_func_alias_base[index];
27500 type = ix86_get_builtin_func_type (icode);
27503 ix86_builtin_func_type_tab[(int) tcode] = type;
27504 return type;
27508 /* Codes for all the SSE/MMX builtins. */
27509 enum ix86_builtins
27511 IX86_BUILTIN_ADDPS,
27512 IX86_BUILTIN_ADDSS,
27513 IX86_BUILTIN_DIVPS,
27514 IX86_BUILTIN_DIVSS,
27515 IX86_BUILTIN_MULPS,
27516 IX86_BUILTIN_MULSS,
27517 IX86_BUILTIN_SUBPS,
27518 IX86_BUILTIN_SUBSS,
27520 IX86_BUILTIN_CMPEQPS,
27521 IX86_BUILTIN_CMPLTPS,
27522 IX86_BUILTIN_CMPLEPS,
27523 IX86_BUILTIN_CMPGTPS,
27524 IX86_BUILTIN_CMPGEPS,
27525 IX86_BUILTIN_CMPNEQPS,
27526 IX86_BUILTIN_CMPNLTPS,
27527 IX86_BUILTIN_CMPNLEPS,
27528 IX86_BUILTIN_CMPNGTPS,
27529 IX86_BUILTIN_CMPNGEPS,
27530 IX86_BUILTIN_CMPORDPS,
27531 IX86_BUILTIN_CMPUNORDPS,
27532 IX86_BUILTIN_CMPEQSS,
27533 IX86_BUILTIN_CMPLTSS,
27534 IX86_BUILTIN_CMPLESS,
27535 IX86_BUILTIN_CMPNEQSS,
27536 IX86_BUILTIN_CMPNLTSS,
27537 IX86_BUILTIN_CMPNLESS,
27538 IX86_BUILTIN_CMPORDSS,
27539 IX86_BUILTIN_CMPUNORDSS,
27541 IX86_BUILTIN_COMIEQSS,
27542 IX86_BUILTIN_COMILTSS,
27543 IX86_BUILTIN_COMILESS,
27544 IX86_BUILTIN_COMIGTSS,
27545 IX86_BUILTIN_COMIGESS,
27546 IX86_BUILTIN_COMINEQSS,
27547 IX86_BUILTIN_UCOMIEQSS,
27548 IX86_BUILTIN_UCOMILTSS,
27549 IX86_BUILTIN_UCOMILESS,
27550 IX86_BUILTIN_UCOMIGTSS,
27551 IX86_BUILTIN_UCOMIGESS,
27552 IX86_BUILTIN_UCOMINEQSS,
27554 IX86_BUILTIN_CVTPI2PS,
27555 IX86_BUILTIN_CVTPS2PI,
27556 IX86_BUILTIN_CVTSI2SS,
27557 IX86_BUILTIN_CVTSI642SS,
27558 IX86_BUILTIN_CVTSS2SI,
27559 IX86_BUILTIN_CVTSS2SI64,
27560 IX86_BUILTIN_CVTTPS2PI,
27561 IX86_BUILTIN_CVTTSS2SI,
27562 IX86_BUILTIN_CVTTSS2SI64,
27564 IX86_BUILTIN_MAXPS,
27565 IX86_BUILTIN_MAXSS,
27566 IX86_BUILTIN_MINPS,
27567 IX86_BUILTIN_MINSS,
27569 IX86_BUILTIN_LOADUPS,
27570 IX86_BUILTIN_STOREUPS,
27571 IX86_BUILTIN_MOVSS,
27573 IX86_BUILTIN_MOVHLPS,
27574 IX86_BUILTIN_MOVLHPS,
27575 IX86_BUILTIN_LOADHPS,
27576 IX86_BUILTIN_LOADLPS,
27577 IX86_BUILTIN_STOREHPS,
27578 IX86_BUILTIN_STORELPS,
27580 IX86_BUILTIN_MASKMOVQ,
27581 IX86_BUILTIN_MOVMSKPS,
27582 IX86_BUILTIN_PMOVMSKB,
27584 IX86_BUILTIN_MOVNTPS,
27585 IX86_BUILTIN_MOVNTQ,
27587 IX86_BUILTIN_LOADDQU,
27588 IX86_BUILTIN_STOREDQU,
27590 IX86_BUILTIN_PACKSSWB,
27591 IX86_BUILTIN_PACKSSDW,
27592 IX86_BUILTIN_PACKUSWB,
27594 IX86_BUILTIN_PADDB,
27595 IX86_BUILTIN_PADDW,
27596 IX86_BUILTIN_PADDD,
27597 IX86_BUILTIN_PADDQ,
27598 IX86_BUILTIN_PADDSB,
27599 IX86_BUILTIN_PADDSW,
27600 IX86_BUILTIN_PADDUSB,
27601 IX86_BUILTIN_PADDUSW,
27602 IX86_BUILTIN_PSUBB,
27603 IX86_BUILTIN_PSUBW,
27604 IX86_BUILTIN_PSUBD,
27605 IX86_BUILTIN_PSUBQ,
27606 IX86_BUILTIN_PSUBSB,
27607 IX86_BUILTIN_PSUBSW,
27608 IX86_BUILTIN_PSUBUSB,
27609 IX86_BUILTIN_PSUBUSW,
27611 IX86_BUILTIN_PAND,
27612 IX86_BUILTIN_PANDN,
27613 IX86_BUILTIN_POR,
27614 IX86_BUILTIN_PXOR,
27616 IX86_BUILTIN_PAVGB,
27617 IX86_BUILTIN_PAVGW,
27619 IX86_BUILTIN_PCMPEQB,
27620 IX86_BUILTIN_PCMPEQW,
27621 IX86_BUILTIN_PCMPEQD,
27622 IX86_BUILTIN_PCMPGTB,
27623 IX86_BUILTIN_PCMPGTW,
27624 IX86_BUILTIN_PCMPGTD,
27626 IX86_BUILTIN_PMADDWD,
27628 IX86_BUILTIN_PMAXSW,
27629 IX86_BUILTIN_PMAXUB,
27630 IX86_BUILTIN_PMINSW,
27631 IX86_BUILTIN_PMINUB,
27633 IX86_BUILTIN_PMULHUW,
27634 IX86_BUILTIN_PMULHW,
27635 IX86_BUILTIN_PMULLW,
27637 IX86_BUILTIN_PSADBW,
27638 IX86_BUILTIN_PSHUFW,
27640 IX86_BUILTIN_PSLLW,
27641 IX86_BUILTIN_PSLLD,
27642 IX86_BUILTIN_PSLLQ,
27643 IX86_BUILTIN_PSRAW,
27644 IX86_BUILTIN_PSRAD,
27645 IX86_BUILTIN_PSRLW,
27646 IX86_BUILTIN_PSRLD,
27647 IX86_BUILTIN_PSRLQ,
27648 IX86_BUILTIN_PSLLWI,
27649 IX86_BUILTIN_PSLLDI,
27650 IX86_BUILTIN_PSLLQI,
27651 IX86_BUILTIN_PSRAWI,
27652 IX86_BUILTIN_PSRADI,
27653 IX86_BUILTIN_PSRLWI,
27654 IX86_BUILTIN_PSRLDI,
27655 IX86_BUILTIN_PSRLQI,
27657 IX86_BUILTIN_PUNPCKHBW,
27658 IX86_BUILTIN_PUNPCKHWD,
27659 IX86_BUILTIN_PUNPCKHDQ,
27660 IX86_BUILTIN_PUNPCKLBW,
27661 IX86_BUILTIN_PUNPCKLWD,
27662 IX86_BUILTIN_PUNPCKLDQ,
27664 IX86_BUILTIN_SHUFPS,
27666 IX86_BUILTIN_RCPPS,
27667 IX86_BUILTIN_RCPSS,
27668 IX86_BUILTIN_RSQRTPS,
27669 IX86_BUILTIN_RSQRTPS_NR,
27670 IX86_BUILTIN_RSQRTSS,
27671 IX86_BUILTIN_RSQRTF,
27672 IX86_BUILTIN_SQRTPS,
27673 IX86_BUILTIN_SQRTPS_NR,
27674 IX86_BUILTIN_SQRTSS,
27676 IX86_BUILTIN_UNPCKHPS,
27677 IX86_BUILTIN_UNPCKLPS,
27679 IX86_BUILTIN_ANDPS,
27680 IX86_BUILTIN_ANDNPS,
27681 IX86_BUILTIN_ORPS,
27682 IX86_BUILTIN_XORPS,
27684 IX86_BUILTIN_EMMS,
27685 IX86_BUILTIN_LDMXCSR,
27686 IX86_BUILTIN_STMXCSR,
27687 IX86_BUILTIN_SFENCE,
27689 IX86_BUILTIN_FXSAVE,
27690 IX86_BUILTIN_FXRSTOR,
27691 IX86_BUILTIN_FXSAVE64,
27692 IX86_BUILTIN_FXRSTOR64,
27694 IX86_BUILTIN_XSAVE,
27695 IX86_BUILTIN_XRSTOR,
27696 IX86_BUILTIN_XSAVE64,
27697 IX86_BUILTIN_XRSTOR64,
27699 IX86_BUILTIN_XSAVEOPT,
27700 IX86_BUILTIN_XSAVEOPT64,
27702 /* 3DNow! Original */
27703 IX86_BUILTIN_FEMMS,
27704 IX86_BUILTIN_PAVGUSB,
27705 IX86_BUILTIN_PF2ID,
27706 IX86_BUILTIN_PFACC,
27707 IX86_BUILTIN_PFADD,
27708 IX86_BUILTIN_PFCMPEQ,
27709 IX86_BUILTIN_PFCMPGE,
27710 IX86_BUILTIN_PFCMPGT,
27711 IX86_BUILTIN_PFMAX,
27712 IX86_BUILTIN_PFMIN,
27713 IX86_BUILTIN_PFMUL,
27714 IX86_BUILTIN_PFRCP,
27715 IX86_BUILTIN_PFRCPIT1,
27716 IX86_BUILTIN_PFRCPIT2,
27717 IX86_BUILTIN_PFRSQIT1,
27718 IX86_BUILTIN_PFRSQRT,
27719 IX86_BUILTIN_PFSUB,
27720 IX86_BUILTIN_PFSUBR,
27721 IX86_BUILTIN_PI2FD,
27722 IX86_BUILTIN_PMULHRW,
27724 /* 3DNow! Athlon Extensions */
27725 IX86_BUILTIN_PF2IW,
27726 IX86_BUILTIN_PFNACC,
27727 IX86_BUILTIN_PFPNACC,
27728 IX86_BUILTIN_PI2FW,
27729 IX86_BUILTIN_PSWAPDSI,
27730 IX86_BUILTIN_PSWAPDSF,
27732 /* SSE2 */
27733 IX86_BUILTIN_ADDPD,
27734 IX86_BUILTIN_ADDSD,
27735 IX86_BUILTIN_DIVPD,
27736 IX86_BUILTIN_DIVSD,
27737 IX86_BUILTIN_MULPD,
27738 IX86_BUILTIN_MULSD,
27739 IX86_BUILTIN_SUBPD,
27740 IX86_BUILTIN_SUBSD,
27742 IX86_BUILTIN_CMPEQPD,
27743 IX86_BUILTIN_CMPLTPD,
27744 IX86_BUILTIN_CMPLEPD,
27745 IX86_BUILTIN_CMPGTPD,
27746 IX86_BUILTIN_CMPGEPD,
27747 IX86_BUILTIN_CMPNEQPD,
27748 IX86_BUILTIN_CMPNLTPD,
27749 IX86_BUILTIN_CMPNLEPD,
27750 IX86_BUILTIN_CMPNGTPD,
27751 IX86_BUILTIN_CMPNGEPD,
27752 IX86_BUILTIN_CMPORDPD,
27753 IX86_BUILTIN_CMPUNORDPD,
27754 IX86_BUILTIN_CMPEQSD,
27755 IX86_BUILTIN_CMPLTSD,
27756 IX86_BUILTIN_CMPLESD,
27757 IX86_BUILTIN_CMPNEQSD,
27758 IX86_BUILTIN_CMPNLTSD,
27759 IX86_BUILTIN_CMPNLESD,
27760 IX86_BUILTIN_CMPORDSD,
27761 IX86_BUILTIN_CMPUNORDSD,
27763 IX86_BUILTIN_COMIEQSD,
27764 IX86_BUILTIN_COMILTSD,
27765 IX86_BUILTIN_COMILESD,
27766 IX86_BUILTIN_COMIGTSD,
27767 IX86_BUILTIN_COMIGESD,
27768 IX86_BUILTIN_COMINEQSD,
27769 IX86_BUILTIN_UCOMIEQSD,
27770 IX86_BUILTIN_UCOMILTSD,
27771 IX86_BUILTIN_UCOMILESD,
27772 IX86_BUILTIN_UCOMIGTSD,
27773 IX86_BUILTIN_UCOMIGESD,
27774 IX86_BUILTIN_UCOMINEQSD,
27776 IX86_BUILTIN_MAXPD,
27777 IX86_BUILTIN_MAXSD,
27778 IX86_BUILTIN_MINPD,
27779 IX86_BUILTIN_MINSD,
27781 IX86_BUILTIN_ANDPD,
27782 IX86_BUILTIN_ANDNPD,
27783 IX86_BUILTIN_ORPD,
27784 IX86_BUILTIN_XORPD,
27786 IX86_BUILTIN_SQRTPD,
27787 IX86_BUILTIN_SQRTSD,
27789 IX86_BUILTIN_UNPCKHPD,
27790 IX86_BUILTIN_UNPCKLPD,
27792 IX86_BUILTIN_SHUFPD,
27794 IX86_BUILTIN_LOADUPD,
27795 IX86_BUILTIN_STOREUPD,
27796 IX86_BUILTIN_MOVSD,
27798 IX86_BUILTIN_LOADHPD,
27799 IX86_BUILTIN_LOADLPD,
27801 IX86_BUILTIN_CVTDQ2PD,
27802 IX86_BUILTIN_CVTDQ2PS,
27804 IX86_BUILTIN_CVTPD2DQ,
27805 IX86_BUILTIN_CVTPD2PI,
27806 IX86_BUILTIN_CVTPD2PS,
27807 IX86_BUILTIN_CVTTPD2DQ,
27808 IX86_BUILTIN_CVTTPD2PI,
27810 IX86_BUILTIN_CVTPI2PD,
27811 IX86_BUILTIN_CVTSI2SD,
27812 IX86_BUILTIN_CVTSI642SD,
27814 IX86_BUILTIN_CVTSD2SI,
27815 IX86_BUILTIN_CVTSD2SI64,
27816 IX86_BUILTIN_CVTSD2SS,
27817 IX86_BUILTIN_CVTSS2SD,
27818 IX86_BUILTIN_CVTTSD2SI,
27819 IX86_BUILTIN_CVTTSD2SI64,
27821 IX86_BUILTIN_CVTPS2DQ,
27822 IX86_BUILTIN_CVTPS2PD,
27823 IX86_BUILTIN_CVTTPS2DQ,
27825 IX86_BUILTIN_MOVNTI,
27826 IX86_BUILTIN_MOVNTI64,
27827 IX86_BUILTIN_MOVNTPD,
27828 IX86_BUILTIN_MOVNTDQ,
27830 IX86_BUILTIN_MOVQ128,
27832 /* SSE2 MMX */
27833 IX86_BUILTIN_MASKMOVDQU,
27834 IX86_BUILTIN_MOVMSKPD,
27835 IX86_BUILTIN_PMOVMSKB128,
27837 IX86_BUILTIN_PACKSSWB128,
27838 IX86_BUILTIN_PACKSSDW128,
27839 IX86_BUILTIN_PACKUSWB128,
27841 IX86_BUILTIN_PADDB128,
27842 IX86_BUILTIN_PADDW128,
27843 IX86_BUILTIN_PADDD128,
27844 IX86_BUILTIN_PADDQ128,
27845 IX86_BUILTIN_PADDSB128,
27846 IX86_BUILTIN_PADDSW128,
27847 IX86_BUILTIN_PADDUSB128,
27848 IX86_BUILTIN_PADDUSW128,
27849 IX86_BUILTIN_PSUBB128,
27850 IX86_BUILTIN_PSUBW128,
27851 IX86_BUILTIN_PSUBD128,
27852 IX86_BUILTIN_PSUBQ128,
27853 IX86_BUILTIN_PSUBSB128,
27854 IX86_BUILTIN_PSUBSW128,
27855 IX86_BUILTIN_PSUBUSB128,
27856 IX86_BUILTIN_PSUBUSW128,
27858 IX86_BUILTIN_PAND128,
27859 IX86_BUILTIN_PANDN128,
27860 IX86_BUILTIN_POR128,
27861 IX86_BUILTIN_PXOR128,
27863 IX86_BUILTIN_PAVGB128,
27864 IX86_BUILTIN_PAVGW128,
27866 IX86_BUILTIN_PCMPEQB128,
27867 IX86_BUILTIN_PCMPEQW128,
27868 IX86_BUILTIN_PCMPEQD128,
27869 IX86_BUILTIN_PCMPGTB128,
27870 IX86_BUILTIN_PCMPGTW128,
27871 IX86_BUILTIN_PCMPGTD128,
27873 IX86_BUILTIN_PMADDWD128,
27875 IX86_BUILTIN_PMAXSW128,
27876 IX86_BUILTIN_PMAXUB128,
27877 IX86_BUILTIN_PMINSW128,
27878 IX86_BUILTIN_PMINUB128,
27880 IX86_BUILTIN_PMULUDQ,
27881 IX86_BUILTIN_PMULUDQ128,
27882 IX86_BUILTIN_PMULHUW128,
27883 IX86_BUILTIN_PMULHW128,
27884 IX86_BUILTIN_PMULLW128,
27886 IX86_BUILTIN_PSADBW128,
27887 IX86_BUILTIN_PSHUFHW,
27888 IX86_BUILTIN_PSHUFLW,
27889 IX86_BUILTIN_PSHUFD,
27891 IX86_BUILTIN_PSLLDQI128,
27892 IX86_BUILTIN_PSLLWI128,
27893 IX86_BUILTIN_PSLLDI128,
27894 IX86_BUILTIN_PSLLQI128,
27895 IX86_BUILTIN_PSRAWI128,
27896 IX86_BUILTIN_PSRADI128,
27897 IX86_BUILTIN_PSRLDQI128,
27898 IX86_BUILTIN_PSRLWI128,
27899 IX86_BUILTIN_PSRLDI128,
27900 IX86_BUILTIN_PSRLQI128,
27902 IX86_BUILTIN_PSLLDQ128,
27903 IX86_BUILTIN_PSLLW128,
27904 IX86_BUILTIN_PSLLD128,
27905 IX86_BUILTIN_PSLLQ128,
27906 IX86_BUILTIN_PSRAW128,
27907 IX86_BUILTIN_PSRAD128,
27908 IX86_BUILTIN_PSRLW128,
27909 IX86_BUILTIN_PSRLD128,
27910 IX86_BUILTIN_PSRLQ128,
27912 IX86_BUILTIN_PUNPCKHBW128,
27913 IX86_BUILTIN_PUNPCKHWD128,
27914 IX86_BUILTIN_PUNPCKHDQ128,
27915 IX86_BUILTIN_PUNPCKHQDQ128,
27916 IX86_BUILTIN_PUNPCKLBW128,
27917 IX86_BUILTIN_PUNPCKLWD128,
27918 IX86_BUILTIN_PUNPCKLDQ128,
27919 IX86_BUILTIN_PUNPCKLQDQ128,
27921 IX86_BUILTIN_CLFLUSH,
27922 IX86_BUILTIN_MFENCE,
27923 IX86_BUILTIN_LFENCE,
27924 IX86_BUILTIN_PAUSE,
27926 IX86_BUILTIN_FNSTENV,
27927 IX86_BUILTIN_FLDENV,
27928 IX86_BUILTIN_FNSTSW,
27929 IX86_BUILTIN_FNCLEX,
27931 IX86_BUILTIN_BSRSI,
27932 IX86_BUILTIN_BSRDI,
27933 IX86_BUILTIN_RDPMC,
27934 IX86_BUILTIN_RDTSC,
27935 IX86_BUILTIN_RDTSCP,
27936 IX86_BUILTIN_ROLQI,
27937 IX86_BUILTIN_ROLHI,
27938 IX86_BUILTIN_RORQI,
27939 IX86_BUILTIN_RORHI,
27941 /* SSE3. */
27942 IX86_BUILTIN_ADDSUBPS,
27943 IX86_BUILTIN_HADDPS,
27944 IX86_BUILTIN_HSUBPS,
27945 IX86_BUILTIN_MOVSHDUP,
27946 IX86_BUILTIN_MOVSLDUP,
27947 IX86_BUILTIN_ADDSUBPD,
27948 IX86_BUILTIN_HADDPD,
27949 IX86_BUILTIN_HSUBPD,
27950 IX86_BUILTIN_LDDQU,
27952 IX86_BUILTIN_MONITOR,
27953 IX86_BUILTIN_MWAIT,
27955 /* SSSE3. */
27956 IX86_BUILTIN_PHADDW,
27957 IX86_BUILTIN_PHADDD,
27958 IX86_BUILTIN_PHADDSW,
27959 IX86_BUILTIN_PHSUBW,
27960 IX86_BUILTIN_PHSUBD,
27961 IX86_BUILTIN_PHSUBSW,
27962 IX86_BUILTIN_PMADDUBSW,
27963 IX86_BUILTIN_PMULHRSW,
27964 IX86_BUILTIN_PSHUFB,
27965 IX86_BUILTIN_PSIGNB,
27966 IX86_BUILTIN_PSIGNW,
27967 IX86_BUILTIN_PSIGND,
27968 IX86_BUILTIN_PALIGNR,
27969 IX86_BUILTIN_PABSB,
27970 IX86_BUILTIN_PABSW,
27971 IX86_BUILTIN_PABSD,
27973 IX86_BUILTIN_PHADDW128,
27974 IX86_BUILTIN_PHADDD128,
27975 IX86_BUILTIN_PHADDSW128,
27976 IX86_BUILTIN_PHSUBW128,
27977 IX86_BUILTIN_PHSUBD128,
27978 IX86_BUILTIN_PHSUBSW128,
27979 IX86_BUILTIN_PMADDUBSW128,
27980 IX86_BUILTIN_PMULHRSW128,
27981 IX86_BUILTIN_PSHUFB128,
27982 IX86_BUILTIN_PSIGNB128,
27983 IX86_BUILTIN_PSIGNW128,
27984 IX86_BUILTIN_PSIGND128,
27985 IX86_BUILTIN_PALIGNR128,
27986 IX86_BUILTIN_PABSB128,
27987 IX86_BUILTIN_PABSW128,
27988 IX86_BUILTIN_PABSD128,
27990 /* AMDFAM10 - SSE4A New Instructions. */
27991 IX86_BUILTIN_MOVNTSD,
27992 IX86_BUILTIN_MOVNTSS,
27993 IX86_BUILTIN_EXTRQI,
27994 IX86_BUILTIN_EXTRQ,
27995 IX86_BUILTIN_INSERTQI,
27996 IX86_BUILTIN_INSERTQ,
27998 /* SSE4.1. */
27999 IX86_BUILTIN_BLENDPD,
28000 IX86_BUILTIN_BLENDPS,
28001 IX86_BUILTIN_BLENDVPD,
28002 IX86_BUILTIN_BLENDVPS,
28003 IX86_BUILTIN_PBLENDVB128,
28004 IX86_BUILTIN_PBLENDW128,
28006 IX86_BUILTIN_DPPD,
28007 IX86_BUILTIN_DPPS,
28009 IX86_BUILTIN_INSERTPS128,
28011 IX86_BUILTIN_MOVNTDQA,
28012 IX86_BUILTIN_MPSADBW128,
28013 IX86_BUILTIN_PACKUSDW128,
28014 IX86_BUILTIN_PCMPEQQ,
28015 IX86_BUILTIN_PHMINPOSUW128,
28017 IX86_BUILTIN_PMAXSB128,
28018 IX86_BUILTIN_PMAXSD128,
28019 IX86_BUILTIN_PMAXUD128,
28020 IX86_BUILTIN_PMAXUW128,
28022 IX86_BUILTIN_PMINSB128,
28023 IX86_BUILTIN_PMINSD128,
28024 IX86_BUILTIN_PMINUD128,
28025 IX86_BUILTIN_PMINUW128,
28027 IX86_BUILTIN_PMOVSXBW128,
28028 IX86_BUILTIN_PMOVSXBD128,
28029 IX86_BUILTIN_PMOVSXBQ128,
28030 IX86_BUILTIN_PMOVSXWD128,
28031 IX86_BUILTIN_PMOVSXWQ128,
28032 IX86_BUILTIN_PMOVSXDQ128,
28034 IX86_BUILTIN_PMOVZXBW128,
28035 IX86_BUILTIN_PMOVZXBD128,
28036 IX86_BUILTIN_PMOVZXBQ128,
28037 IX86_BUILTIN_PMOVZXWD128,
28038 IX86_BUILTIN_PMOVZXWQ128,
28039 IX86_BUILTIN_PMOVZXDQ128,
28041 IX86_BUILTIN_PMULDQ128,
28042 IX86_BUILTIN_PMULLD128,
28044 IX86_BUILTIN_ROUNDSD,
28045 IX86_BUILTIN_ROUNDSS,
28047 IX86_BUILTIN_ROUNDPD,
28048 IX86_BUILTIN_ROUNDPS,
28050 IX86_BUILTIN_FLOORPD,
28051 IX86_BUILTIN_CEILPD,
28052 IX86_BUILTIN_TRUNCPD,
28053 IX86_BUILTIN_RINTPD,
28054 IX86_BUILTIN_ROUNDPD_AZ,
28056 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28057 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28058 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28060 IX86_BUILTIN_FLOORPS,
28061 IX86_BUILTIN_CEILPS,
28062 IX86_BUILTIN_TRUNCPS,
28063 IX86_BUILTIN_RINTPS,
28064 IX86_BUILTIN_ROUNDPS_AZ,
28066 IX86_BUILTIN_FLOORPS_SFIX,
28067 IX86_BUILTIN_CEILPS_SFIX,
28068 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28070 IX86_BUILTIN_PTESTZ,
28071 IX86_BUILTIN_PTESTC,
28072 IX86_BUILTIN_PTESTNZC,
28074 IX86_BUILTIN_VEC_INIT_V2SI,
28075 IX86_BUILTIN_VEC_INIT_V4HI,
28076 IX86_BUILTIN_VEC_INIT_V8QI,
28077 IX86_BUILTIN_VEC_EXT_V2DF,
28078 IX86_BUILTIN_VEC_EXT_V2DI,
28079 IX86_BUILTIN_VEC_EXT_V4SF,
28080 IX86_BUILTIN_VEC_EXT_V4SI,
28081 IX86_BUILTIN_VEC_EXT_V8HI,
28082 IX86_BUILTIN_VEC_EXT_V2SI,
28083 IX86_BUILTIN_VEC_EXT_V4HI,
28084 IX86_BUILTIN_VEC_EXT_V16QI,
28085 IX86_BUILTIN_VEC_SET_V2DI,
28086 IX86_BUILTIN_VEC_SET_V4SF,
28087 IX86_BUILTIN_VEC_SET_V4SI,
28088 IX86_BUILTIN_VEC_SET_V8HI,
28089 IX86_BUILTIN_VEC_SET_V4HI,
28090 IX86_BUILTIN_VEC_SET_V16QI,
28092 IX86_BUILTIN_VEC_PACK_SFIX,
28093 IX86_BUILTIN_VEC_PACK_SFIX256,
28095 /* SSE4.2. */
28096 IX86_BUILTIN_CRC32QI,
28097 IX86_BUILTIN_CRC32HI,
28098 IX86_BUILTIN_CRC32SI,
28099 IX86_BUILTIN_CRC32DI,
28101 IX86_BUILTIN_PCMPESTRI128,
28102 IX86_BUILTIN_PCMPESTRM128,
28103 IX86_BUILTIN_PCMPESTRA128,
28104 IX86_BUILTIN_PCMPESTRC128,
28105 IX86_BUILTIN_PCMPESTRO128,
28106 IX86_BUILTIN_PCMPESTRS128,
28107 IX86_BUILTIN_PCMPESTRZ128,
28108 IX86_BUILTIN_PCMPISTRI128,
28109 IX86_BUILTIN_PCMPISTRM128,
28110 IX86_BUILTIN_PCMPISTRA128,
28111 IX86_BUILTIN_PCMPISTRC128,
28112 IX86_BUILTIN_PCMPISTRO128,
28113 IX86_BUILTIN_PCMPISTRS128,
28114 IX86_BUILTIN_PCMPISTRZ128,
28116 IX86_BUILTIN_PCMPGTQ,
28118 /* AES instructions */
28119 IX86_BUILTIN_AESENC128,
28120 IX86_BUILTIN_AESENCLAST128,
28121 IX86_BUILTIN_AESDEC128,
28122 IX86_BUILTIN_AESDECLAST128,
28123 IX86_BUILTIN_AESIMC128,
28124 IX86_BUILTIN_AESKEYGENASSIST128,
28126 /* PCLMUL instruction */
28127 IX86_BUILTIN_PCLMULQDQ128,
28129 /* AVX */
28130 IX86_BUILTIN_ADDPD256,
28131 IX86_BUILTIN_ADDPS256,
28132 IX86_BUILTIN_ADDSUBPD256,
28133 IX86_BUILTIN_ADDSUBPS256,
28134 IX86_BUILTIN_ANDPD256,
28135 IX86_BUILTIN_ANDPS256,
28136 IX86_BUILTIN_ANDNPD256,
28137 IX86_BUILTIN_ANDNPS256,
28138 IX86_BUILTIN_BLENDPD256,
28139 IX86_BUILTIN_BLENDPS256,
28140 IX86_BUILTIN_BLENDVPD256,
28141 IX86_BUILTIN_BLENDVPS256,
28142 IX86_BUILTIN_DIVPD256,
28143 IX86_BUILTIN_DIVPS256,
28144 IX86_BUILTIN_DPPS256,
28145 IX86_BUILTIN_HADDPD256,
28146 IX86_BUILTIN_HADDPS256,
28147 IX86_BUILTIN_HSUBPD256,
28148 IX86_BUILTIN_HSUBPS256,
28149 IX86_BUILTIN_MAXPD256,
28150 IX86_BUILTIN_MAXPS256,
28151 IX86_BUILTIN_MINPD256,
28152 IX86_BUILTIN_MINPS256,
28153 IX86_BUILTIN_MULPD256,
28154 IX86_BUILTIN_MULPS256,
28155 IX86_BUILTIN_ORPD256,
28156 IX86_BUILTIN_ORPS256,
28157 IX86_BUILTIN_SHUFPD256,
28158 IX86_BUILTIN_SHUFPS256,
28159 IX86_BUILTIN_SUBPD256,
28160 IX86_BUILTIN_SUBPS256,
28161 IX86_BUILTIN_XORPD256,
28162 IX86_BUILTIN_XORPS256,
28163 IX86_BUILTIN_CMPSD,
28164 IX86_BUILTIN_CMPSS,
28165 IX86_BUILTIN_CMPPD,
28166 IX86_BUILTIN_CMPPS,
28167 IX86_BUILTIN_CMPPD256,
28168 IX86_BUILTIN_CMPPS256,
28169 IX86_BUILTIN_CVTDQ2PD256,
28170 IX86_BUILTIN_CVTDQ2PS256,
28171 IX86_BUILTIN_CVTPD2PS256,
28172 IX86_BUILTIN_CVTPS2DQ256,
28173 IX86_BUILTIN_CVTPS2PD256,
28174 IX86_BUILTIN_CVTTPD2DQ256,
28175 IX86_BUILTIN_CVTPD2DQ256,
28176 IX86_BUILTIN_CVTTPS2DQ256,
28177 IX86_BUILTIN_EXTRACTF128PD256,
28178 IX86_BUILTIN_EXTRACTF128PS256,
28179 IX86_BUILTIN_EXTRACTF128SI256,
28180 IX86_BUILTIN_VZEROALL,
28181 IX86_BUILTIN_VZEROUPPER,
28182 IX86_BUILTIN_VPERMILVARPD,
28183 IX86_BUILTIN_VPERMILVARPS,
28184 IX86_BUILTIN_VPERMILVARPD256,
28185 IX86_BUILTIN_VPERMILVARPS256,
28186 IX86_BUILTIN_VPERMILPD,
28187 IX86_BUILTIN_VPERMILPS,
28188 IX86_BUILTIN_VPERMILPD256,
28189 IX86_BUILTIN_VPERMILPS256,
28190 IX86_BUILTIN_VPERMIL2PD,
28191 IX86_BUILTIN_VPERMIL2PS,
28192 IX86_BUILTIN_VPERMIL2PD256,
28193 IX86_BUILTIN_VPERMIL2PS256,
28194 IX86_BUILTIN_VPERM2F128PD256,
28195 IX86_BUILTIN_VPERM2F128PS256,
28196 IX86_BUILTIN_VPERM2F128SI256,
28197 IX86_BUILTIN_VBROADCASTSS,
28198 IX86_BUILTIN_VBROADCASTSD256,
28199 IX86_BUILTIN_VBROADCASTSS256,
28200 IX86_BUILTIN_VBROADCASTPD256,
28201 IX86_BUILTIN_VBROADCASTPS256,
28202 IX86_BUILTIN_VINSERTF128PD256,
28203 IX86_BUILTIN_VINSERTF128PS256,
28204 IX86_BUILTIN_VINSERTF128SI256,
28205 IX86_BUILTIN_LOADUPD256,
28206 IX86_BUILTIN_LOADUPS256,
28207 IX86_BUILTIN_STOREUPD256,
28208 IX86_BUILTIN_STOREUPS256,
28209 IX86_BUILTIN_LDDQU256,
28210 IX86_BUILTIN_MOVNTDQ256,
28211 IX86_BUILTIN_MOVNTPD256,
28212 IX86_BUILTIN_MOVNTPS256,
28213 IX86_BUILTIN_LOADDQU256,
28214 IX86_BUILTIN_STOREDQU256,
28215 IX86_BUILTIN_MASKLOADPD,
28216 IX86_BUILTIN_MASKLOADPS,
28217 IX86_BUILTIN_MASKSTOREPD,
28218 IX86_BUILTIN_MASKSTOREPS,
28219 IX86_BUILTIN_MASKLOADPD256,
28220 IX86_BUILTIN_MASKLOADPS256,
28221 IX86_BUILTIN_MASKSTOREPD256,
28222 IX86_BUILTIN_MASKSTOREPS256,
28223 IX86_BUILTIN_MOVSHDUP256,
28224 IX86_BUILTIN_MOVSLDUP256,
28225 IX86_BUILTIN_MOVDDUP256,
28227 IX86_BUILTIN_SQRTPD256,
28228 IX86_BUILTIN_SQRTPS256,
28229 IX86_BUILTIN_SQRTPS_NR256,
28230 IX86_BUILTIN_RSQRTPS256,
28231 IX86_BUILTIN_RSQRTPS_NR256,
28233 IX86_BUILTIN_RCPPS256,
28235 IX86_BUILTIN_ROUNDPD256,
28236 IX86_BUILTIN_ROUNDPS256,
28238 IX86_BUILTIN_FLOORPD256,
28239 IX86_BUILTIN_CEILPD256,
28240 IX86_BUILTIN_TRUNCPD256,
28241 IX86_BUILTIN_RINTPD256,
28242 IX86_BUILTIN_ROUNDPD_AZ256,
28244 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28245 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28246 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28248 IX86_BUILTIN_FLOORPS256,
28249 IX86_BUILTIN_CEILPS256,
28250 IX86_BUILTIN_TRUNCPS256,
28251 IX86_BUILTIN_RINTPS256,
28252 IX86_BUILTIN_ROUNDPS_AZ256,
28254 IX86_BUILTIN_FLOORPS_SFIX256,
28255 IX86_BUILTIN_CEILPS_SFIX256,
28256 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28258 IX86_BUILTIN_UNPCKHPD256,
28259 IX86_BUILTIN_UNPCKLPD256,
28260 IX86_BUILTIN_UNPCKHPS256,
28261 IX86_BUILTIN_UNPCKLPS256,
28263 IX86_BUILTIN_SI256_SI,
28264 IX86_BUILTIN_PS256_PS,
28265 IX86_BUILTIN_PD256_PD,
28266 IX86_BUILTIN_SI_SI256,
28267 IX86_BUILTIN_PS_PS256,
28268 IX86_BUILTIN_PD_PD256,
28270 IX86_BUILTIN_VTESTZPD,
28271 IX86_BUILTIN_VTESTCPD,
28272 IX86_BUILTIN_VTESTNZCPD,
28273 IX86_BUILTIN_VTESTZPS,
28274 IX86_BUILTIN_VTESTCPS,
28275 IX86_BUILTIN_VTESTNZCPS,
28276 IX86_BUILTIN_VTESTZPD256,
28277 IX86_BUILTIN_VTESTCPD256,
28278 IX86_BUILTIN_VTESTNZCPD256,
28279 IX86_BUILTIN_VTESTZPS256,
28280 IX86_BUILTIN_VTESTCPS256,
28281 IX86_BUILTIN_VTESTNZCPS256,
28282 IX86_BUILTIN_PTESTZ256,
28283 IX86_BUILTIN_PTESTC256,
28284 IX86_BUILTIN_PTESTNZC256,
28286 IX86_BUILTIN_MOVMSKPD256,
28287 IX86_BUILTIN_MOVMSKPS256,
28289 /* AVX2 */
28290 IX86_BUILTIN_MPSADBW256,
28291 IX86_BUILTIN_PABSB256,
28292 IX86_BUILTIN_PABSW256,
28293 IX86_BUILTIN_PABSD256,
28294 IX86_BUILTIN_PACKSSDW256,
28295 IX86_BUILTIN_PACKSSWB256,
28296 IX86_BUILTIN_PACKUSDW256,
28297 IX86_BUILTIN_PACKUSWB256,
28298 IX86_BUILTIN_PADDB256,
28299 IX86_BUILTIN_PADDW256,
28300 IX86_BUILTIN_PADDD256,
28301 IX86_BUILTIN_PADDQ256,
28302 IX86_BUILTIN_PADDSB256,
28303 IX86_BUILTIN_PADDSW256,
28304 IX86_BUILTIN_PADDUSB256,
28305 IX86_BUILTIN_PADDUSW256,
28306 IX86_BUILTIN_PALIGNR256,
28307 IX86_BUILTIN_AND256I,
28308 IX86_BUILTIN_ANDNOT256I,
28309 IX86_BUILTIN_PAVGB256,
28310 IX86_BUILTIN_PAVGW256,
28311 IX86_BUILTIN_PBLENDVB256,
28312 IX86_BUILTIN_PBLENDVW256,
28313 IX86_BUILTIN_PCMPEQB256,
28314 IX86_BUILTIN_PCMPEQW256,
28315 IX86_BUILTIN_PCMPEQD256,
28316 IX86_BUILTIN_PCMPEQQ256,
28317 IX86_BUILTIN_PCMPGTB256,
28318 IX86_BUILTIN_PCMPGTW256,
28319 IX86_BUILTIN_PCMPGTD256,
28320 IX86_BUILTIN_PCMPGTQ256,
28321 IX86_BUILTIN_PHADDW256,
28322 IX86_BUILTIN_PHADDD256,
28323 IX86_BUILTIN_PHADDSW256,
28324 IX86_BUILTIN_PHSUBW256,
28325 IX86_BUILTIN_PHSUBD256,
28326 IX86_BUILTIN_PHSUBSW256,
28327 IX86_BUILTIN_PMADDUBSW256,
28328 IX86_BUILTIN_PMADDWD256,
28329 IX86_BUILTIN_PMAXSB256,
28330 IX86_BUILTIN_PMAXSW256,
28331 IX86_BUILTIN_PMAXSD256,
28332 IX86_BUILTIN_PMAXUB256,
28333 IX86_BUILTIN_PMAXUW256,
28334 IX86_BUILTIN_PMAXUD256,
28335 IX86_BUILTIN_PMINSB256,
28336 IX86_BUILTIN_PMINSW256,
28337 IX86_BUILTIN_PMINSD256,
28338 IX86_BUILTIN_PMINUB256,
28339 IX86_BUILTIN_PMINUW256,
28340 IX86_BUILTIN_PMINUD256,
28341 IX86_BUILTIN_PMOVMSKB256,
28342 IX86_BUILTIN_PMOVSXBW256,
28343 IX86_BUILTIN_PMOVSXBD256,
28344 IX86_BUILTIN_PMOVSXBQ256,
28345 IX86_BUILTIN_PMOVSXWD256,
28346 IX86_BUILTIN_PMOVSXWQ256,
28347 IX86_BUILTIN_PMOVSXDQ256,
28348 IX86_BUILTIN_PMOVZXBW256,
28349 IX86_BUILTIN_PMOVZXBD256,
28350 IX86_BUILTIN_PMOVZXBQ256,
28351 IX86_BUILTIN_PMOVZXWD256,
28352 IX86_BUILTIN_PMOVZXWQ256,
28353 IX86_BUILTIN_PMOVZXDQ256,
28354 IX86_BUILTIN_PMULDQ256,
28355 IX86_BUILTIN_PMULHRSW256,
28356 IX86_BUILTIN_PMULHUW256,
28357 IX86_BUILTIN_PMULHW256,
28358 IX86_BUILTIN_PMULLW256,
28359 IX86_BUILTIN_PMULLD256,
28360 IX86_BUILTIN_PMULUDQ256,
28361 IX86_BUILTIN_POR256,
28362 IX86_BUILTIN_PSADBW256,
28363 IX86_BUILTIN_PSHUFB256,
28364 IX86_BUILTIN_PSHUFD256,
28365 IX86_BUILTIN_PSHUFHW256,
28366 IX86_BUILTIN_PSHUFLW256,
28367 IX86_BUILTIN_PSIGNB256,
28368 IX86_BUILTIN_PSIGNW256,
28369 IX86_BUILTIN_PSIGND256,
28370 IX86_BUILTIN_PSLLDQI256,
28371 IX86_BUILTIN_PSLLWI256,
28372 IX86_BUILTIN_PSLLW256,
28373 IX86_BUILTIN_PSLLDI256,
28374 IX86_BUILTIN_PSLLD256,
28375 IX86_BUILTIN_PSLLQI256,
28376 IX86_BUILTIN_PSLLQ256,
28377 IX86_BUILTIN_PSRAWI256,
28378 IX86_BUILTIN_PSRAW256,
28379 IX86_BUILTIN_PSRADI256,
28380 IX86_BUILTIN_PSRAD256,
28381 IX86_BUILTIN_PSRLDQI256,
28382 IX86_BUILTIN_PSRLWI256,
28383 IX86_BUILTIN_PSRLW256,
28384 IX86_BUILTIN_PSRLDI256,
28385 IX86_BUILTIN_PSRLD256,
28386 IX86_BUILTIN_PSRLQI256,
28387 IX86_BUILTIN_PSRLQ256,
28388 IX86_BUILTIN_PSUBB256,
28389 IX86_BUILTIN_PSUBW256,
28390 IX86_BUILTIN_PSUBD256,
28391 IX86_BUILTIN_PSUBQ256,
28392 IX86_BUILTIN_PSUBSB256,
28393 IX86_BUILTIN_PSUBSW256,
28394 IX86_BUILTIN_PSUBUSB256,
28395 IX86_BUILTIN_PSUBUSW256,
28396 IX86_BUILTIN_PUNPCKHBW256,
28397 IX86_BUILTIN_PUNPCKHWD256,
28398 IX86_BUILTIN_PUNPCKHDQ256,
28399 IX86_BUILTIN_PUNPCKHQDQ256,
28400 IX86_BUILTIN_PUNPCKLBW256,
28401 IX86_BUILTIN_PUNPCKLWD256,
28402 IX86_BUILTIN_PUNPCKLDQ256,
28403 IX86_BUILTIN_PUNPCKLQDQ256,
28404 IX86_BUILTIN_PXOR256,
28405 IX86_BUILTIN_MOVNTDQA256,
28406 IX86_BUILTIN_VBROADCASTSS_PS,
28407 IX86_BUILTIN_VBROADCASTSS_PS256,
28408 IX86_BUILTIN_VBROADCASTSD_PD256,
28409 IX86_BUILTIN_VBROADCASTSI256,
28410 IX86_BUILTIN_PBLENDD256,
28411 IX86_BUILTIN_PBLENDD128,
28412 IX86_BUILTIN_PBROADCASTB256,
28413 IX86_BUILTIN_PBROADCASTW256,
28414 IX86_BUILTIN_PBROADCASTD256,
28415 IX86_BUILTIN_PBROADCASTQ256,
28416 IX86_BUILTIN_PBROADCASTB128,
28417 IX86_BUILTIN_PBROADCASTW128,
28418 IX86_BUILTIN_PBROADCASTD128,
28419 IX86_BUILTIN_PBROADCASTQ128,
28420 IX86_BUILTIN_VPERMVARSI256,
28421 IX86_BUILTIN_VPERMDF256,
28422 IX86_BUILTIN_VPERMVARSF256,
28423 IX86_BUILTIN_VPERMDI256,
28424 IX86_BUILTIN_VPERMTI256,
28425 IX86_BUILTIN_VEXTRACT128I256,
28426 IX86_BUILTIN_VINSERT128I256,
28427 IX86_BUILTIN_MASKLOADD,
28428 IX86_BUILTIN_MASKLOADQ,
28429 IX86_BUILTIN_MASKLOADD256,
28430 IX86_BUILTIN_MASKLOADQ256,
28431 IX86_BUILTIN_MASKSTORED,
28432 IX86_BUILTIN_MASKSTOREQ,
28433 IX86_BUILTIN_MASKSTORED256,
28434 IX86_BUILTIN_MASKSTOREQ256,
28435 IX86_BUILTIN_PSLLVV4DI,
28436 IX86_BUILTIN_PSLLVV2DI,
28437 IX86_BUILTIN_PSLLVV8SI,
28438 IX86_BUILTIN_PSLLVV4SI,
28439 IX86_BUILTIN_PSRAVV8SI,
28440 IX86_BUILTIN_PSRAVV4SI,
28441 IX86_BUILTIN_PSRLVV4DI,
28442 IX86_BUILTIN_PSRLVV2DI,
28443 IX86_BUILTIN_PSRLVV8SI,
28444 IX86_BUILTIN_PSRLVV4SI,
28446 IX86_BUILTIN_GATHERSIV2DF,
28447 IX86_BUILTIN_GATHERSIV4DF,
28448 IX86_BUILTIN_GATHERDIV2DF,
28449 IX86_BUILTIN_GATHERDIV4DF,
28450 IX86_BUILTIN_GATHERSIV4SF,
28451 IX86_BUILTIN_GATHERSIV8SF,
28452 IX86_BUILTIN_GATHERDIV4SF,
28453 IX86_BUILTIN_GATHERDIV8SF,
28454 IX86_BUILTIN_GATHERSIV2DI,
28455 IX86_BUILTIN_GATHERSIV4DI,
28456 IX86_BUILTIN_GATHERDIV2DI,
28457 IX86_BUILTIN_GATHERDIV4DI,
28458 IX86_BUILTIN_GATHERSIV4SI,
28459 IX86_BUILTIN_GATHERSIV8SI,
28460 IX86_BUILTIN_GATHERDIV4SI,
28461 IX86_BUILTIN_GATHERDIV8SI,
28463 /* AVX512F */
28464 IX86_BUILTIN_ADDPD512,
28465 IX86_BUILTIN_ADDPS512,
28466 IX86_BUILTIN_ADDSD_ROUND,
28467 IX86_BUILTIN_ADDSS_ROUND,
28468 IX86_BUILTIN_ALIGND512,
28469 IX86_BUILTIN_ALIGNQ512,
28470 IX86_BUILTIN_BLENDMD512,
28471 IX86_BUILTIN_BLENDMPD512,
28472 IX86_BUILTIN_BLENDMPS512,
28473 IX86_BUILTIN_BLENDMQ512,
28474 IX86_BUILTIN_BROADCASTF32X4_512,
28475 IX86_BUILTIN_BROADCASTF64X4_512,
28476 IX86_BUILTIN_BROADCASTI32X4_512,
28477 IX86_BUILTIN_BROADCASTI64X4_512,
28478 IX86_BUILTIN_BROADCASTSD512,
28479 IX86_BUILTIN_BROADCASTSS512,
28480 IX86_BUILTIN_CMPD512,
28481 IX86_BUILTIN_CMPPD512,
28482 IX86_BUILTIN_CMPPS512,
28483 IX86_BUILTIN_CMPQ512,
28484 IX86_BUILTIN_CMPSD_MASK,
28485 IX86_BUILTIN_CMPSS_MASK,
28486 IX86_BUILTIN_COMIDF,
28487 IX86_BUILTIN_COMISF,
28488 IX86_BUILTIN_COMPRESSPD512,
28489 IX86_BUILTIN_COMPRESSPDSTORE512,
28490 IX86_BUILTIN_COMPRESSPS512,
28491 IX86_BUILTIN_COMPRESSPSSTORE512,
28492 IX86_BUILTIN_CVTDQ2PD512,
28493 IX86_BUILTIN_CVTDQ2PS512,
28494 IX86_BUILTIN_CVTPD2DQ512,
28495 IX86_BUILTIN_CVTPD2PS512,
28496 IX86_BUILTIN_CVTPD2UDQ512,
28497 IX86_BUILTIN_CVTPH2PS512,
28498 IX86_BUILTIN_CVTPS2DQ512,
28499 IX86_BUILTIN_CVTPS2PD512,
28500 IX86_BUILTIN_CVTPS2PH512,
28501 IX86_BUILTIN_CVTPS2UDQ512,
28502 IX86_BUILTIN_CVTSD2SS_ROUND,
28503 IX86_BUILTIN_CVTSI2SD64,
28504 IX86_BUILTIN_CVTSI2SS32,
28505 IX86_BUILTIN_CVTSI2SS64,
28506 IX86_BUILTIN_CVTSS2SD_ROUND,
28507 IX86_BUILTIN_CVTTPD2DQ512,
28508 IX86_BUILTIN_CVTTPD2UDQ512,
28509 IX86_BUILTIN_CVTTPS2DQ512,
28510 IX86_BUILTIN_CVTTPS2UDQ512,
28511 IX86_BUILTIN_CVTUDQ2PD512,
28512 IX86_BUILTIN_CVTUDQ2PS512,
28513 IX86_BUILTIN_CVTUSI2SD32,
28514 IX86_BUILTIN_CVTUSI2SD64,
28515 IX86_BUILTIN_CVTUSI2SS32,
28516 IX86_BUILTIN_CVTUSI2SS64,
28517 IX86_BUILTIN_DIVPD512,
28518 IX86_BUILTIN_DIVPS512,
28519 IX86_BUILTIN_DIVSD_ROUND,
28520 IX86_BUILTIN_DIVSS_ROUND,
28521 IX86_BUILTIN_EXPANDPD512,
28522 IX86_BUILTIN_EXPANDPD512Z,
28523 IX86_BUILTIN_EXPANDPDLOAD512,
28524 IX86_BUILTIN_EXPANDPDLOAD512Z,
28525 IX86_BUILTIN_EXPANDPS512,
28526 IX86_BUILTIN_EXPANDPS512Z,
28527 IX86_BUILTIN_EXPANDPSLOAD512,
28528 IX86_BUILTIN_EXPANDPSLOAD512Z,
28529 IX86_BUILTIN_EXTRACTF32X4,
28530 IX86_BUILTIN_EXTRACTF64X4,
28531 IX86_BUILTIN_EXTRACTI32X4,
28532 IX86_BUILTIN_EXTRACTI64X4,
28533 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28534 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28535 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28536 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28537 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28538 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28539 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28540 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28541 IX86_BUILTIN_GETEXPPD512,
28542 IX86_BUILTIN_GETEXPPS512,
28543 IX86_BUILTIN_GETEXPSD128,
28544 IX86_BUILTIN_GETEXPSS128,
28545 IX86_BUILTIN_GETMANTPD512,
28546 IX86_BUILTIN_GETMANTPS512,
28547 IX86_BUILTIN_GETMANTSD128,
28548 IX86_BUILTIN_GETMANTSS128,
28549 IX86_BUILTIN_INSERTF32X4,
28550 IX86_BUILTIN_INSERTF64X4,
28551 IX86_BUILTIN_INSERTI32X4,
28552 IX86_BUILTIN_INSERTI64X4,
28553 IX86_BUILTIN_LOADAPD512,
28554 IX86_BUILTIN_LOADAPS512,
28555 IX86_BUILTIN_LOADDQUDI512,
28556 IX86_BUILTIN_LOADDQUSI512,
28557 IX86_BUILTIN_LOADUPD512,
28558 IX86_BUILTIN_LOADUPS512,
28559 IX86_BUILTIN_MAXPD512,
28560 IX86_BUILTIN_MAXPS512,
28561 IX86_BUILTIN_MAXSD_ROUND,
28562 IX86_BUILTIN_MAXSS_ROUND,
28563 IX86_BUILTIN_MINPD512,
28564 IX86_BUILTIN_MINPS512,
28565 IX86_BUILTIN_MINSD_ROUND,
28566 IX86_BUILTIN_MINSS_ROUND,
28567 IX86_BUILTIN_MOVAPD512,
28568 IX86_BUILTIN_MOVAPS512,
28569 IX86_BUILTIN_MOVDDUP512,
28570 IX86_BUILTIN_MOVDQA32LOAD512,
28571 IX86_BUILTIN_MOVDQA32STORE512,
28572 IX86_BUILTIN_MOVDQA32_512,
28573 IX86_BUILTIN_MOVDQA64LOAD512,
28574 IX86_BUILTIN_MOVDQA64STORE512,
28575 IX86_BUILTIN_MOVDQA64_512,
28576 IX86_BUILTIN_MOVNTDQ512,
28577 IX86_BUILTIN_MOVNTDQA512,
28578 IX86_BUILTIN_MOVNTPD512,
28579 IX86_BUILTIN_MOVNTPS512,
28580 IX86_BUILTIN_MOVSHDUP512,
28581 IX86_BUILTIN_MOVSLDUP512,
28582 IX86_BUILTIN_MULPD512,
28583 IX86_BUILTIN_MULPS512,
28584 IX86_BUILTIN_MULSD_ROUND,
28585 IX86_BUILTIN_MULSS_ROUND,
28586 IX86_BUILTIN_PABSD512,
28587 IX86_BUILTIN_PABSQ512,
28588 IX86_BUILTIN_PADDD512,
28589 IX86_BUILTIN_PADDQ512,
28590 IX86_BUILTIN_PANDD512,
28591 IX86_BUILTIN_PANDND512,
28592 IX86_BUILTIN_PANDNQ512,
28593 IX86_BUILTIN_PANDQ512,
28594 IX86_BUILTIN_PBROADCASTD512,
28595 IX86_BUILTIN_PBROADCASTD512_GPR,
28596 IX86_BUILTIN_PBROADCASTMB512,
28597 IX86_BUILTIN_PBROADCASTMW512,
28598 IX86_BUILTIN_PBROADCASTQ512,
28599 IX86_BUILTIN_PBROADCASTQ512_GPR,
28600 IX86_BUILTIN_PBROADCASTQ512_MEM,
28601 IX86_BUILTIN_PCMPEQD512_MASK,
28602 IX86_BUILTIN_PCMPEQQ512_MASK,
28603 IX86_BUILTIN_PCMPGTD512_MASK,
28604 IX86_BUILTIN_PCMPGTQ512_MASK,
28605 IX86_BUILTIN_PCOMPRESSD512,
28606 IX86_BUILTIN_PCOMPRESSDSTORE512,
28607 IX86_BUILTIN_PCOMPRESSQ512,
28608 IX86_BUILTIN_PCOMPRESSQSTORE512,
28609 IX86_BUILTIN_PEXPANDD512,
28610 IX86_BUILTIN_PEXPANDD512Z,
28611 IX86_BUILTIN_PEXPANDDLOAD512,
28612 IX86_BUILTIN_PEXPANDDLOAD512Z,
28613 IX86_BUILTIN_PEXPANDQ512,
28614 IX86_BUILTIN_PEXPANDQ512Z,
28615 IX86_BUILTIN_PEXPANDQLOAD512,
28616 IX86_BUILTIN_PEXPANDQLOAD512Z,
28617 IX86_BUILTIN_PMAXSD512,
28618 IX86_BUILTIN_PMAXSQ512,
28619 IX86_BUILTIN_PMAXUD512,
28620 IX86_BUILTIN_PMAXUQ512,
28621 IX86_BUILTIN_PMINSD512,
28622 IX86_BUILTIN_PMINSQ512,
28623 IX86_BUILTIN_PMINUD512,
28624 IX86_BUILTIN_PMINUQ512,
28625 IX86_BUILTIN_PMOVDB512,
28626 IX86_BUILTIN_PMOVDB512_MEM,
28627 IX86_BUILTIN_PMOVDW512,
28628 IX86_BUILTIN_PMOVDW512_MEM,
28629 IX86_BUILTIN_PMOVQB512,
28630 IX86_BUILTIN_PMOVQB512_MEM,
28631 IX86_BUILTIN_PMOVQD512,
28632 IX86_BUILTIN_PMOVQD512_MEM,
28633 IX86_BUILTIN_PMOVQW512,
28634 IX86_BUILTIN_PMOVQW512_MEM,
28635 IX86_BUILTIN_PMOVSDB512,
28636 IX86_BUILTIN_PMOVSDB512_MEM,
28637 IX86_BUILTIN_PMOVSDW512,
28638 IX86_BUILTIN_PMOVSDW512_MEM,
28639 IX86_BUILTIN_PMOVSQB512,
28640 IX86_BUILTIN_PMOVSQB512_MEM,
28641 IX86_BUILTIN_PMOVSQD512,
28642 IX86_BUILTIN_PMOVSQD512_MEM,
28643 IX86_BUILTIN_PMOVSQW512,
28644 IX86_BUILTIN_PMOVSQW512_MEM,
28645 IX86_BUILTIN_PMOVSXBD512,
28646 IX86_BUILTIN_PMOVSXBQ512,
28647 IX86_BUILTIN_PMOVSXDQ512,
28648 IX86_BUILTIN_PMOVSXWD512,
28649 IX86_BUILTIN_PMOVSXWQ512,
28650 IX86_BUILTIN_PMOVUSDB512,
28651 IX86_BUILTIN_PMOVUSDB512_MEM,
28652 IX86_BUILTIN_PMOVUSDW512,
28653 IX86_BUILTIN_PMOVUSDW512_MEM,
28654 IX86_BUILTIN_PMOVUSQB512,
28655 IX86_BUILTIN_PMOVUSQB512_MEM,
28656 IX86_BUILTIN_PMOVUSQD512,
28657 IX86_BUILTIN_PMOVUSQD512_MEM,
28658 IX86_BUILTIN_PMOVUSQW512,
28659 IX86_BUILTIN_PMOVUSQW512_MEM,
28660 IX86_BUILTIN_PMOVZXBD512,
28661 IX86_BUILTIN_PMOVZXBQ512,
28662 IX86_BUILTIN_PMOVZXDQ512,
28663 IX86_BUILTIN_PMOVZXWD512,
28664 IX86_BUILTIN_PMOVZXWQ512,
28665 IX86_BUILTIN_PMULDQ512,
28666 IX86_BUILTIN_PMULLD512,
28667 IX86_BUILTIN_PMULUDQ512,
28668 IX86_BUILTIN_PORD512,
28669 IX86_BUILTIN_PORQ512,
28670 IX86_BUILTIN_PROLD512,
28671 IX86_BUILTIN_PROLQ512,
28672 IX86_BUILTIN_PROLVD512,
28673 IX86_BUILTIN_PROLVQ512,
28674 IX86_BUILTIN_PRORD512,
28675 IX86_BUILTIN_PRORQ512,
28676 IX86_BUILTIN_PRORVD512,
28677 IX86_BUILTIN_PRORVQ512,
28678 IX86_BUILTIN_PSHUFD512,
28679 IX86_BUILTIN_PSLLD512,
28680 IX86_BUILTIN_PSLLDI512,
28681 IX86_BUILTIN_PSLLQ512,
28682 IX86_BUILTIN_PSLLQI512,
28683 IX86_BUILTIN_PSLLVV16SI,
28684 IX86_BUILTIN_PSLLVV8DI,
28685 IX86_BUILTIN_PSRAD512,
28686 IX86_BUILTIN_PSRADI512,
28687 IX86_BUILTIN_PSRAQ512,
28688 IX86_BUILTIN_PSRAQI512,
28689 IX86_BUILTIN_PSRAVV16SI,
28690 IX86_BUILTIN_PSRAVV8DI,
28691 IX86_BUILTIN_PSRLD512,
28692 IX86_BUILTIN_PSRLDI512,
28693 IX86_BUILTIN_PSRLQ512,
28694 IX86_BUILTIN_PSRLQI512,
28695 IX86_BUILTIN_PSRLVV16SI,
28696 IX86_BUILTIN_PSRLVV8DI,
28697 IX86_BUILTIN_PSUBD512,
28698 IX86_BUILTIN_PSUBQ512,
28699 IX86_BUILTIN_PTESTMD512,
28700 IX86_BUILTIN_PTESTMQ512,
28701 IX86_BUILTIN_PTESTNMD512,
28702 IX86_BUILTIN_PTESTNMQ512,
28703 IX86_BUILTIN_PUNPCKHDQ512,
28704 IX86_BUILTIN_PUNPCKHQDQ512,
28705 IX86_BUILTIN_PUNPCKLDQ512,
28706 IX86_BUILTIN_PUNPCKLQDQ512,
28707 IX86_BUILTIN_PXORD512,
28708 IX86_BUILTIN_PXORQ512,
28709 IX86_BUILTIN_RCP14PD512,
28710 IX86_BUILTIN_RCP14PS512,
28711 IX86_BUILTIN_RCP14SD,
28712 IX86_BUILTIN_RCP14SS,
28713 IX86_BUILTIN_RNDSCALEPD,
28714 IX86_BUILTIN_RNDSCALEPS,
28715 IX86_BUILTIN_RNDSCALESD,
28716 IX86_BUILTIN_RNDSCALESS,
28717 IX86_BUILTIN_RSQRT14PD512,
28718 IX86_BUILTIN_RSQRT14PS512,
28719 IX86_BUILTIN_RSQRT14SD,
28720 IX86_BUILTIN_RSQRT14SS,
28721 IX86_BUILTIN_SCALEFPD512,
28722 IX86_BUILTIN_SCALEFPS512,
28723 IX86_BUILTIN_SCALEFSD,
28724 IX86_BUILTIN_SCALEFSS,
28725 IX86_BUILTIN_SHUFPD512,
28726 IX86_BUILTIN_SHUFPS512,
28727 IX86_BUILTIN_SHUF_F32x4,
28728 IX86_BUILTIN_SHUF_F64x2,
28729 IX86_BUILTIN_SHUF_I32x4,
28730 IX86_BUILTIN_SHUF_I64x2,
28731 IX86_BUILTIN_SQRTPD512,
28732 IX86_BUILTIN_SQRTPD512_MASK,
28733 IX86_BUILTIN_SQRTPS512_MASK,
28734 IX86_BUILTIN_SQRTPS_NR512,
28735 IX86_BUILTIN_SQRTSD_ROUND,
28736 IX86_BUILTIN_SQRTSS_ROUND,
28737 IX86_BUILTIN_STOREAPD512,
28738 IX86_BUILTIN_STOREAPS512,
28739 IX86_BUILTIN_STOREDQUDI512,
28740 IX86_BUILTIN_STOREDQUSI512,
28741 IX86_BUILTIN_STOREUPD512,
28742 IX86_BUILTIN_STOREUPS512,
28743 IX86_BUILTIN_SUBPD512,
28744 IX86_BUILTIN_SUBPS512,
28745 IX86_BUILTIN_SUBSD_ROUND,
28746 IX86_BUILTIN_SUBSS_ROUND,
28747 IX86_BUILTIN_UCMPD512,
28748 IX86_BUILTIN_UCMPQ512,
28749 IX86_BUILTIN_UNPCKHPD512,
28750 IX86_BUILTIN_UNPCKHPS512,
28751 IX86_BUILTIN_UNPCKLPD512,
28752 IX86_BUILTIN_UNPCKLPS512,
28753 IX86_BUILTIN_VCVTSD2SI32,
28754 IX86_BUILTIN_VCVTSD2SI64,
28755 IX86_BUILTIN_VCVTSD2USI32,
28756 IX86_BUILTIN_VCVTSD2USI64,
28757 IX86_BUILTIN_VCVTSS2SI32,
28758 IX86_BUILTIN_VCVTSS2SI64,
28759 IX86_BUILTIN_VCVTSS2USI32,
28760 IX86_BUILTIN_VCVTSS2USI64,
28761 IX86_BUILTIN_VCVTTSD2SI32,
28762 IX86_BUILTIN_VCVTTSD2SI64,
28763 IX86_BUILTIN_VCVTTSD2USI32,
28764 IX86_BUILTIN_VCVTTSD2USI64,
28765 IX86_BUILTIN_VCVTTSS2SI32,
28766 IX86_BUILTIN_VCVTTSS2SI64,
28767 IX86_BUILTIN_VCVTTSS2USI32,
28768 IX86_BUILTIN_VCVTTSS2USI64,
28769 IX86_BUILTIN_VFMADDPD512_MASK,
28770 IX86_BUILTIN_VFMADDPD512_MASK3,
28771 IX86_BUILTIN_VFMADDPD512_MASKZ,
28772 IX86_BUILTIN_VFMADDPS512_MASK,
28773 IX86_BUILTIN_VFMADDPS512_MASK3,
28774 IX86_BUILTIN_VFMADDPS512_MASKZ,
28775 IX86_BUILTIN_VFMADDSD3_ROUND,
28776 IX86_BUILTIN_VFMADDSS3_ROUND,
28777 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28778 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28779 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28780 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28781 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28782 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28783 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28784 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28785 IX86_BUILTIN_VFMSUBPD512_MASK3,
28786 IX86_BUILTIN_VFMSUBPS512_MASK3,
28787 IX86_BUILTIN_VFMSUBSD3_MASK3,
28788 IX86_BUILTIN_VFMSUBSS3_MASK3,
28789 IX86_BUILTIN_VFNMADDPD512_MASK,
28790 IX86_BUILTIN_VFNMADDPS512_MASK,
28791 IX86_BUILTIN_VFNMSUBPD512_MASK,
28792 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28793 IX86_BUILTIN_VFNMSUBPS512_MASK,
28794 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28795 IX86_BUILTIN_VPCLZCNTD512,
28796 IX86_BUILTIN_VPCLZCNTQ512,
28797 IX86_BUILTIN_VPCONFLICTD512,
28798 IX86_BUILTIN_VPCONFLICTQ512,
28799 IX86_BUILTIN_VPERMDF512,
28800 IX86_BUILTIN_VPERMDI512,
28801 IX86_BUILTIN_VPERMI2VARD512,
28802 IX86_BUILTIN_VPERMI2VARPD512,
28803 IX86_BUILTIN_VPERMI2VARPS512,
28804 IX86_BUILTIN_VPERMI2VARQ512,
28805 IX86_BUILTIN_VPERMILPD512,
28806 IX86_BUILTIN_VPERMILPS512,
28807 IX86_BUILTIN_VPERMILVARPD512,
28808 IX86_BUILTIN_VPERMILVARPS512,
28809 IX86_BUILTIN_VPERMT2VARD512,
28810 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28811 IX86_BUILTIN_VPERMT2VARPD512,
28812 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28813 IX86_BUILTIN_VPERMT2VARPS512,
28814 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28815 IX86_BUILTIN_VPERMT2VARQ512,
28816 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28817 IX86_BUILTIN_VPERMVARDF512,
28818 IX86_BUILTIN_VPERMVARDI512,
28819 IX86_BUILTIN_VPERMVARSF512,
28820 IX86_BUILTIN_VPERMVARSI512,
28821 IX86_BUILTIN_VTERNLOGD512_MASK,
28822 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28823 IX86_BUILTIN_VTERNLOGQ512_MASK,
28824 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28826 /* Mask arithmetic operations */
28827 IX86_BUILTIN_KAND16,
28828 IX86_BUILTIN_KANDN16,
28829 IX86_BUILTIN_KNOT16,
28830 IX86_BUILTIN_KOR16,
28831 IX86_BUILTIN_KORTESTC16,
28832 IX86_BUILTIN_KORTESTZ16,
28833 IX86_BUILTIN_KUNPCKBW,
28834 IX86_BUILTIN_KXNOR16,
28835 IX86_BUILTIN_KXOR16,
28836 IX86_BUILTIN_KMOV16,
28838 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28839 where all operands are 32-byte or 64-byte wide respectively. */
28840 IX86_BUILTIN_GATHERALTSIV4DF,
28841 IX86_BUILTIN_GATHERALTDIV8SF,
28842 IX86_BUILTIN_GATHERALTSIV4DI,
28843 IX86_BUILTIN_GATHERALTDIV8SI,
28844 IX86_BUILTIN_GATHER3ALTDIV16SF,
28845 IX86_BUILTIN_GATHER3ALTDIV16SI,
28846 IX86_BUILTIN_GATHER3ALTSIV8DF,
28847 IX86_BUILTIN_GATHER3ALTSIV8DI,
28848 IX86_BUILTIN_GATHER3DIV16SF,
28849 IX86_BUILTIN_GATHER3DIV16SI,
28850 IX86_BUILTIN_GATHER3DIV8DF,
28851 IX86_BUILTIN_GATHER3DIV8DI,
28852 IX86_BUILTIN_GATHER3SIV16SF,
28853 IX86_BUILTIN_GATHER3SIV16SI,
28854 IX86_BUILTIN_GATHER3SIV8DF,
28855 IX86_BUILTIN_GATHER3SIV8DI,
28856 IX86_BUILTIN_SCATTERDIV16SF,
28857 IX86_BUILTIN_SCATTERDIV16SI,
28858 IX86_BUILTIN_SCATTERDIV8DF,
28859 IX86_BUILTIN_SCATTERDIV8DI,
28860 IX86_BUILTIN_SCATTERSIV16SF,
28861 IX86_BUILTIN_SCATTERSIV16SI,
28862 IX86_BUILTIN_SCATTERSIV8DF,
28863 IX86_BUILTIN_SCATTERSIV8DI,
28865 /* AVX512PF */
28866 IX86_BUILTIN_GATHERPFQPD,
28867 IX86_BUILTIN_GATHERPFDPS,
28868 IX86_BUILTIN_GATHERPFDPD,
28869 IX86_BUILTIN_GATHERPFQPS,
28870 IX86_BUILTIN_SCATTERPFDPD,
28871 IX86_BUILTIN_SCATTERPFDPS,
28872 IX86_BUILTIN_SCATTERPFQPD,
28873 IX86_BUILTIN_SCATTERPFQPS,
28875 /* AVX-512ER */
28876 IX86_BUILTIN_EXP2PD_MASK,
28877 IX86_BUILTIN_EXP2PS_MASK,
28878 IX86_BUILTIN_EXP2PS,
28879 IX86_BUILTIN_RCP28PD,
28880 IX86_BUILTIN_RCP28PS,
28881 IX86_BUILTIN_RCP28SD,
28882 IX86_BUILTIN_RCP28SS,
28883 IX86_BUILTIN_RSQRT28PD,
28884 IX86_BUILTIN_RSQRT28PS,
28885 IX86_BUILTIN_RSQRT28SD,
28886 IX86_BUILTIN_RSQRT28SS,
28888 /* SHA builtins. */
28889 IX86_BUILTIN_SHA1MSG1,
28890 IX86_BUILTIN_SHA1MSG2,
28891 IX86_BUILTIN_SHA1NEXTE,
28892 IX86_BUILTIN_SHA1RNDS4,
28893 IX86_BUILTIN_SHA256MSG1,
28894 IX86_BUILTIN_SHA256MSG2,
28895 IX86_BUILTIN_SHA256RNDS2,
28897 /* TFmode support builtins. */
28898 IX86_BUILTIN_INFQ,
28899 IX86_BUILTIN_HUGE_VALQ,
28900 IX86_BUILTIN_FABSQ,
28901 IX86_BUILTIN_COPYSIGNQ,
28903 /* Vectorizer support builtins. */
28904 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28905 IX86_BUILTIN_CPYSGNPS,
28906 IX86_BUILTIN_CPYSGNPD,
28907 IX86_BUILTIN_CPYSGNPS256,
28908 IX86_BUILTIN_CPYSGNPS512,
28909 IX86_BUILTIN_CPYSGNPD256,
28910 IX86_BUILTIN_CPYSGNPD512,
28911 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28912 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28915 /* FMA4 instructions. */
28916 IX86_BUILTIN_VFMADDSS,
28917 IX86_BUILTIN_VFMADDSD,
28918 IX86_BUILTIN_VFMADDPS,
28919 IX86_BUILTIN_VFMADDPD,
28920 IX86_BUILTIN_VFMADDPS256,
28921 IX86_BUILTIN_VFMADDPD256,
28922 IX86_BUILTIN_VFMADDSUBPS,
28923 IX86_BUILTIN_VFMADDSUBPD,
28924 IX86_BUILTIN_VFMADDSUBPS256,
28925 IX86_BUILTIN_VFMADDSUBPD256,
28927 /* FMA3 instructions. */
28928 IX86_BUILTIN_VFMADDSS3,
28929 IX86_BUILTIN_VFMADDSD3,
28931 /* XOP instructions. */
28932 IX86_BUILTIN_VPCMOV,
28933 IX86_BUILTIN_VPCMOV_V2DI,
28934 IX86_BUILTIN_VPCMOV_V4SI,
28935 IX86_BUILTIN_VPCMOV_V8HI,
28936 IX86_BUILTIN_VPCMOV_V16QI,
28937 IX86_BUILTIN_VPCMOV_V4SF,
28938 IX86_BUILTIN_VPCMOV_V2DF,
28939 IX86_BUILTIN_VPCMOV256,
28940 IX86_BUILTIN_VPCMOV_V4DI256,
28941 IX86_BUILTIN_VPCMOV_V8SI256,
28942 IX86_BUILTIN_VPCMOV_V16HI256,
28943 IX86_BUILTIN_VPCMOV_V32QI256,
28944 IX86_BUILTIN_VPCMOV_V8SF256,
28945 IX86_BUILTIN_VPCMOV_V4DF256,
28947 IX86_BUILTIN_VPPERM,
28949 IX86_BUILTIN_VPMACSSWW,
28950 IX86_BUILTIN_VPMACSWW,
28951 IX86_BUILTIN_VPMACSSWD,
28952 IX86_BUILTIN_VPMACSWD,
28953 IX86_BUILTIN_VPMACSSDD,
28954 IX86_BUILTIN_VPMACSDD,
28955 IX86_BUILTIN_VPMACSSDQL,
28956 IX86_BUILTIN_VPMACSSDQH,
28957 IX86_BUILTIN_VPMACSDQL,
28958 IX86_BUILTIN_VPMACSDQH,
28959 IX86_BUILTIN_VPMADCSSWD,
28960 IX86_BUILTIN_VPMADCSWD,
28962 IX86_BUILTIN_VPHADDBW,
28963 IX86_BUILTIN_VPHADDBD,
28964 IX86_BUILTIN_VPHADDBQ,
28965 IX86_BUILTIN_VPHADDWD,
28966 IX86_BUILTIN_VPHADDWQ,
28967 IX86_BUILTIN_VPHADDDQ,
28968 IX86_BUILTIN_VPHADDUBW,
28969 IX86_BUILTIN_VPHADDUBD,
28970 IX86_BUILTIN_VPHADDUBQ,
28971 IX86_BUILTIN_VPHADDUWD,
28972 IX86_BUILTIN_VPHADDUWQ,
28973 IX86_BUILTIN_VPHADDUDQ,
28974 IX86_BUILTIN_VPHSUBBW,
28975 IX86_BUILTIN_VPHSUBWD,
28976 IX86_BUILTIN_VPHSUBDQ,
28978 IX86_BUILTIN_VPROTB,
28979 IX86_BUILTIN_VPROTW,
28980 IX86_BUILTIN_VPROTD,
28981 IX86_BUILTIN_VPROTQ,
28982 IX86_BUILTIN_VPROTB_IMM,
28983 IX86_BUILTIN_VPROTW_IMM,
28984 IX86_BUILTIN_VPROTD_IMM,
28985 IX86_BUILTIN_VPROTQ_IMM,
28987 IX86_BUILTIN_VPSHLB,
28988 IX86_BUILTIN_VPSHLW,
28989 IX86_BUILTIN_VPSHLD,
28990 IX86_BUILTIN_VPSHLQ,
28991 IX86_BUILTIN_VPSHAB,
28992 IX86_BUILTIN_VPSHAW,
28993 IX86_BUILTIN_VPSHAD,
28994 IX86_BUILTIN_VPSHAQ,
28996 IX86_BUILTIN_VFRCZSS,
28997 IX86_BUILTIN_VFRCZSD,
28998 IX86_BUILTIN_VFRCZPS,
28999 IX86_BUILTIN_VFRCZPD,
29000 IX86_BUILTIN_VFRCZPS256,
29001 IX86_BUILTIN_VFRCZPD256,
29003 IX86_BUILTIN_VPCOMEQUB,
29004 IX86_BUILTIN_VPCOMNEUB,
29005 IX86_BUILTIN_VPCOMLTUB,
29006 IX86_BUILTIN_VPCOMLEUB,
29007 IX86_BUILTIN_VPCOMGTUB,
29008 IX86_BUILTIN_VPCOMGEUB,
29009 IX86_BUILTIN_VPCOMFALSEUB,
29010 IX86_BUILTIN_VPCOMTRUEUB,
29012 IX86_BUILTIN_VPCOMEQUW,
29013 IX86_BUILTIN_VPCOMNEUW,
29014 IX86_BUILTIN_VPCOMLTUW,
29015 IX86_BUILTIN_VPCOMLEUW,
29016 IX86_BUILTIN_VPCOMGTUW,
29017 IX86_BUILTIN_VPCOMGEUW,
29018 IX86_BUILTIN_VPCOMFALSEUW,
29019 IX86_BUILTIN_VPCOMTRUEUW,
29021 IX86_BUILTIN_VPCOMEQUD,
29022 IX86_BUILTIN_VPCOMNEUD,
29023 IX86_BUILTIN_VPCOMLTUD,
29024 IX86_BUILTIN_VPCOMLEUD,
29025 IX86_BUILTIN_VPCOMGTUD,
29026 IX86_BUILTIN_VPCOMGEUD,
29027 IX86_BUILTIN_VPCOMFALSEUD,
29028 IX86_BUILTIN_VPCOMTRUEUD,
29030 IX86_BUILTIN_VPCOMEQUQ,
29031 IX86_BUILTIN_VPCOMNEUQ,
29032 IX86_BUILTIN_VPCOMLTUQ,
29033 IX86_BUILTIN_VPCOMLEUQ,
29034 IX86_BUILTIN_VPCOMGTUQ,
29035 IX86_BUILTIN_VPCOMGEUQ,
29036 IX86_BUILTIN_VPCOMFALSEUQ,
29037 IX86_BUILTIN_VPCOMTRUEUQ,
29039 IX86_BUILTIN_VPCOMEQB,
29040 IX86_BUILTIN_VPCOMNEB,
29041 IX86_BUILTIN_VPCOMLTB,
29042 IX86_BUILTIN_VPCOMLEB,
29043 IX86_BUILTIN_VPCOMGTB,
29044 IX86_BUILTIN_VPCOMGEB,
29045 IX86_BUILTIN_VPCOMFALSEB,
29046 IX86_BUILTIN_VPCOMTRUEB,
29048 IX86_BUILTIN_VPCOMEQW,
29049 IX86_BUILTIN_VPCOMNEW,
29050 IX86_BUILTIN_VPCOMLTW,
29051 IX86_BUILTIN_VPCOMLEW,
29052 IX86_BUILTIN_VPCOMGTW,
29053 IX86_BUILTIN_VPCOMGEW,
29054 IX86_BUILTIN_VPCOMFALSEW,
29055 IX86_BUILTIN_VPCOMTRUEW,
29057 IX86_BUILTIN_VPCOMEQD,
29058 IX86_BUILTIN_VPCOMNED,
29059 IX86_BUILTIN_VPCOMLTD,
29060 IX86_BUILTIN_VPCOMLED,
29061 IX86_BUILTIN_VPCOMGTD,
29062 IX86_BUILTIN_VPCOMGED,
29063 IX86_BUILTIN_VPCOMFALSED,
29064 IX86_BUILTIN_VPCOMTRUED,
29066 IX86_BUILTIN_VPCOMEQQ,
29067 IX86_BUILTIN_VPCOMNEQ,
29068 IX86_BUILTIN_VPCOMLTQ,
29069 IX86_BUILTIN_VPCOMLEQ,
29070 IX86_BUILTIN_VPCOMGTQ,
29071 IX86_BUILTIN_VPCOMGEQ,
29072 IX86_BUILTIN_VPCOMFALSEQ,
29073 IX86_BUILTIN_VPCOMTRUEQ,
29075 /* LWP instructions. */
29076 IX86_BUILTIN_LLWPCB,
29077 IX86_BUILTIN_SLWPCB,
29078 IX86_BUILTIN_LWPVAL32,
29079 IX86_BUILTIN_LWPVAL64,
29080 IX86_BUILTIN_LWPINS32,
29081 IX86_BUILTIN_LWPINS64,
29083 IX86_BUILTIN_CLZS,
29085 /* RTM */
29086 IX86_BUILTIN_XBEGIN,
29087 IX86_BUILTIN_XEND,
29088 IX86_BUILTIN_XABORT,
29089 IX86_BUILTIN_XTEST,
29091 /* BMI instructions. */
29092 IX86_BUILTIN_BEXTR32,
29093 IX86_BUILTIN_BEXTR64,
29094 IX86_BUILTIN_CTZS,
29096 /* TBM instructions. */
29097 IX86_BUILTIN_BEXTRI32,
29098 IX86_BUILTIN_BEXTRI64,
29100 /* BMI2 instructions. */
29101 IX86_BUILTIN_BZHI32,
29102 IX86_BUILTIN_BZHI64,
29103 IX86_BUILTIN_PDEP32,
29104 IX86_BUILTIN_PDEP64,
29105 IX86_BUILTIN_PEXT32,
29106 IX86_BUILTIN_PEXT64,
29108 /* ADX instructions. */
29109 IX86_BUILTIN_ADDCARRYX32,
29110 IX86_BUILTIN_ADDCARRYX64,
29112 /* FSGSBASE instructions. */
29113 IX86_BUILTIN_RDFSBASE32,
29114 IX86_BUILTIN_RDFSBASE64,
29115 IX86_BUILTIN_RDGSBASE32,
29116 IX86_BUILTIN_RDGSBASE64,
29117 IX86_BUILTIN_WRFSBASE32,
29118 IX86_BUILTIN_WRFSBASE64,
29119 IX86_BUILTIN_WRGSBASE32,
29120 IX86_BUILTIN_WRGSBASE64,
29122 /* RDRND instructions. */
29123 IX86_BUILTIN_RDRAND16_STEP,
29124 IX86_BUILTIN_RDRAND32_STEP,
29125 IX86_BUILTIN_RDRAND64_STEP,
29127 /* RDSEED instructions. */
29128 IX86_BUILTIN_RDSEED16_STEP,
29129 IX86_BUILTIN_RDSEED32_STEP,
29130 IX86_BUILTIN_RDSEED64_STEP,
29132 /* F16C instructions. */
29133 IX86_BUILTIN_CVTPH2PS,
29134 IX86_BUILTIN_CVTPH2PS256,
29135 IX86_BUILTIN_CVTPS2PH,
29136 IX86_BUILTIN_CVTPS2PH256,
29138 /* CFString built-in for darwin */
29139 IX86_BUILTIN_CFSTRING,
29141 /* Builtins to get CPU type and supported features. */
29142 IX86_BUILTIN_CPU_INIT,
29143 IX86_BUILTIN_CPU_IS,
29144 IX86_BUILTIN_CPU_SUPPORTS,
29146 /* Read/write FLAGS register built-ins. */
29147 IX86_BUILTIN_READ_FLAGS,
29148 IX86_BUILTIN_WRITE_FLAGS,
29150 IX86_BUILTIN_MAX
29153 /* Table for the ix86 builtin decls. */
29154 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29156 /* Table of all of the builtin functions that are possible with different ISA's
29157 but are waiting to be built until a function is declared to use that
29158 ISA. */
29159 struct builtin_isa {
29160 const char *name; /* function name */
29161 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29162 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29163 bool const_p; /* true if the declaration is constant */
29164 bool set_and_not_built_p;
29167 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29170 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29171 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29172 function decl in the ix86_builtins array. Returns the function decl or
29173 NULL_TREE, if the builtin was not added.
29175 If the front end has a special hook for builtin functions, delay adding
29176 builtin functions that aren't in the current ISA until the ISA is changed
29177 with function specific optimization. Doing so, can save about 300K for the
29178 default compiler. When the builtin is expanded, check at that time whether
29179 it is valid.
29181 If the front end doesn't have a special hook, record all builtins, even if
29182 it isn't an instruction set in the current ISA in case the user uses
29183 function specific options for a different ISA, so that we don't get scope
29184 errors if a builtin is added in the middle of a function scope. */
29186 static inline tree
29187 def_builtin (HOST_WIDE_INT mask, const char *name,
29188 enum ix86_builtin_func_type tcode,
29189 enum ix86_builtins code)
29191 tree decl = NULL_TREE;
29193 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29195 ix86_builtins_isa[(int) code].isa = mask;
29197 mask &= ~OPTION_MASK_ISA_64BIT;
29198 if (flag_dyn_ipa
29199 || mask == 0
29200 || (mask & ix86_isa_flags) != 0
29201 || (lang_hooks.builtin_function
29202 == lang_hooks.builtin_function_ext_scope))
29205 tree type = ix86_get_builtin_func_type (tcode);
29206 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29207 NULL, NULL_TREE);
29208 ix86_builtins[(int) code] = decl;
29209 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29211 else
29213 ix86_builtins[(int) code] = NULL_TREE;
29214 ix86_builtins_isa[(int) code].tcode = tcode;
29215 ix86_builtins_isa[(int) code].name = name;
29216 ix86_builtins_isa[(int) code].const_p = false;
29217 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29221 return decl;
29224 /* Like def_builtin, but also marks the function decl "const". */
29226 static inline tree
29227 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29228 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29230 tree decl = def_builtin (mask, name, tcode, code);
29231 if (decl)
29232 TREE_READONLY (decl) = 1;
29233 else
29234 ix86_builtins_isa[(int) code].const_p = true;
29236 return decl;
29239 /* Add any new builtin functions for a given ISA that may not have been
29240 declared. This saves a bit of space compared to adding all of the
29241 declarations to the tree, even if we didn't use them. */
29243 static void
29244 ix86_add_new_builtins (HOST_WIDE_INT isa)
29246 int i;
29248 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29250 if ((ix86_builtins_isa[i].isa & isa) != 0
29251 && ix86_builtins_isa[i].set_and_not_built_p)
29253 tree decl, type;
29255 /* Don't define the builtin again. */
29256 ix86_builtins_isa[i].set_and_not_built_p = false;
29258 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29259 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29260 type, i, BUILT_IN_MD, NULL,
29261 NULL_TREE);
29263 ix86_builtins[i] = decl;
29264 if (ix86_builtins_isa[i].const_p)
29265 TREE_READONLY (decl) = 1;
29270 /* Bits for builtin_description.flag. */
29272 /* Set when we don't support the comparison natively, and should
29273 swap_comparison in order to support it. */
29274 #define BUILTIN_DESC_SWAP_OPERANDS 1
29276 struct builtin_description
29278 const HOST_WIDE_INT mask;
29279 const enum insn_code icode;
29280 const char *const name;
29281 const enum ix86_builtins code;
29282 const enum rtx_code comparison;
29283 const int flag;
29286 static const struct builtin_description bdesc_comi[] =
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
29290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
29300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
29301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
29302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
29303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
29304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
29305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29314 static const struct builtin_description bdesc_pcmpestr[] =
29316 /* SSE4.2 */
29317 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29318 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29319 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29320 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29321 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29322 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29323 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29326 static const struct builtin_description bdesc_pcmpistr[] =
29328 /* SSE4.2 */
29329 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29330 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29331 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29332 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29333 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29334 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29335 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29338 /* Special builtins with variable number of arguments. */
29339 static const struct builtin_description bdesc_special_args[] =
29341 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29342 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29343 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29345 /* 80387 (for use internally for atomic compound assignment). */
29346 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29347 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29348 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
29349 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29351 /* MMX */
29352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29354 /* 3DNow! */
29355 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29357 /* FXSR, XSAVE and XSAVEOPT */
29358 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29359 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29360 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29361 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29362 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29364 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29365 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29366 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29367 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29368 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29370 /* SSE */
29371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29380 /* SSE or 3DNow!A */
29381 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29382 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29384 /* SSE2 */
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29392 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29399 /* SSE3 */
29400 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29402 /* SSE4.1 */
29403 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29405 /* SSE4A */
29406 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29407 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29409 /* AVX */
29410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29413 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29414 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29415 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29437 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29440 /* AVX2 */
29441 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29442 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29443 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29444 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29445 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29446 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29447 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29448 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29449 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29451 /* AVX512F */
29452 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29478 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29480 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29485 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29487 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29489 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29490 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29493 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29495 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29497 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29498 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29500 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29501 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29502 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29503 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29504 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29505 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29507 /* FSGSBASE */
29508 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29509 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29510 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29511 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29512 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29513 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29514 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29515 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29517 /* RTM */
29518 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29519 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29520 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29523 /* Builtins with variable number of arguments. */
29524 static const struct builtin_description bdesc_args[] =
29526 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29527 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29528 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29529 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29530 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29531 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29532 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29534 /* MMX */
29535 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29536 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29539 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29540 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29542 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29543 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29544 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29545 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29546 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29547 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29548 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29549 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29551 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29552 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29554 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29555 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29556 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29557 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29559 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29560 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29561 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29562 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29563 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29564 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29566 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29567 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29568 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29569 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29570 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29571 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29573 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29574 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29575 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29577 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29579 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29580 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29581 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29582 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29583 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29584 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29586 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29587 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29588 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29589 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29590 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29591 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29593 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29594 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29595 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29596 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29598 /* 3DNow! */
29599 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29600 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29601 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29602 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29604 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29605 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29606 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29607 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29608 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29609 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29610 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29611 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29612 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29613 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29614 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29615 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29616 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29617 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29618 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29620 /* 3DNow!A */
29621 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29622 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29623 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29624 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29625 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29626 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29628 /* SSE */
29629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29631 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29633 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29637 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29640 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29644 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29645 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29646 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29650 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29665 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29666 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29668 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29669 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29672 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29674 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29675 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29676 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29677 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29679 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29681 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29682 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29684 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29687 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29689 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29690 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29693 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29694 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29696 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29702 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29703 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29705 /* SSE MMX or 3Dnow!A */
29706 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29707 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29708 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29710 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29711 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29712 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29713 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29715 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29716 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29718 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29720 /* SSE2 */
29721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29727 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29739 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29740 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29744 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29746 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29747 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29748 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29749 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29777 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29781 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29783 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29784 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29786 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29789 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29790 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29792 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29794 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29795 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29796 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29797 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29798 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29799 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29800 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29801 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29812 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29813 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29815 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29817 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29818 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29830 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29831 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29832 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29835 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29836 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29837 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29838 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29839 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29840 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29841 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29842 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29845 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29848 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29852 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29857 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29862 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29863 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29864 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29865 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29866 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29867 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29870 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29871 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29872 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29873 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29874 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29875 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29877 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29878 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29879 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29880 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29888 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29890 /* SSE2 MMX */
29891 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29892 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29894 /* SSE3 */
29895 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29896 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29898 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29899 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29900 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29901 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29902 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29903 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29905 /* SSSE3 */
29906 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29907 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29908 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29909 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29910 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29911 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29913 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29914 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29915 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29916 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29917 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29918 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29919 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29920 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29921 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29922 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29923 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29924 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29925 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29926 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29927 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29928 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29929 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29930 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29931 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29932 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29933 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29934 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29935 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29936 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29938 /* SSSE3. */
29939 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29940 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29942 /* SSE4.1 */
29943 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29944 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29945 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29946 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29947 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29948 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29949 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29950 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29951 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29952 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29954 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29955 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29956 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29957 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29958 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29959 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29960 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29961 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29962 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29963 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29964 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29965 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29966 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29968 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29969 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29970 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29971 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29972 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29973 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29974 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29975 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29976 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29977 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29978 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29979 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29981 /* SSE4.1 */
29982 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29983 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29984 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29985 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29987 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29988 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29989 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29990 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29992 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29993 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29995 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29996 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29998 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29999 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
30000 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
30001 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
30003 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
30004 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
30006 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30007 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30009 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30010 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30011 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30013 /* SSE4.2 */
30014 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30015 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
30016 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
30017 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30018 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30020 /* SSE4A */
30021 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
30022 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
30023 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
30024 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30026 /* AES */
30027 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
30028 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30030 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30031 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30032 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30033 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30035 /* PCLMUL */
30036 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
30038 /* AVX */
30039 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30040 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30043 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30044 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30047 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30053 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30054 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30055 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30056 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30057 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30058 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30059 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30060 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30061 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30062 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30063 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30064 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
30067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
30068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
30069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
30074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
30075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
30085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
30086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
30087 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
30088 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
30089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
30090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
30091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
30092 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
30093 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
30094 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
30095 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
30099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
30100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
30102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
30103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
30104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
30106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
30110 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
30111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30112 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30114 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
30121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
30122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
30123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
30124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
30126 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
30127 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
30130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
30132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
30133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
30134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
30135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
30137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
30138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
30140 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
30141 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
30143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30151 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
30152 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
30153 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
30155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
30156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
30157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
30158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
30159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
30160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
30161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
30162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
30163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
30164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
30172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
30174 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30175 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30177 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30179 /* AVX2 */
30180 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
30181 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
30182 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
30183 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
30184 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30185 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30186 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30187 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30188 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30189 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30190 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30191 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30192 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30193 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30194 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30195 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30196 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
30197 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30198 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30199 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30200 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30201 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
30202 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
30203 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30204 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30205 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30206 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30207 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30208 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30209 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30210 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30211 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30212 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30213 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30214 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30215 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30216 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30217 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30218 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
30219 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30220 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30221 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30222 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30223 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30224 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30225 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30226 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30227 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30228 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30229 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30230 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30231 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
30232 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30233 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30234 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30235 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30236 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30237 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30238 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30239 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30240 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30241 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30242 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30243 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30244 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30245 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30246 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30247 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30248 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30249 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30250 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30251 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30252 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30253 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30254 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
30255 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30256 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30257 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30258 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30259 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30260 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30261 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30262 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30263 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30264 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30265 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30266 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30267 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30268 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30269 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30270 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30271 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30272 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30273 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30274 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30275 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30276 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30277 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30278 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30279 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30280 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30281 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30282 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30283 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30284 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30285 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30286 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30287 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30288 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30289 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30290 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30291 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30292 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30293 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30294 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30295 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30296 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30297 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30298 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30299 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30300 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30301 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30302 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30303 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30304 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30305 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30306 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30307 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30308 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30309 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30310 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30311 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30312 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30313 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30314 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30315 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30316 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30317 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30318 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30319 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30320 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30321 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30322 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30323 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30324 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30325 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30327 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30329 /* BMI */
30330 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30331 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30332 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30334 /* TBM */
30335 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30336 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30338 /* F16C */
30339 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30340 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30341 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30342 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30344 /* BMI2 */
30345 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30346 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30347 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30348 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30349 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30350 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30352 /* AVX512F */
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30402 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30403 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30405 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30406 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30417 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30419 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30446 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30448 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30450 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30451 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30452 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30478 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30480 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30485 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30487 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30489 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30490 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30493 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30495 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30497 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30498 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30499 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30500 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30501 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30502 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30503 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30504 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30505 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30506 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30507 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30508 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30509 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30510 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30511 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30512 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30513 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30514 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30515 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30516 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30517 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30518 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30519 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30520 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30521 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30522 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30523 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30524 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30525 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30526 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30527 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30528 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30529 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30530 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30531 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30532 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30533 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30534 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30535 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30536 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30537 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30538 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30539 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30540 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30541 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30542 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30543 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30545 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30546 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30547 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30548 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30549 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30550 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30551 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30552 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30554 /* Mask arithmetic operations */
30555 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30556 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30557 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30558 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30559 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30560 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30561 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30562 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30563 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30564 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30566 /* SHA */
30567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30576 /* Builtins with rounding support. */
30577 static const struct builtin_description bdesc_round_args[] =
30579 /* AVX512F */
30580 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30581 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30582 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30583 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30584 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30585 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30586 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30587 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30588 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30589 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30590 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30591 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30592 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30593 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30594 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30595 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30596 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30597 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30598 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30599 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30600 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30601 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30602 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30603 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30604 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30605 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30606 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30607 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30608 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30609 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30610 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30611 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30612 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30613 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30614 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30615 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30616 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30617 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30618 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30619 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30620 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30621 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30622 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30623 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30624 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30625 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30626 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30627 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30628 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30629 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30630 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30631 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30632 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30633 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30634 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30635 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30636 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30637 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30638 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30639 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30640 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30641 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30642 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30643 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30644 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30645 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30646 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30647 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30648 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30649 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30650 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30651 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30652 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30653 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30654 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30655 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30656 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30657 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30658 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30659 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30660 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30661 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30662 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30663 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30664 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30665 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30666 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30667 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30668 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30669 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30670 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30671 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30672 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30673 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30674 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30675 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30676 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30677 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30678 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30679 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30680 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30681 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30682 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30683 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30684 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30685 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30686 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30687 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30688 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30689 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30690 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30691 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30692 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30693 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30694 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30695 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30696 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30697 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30698 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30700 /* AVX512ER */
30701 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30702 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30703 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30704 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30705 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30706 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30707 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30708 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30709 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30710 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30713 /* FMA4 and XOP. */
30714 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30715 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30716 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30717 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30718 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30719 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30720 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30721 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30722 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30723 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30724 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30725 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30726 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30727 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30728 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30729 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30730 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30731 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30732 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30733 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30734 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30735 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30736 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30737 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30738 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30739 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30740 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30741 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30742 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30743 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30744 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30745 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30746 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30747 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30748 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30749 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30750 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30751 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30752 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30753 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30754 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30755 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30756 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30757 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30758 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30759 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30760 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30761 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30762 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30763 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30764 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30765 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30767 static const struct builtin_description bdesc_multi_arg[] =
30769 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30770 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30771 UNKNOWN, (int)MULTI_ARG_3_SF },
30772 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30773 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30774 UNKNOWN, (int)MULTI_ARG_3_DF },
30776 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30777 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30778 UNKNOWN, (int)MULTI_ARG_3_SF },
30779 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30780 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30781 UNKNOWN, (int)MULTI_ARG_3_DF },
30783 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30784 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30785 UNKNOWN, (int)MULTI_ARG_3_SF },
30786 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30787 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30788 UNKNOWN, (int)MULTI_ARG_3_DF },
30789 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30790 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30791 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30792 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30793 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30794 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30796 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30797 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30798 UNKNOWN, (int)MULTI_ARG_3_SF },
30799 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30800 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30801 UNKNOWN, (int)MULTI_ARG_3_DF },
30802 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30803 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30804 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30805 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30806 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30807 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30809 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30810 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30811 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30812 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30813 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30814 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30815 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30817 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30818 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30819 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30820 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30821 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30822 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30823 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30825 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30827 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30828 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30829 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30830 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30831 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30832 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30833 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30834 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30835 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30836 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30837 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30838 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30840 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30841 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30842 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30843 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30844 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30845 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30846 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30847 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30848 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30849 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30850 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30851 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30852 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30853 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30854 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30855 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30857 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30858 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30859 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30860 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30861 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30862 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30864 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30865 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30866 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30867 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30868 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30869 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30870 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30871 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30872 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30873 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30874 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30875 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30876 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30877 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30878 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30880 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30881 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30882 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30883 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30884 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30885 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30886 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30888 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30889 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30890 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30891 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30892 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30893 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30894 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30896 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30897 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30898 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30899 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30900 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30901 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30902 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30904 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30905 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30906 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30907 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30908 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30909 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30910 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30912 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30913 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30914 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30915 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30916 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30917 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30918 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30920 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30921 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30922 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30923 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30924 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30925 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30926 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30928 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30929 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30930 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30931 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30932 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30933 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30934 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30936 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30937 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30938 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30939 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30940 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30941 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30942 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30944 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30945 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30946 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30947 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30948 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30949 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30950 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30951 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30953 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30954 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30955 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30956 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30957 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30958 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30959 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30960 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30962 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30963 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30964 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30965 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30969 /* TM vector builtins. */
30971 /* Reuse the existing x86-specific `struct builtin_description' cause
30972 we're lazy. Add casts to make them fit. */
30973 static const struct builtin_description bdesc_tm[] =
30975 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30976 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30977 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30978 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30979 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30980 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30981 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30983 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30984 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30985 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30986 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30987 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30988 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30989 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30991 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30992 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30993 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30994 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30995 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30996 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30997 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30999 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
31000 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
31001 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
31004 /* TM callbacks. */
31006 /* Return the builtin decl needed to load a vector of TYPE. */
31008 static tree
31009 ix86_builtin_tm_load (tree type)
31011 if (TREE_CODE (type) == VECTOR_TYPE)
31013 switch (tree_to_uhwi (TYPE_SIZE (type)))
31015 case 64:
31016 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
31017 case 128:
31018 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
31019 case 256:
31020 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
31023 return NULL_TREE;
31026 /* Return the builtin decl needed to store a vector of TYPE. */
31028 static tree
31029 ix86_builtin_tm_store (tree type)
31031 if (TREE_CODE (type) == VECTOR_TYPE)
31033 switch (tree_to_uhwi (TYPE_SIZE (type)))
31035 case 64:
31036 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
31037 case 128:
31038 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
31039 case 256:
31040 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
31043 return NULL_TREE;
31046 /* Initialize the transactional memory vector load/store builtins. */
31048 static void
31049 ix86_init_tm_builtins (void)
31051 enum ix86_builtin_func_type ftype;
31052 const struct builtin_description *d;
31053 size_t i;
31054 tree decl;
31055 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
31056 tree attrs_log, attrs_type_log;
31058 if (!flag_tm)
31059 return;
31061 /* If there are no builtins defined, we must be compiling in a
31062 language without trans-mem support. */
31063 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
31064 return;
31066 /* Use whatever attributes a normal TM load has. */
31067 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
31068 attrs_load = DECL_ATTRIBUTES (decl);
31069 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31070 /* Use whatever attributes a normal TM store has. */
31071 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
31072 attrs_store = DECL_ATTRIBUTES (decl);
31073 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31074 /* Use whatever attributes a normal TM log has. */
31075 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
31076 attrs_log = DECL_ATTRIBUTES (decl);
31077 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
31079 for (i = 0, d = bdesc_tm;
31080 i < ARRAY_SIZE (bdesc_tm);
31081 i++, d++)
31083 if ((d->mask & ix86_isa_flags) != 0
31084 || (lang_hooks.builtin_function
31085 == lang_hooks.builtin_function_ext_scope))
31087 tree type, attrs, attrs_type;
31088 enum built_in_function code = (enum built_in_function) d->code;
31090 ftype = (enum ix86_builtin_func_type) d->flag;
31091 type = ix86_get_builtin_func_type (ftype);
31093 if (BUILTIN_TM_LOAD_P (code))
31095 attrs = attrs_load;
31096 attrs_type = attrs_type_load;
31098 else if (BUILTIN_TM_STORE_P (code))
31100 attrs = attrs_store;
31101 attrs_type = attrs_type_store;
31103 else
31105 attrs = attrs_log;
31106 attrs_type = attrs_type_log;
31108 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
31109 /* The builtin without the prefix for
31110 calling it directly. */
31111 d->name + strlen ("__builtin_"),
31112 attrs);
31113 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
31114 set the TYPE_ATTRIBUTES. */
31115 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
31117 set_builtin_decl (code, decl, false);
31122 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
31123 in the current target ISA to allow the user to compile particular modules
31124 with different target specific options that differ from the command line
31125 options. */
31126 static void
31127 ix86_init_mmx_sse_builtins (void)
31129 const struct builtin_description * d;
31130 enum ix86_builtin_func_type ftype;
31131 size_t i;
31133 /* Add all special builtins with variable number of operands. */
31134 for (i = 0, d = bdesc_special_args;
31135 i < ARRAY_SIZE (bdesc_special_args);
31136 i++, d++)
31138 if (d->name == 0)
31139 continue;
31141 ftype = (enum ix86_builtin_func_type) d->flag;
31142 def_builtin (d->mask, d->name, ftype, d->code);
31145 /* Add all builtins with variable number of operands. */
31146 for (i = 0, d = bdesc_args;
31147 i < ARRAY_SIZE (bdesc_args);
31148 i++, d++)
31150 if (d->name == 0)
31151 continue;
31153 ftype = (enum ix86_builtin_func_type) d->flag;
31154 def_builtin_const (d->mask, d->name, ftype, d->code);
31157 /* Add all builtins with rounding. */
31158 for (i = 0, d = bdesc_round_args;
31159 i < ARRAY_SIZE (bdesc_round_args);
31160 i++, d++)
31162 if (d->name == 0)
31163 continue;
31165 ftype = (enum ix86_builtin_func_type) d->flag;
31166 def_builtin_const (d->mask, d->name, ftype, d->code);
31169 /* pcmpestr[im] insns. */
31170 for (i = 0, d = bdesc_pcmpestr;
31171 i < ARRAY_SIZE (bdesc_pcmpestr);
31172 i++, d++)
31174 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31175 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31176 else
31177 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31178 def_builtin_const (d->mask, d->name, ftype, d->code);
31181 /* pcmpistr[im] insns. */
31182 for (i = 0, d = bdesc_pcmpistr;
31183 i < ARRAY_SIZE (bdesc_pcmpistr);
31184 i++, d++)
31186 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31187 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31188 else
31189 ftype = INT_FTYPE_V16QI_V16QI_INT;
31190 def_builtin_const (d->mask, d->name, ftype, d->code);
31193 /* comi/ucomi insns. */
31194 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31196 if (d->mask == OPTION_MASK_ISA_SSE2)
31197 ftype = INT_FTYPE_V2DF_V2DF;
31198 else
31199 ftype = INT_FTYPE_V4SF_V4SF;
31200 def_builtin_const (d->mask, d->name, ftype, d->code);
31203 /* SSE */
31204 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31205 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31206 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31207 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31209 /* SSE or 3DNow!A */
31210 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31211 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31212 IX86_BUILTIN_MASKMOVQ);
31214 /* SSE2 */
31215 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31216 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31218 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31219 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31220 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31221 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31223 /* SSE3. */
31224 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31225 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31226 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31227 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31229 /* AES */
31230 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
31231 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31232 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
31233 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31234 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
31235 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31236 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
31237 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31238 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
31239 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31240 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
31241 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31243 /* PCLMUL */
31244 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
31245 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31247 /* RDRND */
31248 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31249 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31250 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31251 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31252 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31253 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31254 IX86_BUILTIN_RDRAND64_STEP);
31256 /* AVX2 */
31257 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31258 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31259 IX86_BUILTIN_GATHERSIV2DF);
31261 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31262 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31263 IX86_BUILTIN_GATHERSIV4DF);
31265 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31266 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31267 IX86_BUILTIN_GATHERDIV2DF);
31269 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31270 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31271 IX86_BUILTIN_GATHERDIV4DF);
31273 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31274 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31275 IX86_BUILTIN_GATHERSIV4SF);
31277 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31278 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31279 IX86_BUILTIN_GATHERSIV8SF);
31281 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31282 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31283 IX86_BUILTIN_GATHERDIV4SF);
31285 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31286 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31287 IX86_BUILTIN_GATHERDIV8SF);
31289 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31290 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31291 IX86_BUILTIN_GATHERSIV2DI);
31293 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31294 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31295 IX86_BUILTIN_GATHERSIV4DI);
31297 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31298 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31299 IX86_BUILTIN_GATHERDIV2DI);
31301 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31302 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31303 IX86_BUILTIN_GATHERDIV4DI);
31305 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31306 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31307 IX86_BUILTIN_GATHERSIV4SI);
31309 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31310 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31311 IX86_BUILTIN_GATHERSIV8SI);
31313 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31314 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31315 IX86_BUILTIN_GATHERDIV4SI);
31317 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31318 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31319 IX86_BUILTIN_GATHERDIV8SI);
31321 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31322 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31323 IX86_BUILTIN_GATHERALTSIV4DF);
31325 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31326 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31327 IX86_BUILTIN_GATHERALTDIV8SF);
31329 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31330 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31331 IX86_BUILTIN_GATHERALTSIV4DI);
31333 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31334 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31335 IX86_BUILTIN_GATHERALTDIV8SI);
31337 /* AVX512F */
31338 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31339 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31340 IX86_BUILTIN_GATHER3SIV16SF);
31342 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31343 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31344 IX86_BUILTIN_GATHER3SIV8DF);
31346 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31347 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31348 IX86_BUILTIN_GATHER3DIV16SF);
31350 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31351 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31352 IX86_BUILTIN_GATHER3DIV8DF);
31354 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31355 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31356 IX86_BUILTIN_GATHER3SIV16SI);
31358 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31359 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31360 IX86_BUILTIN_GATHER3SIV8DI);
31362 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31363 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31364 IX86_BUILTIN_GATHER3DIV16SI);
31366 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31367 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31368 IX86_BUILTIN_GATHER3DIV8DI);
31370 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31371 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31372 IX86_BUILTIN_GATHER3ALTSIV8DF);
31374 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31375 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31376 IX86_BUILTIN_GATHER3ALTDIV16SF);
31378 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31379 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31380 IX86_BUILTIN_GATHER3ALTSIV8DI);
31382 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31383 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31384 IX86_BUILTIN_GATHER3ALTDIV16SI);
31386 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31387 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31388 IX86_BUILTIN_SCATTERSIV16SF);
31390 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31391 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31392 IX86_BUILTIN_SCATTERSIV8DF);
31394 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31395 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31396 IX86_BUILTIN_SCATTERDIV16SF);
31398 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31399 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31400 IX86_BUILTIN_SCATTERDIV8DF);
31402 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31403 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31404 IX86_BUILTIN_SCATTERSIV16SI);
31406 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31407 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31408 IX86_BUILTIN_SCATTERSIV8DI);
31410 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31411 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31412 IX86_BUILTIN_SCATTERDIV16SI);
31414 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31415 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31416 IX86_BUILTIN_SCATTERDIV8DI);
31418 /* AVX512PF */
31419 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31420 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31421 IX86_BUILTIN_GATHERPFDPD);
31422 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31423 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31424 IX86_BUILTIN_GATHERPFDPS);
31425 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31426 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31427 IX86_BUILTIN_GATHERPFQPD);
31428 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31429 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31430 IX86_BUILTIN_GATHERPFQPS);
31431 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31432 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31433 IX86_BUILTIN_SCATTERPFDPD);
31434 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31435 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31436 IX86_BUILTIN_SCATTERPFDPS);
31437 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31438 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31439 IX86_BUILTIN_SCATTERPFQPD);
31440 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31441 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31442 IX86_BUILTIN_SCATTERPFQPS);
31444 /* SHA */
31445 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31446 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31447 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31448 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31449 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31450 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31451 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31452 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31453 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31454 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31455 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31456 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31457 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31458 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31460 /* RTM. */
31461 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31462 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31464 /* MMX access to the vec_init patterns. */
31465 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31466 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31468 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31469 V4HI_FTYPE_HI_HI_HI_HI,
31470 IX86_BUILTIN_VEC_INIT_V4HI);
31472 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31473 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31474 IX86_BUILTIN_VEC_INIT_V8QI);
31476 /* Access to the vec_extract patterns. */
31477 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31478 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31479 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31480 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31481 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31482 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31483 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31484 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31485 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31486 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31488 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31489 "__builtin_ia32_vec_ext_v4hi",
31490 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31492 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31493 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31495 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31496 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31498 /* Access to the vec_set patterns. */
31499 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31500 "__builtin_ia32_vec_set_v2di",
31501 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31503 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31504 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31506 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31507 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31509 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31510 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31512 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31513 "__builtin_ia32_vec_set_v4hi",
31514 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31516 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31517 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31519 /* RDSEED */
31520 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31521 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31522 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31523 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31524 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31525 "__builtin_ia32_rdseed_di_step",
31526 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31528 /* ADCX */
31529 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31530 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31531 def_builtin (OPTION_MASK_ISA_64BIT,
31532 "__builtin_ia32_addcarryx_u64",
31533 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31534 IX86_BUILTIN_ADDCARRYX64);
31536 /* Read/write FLAGS. */
31537 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31538 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31539 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31540 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31541 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31542 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31543 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31544 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31547 /* Add FMA4 multi-arg argument instructions */
31548 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31550 if (d->name == 0)
31551 continue;
31553 ftype = (enum ix86_builtin_func_type) d->flag;
31554 def_builtin_const (d->mask, d->name, ftype, d->code);
31558 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31559 to return a pointer to VERSION_DECL if the outcome of the expression
31560 formed by PREDICATE_CHAIN is true. This function will be called during
31561 version dispatch to decide which function version to execute. It returns
31562 the basic block at the end, to which more conditions can be added. */
31564 static basic_block
31565 add_condition_to_bb (tree function_decl, tree version_decl,
31566 tree predicate_chain, basic_block new_bb)
31568 gimple return_stmt;
31569 tree convert_expr, result_var;
31570 gimple convert_stmt;
31571 gimple call_cond_stmt;
31572 gimple if_else_stmt;
31574 basic_block bb1, bb2, bb3;
31575 edge e12, e23;
31577 tree cond_var, and_expr_var = NULL_TREE;
31578 gimple_seq gseq;
31580 tree predicate_decl, predicate_arg;
31582 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31584 gcc_assert (new_bb != NULL);
31585 gseq = bb_seq (new_bb);
31588 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31589 build_fold_addr_expr (version_decl));
31590 result_var = create_tmp_var (ptr_type_node, NULL);
31591 convert_stmt = gimple_build_assign (result_var, convert_expr);
31592 return_stmt = gimple_build_return (result_var);
31594 if (predicate_chain == NULL_TREE)
31596 gimple_seq_add_stmt (&gseq, convert_stmt);
31597 gimple_seq_add_stmt (&gseq, return_stmt);
31598 set_bb_seq (new_bb, gseq);
31599 gimple_set_bb (convert_stmt, new_bb);
31600 gimple_set_bb (return_stmt, new_bb);
31601 pop_cfun ();
31602 return new_bb;
31605 while (predicate_chain != NULL)
31607 cond_var = create_tmp_var (integer_type_node, NULL);
31608 predicate_decl = TREE_PURPOSE (predicate_chain);
31609 predicate_arg = TREE_VALUE (predicate_chain);
31610 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31611 gimple_call_set_lhs (call_cond_stmt, cond_var);
31613 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31614 gimple_set_bb (call_cond_stmt, new_bb);
31615 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31617 predicate_chain = TREE_CHAIN (predicate_chain);
31619 if (and_expr_var == NULL)
31620 and_expr_var = cond_var;
31621 else
31623 gimple assign_stmt;
31624 /* Use MIN_EXPR to check if any integer is zero?.
31625 and_expr_var = min_expr <cond_var, and_expr_var> */
31626 assign_stmt = gimple_build_assign (and_expr_var,
31627 build2 (MIN_EXPR, integer_type_node,
31628 cond_var, and_expr_var));
31630 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31631 gimple_set_bb (assign_stmt, new_bb);
31632 gimple_seq_add_stmt (&gseq, assign_stmt);
31636 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31637 integer_zero_node,
31638 NULL_TREE, NULL_TREE);
31639 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31640 gimple_set_bb (if_else_stmt, new_bb);
31641 gimple_seq_add_stmt (&gseq, if_else_stmt);
31643 gimple_seq_add_stmt (&gseq, convert_stmt);
31644 gimple_seq_add_stmt (&gseq, return_stmt);
31645 set_bb_seq (new_bb, gseq);
31647 bb1 = new_bb;
31648 e12 = split_block (bb1, if_else_stmt);
31649 bb2 = e12->dest;
31650 e12->flags &= ~EDGE_FALLTHRU;
31651 e12->flags |= EDGE_TRUE_VALUE;
31653 e23 = split_block (bb2, return_stmt);
31655 gimple_set_bb (convert_stmt, bb2);
31656 gimple_set_bb (return_stmt, bb2);
31658 bb3 = e23->dest;
31659 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31661 remove_edge (e23);
31662 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31664 pop_cfun ();
31666 return bb3;
31669 /* This parses the attribute arguments to target in DECL and determines
31670 the right builtin to use to match the platform specification.
31671 It returns the priority value for this version decl. If PREDICATE_LIST
31672 is not NULL, it stores the list of cpu features that need to be checked
31673 before dispatching this function. */
31675 static unsigned int
31676 get_builtin_code_for_version (tree decl, tree *predicate_list)
31678 tree attrs;
31679 struct cl_target_option cur_target;
31680 tree target_node;
31681 struct cl_target_option *new_target;
31682 const char *arg_str = NULL;
31683 const char *attrs_str = NULL;
31684 char *tok_str = NULL;
31685 char *token;
31687 /* Priority of i386 features, greater value is higher priority. This is
31688 used to decide the order in which function dispatch must happen. For
31689 instance, a version specialized for SSE4.2 should be checked for dispatch
31690 before a version for SSE3, as SSE4.2 implies SSE3. */
31691 enum feature_priority
31693 P_ZERO = 0,
31694 P_MMX,
31695 P_SSE,
31696 P_SSE2,
31697 P_SSE3,
31698 P_SSSE3,
31699 P_PROC_SSSE3,
31700 P_SSE4_A,
31701 P_PROC_SSE4_A,
31702 P_SSE4_1,
31703 P_SSE4_2,
31704 P_PROC_SSE4_2,
31705 P_POPCNT,
31706 P_AVX,
31707 P_PROC_AVX,
31708 P_FMA4,
31709 P_XOP,
31710 P_PROC_XOP,
31711 P_FMA,
31712 P_PROC_FMA,
31713 P_AVX2,
31714 P_PROC_AVX2
31717 enum feature_priority priority = P_ZERO;
31719 /* These are the target attribute strings for which a dispatcher is
31720 available, from fold_builtin_cpu. */
31722 static struct _feature_list
31724 const char *const name;
31725 const enum feature_priority priority;
31727 const feature_list[] =
31729 {"mmx", P_MMX},
31730 {"sse", P_SSE},
31731 {"sse2", P_SSE2},
31732 {"sse3", P_SSE3},
31733 {"sse4a", P_SSE4_A},
31734 {"ssse3", P_SSSE3},
31735 {"sse4.1", P_SSE4_1},
31736 {"sse4.2", P_SSE4_2},
31737 {"popcnt", P_POPCNT},
31738 {"avx", P_AVX},
31739 {"fma4", P_FMA4},
31740 {"xop", P_XOP},
31741 {"fma", P_FMA},
31742 {"avx2", P_AVX2}
31746 static unsigned int NUM_FEATURES
31747 = sizeof (feature_list) / sizeof (struct _feature_list);
31749 unsigned int i;
31751 tree predicate_chain = NULL_TREE;
31752 tree predicate_decl, predicate_arg;
31754 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31755 gcc_assert (attrs != NULL);
31757 attrs = TREE_VALUE (TREE_VALUE (attrs));
31759 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31760 attrs_str = TREE_STRING_POINTER (attrs);
31762 /* Return priority zero for default function. */
31763 if (strcmp (attrs_str, "default") == 0)
31764 return 0;
31766 /* Handle arch= if specified. For priority, set it to be 1 more than
31767 the best instruction set the processor can handle. For instance, if
31768 there is a version for atom and a version for ssse3 (the highest ISA
31769 priority for atom), the atom version must be checked for dispatch
31770 before the ssse3 version. */
31771 if (strstr (attrs_str, "arch=") != NULL)
31773 cl_target_option_save (&cur_target, &global_options);
31774 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31775 &global_options_set);
31777 gcc_assert (target_node);
31778 new_target = TREE_TARGET_OPTION (target_node);
31779 gcc_assert (new_target);
31781 if (new_target->arch_specified && new_target->arch > 0)
31783 switch (new_target->arch)
31785 case PROCESSOR_CORE2:
31786 arg_str = "core2";
31787 priority = P_PROC_SSSE3;
31788 break;
31789 case PROCESSOR_NEHALEM:
31790 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31791 arg_str = "westmere";
31792 else
31793 /* We translate "arch=corei7" and "arch=nehalem" to
31794 "corei7" so that it will be mapped to M_INTEL_COREI7
31795 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31796 arg_str = "corei7";
31797 priority = P_PROC_SSE4_2;
31798 break;
31799 case PROCESSOR_SANDYBRIDGE:
31800 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31801 arg_str = "ivybridge";
31802 else
31803 arg_str = "sandybridge";
31804 priority = P_PROC_AVX;
31805 break;
31806 case PROCESSOR_HASWELL:
31807 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31808 arg_str = "broadwell";
31809 else
31810 arg_str = "haswell";
31811 priority = P_PROC_AVX2;
31812 break;
31813 case PROCESSOR_BONNELL:
31814 arg_str = "bonnell";
31815 priority = P_PROC_SSSE3;
31816 break;
31817 case PROCESSOR_SILVERMONT:
31818 arg_str = "silvermont";
31819 priority = P_PROC_SSE4_2;
31820 break;
31821 case PROCESSOR_AMDFAM10:
31822 arg_str = "amdfam10h";
31823 priority = P_PROC_SSE4_A;
31824 break;
31825 case PROCESSOR_BTVER1:
31826 arg_str = "btver1";
31827 priority = P_PROC_SSE4_A;
31828 break;
31829 case PROCESSOR_BTVER2:
31830 arg_str = "btver2";
31831 priority = P_PROC_AVX;
31832 break;
31833 case PROCESSOR_BDVER1:
31834 arg_str = "bdver1";
31835 priority = P_PROC_XOP;
31836 break;
31837 case PROCESSOR_BDVER2:
31838 arg_str = "bdver2";
31839 priority = P_PROC_FMA;
31840 break;
31841 case PROCESSOR_BDVER3:
31842 arg_str = "bdver3";
31843 priority = P_PROC_FMA;
31844 break;
31845 case PROCESSOR_BDVER4:
31846 arg_str = "bdver4";
31847 priority = P_PROC_AVX2;
31848 break;
31852 cl_target_option_restore (&global_options, &cur_target);
31854 if (predicate_list && arg_str == NULL)
31856 error_at (DECL_SOURCE_LOCATION (decl),
31857 "No dispatcher found for the versioning attributes");
31858 return 0;
31861 if (predicate_list)
31863 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31864 /* For a C string literal the length includes the trailing NULL. */
31865 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31866 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31867 predicate_chain);
31871 /* Process feature name. */
31872 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31873 strcpy (tok_str, attrs_str);
31874 token = strtok (tok_str, ",");
31875 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31877 while (token != NULL)
31879 /* Do not process "arch=" */
31880 if (strncmp (token, "arch=", 5) == 0)
31882 token = strtok (NULL, ",");
31883 continue;
31885 for (i = 0; i < NUM_FEATURES; ++i)
31887 if (strcmp (token, feature_list[i].name) == 0)
31889 if (predicate_list)
31891 predicate_arg = build_string_literal (
31892 strlen (feature_list[i].name) + 1,
31893 feature_list[i].name);
31894 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31895 predicate_chain);
31897 /* Find the maximum priority feature. */
31898 if (feature_list[i].priority > priority)
31899 priority = feature_list[i].priority;
31901 break;
31904 if (predicate_list && i == NUM_FEATURES)
31906 error_at (DECL_SOURCE_LOCATION (decl),
31907 "No dispatcher found for %s", token);
31908 return 0;
31910 token = strtok (NULL, ",");
31912 free (tok_str);
31914 if (predicate_list && predicate_chain == NULL_TREE)
31916 error_at (DECL_SOURCE_LOCATION (decl),
31917 "No dispatcher found for the versioning attributes : %s",
31918 attrs_str);
31919 return 0;
31921 else if (predicate_list)
31923 predicate_chain = nreverse (predicate_chain);
31924 *predicate_list = predicate_chain;
31927 return priority;
31930 /* This compares the priority of target features in function DECL1
31931 and DECL2. It returns positive value if DECL1 is higher priority,
31932 negative value if DECL2 is higher priority and 0 if they are the
31933 same. */
31935 static int
31936 ix86_compare_version_priority (tree decl1, tree decl2)
31938 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31939 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31941 return (int)priority1 - (int)priority2;
31944 /* V1 and V2 point to function versions with different priorities
31945 based on the target ISA. This function compares their priorities. */
31947 static int
31948 feature_compare (const void *v1, const void *v2)
31950 typedef struct _function_version_info
31952 tree version_decl;
31953 tree predicate_chain;
31954 unsigned int dispatch_priority;
31955 } function_version_info;
31957 const function_version_info c1 = *(const function_version_info *)v1;
31958 const function_version_info c2 = *(const function_version_info *)v2;
31959 return (c2.dispatch_priority - c1.dispatch_priority);
31962 /* This function generates the dispatch function for
31963 multi-versioned functions. DISPATCH_DECL is the function which will
31964 contain the dispatch logic. FNDECLS are the function choices for
31965 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31966 in DISPATCH_DECL in which the dispatch code is generated. */
31968 static int
31969 dispatch_function_versions (tree dispatch_decl,
31970 void *fndecls_p,
31971 basic_block *empty_bb)
31973 tree default_decl;
31974 gimple ifunc_cpu_init_stmt;
31975 gimple_seq gseq;
31976 int ix;
31977 tree ele;
31978 vec<tree> *fndecls;
31979 unsigned int num_versions = 0;
31980 unsigned int actual_versions = 0;
31981 unsigned int i;
31983 struct _function_version_info
31985 tree version_decl;
31986 tree predicate_chain;
31987 unsigned int dispatch_priority;
31988 }*function_version_info;
31990 gcc_assert (dispatch_decl != NULL
31991 && fndecls_p != NULL
31992 && empty_bb != NULL);
31994 /*fndecls_p is actually a vector. */
31995 fndecls = static_cast<vec<tree> *> (fndecls_p);
31997 /* At least one more version other than the default. */
31998 num_versions = fndecls->length ();
31999 gcc_assert (num_versions >= 2);
32001 function_version_info = (struct _function_version_info *)
32002 XNEWVEC (struct _function_version_info, (num_versions - 1));
32004 /* The first version in the vector is the default decl. */
32005 default_decl = (*fndecls)[0];
32007 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
32009 gseq = bb_seq (*empty_bb);
32010 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
32011 constructors, so explicity call __builtin_cpu_init here. */
32012 ifunc_cpu_init_stmt = gimple_build_call_vec (
32013 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
32014 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
32015 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
32016 set_bb_seq (*empty_bb, gseq);
32018 pop_cfun ();
32021 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
32023 tree version_decl = ele;
32024 tree predicate_chain = NULL_TREE;
32025 unsigned int priority;
32026 /* Get attribute string, parse it and find the right predicate decl.
32027 The predicate function could be a lengthy combination of many
32028 features, like arch-type and various isa-variants. */
32029 priority = get_builtin_code_for_version (version_decl,
32030 &predicate_chain);
32032 if (predicate_chain == NULL_TREE)
32033 continue;
32035 function_version_info [actual_versions].version_decl = version_decl;
32036 function_version_info [actual_versions].predicate_chain
32037 = predicate_chain;
32038 function_version_info [actual_versions].dispatch_priority = priority;
32039 actual_versions++;
32042 /* Sort the versions according to descending order of dispatch priority. The
32043 priority is based on the ISA. This is not a perfect solution. There
32044 could still be ambiguity. If more than one function version is suitable
32045 to execute, which one should be dispatched? In future, allow the user
32046 to specify a dispatch priority next to the version. */
32047 qsort (function_version_info, actual_versions,
32048 sizeof (struct _function_version_info), feature_compare);
32050 for (i = 0; i < actual_versions; ++i)
32051 *empty_bb = add_condition_to_bb (dispatch_decl,
32052 function_version_info[i].version_decl,
32053 function_version_info[i].predicate_chain,
32054 *empty_bb);
32056 /* dispatch default version at the end. */
32057 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
32058 NULL, *empty_bb);
32060 free (function_version_info);
32061 return 0;
32064 /* Comparator function to be used in qsort routine to sort attribute
32065 specification strings to "target". */
32067 static int
32068 attr_strcmp (const void *v1, const void *v2)
32070 const char *c1 = *(char *const*)v1;
32071 const char *c2 = *(char *const*)v2;
32072 return strcmp (c1, c2);
32075 /* ARGLIST is the argument to target attribute. This function tokenizes
32076 the comma separated arguments, sorts them and returns a string which
32077 is a unique identifier for the comma separated arguments. It also
32078 replaces non-identifier characters "=,-" with "_". */
32080 static char *
32081 sorted_attr_string (tree arglist)
32083 tree arg;
32084 size_t str_len_sum = 0;
32085 char **args = NULL;
32086 char *attr_str, *ret_str;
32087 char *attr = NULL;
32088 unsigned int argnum = 1;
32089 unsigned int i;
32091 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
32093 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
32094 size_t len = strlen (str);
32095 str_len_sum += len + 1;
32096 if (arg != arglist)
32097 argnum++;
32098 for (i = 0; i < strlen (str); i++)
32099 if (str[i] == ',')
32100 argnum++;
32103 attr_str = XNEWVEC (char, str_len_sum);
32104 str_len_sum = 0;
32105 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
32107 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
32108 size_t len = strlen (str);
32109 memcpy (attr_str + str_len_sum, str, len);
32110 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
32111 str_len_sum += len + 1;
32114 /* Replace "=,-" with "_". */
32115 for (i = 0; i < strlen (attr_str); i++)
32116 if (attr_str[i] == '=' || attr_str[i]== '-')
32117 attr_str[i] = '_';
32119 if (argnum == 1)
32120 return attr_str;
32122 args = XNEWVEC (char *, argnum);
32124 i = 0;
32125 attr = strtok (attr_str, ",");
32126 while (attr != NULL)
32128 args[i] = attr;
32129 i++;
32130 attr = strtok (NULL, ",");
32133 qsort (args, argnum, sizeof (char *), attr_strcmp);
32135 ret_str = XNEWVEC (char, str_len_sum);
32136 str_len_sum = 0;
32137 for (i = 0; i < argnum; i++)
32139 size_t len = strlen (args[i]);
32140 memcpy (ret_str + str_len_sum, args[i], len);
32141 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
32142 str_len_sum += len + 1;
32145 XDELETEVEC (args);
32146 XDELETEVEC (attr_str);
32147 return ret_str;
32150 /* This function changes the assembler name for functions that are
32151 versions. If DECL is a function version and has a "target"
32152 attribute, it appends the attribute string to its assembler name. */
32154 static tree
32155 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32157 tree version_attr;
32158 const char *orig_name, *version_string;
32159 char *attr_str, *assembler_name;
32161 if (DECL_DECLARED_INLINE_P (decl)
32162 && lookup_attribute ("gnu_inline",
32163 DECL_ATTRIBUTES (decl)))
32164 error_at (DECL_SOURCE_LOCATION (decl),
32165 "Function versions cannot be marked as gnu_inline,"
32166 " bodies have to be generated");
32168 if (DECL_VIRTUAL_P (decl)
32169 || DECL_VINDEX (decl))
32170 sorry ("Virtual function multiversioning not supported");
32172 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32174 /* target attribute string cannot be NULL. */
32175 gcc_assert (version_attr != NULL_TREE);
32177 orig_name = IDENTIFIER_POINTER (id);
32178 version_string
32179 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32181 if (strcmp (version_string, "default") == 0)
32182 return id;
32184 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32185 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32187 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32189 /* Allow assembler name to be modified if already set. */
32190 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32191 SET_DECL_RTL (decl, NULL);
32193 tree ret = get_identifier (assembler_name);
32194 XDELETEVEC (attr_str);
32195 XDELETEVEC (assembler_name);
32196 return ret;
32199 /* This function returns true if FN1 and FN2 are versions of the same function,
32200 that is, the target strings of the function decls are different. This assumes
32201 that FN1 and FN2 have the same signature. */
32203 static bool
32204 ix86_function_versions (tree fn1, tree fn2)
32206 tree attr1, attr2;
32207 char *target1, *target2;
32208 bool result;
32210 if (TREE_CODE (fn1) != FUNCTION_DECL
32211 || TREE_CODE (fn2) != FUNCTION_DECL)
32212 return false;
32214 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
32215 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
32217 /* At least one function decl should have the target attribute specified. */
32218 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
32219 return false;
32221 /* Diagnose missing target attribute if one of the decls is already
32222 multi-versioned. */
32223 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
32225 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
32227 if (attr2 != NULL_TREE)
32229 tree tem = fn1;
32230 fn1 = fn2;
32231 fn2 = tem;
32232 attr1 = attr2;
32234 error_at (DECL_SOURCE_LOCATION (fn2),
32235 "missing %<target%> attribute for multi-versioned %D",
32236 fn2);
32237 inform (DECL_SOURCE_LOCATION (fn1),
32238 "previous declaration of %D", fn1);
32239 /* Prevent diagnosing of the same error multiple times. */
32240 DECL_ATTRIBUTES (fn2)
32241 = tree_cons (get_identifier ("target"),
32242 copy_node (TREE_VALUE (attr1)),
32243 DECL_ATTRIBUTES (fn2));
32245 return false;
32248 target1 = sorted_attr_string (TREE_VALUE (attr1));
32249 target2 = sorted_attr_string (TREE_VALUE (attr2));
32251 /* The sorted target strings must be different for fn1 and fn2
32252 to be versions. */
32253 if (strcmp (target1, target2) == 0)
32254 result = false;
32255 else
32256 result = true;
32258 XDELETEVEC (target1);
32259 XDELETEVEC (target2);
32261 return result;
32264 static tree
32265 ix86_mangle_decl_assembler_name (tree decl, tree id)
32267 /* For function version, add the target suffix to the assembler name. */
32268 if (TREE_CODE (decl) == FUNCTION_DECL
32269 && DECL_FUNCTION_VERSIONED (decl))
32270 id = ix86_mangle_function_version_assembler_name (decl, id);
32271 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32272 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32273 #endif
32275 return id;
32278 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
32279 is true, append the full path name of the source file. */
32281 static char *
32282 make_name (tree decl, const char *suffix, bool make_unique)
32284 char *global_var_name;
32285 int name_len;
32286 const char *name;
32287 const char *unique_name = NULL;
32289 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32291 /* Get a unique name that can be used globally without any chances
32292 of collision at link time. */
32293 if (make_unique)
32294 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32296 name_len = strlen (name) + strlen (suffix) + 2;
32298 if (make_unique)
32299 name_len += strlen (unique_name) + 1;
32300 global_var_name = XNEWVEC (char, name_len);
32302 /* Use '.' to concatenate names as it is demangler friendly. */
32303 if (make_unique)
32304 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32305 suffix);
32306 else
32307 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32309 return global_var_name;
32312 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32314 /* Make a dispatcher declaration for the multi-versioned function DECL.
32315 Calls to DECL function will be replaced with calls to the dispatcher
32316 by the front-end. Return the decl created. */
32318 static tree
32319 make_dispatcher_decl (const tree decl)
32321 tree func_decl;
32322 char *func_name;
32323 tree fn_type, func_type;
32324 bool is_uniq = false;
32326 if (TREE_PUBLIC (decl) == 0)
32327 is_uniq = true;
32329 func_name = make_name (decl, "ifunc", is_uniq);
32331 fn_type = TREE_TYPE (decl);
32332 func_type = build_function_type (TREE_TYPE (fn_type),
32333 TYPE_ARG_TYPES (fn_type));
32335 func_decl = build_fn_decl (func_name, func_type);
32336 XDELETEVEC (func_name);
32337 TREE_USED (func_decl) = 1;
32338 DECL_CONTEXT (func_decl) = NULL_TREE;
32339 DECL_INITIAL (func_decl) = error_mark_node;
32340 DECL_ARTIFICIAL (func_decl) = 1;
32341 /* Mark this func as external, the resolver will flip it again if
32342 it gets generated. */
32343 DECL_EXTERNAL (func_decl) = 1;
32344 /* This will be of type IFUNCs have to be externally visible. */
32345 TREE_PUBLIC (func_decl) = 1;
32347 return func_decl;
32350 #endif
32352 /* Returns true if decl is multi-versioned and DECL is the default function,
32353 that is it is not tagged with target specific optimization. */
32355 static bool
32356 is_function_default_version (const tree decl)
32358 if (TREE_CODE (decl) != FUNCTION_DECL
32359 || !DECL_FUNCTION_VERSIONED (decl))
32360 return false;
32361 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32362 gcc_assert (attr);
32363 attr = TREE_VALUE (TREE_VALUE (attr));
32364 return (TREE_CODE (attr) == STRING_CST
32365 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32368 /* Make a dispatcher declaration for the multi-versioned function DECL.
32369 Calls to DECL function will be replaced with calls to the dispatcher
32370 by the front-end. Returns the decl of the dispatcher function. */
32372 static tree
32373 ix86_get_function_versions_dispatcher (void *decl)
32375 tree fn = (tree) decl;
32376 struct cgraph_node *node = NULL;
32377 struct cgraph_node *default_node = NULL;
32378 struct cgraph_function_version_info *node_v = NULL;
32379 struct cgraph_function_version_info *first_v = NULL;
32381 tree dispatch_decl = NULL;
32383 struct cgraph_function_version_info *default_version_info = NULL;
32385 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32387 node = cgraph_get_node (fn);
32388 gcc_assert (node != NULL);
32390 node_v = get_cgraph_node_version (node);
32391 gcc_assert (node_v != NULL);
32393 if (node_v->dispatcher_resolver != NULL)
32394 return node_v->dispatcher_resolver;
32396 /* Find the default version and make it the first node. */
32397 first_v = node_v;
32398 /* Go to the beginning of the chain. */
32399 while (first_v->prev != NULL)
32400 first_v = first_v->prev;
32401 default_version_info = first_v;
32402 while (default_version_info != NULL)
32404 if (is_function_default_version
32405 (default_version_info->this_node->decl))
32406 break;
32407 default_version_info = default_version_info->next;
32410 /* If there is no default node, just return NULL. */
32411 if (default_version_info == NULL)
32412 return NULL;
32414 /* Make default info the first node. */
32415 if (first_v != default_version_info)
32417 default_version_info->prev->next = default_version_info->next;
32418 if (default_version_info->next)
32419 default_version_info->next->prev = default_version_info->prev;
32420 first_v->prev = default_version_info;
32421 default_version_info->next = first_v;
32422 default_version_info->prev = NULL;
32425 default_node = default_version_info->this_node;
32427 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32428 if (targetm.has_ifunc_p ())
32430 struct cgraph_function_version_info *it_v = NULL;
32431 struct cgraph_node *dispatcher_node = NULL;
32432 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32434 /* Right now, the dispatching is done via ifunc. */
32435 dispatch_decl = make_dispatcher_decl (default_node->decl);
32437 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32438 gcc_assert (dispatcher_node != NULL);
32439 dispatcher_node->dispatcher_function = 1;
32440 dispatcher_version_info
32441 = insert_new_cgraph_node_version (dispatcher_node);
32442 dispatcher_version_info->next = default_version_info;
32443 dispatcher_node->definition = 1;
32445 /* Set the dispatcher for all the versions. */
32446 it_v = default_version_info;
32447 while (it_v != NULL)
32449 it_v->dispatcher_resolver = dispatch_decl;
32450 it_v = it_v->next;
32453 else
32454 #endif
32456 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32457 "multiversioning needs ifunc which is not supported "
32458 "on this target");
32461 return dispatch_decl;
32464 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32465 it to CHAIN. */
32467 static tree
32468 make_attribute (const char *name, const char *arg_name, tree chain)
32470 tree attr_name;
32471 tree attr_arg_name;
32472 tree attr_args;
32473 tree attr;
32475 attr_name = get_identifier (name);
32476 attr_arg_name = build_string (strlen (arg_name), arg_name);
32477 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32478 attr = tree_cons (attr_name, attr_args, chain);
32479 return attr;
32482 /* Make the resolver function decl to dispatch the versions of
32483 a multi-versioned function, DEFAULT_DECL. Create an
32484 empty basic block in the resolver and store the pointer in
32485 EMPTY_BB. Return the decl of the resolver function. */
32487 static tree
32488 make_resolver_func (const tree default_decl,
32489 const tree dispatch_decl,
32490 basic_block *empty_bb)
32492 char *resolver_name;
32493 tree decl, type, decl_name, t;
32494 bool is_uniq = false;
32496 /* IFUNC's have to be globally visible. So, if the default_decl is
32497 not, then the name of the IFUNC should be made unique. */
32498 if (TREE_PUBLIC (default_decl) == 0)
32499 is_uniq = true;
32501 /* Append the filename to the resolver function if the versions are
32502 not externally visible. This is because the resolver function has
32503 to be externally visible for the loader to find it. So, appending
32504 the filename will prevent conflicts with a resolver function from
32505 another module which is based on the same version name. */
32506 resolver_name = make_name (default_decl, "resolver", is_uniq);
32508 /* The resolver function should return a (void *). */
32509 type = build_function_type_list (ptr_type_node, NULL_TREE);
32511 decl = build_fn_decl (resolver_name, type);
32512 decl_name = get_identifier (resolver_name);
32513 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32515 DECL_NAME (decl) = decl_name;
32516 TREE_USED (decl) = 1;
32517 DECL_ARTIFICIAL (decl) = 1;
32518 DECL_IGNORED_P (decl) = 0;
32519 /* IFUNC resolvers have to be externally visible. */
32520 TREE_PUBLIC (decl) = 1;
32521 DECL_UNINLINABLE (decl) = 1;
32523 /* Resolver is not external, body is generated. */
32524 DECL_EXTERNAL (decl) = 0;
32525 DECL_EXTERNAL (dispatch_decl) = 0;
32527 DECL_CONTEXT (decl) = NULL_TREE;
32528 DECL_INITIAL (decl) = make_node (BLOCK);
32529 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32531 if (DECL_COMDAT_GROUP (default_decl)
32532 || TREE_PUBLIC (default_decl))
32534 /* In this case, each translation unit with a call to this
32535 versioned function will put out a resolver. Ensure it
32536 is comdat to keep just one copy. */
32537 DECL_COMDAT (decl) = 1;
32538 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32540 /* Build result decl and add to function_decl. */
32541 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32542 DECL_ARTIFICIAL (t) = 1;
32543 DECL_IGNORED_P (t) = 1;
32544 DECL_RESULT (decl) = t;
32546 gimplify_function_tree (decl);
32547 push_cfun (DECL_STRUCT_FUNCTION (decl));
32548 *empty_bb = init_lowered_empty_function (decl, false);
32550 cgraph_add_new_function (decl, true);
32551 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32553 pop_cfun ();
32555 gcc_assert (dispatch_decl != NULL);
32556 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32557 DECL_ATTRIBUTES (dispatch_decl)
32558 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32560 /* Create the alias for dispatch to resolver here. */
32561 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32562 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32563 XDELETEVEC (resolver_name);
32564 return decl;
32567 /* Generate the dispatching code body to dispatch multi-versioned function
32568 DECL. The target hook is called to process the "target" attributes and
32569 provide the code to dispatch the right function at run-time. NODE points
32570 to the dispatcher decl whose body will be created. */
32572 static tree
32573 ix86_generate_version_dispatcher_body (void *node_p)
32575 tree resolver_decl;
32576 basic_block empty_bb;
32577 tree default_ver_decl;
32578 struct cgraph_node *versn;
32579 struct cgraph_node *node;
32581 struct cgraph_function_version_info *node_version_info = NULL;
32582 struct cgraph_function_version_info *versn_info = NULL;
32584 node = (cgraph_node *)node_p;
32586 node_version_info = get_cgraph_node_version (node);
32587 gcc_assert (node->dispatcher_function
32588 && node_version_info != NULL);
32590 if (node_version_info->dispatcher_resolver)
32591 return node_version_info->dispatcher_resolver;
32593 /* The first version in the chain corresponds to the default version. */
32594 default_ver_decl = node_version_info->next->this_node->decl;
32596 /* node is going to be an alias, so remove the finalized bit. */
32597 node->definition = false;
32599 resolver_decl = make_resolver_func (default_ver_decl,
32600 node->decl, &empty_bb);
32602 node_version_info->dispatcher_resolver = resolver_decl;
32604 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32606 auto_vec<tree, 2> fn_ver_vec;
32608 for (versn_info = node_version_info->next; versn_info;
32609 versn_info = versn_info->next)
32611 versn = versn_info->this_node;
32612 /* Check for virtual functions here again, as by this time it should
32613 have been determined if this function needs a vtable index or
32614 not. This happens for methods in derived classes that override
32615 virtual methods in base classes but are not explicitly marked as
32616 virtual. */
32617 if (DECL_VINDEX (versn->decl))
32618 sorry ("Virtual function multiversioning not supported");
32620 fn_ver_vec.safe_push (versn->decl);
32623 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32624 rebuild_cgraph_edges ();
32625 pop_cfun ();
32626 return resolver_decl;
32628 /* This builds the processor_model struct type defined in
32629 libgcc/config/i386/cpuinfo.c */
32631 static tree
32632 build_processor_model_struct (void)
32634 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32635 "__cpu_features"};
32636 tree field = NULL_TREE, field_chain = NULL_TREE;
32637 int i;
32638 tree type = make_node (RECORD_TYPE);
32640 /* The first 3 fields are unsigned int. */
32641 for (i = 0; i < 3; ++i)
32643 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32644 get_identifier (field_name[i]), unsigned_type_node);
32645 if (field_chain != NULL_TREE)
32646 DECL_CHAIN (field) = field_chain;
32647 field_chain = field;
32650 /* The last field is an array of unsigned integers of size one. */
32651 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32652 get_identifier (field_name[3]),
32653 build_array_type (unsigned_type_node,
32654 build_index_type (size_one_node)));
32655 if (field_chain != NULL_TREE)
32656 DECL_CHAIN (field) = field_chain;
32657 field_chain = field;
32659 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32660 return type;
32663 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32665 static tree
32666 make_var_decl (tree type, const char *name)
32668 tree new_decl;
32670 new_decl = build_decl (UNKNOWN_LOCATION,
32671 VAR_DECL,
32672 get_identifier(name),
32673 type);
32675 DECL_EXTERNAL (new_decl) = 1;
32676 TREE_STATIC (new_decl) = 1;
32677 TREE_PUBLIC (new_decl) = 1;
32678 DECL_INITIAL (new_decl) = 0;
32679 DECL_ARTIFICIAL (new_decl) = 0;
32680 DECL_PRESERVE_P (new_decl) = 1;
32682 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32683 assemble_variable (new_decl, 0, 0, 0);
32685 return new_decl;
32688 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32689 into an integer defined in libgcc/config/i386/cpuinfo.c */
32691 static tree
32692 fold_builtin_cpu (tree fndecl, tree *args)
32694 unsigned int i;
32695 enum ix86_builtins fn_code = (enum ix86_builtins)
32696 DECL_FUNCTION_CODE (fndecl);
32697 tree param_string_cst = NULL;
32699 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32700 enum processor_features
32702 F_CMOV = 0,
32703 F_MMX,
32704 F_POPCNT,
32705 F_SSE,
32706 F_SSE2,
32707 F_SSE3,
32708 F_SSSE3,
32709 F_SSE4_1,
32710 F_SSE4_2,
32711 F_AVX,
32712 F_AVX2,
32713 F_SSE4_A,
32714 F_FMA4,
32715 F_XOP,
32716 F_FMA,
32717 F_MAX
32720 /* These are the values for vendor types and cpu types and subtypes
32721 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32722 the corresponding start value. */
32723 enum processor_model
32725 M_INTEL = 1,
32726 M_AMD,
32727 M_CPU_TYPE_START,
32728 M_INTEL_BONNELL,
32729 M_INTEL_CORE2,
32730 M_INTEL_COREI7,
32731 M_AMDFAM10H,
32732 M_AMDFAM15H,
32733 M_INTEL_SILVERMONT,
32734 M_AMD_BTVER1,
32735 M_AMD_BTVER2,
32736 M_CPU_SUBTYPE_START,
32737 M_INTEL_COREI7_NEHALEM,
32738 M_INTEL_COREI7_WESTMERE,
32739 M_INTEL_COREI7_SANDYBRIDGE,
32740 M_AMDFAM10H_BARCELONA,
32741 M_AMDFAM10H_SHANGHAI,
32742 M_AMDFAM10H_ISTANBUL,
32743 M_AMDFAM15H_BDVER1,
32744 M_AMDFAM15H_BDVER2,
32745 M_AMDFAM15H_BDVER3,
32746 M_AMDFAM15H_BDVER4,
32747 M_INTEL_COREI7_IVYBRIDGE,
32748 M_INTEL_COREI7_HASWELL
32751 static struct _arch_names_table
32753 const char *const name;
32754 const enum processor_model model;
32756 const arch_names_table[] =
32758 {"amd", M_AMD},
32759 {"intel", M_INTEL},
32760 {"atom", M_INTEL_BONNELL},
32761 {"slm", M_INTEL_SILVERMONT},
32762 {"core2", M_INTEL_CORE2},
32763 {"corei7", M_INTEL_COREI7},
32764 {"nehalem", M_INTEL_COREI7_NEHALEM},
32765 {"westmere", M_INTEL_COREI7_WESTMERE},
32766 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32767 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32768 {"haswell", M_INTEL_COREI7_HASWELL},
32769 {"bonnell", M_INTEL_BONNELL},
32770 {"silvermont", M_INTEL_SILVERMONT},
32771 {"amdfam10h", M_AMDFAM10H},
32772 {"barcelona", M_AMDFAM10H_BARCELONA},
32773 {"shanghai", M_AMDFAM10H_SHANGHAI},
32774 {"istanbul", M_AMDFAM10H_ISTANBUL},
32775 {"btver1", M_AMD_BTVER1},
32776 {"amdfam15h", M_AMDFAM15H},
32777 {"bdver1", M_AMDFAM15H_BDVER1},
32778 {"bdver2", M_AMDFAM15H_BDVER2},
32779 {"bdver3", M_AMDFAM15H_BDVER3},
32780 {"bdver4", M_AMDFAM15H_BDVER4},
32781 {"btver2", M_AMD_BTVER2},
32784 static struct _isa_names_table
32786 const char *const name;
32787 const enum processor_features feature;
32789 const isa_names_table[] =
32791 {"cmov", F_CMOV},
32792 {"mmx", F_MMX},
32793 {"popcnt", F_POPCNT},
32794 {"sse", F_SSE},
32795 {"sse2", F_SSE2},
32796 {"sse3", F_SSE3},
32797 {"ssse3", F_SSSE3},
32798 {"sse4a", F_SSE4_A},
32799 {"sse4.1", F_SSE4_1},
32800 {"sse4.2", F_SSE4_2},
32801 {"avx", F_AVX},
32802 {"fma4", F_FMA4},
32803 {"xop", F_XOP},
32804 {"fma", F_FMA},
32805 {"avx2", F_AVX2}
32808 tree __processor_model_type = build_processor_model_struct ();
32809 tree __cpu_model_var = make_var_decl (__processor_model_type,
32810 "__cpu_model");
32813 varpool_add_new_variable (__cpu_model_var);
32815 gcc_assert ((args != NULL) && (*args != NULL));
32817 param_string_cst = *args;
32818 while (param_string_cst
32819 && TREE_CODE (param_string_cst) != STRING_CST)
32821 /* *args must be a expr that can contain other EXPRS leading to a
32822 STRING_CST. */
32823 if (!EXPR_P (param_string_cst))
32825 error ("Parameter to builtin must be a string constant or literal");
32826 return integer_zero_node;
32828 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32831 gcc_assert (param_string_cst);
32833 if (fn_code == IX86_BUILTIN_CPU_IS)
32835 tree ref;
32836 tree field;
32837 tree final;
32839 unsigned int field_val = 0;
32840 unsigned int NUM_ARCH_NAMES
32841 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32843 for (i = 0; i < NUM_ARCH_NAMES; i++)
32844 if (strcmp (arch_names_table[i].name,
32845 TREE_STRING_POINTER (param_string_cst)) == 0)
32846 break;
32848 if (i == NUM_ARCH_NAMES)
32850 error ("Parameter to builtin not valid: %s",
32851 TREE_STRING_POINTER (param_string_cst));
32852 return integer_zero_node;
32855 field = TYPE_FIELDS (__processor_model_type);
32856 field_val = arch_names_table[i].model;
32858 /* CPU types are stored in the next field. */
32859 if (field_val > M_CPU_TYPE_START
32860 && field_val < M_CPU_SUBTYPE_START)
32862 field = DECL_CHAIN (field);
32863 field_val -= M_CPU_TYPE_START;
32866 /* CPU subtypes are stored in the next field. */
32867 if (field_val > M_CPU_SUBTYPE_START)
32869 field = DECL_CHAIN ( DECL_CHAIN (field));
32870 field_val -= M_CPU_SUBTYPE_START;
32873 /* Get the appropriate field in __cpu_model. */
32874 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32875 field, NULL_TREE);
32877 /* Check the value. */
32878 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32879 build_int_cstu (unsigned_type_node, field_val));
32880 return build1 (CONVERT_EXPR, integer_type_node, final);
32882 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32884 tree ref;
32885 tree array_elt;
32886 tree field;
32887 tree final;
32889 unsigned int field_val = 0;
32890 unsigned int NUM_ISA_NAMES
32891 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32893 for (i = 0; i < NUM_ISA_NAMES; i++)
32894 if (strcmp (isa_names_table[i].name,
32895 TREE_STRING_POINTER (param_string_cst)) == 0)
32896 break;
32898 if (i == NUM_ISA_NAMES)
32900 error ("Parameter to builtin not valid: %s",
32901 TREE_STRING_POINTER (param_string_cst));
32902 return integer_zero_node;
32905 field = TYPE_FIELDS (__processor_model_type);
32906 /* Get the last field, which is __cpu_features. */
32907 while (DECL_CHAIN (field))
32908 field = DECL_CHAIN (field);
32910 /* Get the appropriate field: __cpu_model.__cpu_features */
32911 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32912 field, NULL_TREE);
32914 /* Access the 0th element of __cpu_features array. */
32915 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32916 integer_zero_node, NULL_TREE, NULL_TREE);
32918 field_val = (1 << isa_names_table[i].feature);
32919 /* Return __cpu_model.__cpu_features[0] & field_val */
32920 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32921 build_int_cstu (unsigned_type_node, field_val));
32922 return build1 (CONVERT_EXPR, integer_type_node, final);
32924 gcc_unreachable ();
32927 static tree
32928 ix86_fold_builtin (tree fndecl, int n_args,
32929 tree *args, bool ignore ATTRIBUTE_UNUSED)
32931 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32933 enum ix86_builtins fn_code = (enum ix86_builtins)
32934 DECL_FUNCTION_CODE (fndecl);
32935 if (fn_code == IX86_BUILTIN_CPU_IS
32936 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32938 gcc_assert (n_args == 1);
32939 return fold_builtin_cpu (fndecl, args);
32943 #ifdef SUBTARGET_FOLD_BUILTIN
32944 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32945 #endif
32947 return NULL_TREE;
32950 /* Make builtins to detect cpu type and features supported. NAME is
32951 the builtin name, CODE is the builtin code, and FTYPE is the function
32952 type of the builtin. */
32954 static void
32955 make_cpu_type_builtin (const char* name, int code,
32956 enum ix86_builtin_func_type ftype, bool is_const)
32958 tree decl;
32959 tree type;
32961 type = ix86_get_builtin_func_type (ftype);
32962 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32963 NULL, NULL_TREE);
32964 gcc_assert (decl != NULL_TREE);
32965 ix86_builtins[(int) code] = decl;
32966 TREE_READONLY (decl) = is_const;
32969 /* Make builtins to get CPU type and features supported. The created
32970 builtins are :
32972 __builtin_cpu_init (), to detect cpu type and features,
32973 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32974 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32977 static void
32978 ix86_init_platform_type_builtins (void)
32980 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32981 INT_FTYPE_VOID, false);
32982 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32983 INT_FTYPE_PCCHAR, true);
32984 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32985 INT_FTYPE_PCCHAR, true);
32988 /* Internal method for ix86_init_builtins. */
32990 static void
32991 ix86_init_builtins_va_builtins_abi (void)
32993 tree ms_va_ref, sysv_va_ref;
32994 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32995 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32996 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32997 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32999 if (!TARGET_64BIT)
33000 return;
33001 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
33002 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
33003 ms_va_ref = build_reference_type (ms_va_list_type_node);
33004 sysv_va_ref =
33005 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
33007 fnvoid_va_end_ms =
33008 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33009 fnvoid_va_start_ms =
33010 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
33011 fnvoid_va_end_sysv =
33012 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
33013 fnvoid_va_start_sysv =
33014 build_varargs_function_type_list (void_type_node, sysv_va_ref,
33015 NULL_TREE);
33016 fnvoid_va_copy_ms =
33017 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
33018 NULL_TREE);
33019 fnvoid_va_copy_sysv =
33020 build_function_type_list (void_type_node, sysv_va_ref,
33021 sysv_va_ref, NULL_TREE);
33023 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
33024 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
33025 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
33026 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
33027 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
33028 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
33029 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
33030 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33031 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
33032 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33033 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
33034 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
33037 static void
33038 ix86_init_builtin_types (void)
33040 tree float128_type_node, float80_type_node;
33042 /* The __float80 type. */
33043 float80_type_node = long_double_type_node;
33044 if (TYPE_MODE (float80_type_node) != XFmode)
33046 /* The __float80 type. */
33047 float80_type_node = make_node (REAL_TYPE);
33049 TYPE_PRECISION (float80_type_node) = 80;
33050 layout_type (float80_type_node);
33052 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
33054 /* The __float128 type. */
33055 float128_type_node = make_node (REAL_TYPE);
33056 TYPE_PRECISION (float128_type_node) = 128;
33057 layout_type (float128_type_node);
33058 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
33060 /* This macro is built by i386-builtin-types.awk. */
33061 DEFINE_BUILTIN_PRIMITIVE_TYPES;
33064 static void
33065 ix86_init_builtins (void)
33067 tree t;
33069 ix86_init_builtin_types ();
33071 /* Builtins to get CPU type and features. */
33072 ix86_init_platform_type_builtins ();
33074 /* TFmode support builtins. */
33075 def_builtin_const (0, "__builtin_infq",
33076 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
33077 def_builtin_const (0, "__builtin_huge_valq",
33078 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
33080 /* We will expand them to normal call if SSE isn't available since
33081 they are used by libgcc. */
33082 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
33083 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
33084 BUILT_IN_MD, "__fabstf2", NULL_TREE);
33085 TREE_READONLY (t) = 1;
33086 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
33088 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
33089 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
33090 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
33091 TREE_READONLY (t) = 1;
33092 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
33094 ix86_init_tm_builtins ();
33095 ix86_init_mmx_sse_builtins ();
33097 if (TARGET_LP64)
33098 ix86_init_builtins_va_builtins_abi ();
33100 #ifdef SUBTARGET_INIT_BUILTINS
33101 SUBTARGET_INIT_BUILTINS;
33102 #endif
33105 /* Return the ix86 builtin for CODE. */
33107 static tree
33108 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
33110 if (code >= IX86_BUILTIN_MAX)
33111 return error_mark_node;
33113 return ix86_builtins[code];
33116 /* Errors in the source file can cause expand_expr to return const0_rtx
33117 where we expect a vector. To avoid crashing, use one of the vector
33118 clear instructions. */
33119 static rtx
33120 safe_vector_operand (rtx x, enum machine_mode mode)
33122 if (x == const0_rtx)
33123 x = CONST0_RTX (mode);
33124 return x;
33127 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
33129 static rtx
33130 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
33132 rtx pat;
33133 tree arg0 = CALL_EXPR_ARG (exp, 0);
33134 tree arg1 = CALL_EXPR_ARG (exp, 1);
33135 rtx op0 = expand_normal (arg0);
33136 rtx op1 = expand_normal (arg1);
33137 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33138 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33139 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
33141 if (VECTOR_MODE_P (mode0))
33142 op0 = safe_vector_operand (op0, mode0);
33143 if (VECTOR_MODE_P (mode1))
33144 op1 = safe_vector_operand (op1, mode1);
33146 if (optimize || !target
33147 || GET_MODE (target) != tmode
33148 || !insn_data[icode].operand[0].predicate (target, tmode))
33149 target = gen_reg_rtx (tmode);
33151 if (GET_MODE (op1) == SImode && mode1 == TImode)
33153 rtx x = gen_reg_rtx (V4SImode);
33154 emit_insn (gen_sse2_loadd (x, op1));
33155 op1 = gen_lowpart (TImode, x);
33158 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33159 op0 = copy_to_mode_reg (mode0, op0);
33160 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33161 op1 = copy_to_mode_reg (mode1, op1);
33163 pat = GEN_FCN (icode) (target, op0, op1);
33164 if (! pat)
33165 return 0;
33167 emit_insn (pat);
33169 return target;
33172 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33174 static rtx
33175 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33176 enum ix86_builtin_func_type m_type,
33177 enum rtx_code sub_code)
33179 rtx pat;
33180 int i;
33181 int nargs;
33182 bool comparison_p = false;
33183 bool tf_p = false;
33184 bool last_arg_constant = false;
33185 int num_memory = 0;
33186 struct {
33187 rtx op;
33188 enum machine_mode mode;
33189 } args[4];
33191 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33193 switch (m_type)
33195 case MULTI_ARG_4_DF2_DI_I:
33196 case MULTI_ARG_4_DF2_DI_I1:
33197 case MULTI_ARG_4_SF2_SI_I:
33198 case MULTI_ARG_4_SF2_SI_I1:
33199 nargs = 4;
33200 last_arg_constant = true;
33201 break;
33203 case MULTI_ARG_3_SF:
33204 case MULTI_ARG_3_DF:
33205 case MULTI_ARG_3_SF2:
33206 case MULTI_ARG_3_DF2:
33207 case MULTI_ARG_3_DI:
33208 case MULTI_ARG_3_SI:
33209 case MULTI_ARG_3_SI_DI:
33210 case MULTI_ARG_3_HI:
33211 case MULTI_ARG_3_HI_SI:
33212 case MULTI_ARG_3_QI:
33213 case MULTI_ARG_3_DI2:
33214 case MULTI_ARG_3_SI2:
33215 case MULTI_ARG_3_HI2:
33216 case MULTI_ARG_3_QI2:
33217 nargs = 3;
33218 break;
33220 case MULTI_ARG_2_SF:
33221 case MULTI_ARG_2_DF:
33222 case MULTI_ARG_2_DI:
33223 case MULTI_ARG_2_SI:
33224 case MULTI_ARG_2_HI:
33225 case MULTI_ARG_2_QI:
33226 nargs = 2;
33227 break;
33229 case MULTI_ARG_2_DI_IMM:
33230 case MULTI_ARG_2_SI_IMM:
33231 case MULTI_ARG_2_HI_IMM:
33232 case MULTI_ARG_2_QI_IMM:
33233 nargs = 2;
33234 last_arg_constant = true;
33235 break;
33237 case MULTI_ARG_1_SF:
33238 case MULTI_ARG_1_DF:
33239 case MULTI_ARG_1_SF2:
33240 case MULTI_ARG_1_DF2:
33241 case MULTI_ARG_1_DI:
33242 case MULTI_ARG_1_SI:
33243 case MULTI_ARG_1_HI:
33244 case MULTI_ARG_1_QI:
33245 case MULTI_ARG_1_SI_DI:
33246 case MULTI_ARG_1_HI_DI:
33247 case MULTI_ARG_1_HI_SI:
33248 case MULTI_ARG_1_QI_DI:
33249 case MULTI_ARG_1_QI_SI:
33250 case MULTI_ARG_1_QI_HI:
33251 nargs = 1;
33252 break;
33254 case MULTI_ARG_2_DI_CMP:
33255 case MULTI_ARG_2_SI_CMP:
33256 case MULTI_ARG_2_HI_CMP:
33257 case MULTI_ARG_2_QI_CMP:
33258 nargs = 2;
33259 comparison_p = true;
33260 break;
33262 case MULTI_ARG_2_SF_TF:
33263 case MULTI_ARG_2_DF_TF:
33264 case MULTI_ARG_2_DI_TF:
33265 case MULTI_ARG_2_SI_TF:
33266 case MULTI_ARG_2_HI_TF:
33267 case MULTI_ARG_2_QI_TF:
33268 nargs = 2;
33269 tf_p = true;
33270 break;
33272 default:
33273 gcc_unreachable ();
33276 if (optimize || !target
33277 || GET_MODE (target) != tmode
33278 || !insn_data[icode].operand[0].predicate (target, tmode))
33279 target = gen_reg_rtx (tmode);
33281 gcc_assert (nargs <= 4);
33283 for (i = 0; i < nargs; i++)
33285 tree arg = CALL_EXPR_ARG (exp, i);
33286 rtx op = expand_normal (arg);
33287 int adjust = (comparison_p) ? 1 : 0;
33288 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33290 if (last_arg_constant && i == nargs - 1)
33292 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33294 enum insn_code new_icode = icode;
33295 switch (icode)
33297 case CODE_FOR_xop_vpermil2v2df3:
33298 case CODE_FOR_xop_vpermil2v4sf3:
33299 case CODE_FOR_xop_vpermil2v4df3:
33300 case CODE_FOR_xop_vpermil2v8sf3:
33301 error ("the last argument must be a 2-bit immediate");
33302 return gen_reg_rtx (tmode);
33303 case CODE_FOR_xop_rotlv2di3:
33304 new_icode = CODE_FOR_rotlv2di3;
33305 goto xop_rotl;
33306 case CODE_FOR_xop_rotlv4si3:
33307 new_icode = CODE_FOR_rotlv4si3;
33308 goto xop_rotl;
33309 case CODE_FOR_xop_rotlv8hi3:
33310 new_icode = CODE_FOR_rotlv8hi3;
33311 goto xop_rotl;
33312 case CODE_FOR_xop_rotlv16qi3:
33313 new_icode = CODE_FOR_rotlv16qi3;
33314 xop_rotl:
33315 if (CONST_INT_P (op))
33317 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33318 op = GEN_INT (INTVAL (op) & mask);
33319 gcc_checking_assert
33320 (insn_data[icode].operand[i + 1].predicate (op, mode));
33322 else
33324 gcc_checking_assert
33325 (nargs == 2
33326 && insn_data[new_icode].operand[0].mode == tmode
33327 && insn_data[new_icode].operand[1].mode == tmode
33328 && insn_data[new_icode].operand[2].mode == mode
33329 && insn_data[new_icode].operand[0].predicate
33330 == insn_data[icode].operand[0].predicate
33331 && insn_data[new_icode].operand[1].predicate
33332 == insn_data[icode].operand[1].predicate);
33333 icode = new_icode;
33334 goto non_constant;
33336 break;
33337 default:
33338 gcc_unreachable ();
33342 else
33344 non_constant:
33345 if (VECTOR_MODE_P (mode))
33346 op = safe_vector_operand (op, mode);
33348 /* If we aren't optimizing, only allow one memory operand to be
33349 generated. */
33350 if (memory_operand (op, mode))
33351 num_memory++;
33353 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33355 if (optimize
33356 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33357 || num_memory > 1)
33358 op = force_reg (mode, op);
33361 args[i].op = op;
33362 args[i].mode = mode;
33365 switch (nargs)
33367 case 1:
33368 pat = GEN_FCN (icode) (target, args[0].op);
33369 break;
33371 case 2:
33372 if (tf_p)
33373 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33374 GEN_INT ((int)sub_code));
33375 else if (! comparison_p)
33376 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33377 else
33379 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33380 args[0].op,
33381 args[1].op);
33383 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33385 break;
33387 case 3:
33388 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33389 break;
33391 case 4:
33392 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33393 break;
33395 default:
33396 gcc_unreachable ();
33399 if (! pat)
33400 return 0;
33402 emit_insn (pat);
33403 return target;
33406 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33407 insns with vec_merge. */
33409 static rtx
33410 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33411 rtx target)
33413 rtx pat;
33414 tree arg0 = CALL_EXPR_ARG (exp, 0);
33415 rtx op1, op0 = expand_normal (arg0);
33416 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33417 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33419 if (optimize || !target
33420 || GET_MODE (target) != tmode
33421 || !insn_data[icode].operand[0].predicate (target, tmode))
33422 target = gen_reg_rtx (tmode);
33424 if (VECTOR_MODE_P (mode0))
33425 op0 = safe_vector_operand (op0, mode0);
33427 if ((optimize && !register_operand (op0, mode0))
33428 || !insn_data[icode].operand[1].predicate (op0, mode0))
33429 op0 = copy_to_mode_reg (mode0, op0);
33431 op1 = op0;
33432 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33433 op1 = copy_to_mode_reg (mode0, op1);
33435 pat = GEN_FCN (icode) (target, op0, op1);
33436 if (! pat)
33437 return 0;
33438 emit_insn (pat);
33439 return target;
33442 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33444 static rtx
33445 ix86_expand_sse_compare (const struct builtin_description *d,
33446 tree exp, rtx target, bool swap)
33448 rtx pat;
33449 tree arg0 = CALL_EXPR_ARG (exp, 0);
33450 tree arg1 = CALL_EXPR_ARG (exp, 1);
33451 rtx op0 = expand_normal (arg0);
33452 rtx op1 = expand_normal (arg1);
33453 rtx op2;
33454 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33455 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33456 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33457 enum rtx_code comparison = d->comparison;
33459 if (VECTOR_MODE_P (mode0))
33460 op0 = safe_vector_operand (op0, mode0);
33461 if (VECTOR_MODE_P (mode1))
33462 op1 = safe_vector_operand (op1, mode1);
33464 /* Swap operands if we have a comparison that isn't available in
33465 hardware. */
33466 if (swap)
33468 rtx tmp = gen_reg_rtx (mode1);
33469 emit_move_insn (tmp, op1);
33470 op1 = op0;
33471 op0 = tmp;
33474 if (optimize || !target
33475 || GET_MODE (target) != tmode
33476 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33477 target = gen_reg_rtx (tmode);
33479 if ((optimize && !register_operand (op0, mode0))
33480 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33481 op0 = copy_to_mode_reg (mode0, op0);
33482 if ((optimize && !register_operand (op1, mode1))
33483 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33484 op1 = copy_to_mode_reg (mode1, op1);
33486 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33487 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33488 if (! pat)
33489 return 0;
33490 emit_insn (pat);
33491 return target;
33494 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33496 static rtx
33497 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33498 rtx target)
33500 rtx pat;
33501 tree arg0 = CALL_EXPR_ARG (exp, 0);
33502 tree arg1 = CALL_EXPR_ARG (exp, 1);
33503 rtx op0 = expand_normal (arg0);
33504 rtx op1 = expand_normal (arg1);
33505 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33506 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33507 enum rtx_code comparison = d->comparison;
33509 if (VECTOR_MODE_P (mode0))
33510 op0 = safe_vector_operand (op0, mode0);
33511 if (VECTOR_MODE_P (mode1))
33512 op1 = safe_vector_operand (op1, mode1);
33514 /* Swap operands if we have a comparison that isn't available in
33515 hardware. */
33516 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33518 rtx tmp = op1;
33519 op1 = op0;
33520 op0 = tmp;
33523 target = gen_reg_rtx (SImode);
33524 emit_move_insn (target, const0_rtx);
33525 target = gen_rtx_SUBREG (QImode, target, 0);
33527 if ((optimize && !register_operand (op0, mode0))
33528 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33529 op0 = copy_to_mode_reg (mode0, op0);
33530 if ((optimize && !register_operand (op1, mode1))
33531 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33532 op1 = copy_to_mode_reg (mode1, op1);
33534 pat = GEN_FCN (d->icode) (op0, op1);
33535 if (! pat)
33536 return 0;
33537 emit_insn (pat);
33538 emit_insn (gen_rtx_SET (VOIDmode,
33539 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33540 gen_rtx_fmt_ee (comparison, QImode,
33541 SET_DEST (pat),
33542 const0_rtx)));
33544 return SUBREG_REG (target);
33547 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33549 static rtx
33550 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33551 rtx target)
33553 rtx pat;
33554 tree arg0 = CALL_EXPR_ARG (exp, 0);
33555 rtx op1, op0 = expand_normal (arg0);
33556 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33557 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33559 if (optimize || target == 0
33560 || GET_MODE (target) != tmode
33561 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33562 target = gen_reg_rtx (tmode);
33564 if (VECTOR_MODE_P (mode0))
33565 op0 = safe_vector_operand (op0, mode0);
33567 if ((optimize && !register_operand (op0, mode0))
33568 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33569 op0 = copy_to_mode_reg (mode0, op0);
33571 op1 = GEN_INT (d->comparison);
33573 pat = GEN_FCN (d->icode) (target, op0, op1);
33574 if (! pat)
33575 return 0;
33576 emit_insn (pat);
33577 return target;
33580 static rtx
33581 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33582 tree exp, rtx target)
33584 rtx pat;
33585 tree arg0 = CALL_EXPR_ARG (exp, 0);
33586 tree arg1 = CALL_EXPR_ARG (exp, 1);
33587 rtx op0 = expand_normal (arg0);
33588 rtx op1 = expand_normal (arg1);
33589 rtx op2;
33590 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33591 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33592 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33594 if (optimize || target == 0
33595 || GET_MODE (target) != tmode
33596 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33597 target = gen_reg_rtx (tmode);
33599 op0 = safe_vector_operand (op0, mode0);
33600 op1 = safe_vector_operand (op1, mode1);
33602 if ((optimize && !register_operand (op0, mode0))
33603 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33604 op0 = copy_to_mode_reg (mode0, op0);
33605 if ((optimize && !register_operand (op1, mode1))
33606 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33607 op1 = copy_to_mode_reg (mode1, op1);
33609 op2 = GEN_INT (d->comparison);
33611 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33612 if (! pat)
33613 return 0;
33614 emit_insn (pat);
33615 return target;
33618 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33620 static rtx
33621 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33622 rtx target)
33624 rtx pat;
33625 tree arg0 = CALL_EXPR_ARG (exp, 0);
33626 tree arg1 = CALL_EXPR_ARG (exp, 1);
33627 rtx op0 = expand_normal (arg0);
33628 rtx op1 = expand_normal (arg1);
33629 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33630 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33631 enum rtx_code comparison = d->comparison;
33633 if (VECTOR_MODE_P (mode0))
33634 op0 = safe_vector_operand (op0, mode0);
33635 if (VECTOR_MODE_P (mode1))
33636 op1 = safe_vector_operand (op1, mode1);
33638 target = gen_reg_rtx (SImode);
33639 emit_move_insn (target, const0_rtx);
33640 target = gen_rtx_SUBREG (QImode, target, 0);
33642 if ((optimize && !register_operand (op0, mode0))
33643 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33644 op0 = copy_to_mode_reg (mode0, op0);
33645 if ((optimize && !register_operand (op1, mode1))
33646 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33647 op1 = copy_to_mode_reg (mode1, op1);
33649 pat = GEN_FCN (d->icode) (op0, op1);
33650 if (! pat)
33651 return 0;
33652 emit_insn (pat);
33653 emit_insn (gen_rtx_SET (VOIDmode,
33654 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33655 gen_rtx_fmt_ee (comparison, QImode,
33656 SET_DEST (pat),
33657 const0_rtx)));
33659 return SUBREG_REG (target);
33662 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33664 static rtx
33665 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33666 tree exp, rtx target)
33668 rtx pat;
33669 tree arg0 = CALL_EXPR_ARG (exp, 0);
33670 tree arg1 = CALL_EXPR_ARG (exp, 1);
33671 tree arg2 = CALL_EXPR_ARG (exp, 2);
33672 tree arg3 = CALL_EXPR_ARG (exp, 3);
33673 tree arg4 = CALL_EXPR_ARG (exp, 4);
33674 rtx scratch0, scratch1;
33675 rtx op0 = expand_normal (arg0);
33676 rtx op1 = expand_normal (arg1);
33677 rtx op2 = expand_normal (arg2);
33678 rtx op3 = expand_normal (arg3);
33679 rtx op4 = expand_normal (arg4);
33680 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33682 tmode0 = insn_data[d->icode].operand[0].mode;
33683 tmode1 = insn_data[d->icode].operand[1].mode;
33684 modev2 = insn_data[d->icode].operand[2].mode;
33685 modei3 = insn_data[d->icode].operand[3].mode;
33686 modev4 = insn_data[d->icode].operand[4].mode;
33687 modei5 = insn_data[d->icode].operand[5].mode;
33688 modeimm = insn_data[d->icode].operand[6].mode;
33690 if (VECTOR_MODE_P (modev2))
33691 op0 = safe_vector_operand (op0, modev2);
33692 if (VECTOR_MODE_P (modev4))
33693 op2 = safe_vector_operand (op2, modev4);
33695 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33696 op0 = copy_to_mode_reg (modev2, op0);
33697 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33698 op1 = copy_to_mode_reg (modei3, op1);
33699 if ((optimize && !register_operand (op2, modev4))
33700 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33701 op2 = copy_to_mode_reg (modev4, op2);
33702 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33703 op3 = copy_to_mode_reg (modei5, op3);
33705 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33707 error ("the fifth argument must be an 8-bit immediate");
33708 return const0_rtx;
33711 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33713 if (optimize || !target
33714 || GET_MODE (target) != tmode0
33715 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33716 target = gen_reg_rtx (tmode0);
33718 scratch1 = gen_reg_rtx (tmode1);
33720 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33722 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33724 if (optimize || !target
33725 || GET_MODE (target) != tmode1
33726 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33727 target = gen_reg_rtx (tmode1);
33729 scratch0 = gen_reg_rtx (tmode0);
33731 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33733 else
33735 gcc_assert (d->flag);
33737 scratch0 = gen_reg_rtx (tmode0);
33738 scratch1 = gen_reg_rtx (tmode1);
33740 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33743 if (! pat)
33744 return 0;
33746 emit_insn (pat);
33748 if (d->flag)
33750 target = gen_reg_rtx (SImode);
33751 emit_move_insn (target, const0_rtx);
33752 target = gen_rtx_SUBREG (QImode, target, 0);
33754 emit_insn
33755 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33756 gen_rtx_fmt_ee (EQ, QImode,
33757 gen_rtx_REG ((enum machine_mode) d->flag,
33758 FLAGS_REG),
33759 const0_rtx)));
33760 return SUBREG_REG (target);
33762 else
33763 return target;
33767 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33769 static rtx
33770 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33771 tree exp, rtx target)
33773 rtx pat;
33774 tree arg0 = CALL_EXPR_ARG (exp, 0);
33775 tree arg1 = CALL_EXPR_ARG (exp, 1);
33776 tree arg2 = CALL_EXPR_ARG (exp, 2);
33777 rtx scratch0, scratch1;
33778 rtx op0 = expand_normal (arg0);
33779 rtx op1 = expand_normal (arg1);
33780 rtx op2 = expand_normal (arg2);
33781 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33783 tmode0 = insn_data[d->icode].operand[0].mode;
33784 tmode1 = insn_data[d->icode].operand[1].mode;
33785 modev2 = insn_data[d->icode].operand[2].mode;
33786 modev3 = insn_data[d->icode].operand[3].mode;
33787 modeimm = insn_data[d->icode].operand[4].mode;
33789 if (VECTOR_MODE_P (modev2))
33790 op0 = safe_vector_operand (op0, modev2);
33791 if (VECTOR_MODE_P (modev3))
33792 op1 = safe_vector_operand (op1, modev3);
33794 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33795 op0 = copy_to_mode_reg (modev2, op0);
33796 if ((optimize && !register_operand (op1, modev3))
33797 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33798 op1 = copy_to_mode_reg (modev3, op1);
33800 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33802 error ("the third argument must be an 8-bit immediate");
33803 return const0_rtx;
33806 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33808 if (optimize || !target
33809 || GET_MODE (target) != tmode0
33810 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33811 target = gen_reg_rtx (tmode0);
33813 scratch1 = gen_reg_rtx (tmode1);
33815 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33817 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33819 if (optimize || !target
33820 || GET_MODE (target) != tmode1
33821 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33822 target = gen_reg_rtx (tmode1);
33824 scratch0 = gen_reg_rtx (tmode0);
33826 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33828 else
33830 gcc_assert (d->flag);
33832 scratch0 = gen_reg_rtx (tmode0);
33833 scratch1 = gen_reg_rtx (tmode1);
33835 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33838 if (! pat)
33839 return 0;
33841 emit_insn (pat);
33843 if (d->flag)
33845 target = gen_reg_rtx (SImode);
33846 emit_move_insn (target, const0_rtx);
33847 target = gen_rtx_SUBREG (QImode, target, 0);
33849 emit_insn
33850 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33851 gen_rtx_fmt_ee (EQ, QImode,
33852 gen_rtx_REG ((enum machine_mode) d->flag,
33853 FLAGS_REG),
33854 const0_rtx)));
33855 return SUBREG_REG (target);
33857 else
33858 return target;
33861 /* Subroutine of ix86_expand_builtin to take care of insns with
33862 variable number of operands. */
33864 static rtx
33865 ix86_expand_args_builtin (const struct builtin_description *d,
33866 tree exp, rtx target)
33868 rtx pat, real_target;
33869 unsigned int i, nargs;
33870 unsigned int nargs_constant = 0;
33871 unsigned int mask_pos = 0;
33872 int num_memory = 0;
33873 struct
33875 rtx op;
33876 enum machine_mode mode;
33877 } args[6];
33878 bool last_arg_count = false;
33879 enum insn_code icode = d->icode;
33880 const struct insn_data_d *insn_p = &insn_data[icode];
33881 enum machine_mode tmode = insn_p->operand[0].mode;
33882 enum machine_mode rmode = VOIDmode;
33883 bool swap = false;
33884 enum rtx_code comparison = d->comparison;
33886 switch ((enum ix86_builtin_func_type) d->flag)
33888 case V2DF_FTYPE_V2DF_ROUND:
33889 case V4DF_FTYPE_V4DF_ROUND:
33890 case V4SF_FTYPE_V4SF_ROUND:
33891 case V8SF_FTYPE_V8SF_ROUND:
33892 case V4SI_FTYPE_V4SF_ROUND:
33893 case V8SI_FTYPE_V8SF_ROUND:
33894 return ix86_expand_sse_round (d, exp, target);
33895 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33896 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33897 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33898 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33899 case INT_FTYPE_V8SF_V8SF_PTEST:
33900 case INT_FTYPE_V4DI_V4DI_PTEST:
33901 case INT_FTYPE_V4DF_V4DF_PTEST:
33902 case INT_FTYPE_V4SF_V4SF_PTEST:
33903 case INT_FTYPE_V2DI_V2DI_PTEST:
33904 case INT_FTYPE_V2DF_V2DF_PTEST:
33905 return ix86_expand_sse_ptest (d, exp, target);
33906 case FLOAT128_FTYPE_FLOAT128:
33907 case FLOAT_FTYPE_FLOAT:
33908 case INT_FTYPE_INT:
33909 case UINT64_FTYPE_INT:
33910 case UINT16_FTYPE_UINT16:
33911 case INT64_FTYPE_INT64:
33912 case INT64_FTYPE_V4SF:
33913 case INT64_FTYPE_V2DF:
33914 case INT_FTYPE_V16QI:
33915 case INT_FTYPE_V8QI:
33916 case INT_FTYPE_V8SF:
33917 case INT_FTYPE_V4DF:
33918 case INT_FTYPE_V4SF:
33919 case INT_FTYPE_V2DF:
33920 case INT_FTYPE_V32QI:
33921 case V16QI_FTYPE_V16QI:
33922 case V8SI_FTYPE_V8SF:
33923 case V8SI_FTYPE_V4SI:
33924 case V8HI_FTYPE_V8HI:
33925 case V8HI_FTYPE_V16QI:
33926 case V8QI_FTYPE_V8QI:
33927 case V8SF_FTYPE_V8SF:
33928 case V8SF_FTYPE_V8SI:
33929 case V8SF_FTYPE_V4SF:
33930 case V8SF_FTYPE_V8HI:
33931 case V4SI_FTYPE_V4SI:
33932 case V4SI_FTYPE_V16QI:
33933 case V4SI_FTYPE_V4SF:
33934 case V4SI_FTYPE_V8SI:
33935 case V4SI_FTYPE_V8HI:
33936 case V4SI_FTYPE_V4DF:
33937 case V4SI_FTYPE_V2DF:
33938 case V4HI_FTYPE_V4HI:
33939 case V4DF_FTYPE_V4DF:
33940 case V4DF_FTYPE_V4SI:
33941 case V4DF_FTYPE_V4SF:
33942 case V4DF_FTYPE_V2DF:
33943 case V4SF_FTYPE_V4SF:
33944 case V4SF_FTYPE_V4SI:
33945 case V4SF_FTYPE_V8SF:
33946 case V4SF_FTYPE_V4DF:
33947 case V4SF_FTYPE_V8HI:
33948 case V4SF_FTYPE_V2DF:
33949 case V2DI_FTYPE_V2DI:
33950 case V2DI_FTYPE_V16QI:
33951 case V2DI_FTYPE_V8HI:
33952 case V2DI_FTYPE_V4SI:
33953 case V2DF_FTYPE_V2DF:
33954 case V2DF_FTYPE_V4SI:
33955 case V2DF_FTYPE_V4DF:
33956 case V2DF_FTYPE_V4SF:
33957 case V2DF_FTYPE_V2SI:
33958 case V2SI_FTYPE_V2SI:
33959 case V2SI_FTYPE_V4SF:
33960 case V2SI_FTYPE_V2SF:
33961 case V2SI_FTYPE_V2DF:
33962 case V2SF_FTYPE_V2SF:
33963 case V2SF_FTYPE_V2SI:
33964 case V32QI_FTYPE_V32QI:
33965 case V32QI_FTYPE_V16QI:
33966 case V16HI_FTYPE_V16HI:
33967 case V16HI_FTYPE_V8HI:
33968 case V8SI_FTYPE_V8SI:
33969 case V16HI_FTYPE_V16QI:
33970 case V8SI_FTYPE_V16QI:
33971 case V4DI_FTYPE_V16QI:
33972 case V8SI_FTYPE_V8HI:
33973 case V4DI_FTYPE_V8HI:
33974 case V4DI_FTYPE_V4SI:
33975 case V4DI_FTYPE_V2DI:
33976 case HI_FTYPE_HI:
33977 case UINT_FTYPE_V2DF:
33978 case UINT_FTYPE_V4SF:
33979 case UINT64_FTYPE_V2DF:
33980 case UINT64_FTYPE_V4SF:
33981 case V16QI_FTYPE_V8DI:
33982 case V16HI_FTYPE_V16SI:
33983 case V16SI_FTYPE_HI:
33984 case V16SI_FTYPE_V16SI:
33985 case V16SI_FTYPE_INT:
33986 case V16SF_FTYPE_FLOAT:
33987 case V16SF_FTYPE_V4SF:
33988 case V16SF_FTYPE_V16SF:
33989 case V8HI_FTYPE_V8DI:
33990 case V8UHI_FTYPE_V8UHI:
33991 case V8SI_FTYPE_V8DI:
33992 case V8USI_FTYPE_V8USI:
33993 case V8SF_FTYPE_V8DF:
33994 case V8DI_FTYPE_QI:
33995 case V8DI_FTYPE_INT64:
33996 case V8DI_FTYPE_V4DI:
33997 case V8DI_FTYPE_V8DI:
33998 case V8DF_FTYPE_DOUBLE:
33999 case V8DF_FTYPE_V4DF:
34000 case V8DF_FTYPE_V8DF:
34001 case V8DF_FTYPE_V8SI:
34002 nargs = 1;
34003 break;
34004 case V4SF_FTYPE_V4SF_VEC_MERGE:
34005 case V2DF_FTYPE_V2DF_VEC_MERGE:
34006 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
34007 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
34008 case V16QI_FTYPE_V16QI_V16QI:
34009 case V16QI_FTYPE_V8HI_V8HI:
34010 case V16SI_FTYPE_V16SI_V16SI:
34011 case V16SF_FTYPE_V16SF_V16SF:
34012 case V16SF_FTYPE_V16SF_V16SI:
34013 case V8QI_FTYPE_V8QI_V8QI:
34014 case V8QI_FTYPE_V4HI_V4HI:
34015 case V8HI_FTYPE_V8HI_V8HI:
34016 case V8HI_FTYPE_V16QI_V16QI:
34017 case V8HI_FTYPE_V4SI_V4SI:
34018 case V8SF_FTYPE_V8SF_V8SF:
34019 case V8SF_FTYPE_V8SF_V8SI:
34020 case V8DI_FTYPE_V8DI_V8DI:
34021 case V8DF_FTYPE_V8DF_V8DF:
34022 case V8DF_FTYPE_V8DF_V8DI:
34023 case V4SI_FTYPE_V4SI_V4SI:
34024 case V4SI_FTYPE_V8HI_V8HI:
34025 case V4SI_FTYPE_V4SF_V4SF:
34026 case V4SI_FTYPE_V2DF_V2DF:
34027 case V4HI_FTYPE_V4HI_V4HI:
34028 case V4HI_FTYPE_V8QI_V8QI:
34029 case V4HI_FTYPE_V2SI_V2SI:
34030 case V4DF_FTYPE_V4DF_V4DF:
34031 case V4DF_FTYPE_V4DF_V4DI:
34032 case V4SF_FTYPE_V4SF_V4SF:
34033 case V4SF_FTYPE_V4SF_V4SI:
34034 case V4SF_FTYPE_V4SF_V2SI:
34035 case V4SF_FTYPE_V4SF_V2DF:
34036 case V4SF_FTYPE_V4SF_UINT:
34037 case V4SF_FTYPE_V4SF_UINT64:
34038 case V4SF_FTYPE_V4SF_DI:
34039 case V4SF_FTYPE_V4SF_SI:
34040 case V2DI_FTYPE_V2DI_V2DI:
34041 case V2DI_FTYPE_V16QI_V16QI:
34042 case V2DI_FTYPE_V4SI_V4SI:
34043 case V2UDI_FTYPE_V4USI_V4USI:
34044 case V2DI_FTYPE_V2DI_V16QI:
34045 case V2DI_FTYPE_V2DF_V2DF:
34046 case V2SI_FTYPE_V2SI_V2SI:
34047 case V2SI_FTYPE_V4HI_V4HI:
34048 case V2SI_FTYPE_V2SF_V2SF:
34049 case V2DF_FTYPE_V2DF_V2DF:
34050 case V2DF_FTYPE_V2DF_V4SF:
34051 case V2DF_FTYPE_V2DF_V2DI:
34052 case V2DF_FTYPE_V2DF_DI:
34053 case V2DF_FTYPE_V2DF_SI:
34054 case V2DF_FTYPE_V2DF_UINT:
34055 case V2DF_FTYPE_V2DF_UINT64:
34056 case V2SF_FTYPE_V2SF_V2SF:
34057 case V1DI_FTYPE_V1DI_V1DI:
34058 case V1DI_FTYPE_V8QI_V8QI:
34059 case V1DI_FTYPE_V2SI_V2SI:
34060 case V32QI_FTYPE_V16HI_V16HI:
34061 case V16HI_FTYPE_V8SI_V8SI:
34062 case V32QI_FTYPE_V32QI_V32QI:
34063 case V16HI_FTYPE_V32QI_V32QI:
34064 case V16HI_FTYPE_V16HI_V16HI:
34065 case V8SI_FTYPE_V4DF_V4DF:
34066 case V8SI_FTYPE_V8SI_V8SI:
34067 case V8SI_FTYPE_V16HI_V16HI:
34068 case V4DI_FTYPE_V4DI_V4DI:
34069 case V4DI_FTYPE_V8SI_V8SI:
34070 case V4UDI_FTYPE_V8USI_V8USI:
34071 case QI_FTYPE_V8DI_V8DI:
34072 case HI_FTYPE_V16SI_V16SI:
34073 if (comparison == UNKNOWN)
34074 return ix86_expand_binop_builtin (icode, exp, target);
34075 nargs = 2;
34076 break;
34077 case V4SF_FTYPE_V4SF_V4SF_SWAP:
34078 case V2DF_FTYPE_V2DF_V2DF_SWAP:
34079 gcc_assert (comparison != UNKNOWN);
34080 nargs = 2;
34081 swap = true;
34082 break;
34083 case V16HI_FTYPE_V16HI_V8HI_COUNT:
34084 case V16HI_FTYPE_V16HI_SI_COUNT:
34085 case V8SI_FTYPE_V8SI_V4SI_COUNT:
34086 case V8SI_FTYPE_V8SI_SI_COUNT:
34087 case V4DI_FTYPE_V4DI_V2DI_COUNT:
34088 case V4DI_FTYPE_V4DI_INT_COUNT:
34089 case V8HI_FTYPE_V8HI_V8HI_COUNT:
34090 case V8HI_FTYPE_V8HI_SI_COUNT:
34091 case V4SI_FTYPE_V4SI_V4SI_COUNT:
34092 case V4SI_FTYPE_V4SI_SI_COUNT:
34093 case V4HI_FTYPE_V4HI_V4HI_COUNT:
34094 case V4HI_FTYPE_V4HI_SI_COUNT:
34095 case V2DI_FTYPE_V2DI_V2DI_COUNT:
34096 case V2DI_FTYPE_V2DI_SI_COUNT:
34097 case V2SI_FTYPE_V2SI_V2SI_COUNT:
34098 case V2SI_FTYPE_V2SI_SI_COUNT:
34099 case V1DI_FTYPE_V1DI_V1DI_COUNT:
34100 case V1DI_FTYPE_V1DI_SI_COUNT:
34101 nargs = 2;
34102 last_arg_count = true;
34103 break;
34104 case UINT64_FTYPE_UINT64_UINT64:
34105 case UINT_FTYPE_UINT_UINT:
34106 case UINT_FTYPE_UINT_USHORT:
34107 case UINT_FTYPE_UINT_UCHAR:
34108 case UINT16_FTYPE_UINT16_INT:
34109 case UINT8_FTYPE_UINT8_INT:
34110 case HI_FTYPE_HI_HI:
34111 case V16SI_FTYPE_V8DF_V8DF:
34112 nargs = 2;
34113 break;
34114 case V2DI_FTYPE_V2DI_INT_CONVERT:
34115 nargs = 2;
34116 rmode = V1TImode;
34117 nargs_constant = 1;
34118 break;
34119 case V4DI_FTYPE_V4DI_INT_CONVERT:
34120 nargs = 2;
34121 rmode = V2TImode;
34122 nargs_constant = 1;
34123 break;
34124 case V8HI_FTYPE_V8HI_INT:
34125 case V8HI_FTYPE_V8SF_INT:
34126 case V16HI_FTYPE_V16SF_INT:
34127 case V8HI_FTYPE_V4SF_INT:
34128 case V8SF_FTYPE_V8SF_INT:
34129 case V4SF_FTYPE_V16SF_INT:
34130 case V16SF_FTYPE_V16SF_INT:
34131 case V4SI_FTYPE_V4SI_INT:
34132 case V4SI_FTYPE_V8SI_INT:
34133 case V4HI_FTYPE_V4HI_INT:
34134 case V4DF_FTYPE_V4DF_INT:
34135 case V4DF_FTYPE_V8DF_INT:
34136 case V4SF_FTYPE_V4SF_INT:
34137 case V4SF_FTYPE_V8SF_INT:
34138 case V2DI_FTYPE_V2DI_INT:
34139 case V2DF_FTYPE_V2DF_INT:
34140 case V2DF_FTYPE_V4DF_INT:
34141 case V16HI_FTYPE_V16HI_INT:
34142 case V8SI_FTYPE_V8SI_INT:
34143 case V16SI_FTYPE_V16SI_INT:
34144 case V4SI_FTYPE_V16SI_INT:
34145 case V4DI_FTYPE_V4DI_INT:
34146 case V2DI_FTYPE_V4DI_INT:
34147 case V4DI_FTYPE_V8DI_INT:
34148 case HI_FTYPE_HI_INT:
34149 nargs = 2;
34150 nargs_constant = 1;
34151 break;
34152 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34153 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34154 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34155 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34156 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34157 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34158 case HI_FTYPE_V16SI_V16SI_HI:
34159 case QI_FTYPE_V8DI_V8DI_QI:
34160 case V16HI_FTYPE_V16SI_V16HI_HI:
34161 case V16QI_FTYPE_V16SI_V16QI_HI:
34162 case V16QI_FTYPE_V8DI_V16QI_QI:
34163 case V16SF_FTYPE_V16SF_V16SF_HI:
34164 case V16SF_FTYPE_V16SF_V16SF_V16SF:
34165 case V16SF_FTYPE_V16SF_V16SI_V16SF:
34166 case V16SF_FTYPE_V16SI_V16SF_HI:
34167 case V16SF_FTYPE_V16SI_V16SF_V16SF:
34168 case V16SF_FTYPE_V4SF_V16SF_HI:
34169 case V16SI_FTYPE_SI_V16SI_HI:
34170 case V16SI_FTYPE_V16HI_V16SI_HI:
34171 case V16SI_FTYPE_V16QI_V16SI_HI:
34172 case V16SI_FTYPE_V16SF_V16SI_HI:
34173 case V16SI_FTYPE_V16SI_V16SI_HI:
34174 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34175 case V16SI_FTYPE_V4SI_V16SI_HI:
34176 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34177 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34178 case V8DF_FTYPE_V2DF_V8DF_QI:
34179 case V8DF_FTYPE_V4DF_V8DF_QI:
34180 case V8DF_FTYPE_V8DF_V8DF_QI:
34181 case V8DF_FTYPE_V8DF_V8DF_V8DF:
34182 case V8DF_FTYPE_V8DF_V8DI_V8DF:
34183 case V8DF_FTYPE_V8DI_V8DF_V8DF:
34184 case V8DF_FTYPE_V8SF_V8DF_QI:
34185 case V8DF_FTYPE_V8SI_V8DF_QI:
34186 case V8DI_FTYPE_DI_V8DI_QI:
34187 case V8DI_FTYPE_V16QI_V8DI_QI:
34188 case V8DI_FTYPE_V2DI_V8DI_QI:
34189 case V8DI_FTYPE_V4DI_V8DI_QI:
34190 case V8DI_FTYPE_V8DI_V8DI_QI:
34191 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34192 case V8DI_FTYPE_V8HI_V8DI_QI:
34193 case V8DI_FTYPE_V8SI_V8DI_QI:
34194 case V8HI_FTYPE_V8DI_V8HI_QI:
34195 case V8SF_FTYPE_V8DF_V8SF_QI:
34196 case V8SI_FTYPE_V8DF_V8SI_QI:
34197 case V8SI_FTYPE_V8DI_V8SI_QI:
34198 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34199 nargs = 3;
34200 break;
34201 case V32QI_FTYPE_V32QI_V32QI_INT:
34202 case V16HI_FTYPE_V16HI_V16HI_INT:
34203 case V16QI_FTYPE_V16QI_V16QI_INT:
34204 case V4DI_FTYPE_V4DI_V4DI_INT:
34205 case V8HI_FTYPE_V8HI_V8HI_INT:
34206 case V8SI_FTYPE_V8SI_V8SI_INT:
34207 case V8SI_FTYPE_V8SI_V4SI_INT:
34208 case V8SF_FTYPE_V8SF_V8SF_INT:
34209 case V8SF_FTYPE_V8SF_V4SF_INT:
34210 case V4SI_FTYPE_V4SI_V4SI_INT:
34211 case V4DF_FTYPE_V4DF_V4DF_INT:
34212 case V16SF_FTYPE_V16SF_V16SF_INT:
34213 case V16SF_FTYPE_V16SF_V4SF_INT:
34214 case V16SI_FTYPE_V16SI_V4SI_INT:
34215 case V4DF_FTYPE_V4DF_V2DF_INT:
34216 case V4SF_FTYPE_V4SF_V4SF_INT:
34217 case V2DI_FTYPE_V2DI_V2DI_INT:
34218 case V4DI_FTYPE_V4DI_V2DI_INT:
34219 case V2DF_FTYPE_V2DF_V2DF_INT:
34220 case QI_FTYPE_V8DI_V8DI_INT:
34221 case QI_FTYPE_V8DF_V8DF_INT:
34222 case QI_FTYPE_V2DF_V2DF_INT:
34223 case QI_FTYPE_V4SF_V4SF_INT:
34224 case HI_FTYPE_V16SI_V16SI_INT:
34225 case HI_FTYPE_V16SF_V16SF_INT:
34226 nargs = 3;
34227 nargs_constant = 1;
34228 break;
34229 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
34230 nargs = 3;
34231 rmode = V4DImode;
34232 nargs_constant = 1;
34233 break;
34234 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
34235 nargs = 3;
34236 rmode = V2DImode;
34237 nargs_constant = 1;
34238 break;
34239 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
34240 nargs = 3;
34241 rmode = DImode;
34242 nargs_constant = 1;
34243 break;
34244 case V2DI_FTYPE_V2DI_UINT_UINT:
34245 nargs = 3;
34246 nargs_constant = 2;
34247 break;
34248 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
34249 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
34250 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
34251 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
34252 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
34253 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
34254 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
34255 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
34256 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
34257 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
34258 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
34259 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
34260 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
34261 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
34262 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
34263 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
34264 nargs = 4;
34265 break;
34266 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
34267 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34268 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34269 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34270 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34271 nargs = 4;
34272 nargs_constant = 1;
34273 break;
34274 case QI_FTYPE_V2DF_V2DF_INT_QI:
34275 case QI_FTYPE_V4SF_V4SF_INT_QI:
34276 nargs = 4;
34277 mask_pos = 1;
34278 nargs_constant = 1;
34279 break;
34280 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34281 nargs = 4;
34282 nargs_constant = 2;
34283 break;
34284 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34285 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34286 nargs = 4;
34287 break;
34288 case QI_FTYPE_V8DI_V8DI_INT_QI:
34289 case HI_FTYPE_V16SI_V16SI_INT_HI:
34290 case QI_FTYPE_V8DF_V8DF_INT_QI:
34291 case HI_FTYPE_V16SF_V16SF_INT_HI:
34292 mask_pos = 1;
34293 nargs = 4;
34294 nargs_constant = 1;
34295 break;
34296 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34297 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34298 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34299 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34300 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34301 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34302 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34303 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34304 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34305 nargs = 4;
34306 mask_pos = 2;
34307 nargs_constant = 1;
34308 break;
34309 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34310 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34311 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34312 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34313 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34314 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34315 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34316 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34317 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34318 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34319 nargs = 5;
34320 mask_pos = 2;
34321 nargs_constant = 1;
34322 break;
34323 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34324 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34325 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34326 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34327 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34328 nargs = 5;
34329 mask_pos = 1;
34330 nargs_constant = 1;
34331 break;
34333 default:
34334 gcc_unreachable ();
34337 gcc_assert (nargs <= ARRAY_SIZE (args));
34339 if (comparison != UNKNOWN)
34341 gcc_assert (nargs == 2);
34342 return ix86_expand_sse_compare (d, exp, target, swap);
34345 if (rmode == VOIDmode || rmode == tmode)
34347 if (optimize
34348 || target == 0
34349 || GET_MODE (target) != tmode
34350 || !insn_p->operand[0].predicate (target, tmode))
34351 target = gen_reg_rtx (tmode);
34352 real_target = target;
34354 else
34356 real_target = gen_reg_rtx (tmode);
34357 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34360 for (i = 0; i < nargs; i++)
34362 tree arg = CALL_EXPR_ARG (exp, i);
34363 rtx op = expand_normal (arg);
34364 enum machine_mode mode = insn_p->operand[i + 1].mode;
34365 bool match = insn_p->operand[i + 1].predicate (op, mode);
34367 if (last_arg_count && (i + 1) == nargs)
34369 /* SIMD shift insns take either an 8-bit immediate or
34370 register as count. But builtin functions take int as
34371 count. If count doesn't match, we put it in register. */
34372 if (!match)
34374 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34375 if (!insn_p->operand[i + 1].predicate (op, mode))
34376 op = copy_to_reg (op);
34379 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34380 (!mask_pos && (nargs - i) <= nargs_constant))
34382 if (!match)
34383 switch (icode)
34385 case CODE_FOR_avx2_inserti128:
34386 case CODE_FOR_avx2_extracti128:
34387 error ("the last argument must be an 1-bit immediate");
34388 return const0_rtx;
34390 case CODE_FOR_avx512f_cmpv8di3_mask:
34391 case CODE_FOR_avx512f_cmpv16si3_mask:
34392 case CODE_FOR_avx512f_ucmpv8di3_mask:
34393 case CODE_FOR_avx512f_ucmpv16si3_mask:
34394 error ("the last argument must be a 3-bit immediate");
34395 return const0_rtx;
34397 case CODE_FOR_sse4_1_roundsd:
34398 case CODE_FOR_sse4_1_roundss:
34400 case CODE_FOR_sse4_1_roundpd:
34401 case CODE_FOR_sse4_1_roundps:
34402 case CODE_FOR_avx_roundpd256:
34403 case CODE_FOR_avx_roundps256:
34405 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34406 case CODE_FOR_sse4_1_roundps_sfix:
34407 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34408 case CODE_FOR_avx_roundps_sfix256:
34410 case CODE_FOR_sse4_1_blendps:
34411 case CODE_FOR_avx_blendpd256:
34412 case CODE_FOR_avx_vpermilv4df:
34413 case CODE_FOR_avx512f_getmantv8df_mask:
34414 case CODE_FOR_avx512f_getmantv16sf_mask:
34415 error ("the last argument must be a 4-bit immediate");
34416 return const0_rtx;
34418 case CODE_FOR_sha1rnds4:
34419 case CODE_FOR_sse4_1_blendpd:
34420 case CODE_FOR_avx_vpermilv2df:
34421 case CODE_FOR_xop_vpermil2v2df3:
34422 case CODE_FOR_xop_vpermil2v4sf3:
34423 case CODE_FOR_xop_vpermil2v4df3:
34424 case CODE_FOR_xop_vpermil2v8sf3:
34425 case CODE_FOR_avx512f_vinsertf32x4_mask:
34426 case CODE_FOR_avx512f_vinserti32x4_mask:
34427 case CODE_FOR_avx512f_vextractf32x4_mask:
34428 case CODE_FOR_avx512f_vextracti32x4_mask:
34429 error ("the last argument must be a 2-bit immediate");
34430 return const0_rtx;
34432 case CODE_FOR_avx_vextractf128v4df:
34433 case CODE_FOR_avx_vextractf128v8sf:
34434 case CODE_FOR_avx_vextractf128v8si:
34435 case CODE_FOR_avx_vinsertf128v4df:
34436 case CODE_FOR_avx_vinsertf128v8sf:
34437 case CODE_FOR_avx_vinsertf128v8si:
34438 case CODE_FOR_avx512f_vinsertf64x4_mask:
34439 case CODE_FOR_avx512f_vinserti64x4_mask:
34440 case CODE_FOR_avx512f_vextractf64x4_mask:
34441 case CODE_FOR_avx512f_vextracti64x4_mask:
34442 error ("the last argument must be a 1-bit immediate");
34443 return const0_rtx;
34445 case CODE_FOR_avx_vmcmpv2df3:
34446 case CODE_FOR_avx_vmcmpv4sf3:
34447 case CODE_FOR_avx_cmpv2df3:
34448 case CODE_FOR_avx_cmpv4sf3:
34449 case CODE_FOR_avx_cmpv4df3:
34450 case CODE_FOR_avx_cmpv8sf3:
34451 case CODE_FOR_avx512f_cmpv8df3_mask:
34452 case CODE_FOR_avx512f_cmpv16sf3_mask:
34453 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34454 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34455 error ("the last argument must be a 5-bit immediate");
34456 return const0_rtx;
34458 default:
34459 switch (nargs_constant)
34461 case 2:
34462 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34463 (!mask_pos && (nargs - i) == nargs_constant))
34465 error ("the next to last argument must be an 8-bit immediate");
34466 break;
34468 case 1:
34469 error ("the last argument must be an 8-bit immediate");
34470 break;
34471 default:
34472 gcc_unreachable ();
34474 return const0_rtx;
34477 else
34479 if (VECTOR_MODE_P (mode))
34480 op = safe_vector_operand (op, mode);
34482 /* If we aren't optimizing, only allow one memory operand to
34483 be generated. */
34484 if (memory_operand (op, mode))
34485 num_memory++;
34487 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34489 if (optimize || !match || num_memory > 1)
34490 op = copy_to_mode_reg (mode, op);
34492 else
34494 op = copy_to_reg (op);
34495 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34499 args[i].op = op;
34500 args[i].mode = mode;
34503 switch (nargs)
34505 case 1:
34506 pat = GEN_FCN (icode) (real_target, args[0].op);
34507 break;
34508 case 2:
34509 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34510 break;
34511 case 3:
34512 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34513 args[2].op);
34514 break;
34515 case 4:
34516 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34517 args[2].op, args[3].op);
34518 break;
34519 case 5:
34520 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34521 args[2].op, args[3].op, args[4].op);
34522 case 6:
34523 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34524 args[2].op, args[3].op, args[4].op,
34525 args[5].op);
34526 break;
34527 default:
34528 gcc_unreachable ();
34531 if (! pat)
34532 return 0;
34534 emit_insn (pat);
34535 return target;
34538 /* Transform pattern of following layout:
34539 (parallel [
34540 set (A B)
34541 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34543 into:
34544 (set (A B))
34547 (parallel [ A B
34549 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34552 into:
34553 (parallel [ A B ... ]) */
34555 static rtx
34556 ix86_erase_embedded_rounding (rtx pat)
34558 if (GET_CODE (pat) == INSN)
34559 pat = PATTERN (pat);
34561 gcc_assert (GET_CODE (pat) == PARALLEL);
34563 if (XVECLEN (pat, 0) == 2)
34565 rtx p0 = XVECEXP (pat, 0, 0);
34566 rtx p1 = XVECEXP (pat, 0, 1);
34568 gcc_assert (GET_CODE (p0) == SET
34569 && GET_CODE (p1) == UNSPEC
34570 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34572 return p0;
34574 else
34576 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34577 int i = 0;
34578 int j = 0;
34580 for (; i < XVECLEN (pat, 0); ++i)
34582 rtx elem = XVECEXP (pat, 0, i);
34583 if (GET_CODE (elem) != UNSPEC
34584 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34585 res [j++] = elem;
34588 /* No more than 1 occurence was removed. */
34589 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34591 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34595 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34596 with rounding. */
34597 static rtx
34598 ix86_expand_sse_comi_round (const struct builtin_description *d,
34599 tree exp, rtx target)
34601 rtx pat, set_dst;
34602 tree arg0 = CALL_EXPR_ARG (exp, 0);
34603 tree arg1 = CALL_EXPR_ARG (exp, 1);
34604 tree arg2 = CALL_EXPR_ARG (exp, 2);
34605 tree arg3 = CALL_EXPR_ARG (exp, 3);
34606 rtx op0 = expand_normal (arg0);
34607 rtx op1 = expand_normal (arg1);
34608 rtx op2 = expand_normal (arg2);
34609 rtx op3 = expand_normal (arg3);
34610 enum insn_code icode = d->icode;
34611 const struct insn_data_d *insn_p = &insn_data[icode];
34612 enum machine_mode mode0 = insn_p->operand[0].mode;
34613 enum machine_mode mode1 = insn_p->operand[1].mode;
34614 enum rtx_code comparison = UNEQ;
34615 bool need_ucomi = false;
34617 /* See avxintrin.h for values. */
34618 enum rtx_code comi_comparisons[32] =
34620 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34621 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34622 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34624 bool need_ucomi_values[32] =
34626 true, false, false, true, true, false, false, true,
34627 true, false, false, true, true, false, false, true,
34628 false, true, true, false, false, true, true, false,
34629 false, true, true, false, false, true, true, false
34632 if (!CONST_INT_P (op2))
34634 error ("the third argument must be comparison constant");
34635 return const0_rtx;
34637 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34639 error ("incorect comparison mode");
34640 return const0_rtx;
34643 if (!insn_p->operand[2].predicate (op3, SImode))
34645 error ("incorrect rounding operand");
34646 return const0_rtx;
34649 comparison = comi_comparisons[INTVAL (op2)];
34650 need_ucomi = need_ucomi_values[INTVAL (op2)];
34652 if (VECTOR_MODE_P (mode0))
34653 op0 = safe_vector_operand (op0, mode0);
34654 if (VECTOR_MODE_P (mode1))
34655 op1 = safe_vector_operand (op1, mode1);
34657 target = gen_reg_rtx (SImode);
34658 emit_move_insn (target, const0_rtx);
34659 target = gen_rtx_SUBREG (QImode, target, 0);
34661 if ((optimize && !register_operand (op0, mode0))
34662 || !insn_p->operand[0].predicate (op0, mode0))
34663 op0 = copy_to_mode_reg (mode0, op0);
34664 if ((optimize && !register_operand (op1, mode1))
34665 || !insn_p->operand[1].predicate (op1, mode1))
34666 op1 = copy_to_mode_reg (mode1, op1);
34668 if (need_ucomi)
34669 icode = icode == CODE_FOR_sse_comi_round
34670 ? CODE_FOR_sse_ucomi_round
34671 : CODE_FOR_sse2_ucomi_round;
34673 pat = GEN_FCN (icode) (op0, op1, op3);
34674 if (! pat)
34675 return 0;
34677 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34678 if (INTVAL (op3) == NO_ROUND)
34680 pat = ix86_erase_embedded_rounding (pat);
34681 if (! pat)
34682 return 0;
34684 set_dst = SET_DEST (pat);
34686 else
34688 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34689 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34692 emit_insn (pat);
34693 emit_insn (gen_rtx_SET (VOIDmode,
34694 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34695 gen_rtx_fmt_ee (comparison, QImode,
34696 set_dst,
34697 const0_rtx)));
34699 return SUBREG_REG (target);
34702 static rtx
34703 ix86_expand_round_builtin (const struct builtin_description *d,
34704 tree exp, rtx target)
34706 rtx pat;
34707 unsigned int i, nargs;
34708 struct
34710 rtx op;
34711 enum machine_mode mode;
34712 } args[6];
34713 enum insn_code icode = d->icode;
34714 const struct insn_data_d *insn_p = &insn_data[icode];
34715 enum machine_mode tmode = insn_p->operand[0].mode;
34716 unsigned int nargs_constant = 0;
34717 unsigned int redundant_embed_rnd = 0;
34719 switch ((enum ix86_builtin_func_type) d->flag)
34721 case UINT64_FTYPE_V2DF_INT:
34722 case UINT64_FTYPE_V4SF_INT:
34723 case UINT_FTYPE_V2DF_INT:
34724 case UINT_FTYPE_V4SF_INT:
34725 case INT64_FTYPE_V2DF_INT:
34726 case INT64_FTYPE_V4SF_INT:
34727 case INT_FTYPE_V2DF_INT:
34728 case INT_FTYPE_V4SF_INT:
34729 nargs = 2;
34730 break;
34731 case V4SF_FTYPE_V4SF_UINT_INT:
34732 case V4SF_FTYPE_V4SF_UINT64_INT:
34733 case V2DF_FTYPE_V2DF_UINT64_INT:
34734 case V4SF_FTYPE_V4SF_INT_INT:
34735 case V4SF_FTYPE_V4SF_INT64_INT:
34736 case V2DF_FTYPE_V2DF_INT64_INT:
34737 case V4SF_FTYPE_V4SF_V4SF_INT:
34738 case V2DF_FTYPE_V2DF_V2DF_INT:
34739 case V4SF_FTYPE_V4SF_V2DF_INT:
34740 case V2DF_FTYPE_V2DF_V4SF_INT:
34741 nargs = 3;
34742 break;
34743 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34744 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34745 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34746 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34747 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34748 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34749 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34750 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34751 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34752 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34753 nargs = 4;
34754 break;
34755 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34756 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34757 nargs_constant = 2;
34758 nargs = 4;
34759 break;
34760 case INT_FTYPE_V4SF_V4SF_INT_INT:
34761 case INT_FTYPE_V2DF_V2DF_INT_INT:
34762 return ix86_expand_sse_comi_round (d, exp, target);
34763 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34764 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34765 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34766 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34767 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34768 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34769 nargs = 5;
34770 break;
34771 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34772 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34773 nargs_constant = 4;
34774 nargs = 5;
34775 break;
34776 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34777 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34778 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34779 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34780 nargs_constant = 3;
34781 nargs = 5;
34782 break;
34783 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34784 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34785 nargs = 6;
34786 nargs_constant = 4;
34787 break;
34788 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34789 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34790 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34791 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34792 nargs = 6;
34793 nargs_constant = 3;
34794 break;
34795 default:
34796 gcc_unreachable ();
34798 gcc_assert (nargs <= ARRAY_SIZE (args));
34800 if (optimize
34801 || target == 0
34802 || GET_MODE (target) != tmode
34803 || !insn_p->operand[0].predicate (target, tmode))
34804 target = gen_reg_rtx (tmode);
34806 for (i = 0; i < nargs; i++)
34808 tree arg = CALL_EXPR_ARG (exp, i);
34809 rtx op = expand_normal (arg);
34810 enum machine_mode mode = insn_p->operand[i + 1].mode;
34811 bool match = insn_p->operand[i + 1].predicate (op, mode);
34813 if (i == nargs - nargs_constant)
34815 if (!match)
34817 switch (icode)
34819 case CODE_FOR_avx512f_getmantv8df_mask_round:
34820 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34821 case CODE_FOR_avx512f_getmantv2df_round:
34822 case CODE_FOR_avx512f_getmantv4sf_round:
34823 error ("the immediate argument must be a 4-bit immediate");
34824 return const0_rtx;
34825 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34826 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34827 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34828 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34829 error ("the immediate argument must be a 5-bit immediate");
34830 return const0_rtx;
34831 default:
34832 error ("the immediate argument must be an 8-bit immediate");
34833 return const0_rtx;
34837 else if (i == nargs-1)
34839 if (!insn_p->operand[nargs].predicate (op, SImode))
34841 error ("incorrect rounding operand");
34842 return const0_rtx;
34845 /* If there is no rounding use normal version of the pattern. */
34846 if (INTVAL (op) == NO_ROUND)
34847 redundant_embed_rnd = 1;
34849 else
34851 if (VECTOR_MODE_P (mode))
34852 op = safe_vector_operand (op, mode);
34854 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34856 if (optimize || !match)
34857 op = copy_to_mode_reg (mode, op);
34859 else
34861 op = copy_to_reg (op);
34862 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34866 args[i].op = op;
34867 args[i].mode = mode;
34870 switch (nargs)
34872 case 1:
34873 pat = GEN_FCN (icode) (target, args[0].op);
34874 break;
34875 case 2:
34876 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34877 break;
34878 case 3:
34879 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34880 args[2].op);
34881 break;
34882 case 4:
34883 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34884 args[2].op, args[3].op);
34885 break;
34886 case 5:
34887 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34888 args[2].op, args[3].op, args[4].op);
34889 case 6:
34890 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34891 args[2].op, args[3].op, args[4].op,
34892 args[5].op);
34893 break;
34894 default:
34895 gcc_unreachable ();
34898 if (!pat)
34899 return 0;
34901 if (redundant_embed_rnd)
34902 pat = ix86_erase_embedded_rounding (pat);
34904 emit_insn (pat);
34905 return target;
34908 /* Subroutine of ix86_expand_builtin to take care of special insns
34909 with variable number of operands. */
34911 static rtx
34912 ix86_expand_special_args_builtin (const struct builtin_description *d,
34913 tree exp, rtx target)
34915 tree arg;
34916 rtx pat, op;
34917 unsigned int i, nargs, arg_adjust, memory;
34918 bool aligned_mem = false;
34919 struct
34921 rtx op;
34922 enum machine_mode mode;
34923 } args[3];
34924 enum insn_code icode = d->icode;
34925 bool last_arg_constant = false;
34926 const struct insn_data_d *insn_p = &insn_data[icode];
34927 enum machine_mode tmode = insn_p->operand[0].mode;
34928 enum { load, store } klass;
34930 switch ((enum ix86_builtin_func_type) d->flag)
34932 case VOID_FTYPE_VOID:
34933 emit_insn (GEN_FCN (icode) (target));
34934 return 0;
34935 case VOID_FTYPE_UINT64:
34936 case VOID_FTYPE_UNSIGNED:
34937 nargs = 0;
34938 klass = store;
34939 memory = 0;
34940 break;
34942 case INT_FTYPE_VOID:
34943 case UINT64_FTYPE_VOID:
34944 case UNSIGNED_FTYPE_VOID:
34945 nargs = 0;
34946 klass = load;
34947 memory = 0;
34948 break;
34949 case UINT64_FTYPE_PUNSIGNED:
34950 case V2DI_FTYPE_PV2DI:
34951 case V4DI_FTYPE_PV4DI:
34952 case V32QI_FTYPE_PCCHAR:
34953 case V16QI_FTYPE_PCCHAR:
34954 case V8SF_FTYPE_PCV4SF:
34955 case V8SF_FTYPE_PCFLOAT:
34956 case V4SF_FTYPE_PCFLOAT:
34957 case V4DF_FTYPE_PCV2DF:
34958 case V4DF_FTYPE_PCDOUBLE:
34959 case V2DF_FTYPE_PCDOUBLE:
34960 case VOID_FTYPE_PVOID:
34961 case V16SI_FTYPE_PV4SI:
34962 case V16SF_FTYPE_PV4SF:
34963 case V8DI_FTYPE_PV4DI:
34964 case V8DI_FTYPE_PV8DI:
34965 case V8DF_FTYPE_PV4DF:
34966 nargs = 1;
34967 klass = load;
34968 memory = 0;
34969 switch (icode)
34971 case CODE_FOR_sse4_1_movntdqa:
34972 case CODE_FOR_avx2_movntdqa:
34973 case CODE_FOR_avx512f_movntdqa:
34974 aligned_mem = true;
34975 break;
34976 default:
34977 break;
34979 break;
34980 case VOID_FTYPE_PV2SF_V4SF:
34981 case VOID_FTYPE_PV8DI_V8DI:
34982 case VOID_FTYPE_PV4DI_V4DI:
34983 case VOID_FTYPE_PV2DI_V2DI:
34984 case VOID_FTYPE_PCHAR_V32QI:
34985 case VOID_FTYPE_PCHAR_V16QI:
34986 case VOID_FTYPE_PFLOAT_V16SF:
34987 case VOID_FTYPE_PFLOAT_V8SF:
34988 case VOID_FTYPE_PFLOAT_V4SF:
34989 case VOID_FTYPE_PDOUBLE_V8DF:
34990 case VOID_FTYPE_PDOUBLE_V4DF:
34991 case VOID_FTYPE_PDOUBLE_V2DF:
34992 case VOID_FTYPE_PLONGLONG_LONGLONG:
34993 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34994 case VOID_FTYPE_PINT_INT:
34995 nargs = 1;
34996 klass = store;
34997 /* Reserve memory operand for target. */
34998 memory = ARRAY_SIZE (args);
34999 switch (icode)
35001 /* These builtins and instructions require the memory
35002 to be properly aligned. */
35003 case CODE_FOR_avx_movntv4di:
35004 case CODE_FOR_sse2_movntv2di:
35005 case CODE_FOR_avx_movntv8sf:
35006 case CODE_FOR_sse_movntv4sf:
35007 case CODE_FOR_sse4a_vmmovntv4sf:
35008 case CODE_FOR_avx_movntv4df:
35009 case CODE_FOR_sse2_movntv2df:
35010 case CODE_FOR_sse4a_vmmovntv2df:
35011 case CODE_FOR_sse2_movntidi:
35012 case CODE_FOR_sse_movntq:
35013 case CODE_FOR_sse2_movntisi:
35014 case CODE_FOR_avx512f_movntv16sf:
35015 case CODE_FOR_avx512f_movntv8df:
35016 case CODE_FOR_avx512f_movntv8di:
35017 aligned_mem = true;
35018 break;
35019 default:
35020 break;
35022 break;
35023 case V4SF_FTYPE_V4SF_PCV2SF:
35024 case V2DF_FTYPE_V2DF_PCDOUBLE:
35025 nargs = 2;
35026 klass = load;
35027 memory = 1;
35028 break;
35029 case V8SF_FTYPE_PCV8SF_V8SI:
35030 case V4DF_FTYPE_PCV4DF_V4DI:
35031 case V4SF_FTYPE_PCV4SF_V4SI:
35032 case V2DF_FTYPE_PCV2DF_V2DI:
35033 case V8SI_FTYPE_PCV8SI_V8SI:
35034 case V4DI_FTYPE_PCV4DI_V4DI:
35035 case V4SI_FTYPE_PCV4SI_V4SI:
35036 case V2DI_FTYPE_PCV2DI_V2DI:
35037 nargs = 2;
35038 klass = load;
35039 memory = 0;
35040 break;
35041 case VOID_FTYPE_PV8DF_V8DF_QI:
35042 case VOID_FTYPE_PV16SF_V16SF_HI:
35043 case VOID_FTYPE_PV8DI_V8DI_QI:
35044 case VOID_FTYPE_PV16SI_V16SI_HI:
35045 switch (icode)
35047 /* These builtins and instructions require the memory
35048 to be properly aligned. */
35049 case CODE_FOR_avx512f_storev16sf_mask:
35050 case CODE_FOR_avx512f_storev16si_mask:
35051 case CODE_FOR_avx512f_storev8df_mask:
35052 case CODE_FOR_avx512f_storev8di_mask:
35053 aligned_mem = true;
35054 break;
35055 default:
35056 break;
35058 /* FALLTHRU */
35059 case VOID_FTYPE_PV8SF_V8SI_V8SF:
35060 case VOID_FTYPE_PV4DF_V4DI_V4DF:
35061 case VOID_FTYPE_PV4SF_V4SI_V4SF:
35062 case VOID_FTYPE_PV2DF_V2DI_V2DF:
35063 case VOID_FTYPE_PV8SI_V8SI_V8SI:
35064 case VOID_FTYPE_PV4DI_V4DI_V4DI:
35065 case VOID_FTYPE_PV4SI_V4SI_V4SI:
35066 case VOID_FTYPE_PV2DI_V2DI_V2DI:
35067 case VOID_FTYPE_PDOUBLE_V2DF_QI:
35068 case VOID_FTYPE_PFLOAT_V4SF_QI:
35069 case VOID_FTYPE_PV8SI_V8DI_QI:
35070 case VOID_FTYPE_PV8HI_V8DI_QI:
35071 case VOID_FTYPE_PV16HI_V16SI_HI:
35072 case VOID_FTYPE_PV16QI_V8DI_QI:
35073 case VOID_FTYPE_PV16QI_V16SI_HI:
35074 nargs = 2;
35075 klass = store;
35076 /* Reserve memory operand for target. */
35077 memory = ARRAY_SIZE (args);
35078 break;
35079 case V16SF_FTYPE_PCV16SF_V16SF_HI:
35080 case V16SI_FTYPE_PCV16SI_V16SI_HI:
35081 case V8DF_FTYPE_PCV8DF_V8DF_QI:
35082 case V8DI_FTYPE_PCV8DI_V8DI_QI:
35083 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
35084 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
35085 nargs = 3;
35086 klass = load;
35087 memory = 0;
35088 switch (icode)
35090 /* These builtins and instructions require the memory
35091 to be properly aligned. */
35092 case CODE_FOR_avx512f_loadv16sf_mask:
35093 case CODE_FOR_avx512f_loadv16si_mask:
35094 case CODE_FOR_avx512f_loadv8df_mask:
35095 case CODE_FOR_avx512f_loadv8di_mask:
35096 aligned_mem = true;
35097 break;
35098 default:
35099 break;
35101 break;
35102 case VOID_FTYPE_UINT_UINT_UINT:
35103 case VOID_FTYPE_UINT64_UINT_UINT:
35104 case UCHAR_FTYPE_UINT_UINT_UINT:
35105 case UCHAR_FTYPE_UINT64_UINT_UINT:
35106 nargs = 3;
35107 klass = load;
35108 memory = ARRAY_SIZE (args);
35109 last_arg_constant = true;
35110 break;
35111 default:
35112 gcc_unreachable ();
35115 gcc_assert (nargs <= ARRAY_SIZE (args));
35117 if (klass == store)
35119 arg = CALL_EXPR_ARG (exp, 0);
35120 op = expand_normal (arg);
35121 gcc_assert (target == 0);
35122 if (memory)
35124 op = ix86_zero_extend_to_Pmode (op);
35125 target = gen_rtx_MEM (tmode, op);
35126 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35127 on it. Try to improve it using get_pointer_alignment,
35128 and if the special builtin is one that requires strict
35129 mode alignment, also from it's GET_MODE_ALIGNMENT.
35130 Failure to do so could lead to ix86_legitimate_combined_insn
35131 rejecting all changes to such insns. */
35132 unsigned int align = get_pointer_alignment (arg);
35133 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
35134 align = GET_MODE_ALIGNMENT (tmode);
35135 if (MEM_ALIGN (target) < align)
35136 set_mem_align (target, align);
35138 else
35139 target = force_reg (tmode, op);
35140 arg_adjust = 1;
35142 else
35144 arg_adjust = 0;
35145 if (optimize
35146 || target == 0
35147 || !register_operand (target, tmode)
35148 || GET_MODE (target) != tmode)
35149 target = gen_reg_rtx (tmode);
35152 for (i = 0; i < nargs; i++)
35154 enum machine_mode mode = insn_p->operand[i + 1].mode;
35155 bool match;
35157 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
35158 op = expand_normal (arg);
35159 match = insn_p->operand[i + 1].predicate (op, mode);
35161 if (last_arg_constant && (i + 1) == nargs)
35163 if (!match)
35165 if (icode == CODE_FOR_lwp_lwpvalsi3
35166 || icode == CODE_FOR_lwp_lwpinssi3
35167 || icode == CODE_FOR_lwp_lwpvaldi3
35168 || icode == CODE_FOR_lwp_lwpinsdi3)
35169 error ("the last argument must be a 32-bit immediate");
35170 else
35171 error ("the last argument must be an 8-bit immediate");
35172 return const0_rtx;
35175 else
35177 if (i == memory)
35179 /* This must be the memory operand. */
35180 op = ix86_zero_extend_to_Pmode (op);
35181 op = gen_rtx_MEM (mode, op);
35182 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35183 on it. Try to improve it using get_pointer_alignment,
35184 and if the special builtin is one that requires strict
35185 mode alignment, also from it's GET_MODE_ALIGNMENT.
35186 Failure to do so could lead to ix86_legitimate_combined_insn
35187 rejecting all changes to such insns. */
35188 unsigned int align = get_pointer_alignment (arg);
35189 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
35190 align = GET_MODE_ALIGNMENT (mode);
35191 if (MEM_ALIGN (op) < align)
35192 set_mem_align (op, align);
35194 else
35196 /* This must be register. */
35197 if (VECTOR_MODE_P (mode))
35198 op = safe_vector_operand (op, mode);
35200 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35201 op = copy_to_mode_reg (mode, op);
35202 else
35204 op = copy_to_reg (op);
35205 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
35210 args[i].op = op;
35211 args[i].mode = mode;
35214 switch (nargs)
35216 case 0:
35217 pat = GEN_FCN (icode) (target);
35218 break;
35219 case 1:
35220 pat = GEN_FCN (icode) (target, args[0].op);
35221 break;
35222 case 2:
35223 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35224 break;
35225 case 3:
35226 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35227 break;
35228 default:
35229 gcc_unreachable ();
35232 if (! pat)
35233 return 0;
35234 emit_insn (pat);
35235 return klass == store ? 0 : target;
35238 /* Return the integer constant in ARG. Constrain it to be in the range
35239 of the subparts of VEC_TYPE; issue an error if not. */
35241 static int
35242 get_element_number (tree vec_type, tree arg)
35244 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35246 if (!tree_fits_uhwi_p (arg)
35247 || (elt = tree_to_uhwi (arg), elt > max))
35249 error ("selector must be an integer constant in the range 0..%wi", max);
35250 return 0;
35253 return elt;
35256 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35257 ix86_expand_vector_init. We DO have language-level syntax for this, in
35258 the form of (type){ init-list }. Except that since we can't place emms
35259 instructions from inside the compiler, we can't allow the use of MMX
35260 registers unless the user explicitly asks for it. So we do *not* define
35261 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35262 we have builtins invoked by mmintrin.h that gives us license to emit
35263 these sorts of instructions. */
35265 static rtx
35266 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35268 enum machine_mode tmode = TYPE_MODE (type);
35269 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35270 int i, n_elt = GET_MODE_NUNITS (tmode);
35271 rtvec v = rtvec_alloc (n_elt);
35273 gcc_assert (VECTOR_MODE_P (tmode));
35274 gcc_assert (call_expr_nargs (exp) == n_elt);
35276 for (i = 0; i < n_elt; ++i)
35278 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35279 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35282 if (!target || !register_operand (target, tmode))
35283 target = gen_reg_rtx (tmode);
35285 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35286 return target;
35289 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35290 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35291 had a language-level syntax for referencing vector elements. */
35293 static rtx
35294 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35296 enum machine_mode tmode, mode0;
35297 tree arg0, arg1;
35298 int elt;
35299 rtx op0;
35301 arg0 = CALL_EXPR_ARG (exp, 0);
35302 arg1 = CALL_EXPR_ARG (exp, 1);
35304 op0 = expand_normal (arg0);
35305 elt = get_element_number (TREE_TYPE (arg0), arg1);
35307 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35308 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35309 gcc_assert (VECTOR_MODE_P (mode0));
35311 op0 = force_reg (mode0, op0);
35313 if (optimize || !target || !register_operand (target, tmode))
35314 target = gen_reg_rtx (tmode);
35316 ix86_expand_vector_extract (true, target, op0, elt);
35318 return target;
35321 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35322 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35323 a language-level syntax for referencing vector elements. */
35325 static rtx
35326 ix86_expand_vec_set_builtin (tree exp)
35328 enum machine_mode tmode, mode1;
35329 tree arg0, arg1, arg2;
35330 int elt;
35331 rtx op0, op1, target;
35333 arg0 = CALL_EXPR_ARG (exp, 0);
35334 arg1 = CALL_EXPR_ARG (exp, 1);
35335 arg2 = CALL_EXPR_ARG (exp, 2);
35337 tmode = TYPE_MODE (TREE_TYPE (arg0));
35338 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35339 gcc_assert (VECTOR_MODE_P (tmode));
35341 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35342 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35343 elt = get_element_number (TREE_TYPE (arg0), arg2);
35345 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35346 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35348 op0 = force_reg (tmode, op0);
35349 op1 = force_reg (mode1, op1);
35351 /* OP0 is the source of these builtin functions and shouldn't be
35352 modified. Create a copy, use it and return it as target. */
35353 target = gen_reg_rtx (tmode);
35354 emit_move_insn (target, op0);
35355 ix86_expand_vector_set (true, target, op1, elt);
35357 return target;
35360 /* Expand an expression EXP that calls a built-in function,
35361 with result going to TARGET if that's convenient
35362 (and in mode MODE if that's convenient).
35363 SUBTARGET may be used as the target for computing one of EXP's operands.
35364 IGNORE is nonzero if the value is to be ignored. */
35366 static rtx
35367 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35368 enum machine_mode mode, int ignore)
35370 const struct builtin_description *d;
35371 size_t i;
35372 enum insn_code icode;
35373 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35374 tree arg0, arg1, arg2, arg3, arg4;
35375 rtx op0, op1, op2, op3, op4, pat, insn;
35376 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35377 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35379 /* For CPU builtins that can be folded, fold first and expand the fold. */
35380 switch (fcode)
35382 case IX86_BUILTIN_CPU_INIT:
35384 /* Make it call __cpu_indicator_init in libgcc. */
35385 tree call_expr, fndecl, type;
35386 type = build_function_type_list (integer_type_node, NULL_TREE);
35387 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35388 call_expr = build_call_expr (fndecl, 0);
35389 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35391 case IX86_BUILTIN_CPU_IS:
35392 case IX86_BUILTIN_CPU_SUPPORTS:
35394 tree arg0 = CALL_EXPR_ARG (exp, 0);
35395 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35396 gcc_assert (fold_expr != NULL_TREE);
35397 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35401 /* Determine whether the builtin function is available under the current ISA.
35402 Originally the builtin was not created if it wasn't applicable to the
35403 current ISA based on the command line switches. With function specific
35404 options, we need to check in the context of the function making the call
35405 whether it is supported. */
35406 if (ix86_builtins_isa[fcode].isa
35407 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35409 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35410 NULL, (enum fpmath_unit) 0, false);
35412 if (!opts)
35413 error ("%qE needs unknown isa option", fndecl);
35414 else
35416 gcc_assert (opts != NULL);
35417 error ("%qE needs isa option %s", fndecl, opts);
35418 free (opts);
35420 return const0_rtx;
35423 switch (fcode)
35425 case IX86_BUILTIN_MASKMOVQ:
35426 case IX86_BUILTIN_MASKMOVDQU:
35427 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35428 ? CODE_FOR_mmx_maskmovq
35429 : CODE_FOR_sse2_maskmovdqu);
35430 /* Note the arg order is different from the operand order. */
35431 arg1 = CALL_EXPR_ARG (exp, 0);
35432 arg2 = CALL_EXPR_ARG (exp, 1);
35433 arg0 = CALL_EXPR_ARG (exp, 2);
35434 op0 = expand_normal (arg0);
35435 op1 = expand_normal (arg1);
35436 op2 = expand_normal (arg2);
35437 mode0 = insn_data[icode].operand[0].mode;
35438 mode1 = insn_data[icode].operand[1].mode;
35439 mode2 = insn_data[icode].operand[2].mode;
35441 op0 = ix86_zero_extend_to_Pmode (op0);
35442 op0 = gen_rtx_MEM (mode1, op0);
35444 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35445 op0 = copy_to_mode_reg (mode0, op0);
35446 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35447 op1 = copy_to_mode_reg (mode1, op1);
35448 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35449 op2 = copy_to_mode_reg (mode2, op2);
35450 pat = GEN_FCN (icode) (op0, op1, op2);
35451 if (! pat)
35452 return 0;
35453 emit_insn (pat);
35454 return 0;
35456 case IX86_BUILTIN_LDMXCSR:
35457 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35458 target = assign_386_stack_local (SImode, SLOT_TEMP);
35459 emit_move_insn (target, op0);
35460 emit_insn (gen_sse_ldmxcsr (target));
35461 return 0;
35463 case IX86_BUILTIN_STMXCSR:
35464 target = assign_386_stack_local (SImode, SLOT_TEMP);
35465 emit_insn (gen_sse_stmxcsr (target));
35466 return copy_to_mode_reg (SImode, target);
35468 case IX86_BUILTIN_CLFLUSH:
35469 arg0 = CALL_EXPR_ARG (exp, 0);
35470 op0 = expand_normal (arg0);
35471 icode = CODE_FOR_sse2_clflush;
35472 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35473 op0 = ix86_zero_extend_to_Pmode (op0);
35475 emit_insn (gen_sse2_clflush (op0));
35476 return 0;
35478 case IX86_BUILTIN_MONITOR:
35479 arg0 = CALL_EXPR_ARG (exp, 0);
35480 arg1 = CALL_EXPR_ARG (exp, 1);
35481 arg2 = CALL_EXPR_ARG (exp, 2);
35482 op0 = expand_normal (arg0);
35483 op1 = expand_normal (arg1);
35484 op2 = expand_normal (arg2);
35485 if (!REG_P (op0))
35486 op0 = ix86_zero_extend_to_Pmode (op0);
35487 if (!REG_P (op1))
35488 op1 = copy_to_mode_reg (SImode, op1);
35489 if (!REG_P (op2))
35490 op2 = copy_to_mode_reg (SImode, op2);
35491 emit_insn (ix86_gen_monitor (op0, op1, op2));
35492 return 0;
35494 case IX86_BUILTIN_MWAIT:
35495 arg0 = CALL_EXPR_ARG (exp, 0);
35496 arg1 = CALL_EXPR_ARG (exp, 1);
35497 op0 = expand_normal (arg0);
35498 op1 = expand_normal (arg1);
35499 if (!REG_P (op0))
35500 op0 = copy_to_mode_reg (SImode, op0);
35501 if (!REG_P (op1))
35502 op1 = copy_to_mode_reg (SImode, op1);
35503 emit_insn (gen_sse3_mwait (op0, op1));
35504 return 0;
35506 case IX86_BUILTIN_VEC_INIT_V2SI:
35507 case IX86_BUILTIN_VEC_INIT_V4HI:
35508 case IX86_BUILTIN_VEC_INIT_V8QI:
35509 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35511 case IX86_BUILTIN_VEC_EXT_V2DF:
35512 case IX86_BUILTIN_VEC_EXT_V2DI:
35513 case IX86_BUILTIN_VEC_EXT_V4SF:
35514 case IX86_BUILTIN_VEC_EXT_V4SI:
35515 case IX86_BUILTIN_VEC_EXT_V8HI:
35516 case IX86_BUILTIN_VEC_EXT_V2SI:
35517 case IX86_BUILTIN_VEC_EXT_V4HI:
35518 case IX86_BUILTIN_VEC_EXT_V16QI:
35519 return ix86_expand_vec_ext_builtin (exp, target);
35521 case IX86_BUILTIN_VEC_SET_V2DI:
35522 case IX86_BUILTIN_VEC_SET_V4SF:
35523 case IX86_BUILTIN_VEC_SET_V4SI:
35524 case IX86_BUILTIN_VEC_SET_V8HI:
35525 case IX86_BUILTIN_VEC_SET_V4HI:
35526 case IX86_BUILTIN_VEC_SET_V16QI:
35527 return ix86_expand_vec_set_builtin (exp);
35529 case IX86_BUILTIN_INFQ:
35530 case IX86_BUILTIN_HUGE_VALQ:
35532 REAL_VALUE_TYPE inf;
35533 rtx tmp;
35535 real_inf (&inf);
35536 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35538 tmp = validize_mem (force_const_mem (mode, tmp));
35540 if (target == 0)
35541 target = gen_reg_rtx (mode);
35543 emit_move_insn (target, tmp);
35544 return target;
35547 case IX86_BUILTIN_RDPMC:
35548 case IX86_BUILTIN_RDTSC:
35549 case IX86_BUILTIN_RDTSCP:
35551 op0 = gen_reg_rtx (DImode);
35552 op1 = gen_reg_rtx (DImode);
35554 if (fcode == IX86_BUILTIN_RDPMC)
35556 arg0 = CALL_EXPR_ARG (exp, 0);
35557 op2 = expand_normal (arg0);
35558 if (!register_operand (op2, SImode))
35559 op2 = copy_to_mode_reg (SImode, op2);
35561 insn = (TARGET_64BIT
35562 ? gen_rdpmc_rex64 (op0, op1, op2)
35563 : gen_rdpmc (op0, op2));
35564 emit_insn (insn);
35566 else if (fcode == IX86_BUILTIN_RDTSC)
35568 insn = (TARGET_64BIT
35569 ? gen_rdtsc_rex64 (op0, op1)
35570 : gen_rdtsc (op0));
35571 emit_insn (insn);
35573 else
35575 op2 = gen_reg_rtx (SImode);
35577 insn = (TARGET_64BIT
35578 ? gen_rdtscp_rex64 (op0, op1, op2)
35579 : gen_rdtscp (op0, op2));
35580 emit_insn (insn);
35582 arg0 = CALL_EXPR_ARG (exp, 0);
35583 op4 = expand_normal (arg0);
35584 if (!address_operand (op4, VOIDmode))
35586 op4 = convert_memory_address (Pmode, op4);
35587 op4 = copy_addr_to_reg (op4);
35589 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35592 if (target == 0)
35594 /* mode is VOIDmode if __builtin_rd* has been called
35595 without lhs. */
35596 if (mode == VOIDmode)
35597 return target;
35598 target = gen_reg_rtx (mode);
35601 if (TARGET_64BIT)
35603 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35604 op1, 1, OPTAB_DIRECT);
35605 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35606 op0, 1, OPTAB_DIRECT);
35609 emit_move_insn (target, op0);
35610 return target;
35612 case IX86_BUILTIN_FXSAVE:
35613 case IX86_BUILTIN_FXRSTOR:
35614 case IX86_BUILTIN_FXSAVE64:
35615 case IX86_BUILTIN_FXRSTOR64:
35616 case IX86_BUILTIN_FNSTENV:
35617 case IX86_BUILTIN_FLDENV:
35618 case IX86_BUILTIN_FNSTSW:
35619 mode0 = BLKmode;
35620 switch (fcode)
35622 case IX86_BUILTIN_FXSAVE:
35623 icode = CODE_FOR_fxsave;
35624 break;
35625 case IX86_BUILTIN_FXRSTOR:
35626 icode = CODE_FOR_fxrstor;
35627 break;
35628 case IX86_BUILTIN_FXSAVE64:
35629 icode = CODE_FOR_fxsave64;
35630 break;
35631 case IX86_BUILTIN_FXRSTOR64:
35632 icode = CODE_FOR_fxrstor64;
35633 break;
35634 case IX86_BUILTIN_FNSTENV:
35635 icode = CODE_FOR_fnstenv;
35636 break;
35637 case IX86_BUILTIN_FLDENV:
35638 icode = CODE_FOR_fldenv;
35639 break;
35640 case IX86_BUILTIN_FNSTSW:
35641 icode = CODE_FOR_fnstsw;
35642 mode0 = HImode;
35643 break;
35644 default:
35645 gcc_unreachable ();
35648 arg0 = CALL_EXPR_ARG (exp, 0);
35649 op0 = expand_normal (arg0);
35651 if (!address_operand (op0, VOIDmode))
35653 op0 = convert_memory_address (Pmode, op0);
35654 op0 = copy_addr_to_reg (op0);
35656 op0 = gen_rtx_MEM (mode0, op0);
35658 pat = GEN_FCN (icode) (op0);
35659 if (pat)
35660 emit_insn (pat);
35661 return 0;
35663 case IX86_BUILTIN_XSAVE:
35664 case IX86_BUILTIN_XRSTOR:
35665 case IX86_BUILTIN_XSAVE64:
35666 case IX86_BUILTIN_XRSTOR64:
35667 case IX86_BUILTIN_XSAVEOPT:
35668 case IX86_BUILTIN_XSAVEOPT64:
35669 arg0 = CALL_EXPR_ARG (exp, 0);
35670 arg1 = CALL_EXPR_ARG (exp, 1);
35671 op0 = expand_normal (arg0);
35672 op1 = expand_normal (arg1);
35674 if (!address_operand (op0, VOIDmode))
35676 op0 = convert_memory_address (Pmode, op0);
35677 op0 = copy_addr_to_reg (op0);
35679 op0 = gen_rtx_MEM (BLKmode, op0);
35681 op1 = force_reg (DImode, op1);
35683 if (TARGET_64BIT)
35685 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35686 NULL, 1, OPTAB_DIRECT);
35687 switch (fcode)
35689 case IX86_BUILTIN_XSAVE:
35690 icode = CODE_FOR_xsave_rex64;
35691 break;
35692 case IX86_BUILTIN_XRSTOR:
35693 icode = CODE_FOR_xrstor_rex64;
35694 break;
35695 case IX86_BUILTIN_XSAVE64:
35696 icode = CODE_FOR_xsave64;
35697 break;
35698 case IX86_BUILTIN_XRSTOR64:
35699 icode = CODE_FOR_xrstor64;
35700 break;
35701 case IX86_BUILTIN_XSAVEOPT:
35702 icode = CODE_FOR_xsaveopt_rex64;
35703 break;
35704 case IX86_BUILTIN_XSAVEOPT64:
35705 icode = CODE_FOR_xsaveopt64;
35706 break;
35707 default:
35708 gcc_unreachable ();
35711 op2 = gen_lowpart (SImode, op2);
35712 op1 = gen_lowpart (SImode, op1);
35713 pat = GEN_FCN (icode) (op0, op1, op2);
35715 else
35717 switch (fcode)
35719 case IX86_BUILTIN_XSAVE:
35720 icode = CODE_FOR_xsave;
35721 break;
35722 case IX86_BUILTIN_XRSTOR:
35723 icode = CODE_FOR_xrstor;
35724 break;
35725 case IX86_BUILTIN_XSAVEOPT:
35726 icode = CODE_FOR_xsaveopt;
35727 break;
35728 default:
35729 gcc_unreachable ();
35731 pat = GEN_FCN (icode) (op0, op1);
35734 if (pat)
35735 emit_insn (pat);
35736 return 0;
35738 case IX86_BUILTIN_LLWPCB:
35739 arg0 = CALL_EXPR_ARG (exp, 0);
35740 op0 = expand_normal (arg0);
35741 icode = CODE_FOR_lwp_llwpcb;
35742 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35743 op0 = ix86_zero_extend_to_Pmode (op0);
35744 emit_insn (gen_lwp_llwpcb (op0));
35745 return 0;
35747 case IX86_BUILTIN_SLWPCB:
35748 icode = CODE_FOR_lwp_slwpcb;
35749 if (!target
35750 || !insn_data[icode].operand[0].predicate (target, Pmode))
35751 target = gen_reg_rtx (Pmode);
35752 emit_insn (gen_lwp_slwpcb (target));
35753 return target;
35755 case IX86_BUILTIN_BEXTRI32:
35756 case IX86_BUILTIN_BEXTRI64:
35757 arg0 = CALL_EXPR_ARG (exp, 0);
35758 arg1 = CALL_EXPR_ARG (exp, 1);
35759 op0 = expand_normal (arg0);
35760 op1 = expand_normal (arg1);
35761 icode = (fcode == IX86_BUILTIN_BEXTRI32
35762 ? CODE_FOR_tbm_bextri_si
35763 : CODE_FOR_tbm_bextri_di);
35764 if (!CONST_INT_P (op1))
35766 error ("last argument must be an immediate");
35767 return const0_rtx;
35769 else
35771 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35772 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35773 op1 = GEN_INT (length);
35774 op2 = GEN_INT (lsb_index);
35775 pat = GEN_FCN (icode) (target, op0, op1, op2);
35776 if (pat)
35777 emit_insn (pat);
35778 return target;
35781 case IX86_BUILTIN_RDRAND16_STEP:
35782 icode = CODE_FOR_rdrandhi_1;
35783 mode0 = HImode;
35784 goto rdrand_step;
35786 case IX86_BUILTIN_RDRAND32_STEP:
35787 icode = CODE_FOR_rdrandsi_1;
35788 mode0 = SImode;
35789 goto rdrand_step;
35791 case IX86_BUILTIN_RDRAND64_STEP:
35792 icode = CODE_FOR_rdranddi_1;
35793 mode0 = DImode;
35795 rdrand_step:
35796 op0 = gen_reg_rtx (mode0);
35797 emit_insn (GEN_FCN (icode) (op0));
35799 arg0 = CALL_EXPR_ARG (exp, 0);
35800 op1 = expand_normal (arg0);
35801 if (!address_operand (op1, VOIDmode))
35803 op1 = convert_memory_address (Pmode, op1);
35804 op1 = copy_addr_to_reg (op1);
35806 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35808 op1 = gen_reg_rtx (SImode);
35809 emit_move_insn (op1, CONST1_RTX (SImode));
35811 /* Emit SImode conditional move. */
35812 if (mode0 == HImode)
35814 op2 = gen_reg_rtx (SImode);
35815 emit_insn (gen_zero_extendhisi2 (op2, op0));
35817 else if (mode0 == SImode)
35818 op2 = op0;
35819 else
35820 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35822 if (target == 0
35823 || !register_operand (target, SImode))
35824 target = gen_reg_rtx (SImode);
35826 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35827 const0_rtx);
35828 emit_insn (gen_rtx_SET (VOIDmode, target,
35829 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35830 return target;
35832 case IX86_BUILTIN_RDSEED16_STEP:
35833 icode = CODE_FOR_rdseedhi_1;
35834 mode0 = HImode;
35835 goto rdseed_step;
35837 case IX86_BUILTIN_RDSEED32_STEP:
35838 icode = CODE_FOR_rdseedsi_1;
35839 mode0 = SImode;
35840 goto rdseed_step;
35842 case IX86_BUILTIN_RDSEED64_STEP:
35843 icode = CODE_FOR_rdseeddi_1;
35844 mode0 = DImode;
35846 rdseed_step:
35847 op0 = gen_reg_rtx (mode0);
35848 emit_insn (GEN_FCN (icode) (op0));
35850 arg0 = CALL_EXPR_ARG (exp, 0);
35851 op1 = expand_normal (arg0);
35852 if (!address_operand (op1, VOIDmode))
35854 op1 = convert_memory_address (Pmode, op1);
35855 op1 = copy_addr_to_reg (op1);
35857 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35859 op2 = gen_reg_rtx (QImode);
35861 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35862 const0_rtx);
35863 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35865 if (target == 0
35866 || !register_operand (target, SImode))
35867 target = gen_reg_rtx (SImode);
35869 emit_insn (gen_zero_extendqisi2 (target, op2));
35870 return target;
35872 case IX86_BUILTIN_ADDCARRYX32:
35873 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35874 mode0 = SImode;
35875 goto addcarryx;
35877 case IX86_BUILTIN_ADDCARRYX64:
35878 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35879 mode0 = DImode;
35881 addcarryx:
35882 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35883 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35884 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35885 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35887 op0 = gen_reg_rtx (QImode);
35889 /* Generate CF from input operand. */
35890 op1 = expand_normal (arg0);
35891 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35892 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35894 /* Gen ADCX instruction to compute X+Y+CF. */
35895 op2 = expand_normal (arg1);
35896 op3 = expand_normal (arg2);
35898 if (!REG_P (op2))
35899 op2 = copy_to_mode_reg (mode0, op2);
35900 if (!REG_P (op3))
35901 op3 = copy_to_mode_reg (mode0, op3);
35903 op0 = gen_reg_rtx (mode0);
35905 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35906 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35907 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35909 /* Store the result. */
35910 op4 = expand_normal (arg3);
35911 if (!address_operand (op4, VOIDmode))
35913 op4 = convert_memory_address (Pmode, op4);
35914 op4 = copy_addr_to_reg (op4);
35916 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35918 /* Return current CF value. */
35919 if (target == 0)
35920 target = gen_reg_rtx (QImode);
35922 PUT_MODE (pat, QImode);
35923 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35924 return target;
35926 case IX86_BUILTIN_READ_FLAGS:
35927 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35929 if (optimize
35930 || target == NULL_RTX
35931 || !nonimmediate_operand (target, word_mode)
35932 || GET_MODE (target) != word_mode)
35933 target = gen_reg_rtx (word_mode);
35935 emit_insn (gen_pop (target));
35936 return target;
35938 case IX86_BUILTIN_WRITE_FLAGS:
35940 arg0 = CALL_EXPR_ARG (exp, 0);
35941 op0 = expand_normal (arg0);
35942 if (!general_no_elim_operand (op0, word_mode))
35943 op0 = copy_to_mode_reg (word_mode, op0);
35945 emit_insn (gen_push (op0));
35946 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35947 return 0;
35949 case IX86_BUILTIN_KORTESTC16:
35950 icode = CODE_FOR_kortestchi;
35951 mode0 = HImode;
35952 mode1 = CCCmode;
35953 goto kortest;
35955 case IX86_BUILTIN_KORTESTZ16:
35956 icode = CODE_FOR_kortestzhi;
35957 mode0 = HImode;
35958 mode1 = CCZmode;
35960 kortest:
35961 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35962 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35963 op0 = expand_normal (arg0);
35964 op1 = expand_normal (arg1);
35966 op0 = copy_to_reg (op0);
35967 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35968 op1 = copy_to_reg (op1);
35969 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35971 target = gen_reg_rtx (QImode);
35972 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35974 /* Emit kortest. */
35975 emit_insn (GEN_FCN (icode) (op0, op1));
35976 /* And use setcc to return result from flags. */
35977 ix86_expand_setcc (target, EQ,
35978 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35979 return target;
35981 case IX86_BUILTIN_GATHERSIV2DF:
35982 icode = CODE_FOR_avx2_gathersiv2df;
35983 goto gather_gen;
35984 case IX86_BUILTIN_GATHERSIV4DF:
35985 icode = CODE_FOR_avx2_gathersiv4df;
35986 goto gather_gen;
35987 case IX86_BUILTIN_GATHERDIV2DF:
35988 icode = CODE_FOR_avx2_gatherdiv2df;
35989 goto gather_gen;
35990 case IX86_BUILTIN_GATHERDIV4DF:
35991 icode = CODE_FOR_avx2_gatherdiv4df;
35992 goto gather_gen;
35993 case IX86_BUILTIN_GATHERSIV4SF:
35994 icode = CODE_FOR_avx2_gathersiv4sf;
35995 goto gather_gen;
35996 case IX86_BUILTIN_GATHERSIV8SF:
35997 icode = CODE_FOR_avx2_gathersiv8sf;
35998 goto gather_gen;
35999 case IX86_BUILTIN_GATHERDIV4SF:
36000 icode = CODE_FOR_avx2_gatherdiv4sf;
36001 goto gather_gen;
36002 case IX86_BUILTIN_GATHERDIV8SF:
36003 icode = CODE_FOR_avx2_gatherdiv8sf;
36004 goto gather_gen;
36005 case IX86_BUILTIN_GATHERSIV2DI:
36006 icode = CODE_FOR_avx2_gathersiv2di;
36007 goto gather_gen;
36008 case IX86_BUILTIN_GATHERSIV4DI:
36009 icode = CODE_FOR_avx2_gathersiv4di;
36010 goto gather_gen;
36011 case IX86_BUILTIN_GATHERDIV2DI:
36012 icode = CODE_FOR_avx2_gatherdiv2di;
36013 goto gather_gen;
36014 case IX86_BUILTIN_GATHERDIV4DI:
36015 icode = CODE_FOR_avx2_gatherdiv4di;
36016 goto gather_gen;
36017 case IX86_BUILTIN_GATHERSIV4SI:
36018 icode = CODE_FOR_avx2_gathersiv4si;
36019 goto gather_gen;
36020 case IX86_BUILTIN_GATHERSIV8SI:
36021 icode = CODE_FOR_avx2_gathersiv8si;
36022 goto gather_gen;
36023 case IX86_BUILTIN_GATHERDIV4SI:
36024 icode = CODE_FOR_avx2_gatherdiv4si;
36025 goto gather_gen;
36026 case IX86_BUILTIN_GATHERDIV8SI:
36027 icode = CODE_FOR_avx2_gatherdiv8si;
36028 goto gather_gen;
36029 case IX86_BUILTIN_GATHERALTSIV4DF:
36030 icode = CODE_FOR_avx2_gathersiv4df;
36031 goto gather_gen;
36032 case IX86_BUILTIN_GATHERALTDIV8SF:
36033 icode = CODE_FOR_avx2_gatherdiv8sf;
36034 goto gather_gen;
36035 case IX86_BUILTIN_GATHERALTSIV4DI:
36036 icode = CODE_FOR_avx2_gathersiv4di;
36037 goto gather_gen;
36038 case IX86_BUILTIN_GATHERALTDIV8SI:
36039 icode = CODE_FOR_avx2_gatherdiv8si;
36040 goto gather_gen;
36041 case IX86_BUILTIN_GATHER3SIV16SF:
36042 icode = CODE_FOR_avx512f_gathersiv16sf;
36043 goto gather_gen;
36044 case IX86_BUILTIN_GATHER3SIV8DF:
36045 icode = CODE_FOR_avx512f_gathersiv8df;
36046 goto gather_gen;
36047 case IX86_BUILTIN_GATHER3DIV16SF:
36048 icode = CODE_FOR_avx512f_gatherdiv16sf;
36049 goto gather_gen;
36050 case IX86_BUILTIN_GATHER3DIV8DF:
36051 icode = CODE_FOR_avx512f_gatherdiv8df;
36052 goto gather_gen;
36053 case IX86_BUILTIN_GATHER3SIV16SI:
36054 icode = CODE_FOR_avx512f_gathersiv16si;
36055 goto gather_gen;
36056 case IX86_BUILTIN_GATHER3SIV8DI:
36057 icode = CODE_FOR_avx512f_gathersiv8di;
36058 goto gather_gen;
36059 case IX86_BUILTIN_GATHER3DIV16SI:
36060 icode = CODE_FOR_avx512f_gatherdiv16si;
36061 goto gather_gen;
36062 case IX86_BUILTIN_GATHER3DIV8DI:
36063 icode = CODE_FOR_avx512f_gatherdiv8di;
36064 goto gather_gen;
36065 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36066 icode = CODE_FOR_avx512f_gathersiv8df;
36067 goto gather_gen;
36068 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36069 icode = CODE_FOR_avx512f_gatherdiv16sf;
36070 goto gather_gen;
36071 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36072 icode = CODE_FOR_avx512f_gathersiv8di;
36073 goto gather_gen;
36074 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36075 icode = CODE_FOR_avx512f_gatherdiv16si;
36076 goto gather_gen;
36077 case IX86_BUILTIN_SCATTERSIV16SF:
36078 icode = CODE_FOR_avx512f_scattersiv16sf;
36079 goto scatter_gen;
36080 case IX86_BUILTIN_SCATTERSIV8DF:
36081 icode = CODE_FOR_avx512f_scattersiv8df;
36082 goto scatter_gen;
36083 case IX86_BUILTIN_SCATTERDIV16SF:
36084 icode = CODE_FOR_avx512f_scatterdiv16sf;
36085 goto scatter_gen;
36086 case IX86_BUILTIN_SCATTERDIV8DF:
36087 icode = CODE_FOR_avx512f_scatterdiv8df;
36088 goto scatter_gen;
36089 case IX86_BUILTIN_SCATTERSIV16SI:
36090 icode = CODE_FOR_avx512f_scattersiv16si;
36091 goto scatter_gen;
36092 case IX86_BUILTIN_SCATTERSIV8DI:
36093 icode = CODE_FOR_avx512f_scattersiv8di;
36094 goto scatter_gen;
36095 case IX86_BUILTIN_SCATTERDIV16SI:
36096 icode = CODE_FOR_avx512f_scatterdiv16si;
36097 goto scatter_gen;
36098 case IX86_BUILTIN_SCATTERDIV8DI:
36099 icode = CODE_FOR_avx512f_scatterdiv8di;
36100 goto scatter_gen;
36102 case IX86_BUILTIN_GATHERPFDPD:
36103 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36104 goto vec_prefetch_gen;
36105 case IX86_BUILTIN_GATHERPFDPS:
36106 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36107 goto vec_prefetch_gen;
36108 case IX86_BUILTIN_GATHERPFQPD:
36109 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36110 goto vec_prefetch_gen;
36111 case IX86_BUILTIN_GATHERPFQPS:
36112 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36113 goto vec_prefetch_gen;
36114 case IX86_BUILTIN_SCATTERPFDPD:
36115 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36116 goto vec_prefetch_gen;
36117 case IX86_BUILTIN_SCATTERPFDPS:
36118 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36119 goto vec_prefetch_gen;
36120 case IX86_BUILTIN_SCATTERPFQPD:
36121 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36122 goto vec_prefetch_gen;
36123 case IX86_BUILTIN_SCATTERPFQPS:
36124 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36125 goto vec_prefetch_gen;
36127 gather_gen:
36128 rtx half;
36129 rtx (*gen) (rtx, rtx);
36131 arg0 = CALL_EXPR_ARG (exp, 0);
36132 arg1 = CALL_EXPR_ARG (exp, 1);
36133 arg2 = CALL_EXPR_ARG (exp, 2);
36134 arg3 = CALL_EXPR_ARG (exp, 3);
36135 arg4 = CALL_EXPR_ARG (exp, 4);
36136 op0 = expand_normal (arg0);
36137 op1 = expand_normal (arg1);
36138 op2 = expand_normal (arg2);
36139 op3 = expand_normal (arg3);
36140 op4 = expand_normal (arg4);
36141 /* Note the arg order is different from the operand order. */
36142 mode0 = insn_data[icode].operand[1].mode;
36143 mode2 = insn_data[icode].operand[3].mode;
36144 mode3 = insn_data[icode].operand[4].mode;
36145 mode4 = insn_data[icode].operand[5].mode;
36147 if (target == NULL_RTX
36148 || GET_MODE (target) != insn_data[icode].operand[0].mode
36149 || !insn_data[icode].operand[0].predicate (target,
36150 GET_MODE (target)))
36151 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36152 else
36153 subtarget = target;
36155 switch (fcode)
36157 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36158 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36159 half = gen_reg_rtx (V8SImode);
36160 if (!nonimmediate_operand (op2, V16SImode))
36161 op2 = copy_to_mode_reg (V16SImode, op2);
36162 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36163 op2 = half;
36164 break;
36165 case IX86_BUILTIN_GATHERALTSIV4DF:
36166 case IX86_BUILTIN_GATHERALTSIV4DI:
36167 half = gen_reg_rtx (V4SImode);
36168 if (!nonimmediate_operand (op2, V8SImode))
36169 op2 = copy_to_mode_reg (V8SImode, op2);
36170 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36171 op2 = half;
36172 break;
36173 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36174 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36175 half = gen_reg_rtx (mode0);
36176 if (mode0 == V8SFmode)
36177 gen = gen_vec_extract_lo_v16sf;
36178 else
36179 gen = gen_vec_extract_lo_v16si;
36180 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36181 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36182 emit_insn (gen (half, op0));
36183 op0 = half;
36184 if (GET_MODE (op3) != VOIDmode)
36186 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36187 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36188 emit_insn (gen (half, op3));
36189 op3 = half;
36191 break;
36192 case IX86_BUILTIN_GATHERALTDIV8SF:
36193 case IX86_BUILTIN_GATHERALTDIV8SI:
36194 half = gen_reg_rtx (mode0);
36195 if (mode0 == V4SFmode)
36196 gen = gen_vec_extract_lo_v8sf;
36197 else
36198 gen = gen_vec_extract_lo_v8si;
36199 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36200 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36201 emit_insn (gen (half, op0));
36202 op0 = half;
36203 if (GET_MODE (op3) != VOIDmode)
36205 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36206 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36207 emit_insn (gen (half, op3));
36208 op3 = half;
36210 break;
36211 default:
36212 break;
36215 /* Force memory operand only with base register here. But we
36216 don't want to do it on memory operand for other builtin
36217 functions. */
36218 op1 = ix86_zero_extend_to_Pmode (op1);
36220 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36221 op0 = copy_to_mode_reg (mode0, op0);
36222 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36223 op1 = copy_to_mode_reg (Pmode, op1);
36224 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36225 op2 = copy_to_mode_reg (mode2, op2);
36226 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36228 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36229 op3 = copy_to_mode_reg (mode3, op3);
36231 else
36233 op3 = copy_to_reg (op3);
36234 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36236 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36238 error ("the last argument must be scale 1, 2, 4, 8");
36239 return const0_rtx;
36242 /* Optimize. If mask is known to have all high bits set,
36243 replace op0 with pc_rtx to signal that the instruction
36244 overwrites the whole destination and doesn't use its
36245 previous contents. */
36246 if (optimize)
36248 if (TREE_CODE (arg3) == INTEGER_CST)
36250 if (integer_all_onesp (arg3))
36251 op0 = pc_rtx;
36253 else if (TREE_CODE (arg3) == VECTOR_CST)
36255 unsigned int negative = 0;
36256 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36258 tree cst = VECTOR_CST_ELT (arg3, i);
36259 if (TREE_CODE (cst) == INTEGER_CST
36260 && tree_int_cst_sign_bit (cst))
36261 negative++;
36262 else if (TREE_CODE (cst) == REAL_CST
36263 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36264 negative++;
36266 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36267 op0 = pc_rtx;
36269 else if (TREE_CODE (arg3) == SSA_NAME
36270 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36272 /* Recognize also when mask is like:
36273 __v2df src = _mm_setzero_pd ();
36274 __v2df mask = _mm_cmpeq_pd (src, src);
36276 __v8sf src = _mm256_setzero_ps ();
36277 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36278 as that is a cheaper way to load all ones into
36279 a register than having to load a constant from
36280 memory. */
36281 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36282 if (is_gimple_call (def_stmt))
36284 tree fndecl = gimple_call_fndecl (def_stmt);
36285 if (fndecl
36286 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36287 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36289 case IX86_BUILTIN_CMPPD:
36290 case IX86_BUILTIN_CMPPS:
36291 case IX86_BUILTIN_CMPPD256:
36292 case IX86_BUILTIN_CMPPS256:
36293 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36294 break;
36295 /* FALLTHRU */
36296 case IX86_BUILTIN_CMPEQPD:
36297 case IX86_BUILTIN_CMPEQPS:
36298 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36299 && initializer_zerop (gimple_call_arg (def_stmt,
36300 1)))
36301 op0 = pc_rtx;
36302 break;
36303 default:
36304 break;
36310 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36311 if (! pat)
36312 return const0_rtx;
36313 emit_insn (pat);
36315 switch (fcode)
36317 case IX86_BUILTIN_GATHER3DIV16SF:
36318 if (target == NULL_RTX)
36319 target = gen_reg_rtx (V8SFmode);
36320 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36321 break;
36322 case IX86_BUILTIN_GATHER3DIV16SI:
36323 if (target == NULL_RTX)
36324 target = gen_reg_rtx (V8SImode);
36325 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36326 break;
36327 case IX86_BUILTIN_GATHERDIV8SF:
36328 if (target == NULL_RTX)
36329 target = gen_reg_rtx (V4SFmode);
36330 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36331 break;
36332 case IX86_BUILTIN_GATHERDIV8SI:
36333 if (target == NULL_RTX)
36334 target = gen_reg_rtx (V4SImode);
36335 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36336 break;
36337 default:
36338 target = subtarget;
36339 break;
36341 return target;
36343 scatter_gen:
36344 arg0 = CALL_EXPR_ARG (exp, 0);
36345 arg1 = CALL_EXPR_ARG (exp, 1);
36346 arg2 = CALL_EXPR_ARG (exp, 2);
36347 arg3 = CALL_EXPR_ARG (exp, 3);
36348 arg4 = CALL_EXPR_ARG (exp, 4);
36349 op0 = expand_normal (arg0);
36350 op1 = expand_normal (arg1);
36351 op2 = expand_normal (arg2);
36352 op3 = expand_normal (arg3);
36353 op4 = expand_normal (arg4);
36354 mode1 = insn_data[icode].operand[1].mode;
36355 mode2 = insn_data[icode].operand[2].mode;
36356 mode3 = insn_data[icode].operand[3].mode;
36357 mode4 = insn_data[icode].operand[4].mode;
36359 /* Force memory operand only with base register here. But we
36360 don't want to do it on memory operand for other builtin
36361 functions. */
36362 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36364 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36365 op0 = copy_to_mode_reg (Pmode, op0);
36367 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36369 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36370 op1 = copy_to_mode_reg (mode1, op1);
36372 else
36374 op1 = copy_to_reg (op1);
36375 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36378 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36379 op2 = copy_to_mode_reg (mode2, op2);
36381 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36382 op3 = copy_to_mode_reg (mode3, op3);
36384 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36386 error ("the last argument must be scale 1, 2, 4, 8");
36387 return const0_rtx;
36390 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36391 if (! pat)
36392 return const0_rtx;
36394 emit_insn (pat);
36395 return 0;
36397 vec_prefetch_gen:
36398 arg0 = CALL_EXPR_ARG (exp, 0);
36399 arg1 = CALL_EXPR_ARG (exp, 1);
36400 arg2 = CALL_EXPR_ARG (exp, 2);
36401 arg3 = CALL_EXPR_ARG (exp, 3);
36402 arg4 = CALL_EXPR_ARG (exp, 4);
36403 op0 = expand_normal (arg0);
36404 op1 = expand_normal (arg1);
36405 op2 = expand_normal (arg2);
36406 op3 = expand_normal (arg3);
36407 op4 = expand_normal (arg4);
36408 mode0 = insn_data[icode].operand[0].mode;
36409 mode1 = insn_data[icode].operand[1].mode;
36410 mode3 = insn_data[icode].operand[3].mode;
36411 mode4 = insn_data[icode].operand[4].mode;
36413 if (GET_MODE (op0) == mode0
36414 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36416 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36417 op0 = copy_to_mode_reg (mode0, op0);
36419 else if (op0 != constm1_rtx)
36421 op0 = copy_to_reg (op0);
36422 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36425 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36426 op1 = copy_to_mode_reg (mode1, op1);
36428 /* Force memory operand only with base register here. But we
36429 don't want to do it on memory operand for other builtin
36430 functions. */
36431 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36433 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36434 op2 = copy_to_mode_reg (Pmode, op2);
36436 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36438 error ("the forth argument must be scale 1, 2, 4, 8");
36439 return const0_rtx;
36442 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36444 error ("incorrect hint operand");
36445 return const0_rtx;
36448 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36449 if (! pat)
36450 return const0_rtx;
36452 emit_insn (pat);
36454 return 0;
36456 case IX86_BUILTIN_XABORT:
36457 icode = CODE_FOR_xabort;
36458 arg0 = CALL_EXPR_ARG (exp, 0);
36459 op0 = expand_normal (arg0);
36460 mode0 = insn_data[icode].operand[0].mode;
36461 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36463 error ("the xabort's argument must be an 8-bit immediate");
36464 return const0_rtx;
36466 emit_insn (gen_xabort (op0));
36467 return 0;
36469 default:
36470 break;
36473 for (i = 0, d = bdesc_special_args;
36474 i < ARRAY_SIZE (bdesc_special_args);
36475 i++, d++)
36476 if (d->code == fcode)
36477 return ix86_expand_special_args_builtin (d, exp, target);
36479 for (i = 0, d = bdesc_args;
36480 i < ARRAY_SIZE (bdesc_args);
36481 i++, d++)
36482 if (d->code == fcode)
36483 switch (fcode)
36485 case IX86_BUILTIN_FABSQ:
36486 case IX86_BUILTIN_COPYSIGNQ:
36487 if (!TARGET_SSE)
36488 /* Emit a normal call if SSE isn't available. */
36489 return expand_call (exp, target, ignore);
36490 default:
36491 return ix86_expand_args_builtin (d, exp, target);
36494 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36495 if (d->code == fcode)
36496 return ix86_expand_sse_comi (d, exp, target);
36498 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36499 if (d->code == fcode)
36500 return ix86_expand_round_builtin (d, exp, target);
36502 for (i = 0, d = bdesc_pcmpestr;
36503 i < ARRAY_SIZE (bdesc_pcmpestr);
36504 i++, d++)
36505 if (d->code == fcode)
36506 return ix86_expand_sse_pcmpestr (d, exp, target);
36508 for (i = 0, d = bdesc_pcmpistr;
36509 i < ARRAY_SIZE (bdesc_pcmpistr);
36510 i++, d++)
36511 if (d->code == fcode)
36512 return ix86_expand_sse_pcmpistr (d, exp, target);
36514 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36515 if (d->code == fcode)
36516 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36517 (enum ix86_builtin_func_type)
36518 d->flag, d->comparison);
36520 gcc_unreachable ();
36523 /* This returns the target-specific builtin with code CODE if
36524 current_function_decl has visibility on this builtin, which is checked
36525 using isa flags. Returns NULL_TREE otherwise. */
36527 static tree ix86_get_builtin (enum ix86_builtins code)
36529 struct cl_target_option *opts;
36530 tree target_tree = NULL_TREE;
36532 /* Determine the isa flags of current_function_decl. */
36534 if (current_function_decl)
36535 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36537 if (target_tree == NULL)
36538 target_tree = target_option_default_node;
36540 opts = TREE_TARGET_OPTION (target_tree);
36542 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36543 return ix86_builtin_decl (code, true);
36544 else
36545 return NULL_TREE;
36548 /* Returns a function decl for a vectorized version of the builtin function
36549 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36550 if it is not available. */
36552 static tree
36553 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36554 tree type_in)
36556 enum machine_mode in_mode, out_mode;
36557 int in_n, out_n;
36558 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36560 if (TREE_CODE (type_out) != VECTOR_TYPE
36561 || TREE_CODE (type_in) != VECTOR_TYPE
36562 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36563 return NULL_TREE;
36565 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36566 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36567 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36568 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36570 switch (fn)
36572 case BUILT_IN_SQRT:
36573 if (out_mode == DFmode && in_mode == DFmode)
36575 if (out_n == 2 && in_n == 2)
36576 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36577 else if (out_n == 4 && in_n == 4)
36578 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36579 else if (out_n == 8 && in_n == 8)
36580 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36582 break;
36584 case BUILT_IN_EXP2F:
36585 if (out_mode == SFmode && in_mode == SFmode)
36587 if (out_n == 16 && in_n == 16)
36588 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36590 break;
36592 case BUILT_IN_SQRTF:
36593 if (out_mode == SFmode && in_mode == SFmode)
36595 if (out_n == 4 && in_n == 4)
36596 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36597 else if (out_n == 8 && in_n == 8)
36598 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36599 else if (out_n == 16 && in_n == 16)
36600 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36602 break;
36604 case BUILT_IN_IFLOOR:
36605 case BUILT_IN_LFLOOR:
36606 case BUILT_IN_LLFLOOR:
36607 /* The round insn does not trap on denormals. */
36608 if (flag_trapping_math || !TARGET_ROUND)
36609 break;
36611 if (out_mode == SImode && in_mode == DFmode)
36613 if (out_n == 4 && in_n == 2)
36614 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36615 else if (out_n == 8 && in_n == 4)
36616 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36617 else if (out_n == 16 && in_n == 8)
36618 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36620 break;
36622 case BUILT_IN_IFLOORF:
36623 case BUILT_IN_LFLOORF:
36624 case BUILT_IN_LLFLOORF:
36625 /* The round insn does not trap on denormals. */
36626 if (flag_trapping_math || !TARGET_ROUND)
36627 break;
36629 if (out_mode == SImode && in_mode == SFmode)
36631 if (out_n == 4 && in_n == 4)
36632 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36633 else if (out_n == 8 && in_n == 8)
36634 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36636 break;
36638 case BUILT_IN_ICEIL:
36639 case BUILT_IN_LCEIL:
36640 case BUILT_IN_LLCEIL:
36641 /* The round insn does not trap on denormals. */
36642 if (flag_trapping_math || !TARGET_ROUND)
36643 break;
36645 if (out_mode == SImode && in_mode == DFmode)
36647 if (out_n == 4 && in_n == 2)
36648 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36649 else if (out_n == 8 && in_n == 4)
36650 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36651 else if (out_n == 16 && in_n == 8)
36652 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36654 break;
36656 case BUILT_IN_ICEILF:
36657 case BUILT_IN_LCEILF:
36658 case BUILT_IN_LLCEILF:
36659 /* The round insn does not trap on denormals. */
36660 if (flag_trapping_math || !TARGET_ROUND)
36661 break;
36663 if (out_mode == SImode && in_mode == SFmode)
36665 if (out_n == 4 && in_n == 4)
36666 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36667 else if (out_n == 8 && in_n == 8)
36668 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36670 break;
36672 case BUILT_IN_IRINT:
36673 case BUILT_IN_LRINT:
36674 case BUILT_IN_LLRINT:
36675 if (out_mode == SImode && in_mode == DFmode)
36677 if (out_n == 4 && in_n == 2)
36678 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36679 else if (out_n == 8 && in_n == 4)
36680 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36682 break;
36684 case BUILT_IN_IRINTF:
36685 case BUILT_IN_LRINTF:
36686 case BUILT_IN_LLRINTF:
36687 if (out_mode == SImode && in_mode == SFmode)
36689 if (out_n == 4 && in_n == 4)
36690 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36691 else if (out_n == 8 && in_n == 8)
36692 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36694 break;
36696 case BUILT_IN_IROUND:
36697 case BUILT_IN_LROUND:
36698 case BUILT_IN_LLROUND:
36699 /* The round insn does not trap on denormals. */
36700 if (flag_trapping_math || !TARGET_ROUND)
36701 break;
36703 if (out_mode == SImode && in_mode == DFmode)
36705 if (out_n == 4 && in_n == 2)
36706 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36707 else if (out_n == 8 && in_n == 4)
36708 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36709 else if (out_n == 16 && in_n == 8)
36710 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36712 break;
36714 case BUILT_IN_IROUNDF:
36715 case BUILT_IN_LROUNDF:
36716 case BUILT_IN_LLROUNDF:
36717 /* The round insn does not trap on denormals. */
36718 if (flag_trapping_math || !TARGET_ROUND)
36719 break;
36721 if (out_mode == SImode && in_mode == SFmode)
36723 if (out_n == 4 && in_n == 4)
36724 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36725 else if (out_n == 8 && in_n == 8)
36726 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36728 break;
36730 case BUILT_IN_COPYSIGN:
36731 if (out_mode == DFmode && in_mode == DFmode)
36733 if (out_n == 2 && in_n == 2)
36734 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36735 else if (out_n == 4 && in_n == 4)
36736 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36737 else if (out_n == 8 && in_n == 8)
36738 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36740 break;
36742 case BUILT_IN_COPYSIGNF:
36743 if (out_mode == SFmode && in_mode == SFmode)
36745 if (out_n == 4 && in_n == 4)
36746 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36747 else if (out_n == 8 && in_n == 8)
36748 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36749 else if (out_n == 16 && in_n == 16)
36750 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36752 break;
36754 case BUILT_IN_FLOOR:
36755 /* The round insn does not trap on denormals. */
36756 if (flag_trapping_math || !TARGET_ROUND)
36757 break;
36759 if (out_mode == DFmode && in_mode == DFmode)
36761 if (out_n == 2 && in_n == 2)
36762 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36763 else if (out_n == 4 && in_n == 4)
36764 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36766 break;
36768 case BUILT_IN_FLOORF:
36769 /* The round insn does not trap on denormals. */
36770 if (flag_trapping_math || !TARGET_ROUND)
36771 break;
36773 if (out_mode == SFmode && in_mode == SFmode)
36775 if (out_n == 4 && in_n == 4)
36776 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36777 else if (out_n == 8 && in_n == 8)
36778 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36780 break;
36782 case BUILT_IN_CEIL:
36783 /* The round insn does not trap on denormals. */
36784 if (flag_trapping_math || !TARGET_ROUND)
36785 break;
36787 if (out_mode == DFmode && in_mode == DFmode)
36789 if (out_n == 2 && in_n == 2)
36790 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36791 else if (out_n == 4 && in_n == 4)
36792 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36794 break;
36796 case BUILT_IN_CEILF:
36797 /* The round insn does not trap on denormals. */
36798 if (flag_trapping_math || !TARGET_ROUND)
36799 break;
36801 if (out_mode == SFmode && in_mode == SFmode)
36803 if (out_n == 4 && in_n == 4)
36804 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36805 else if (out_n == 8 && in_n == 8)
36806 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36808 break;
36810 case BUILT_IN_TRUNC:
36811 /* The round insn does not trap on denormals. */
36812 if (flag_trapping_math || !TARGET_ROUND)
36813 break;
36815 if (out_mode == DFmode && in_mode == DFmode)
36817 if (out_n == 2 && in_n == 2)
36818 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36819 else if (out_n == 4 && in_n == 4)
36820 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36822 break;
36824 case BUILT_IN_TRUNCF:
36825 /* The round insn does not trap on denormals. */
36826 if (flag_trapping_math || !TARGET_ROUND)
36827 break;
36829 if (out_mode == SFmode && in_mode == SFmode)
36831 if (out_n == 4 && in_n == 4)
36832 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36833 else if (out_n == 8 && in_n == 8)
36834 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36836 break;
36838 case BUILT_IN_RINT:
36839 /* The round insn does not trap on denormals. */
36840 if (flag_trapping_math || !TARGET_ROUND)
36841 break;
36843 if (out_mode == DFmode && in_mode == DFmode)
36845 if (out_n == 2 && in_n == 2)
36846 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36847 else if (out_n == 4 && in_n == 4)
36848 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36850 break;
36852 case BUILT_IN_RINTF:
36853 /* The round insn does not trap on denormals. */
36854 if (flag_trapping_math || !TARGET_ROUND)
36855 break;
36857 if (out_mode == SFmode && in_mode == SFmode)
36859 if (out_n == 4 && in_n == 4)
36860 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36861 else if (out_n == 8 && in_n == 8)
36862 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36864 break;
36866 case BUILT_IN_ROUND:
36867 /* The round insn does not trap on denormals. */
36868 if (flag_trapping_math || !TARGET_ROUND)
36869 break;
36871 if (out_mode == DFmode && in_mode == DFmode)
36873 if (out_n == 2 && in_n == 2)
36874 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36875 else if (out_n == 4 && in_n == 4)
36876 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36878 break;
36880 case BUILT_IN_ROUNDF:
36881 /* The round insn does not trap on denormals. */
36882 if (flag_trapping_math || !TARGET_ROUND)
36883 break;
36885 if (out_mode == SFmode && in_mode == SFmode)
36887 if (out_n == 4 && in_n == 4)
36888 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36889 else if (out_n == 8 && in_n == 8)
36890 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36892 break;
36894 case BUILT_IN_FMA:
36895 if (out_mode == DFmode && in_mode == DFmode)
36897 if (out_n == 2 && in_n == 2)
36898 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36899 if (out_n == 4 && in_n == 4)
36900 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36902 break;
36904 case BUILT_IN_FMAF:
36905 if (out_mode == SFmode && in_mode == SFmode)
36907 if (out_n == 4 && in_n == 4)
36908 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36909 if (out_n == 8 && in_n == 8)
36910 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36912 break;
36914 default:
36915 break;
36918 /* Dispatch to a handler for a vectorization library. */
36919 if (ix86_veclib_handler)
36920 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36921 type_in);
36923 return NULL_TREE;
36926 /* Handler for an SVML-style interface to
36927 a library with vectorized intrinsics. */
36929 static tree
36930 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36932 char name[20];
36933 tree fntype, new_fndecl, args;
36934 unsigned arity;
36935 const char *bname;
36936 enum machine_mode el_mode, in_mode;
36937 int n, in_n;
36939 /* The SVML is suitable for unsafe math only. */
36940 if (!flag_unsafe_math_optimizations)
36941 return NULL_TREE;
36943 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36944 n = TYPE_VECTOR_SUBPARTS (type_out);
36945 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36946 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36947 if (el_mode != in_mode
36948 || n != in_n)
36949 return NULL_TREE;
36951 switch (fn)
36953 case BUILT_IN_EXP:
36954 case BUILT_IN_LOG:
36955 case BUILT_IN_LOG10:
36956 case BUILT_IN_POW:
36957 case BUILT_IN_TANH:
36958 case BUILT_IN_TAN:
36959 case BUILT_IN_ATAN:
36960 case BUILT_IN_ATAN2:
36961 case BUILT_IN_ATANH:
36962 case BUILT_IN_CBRT:
36963 case BUILT_IN_SINH:
36964 case BUILT_IN_SIN:
36965 case BUILT_IN_ASINH:
36966 case BUILT_IN_ASIN:
36967 case BUILT_IN_COSH:
36968 case BUILT_IN_COS:
36969 case BUILT_IN_ACOSH:
36970 case BUILT_IN_ACOS:
36971 if (el_mode != DFmode || n != 2)
36972 return NULL_TREE;
36973 break;
36975 case BUILT_IN_EXPF:
36976 case BUILT_IN_LOGF:
36977 case BUILT_IN_LOG10F:
36978 case BUILT_IN_POWF:
36979 case BUILT_IN_TANHF:
36980 case BUILT_IN_TANF:
36981 case BUILT_IN_ATANF:
36982 case BUILT_IN_ATAN2F:
36983 case BUILT_IN_ATANHF:
36984 case BUILT_IN_CBRTF:
36985 case BUILT_IN_SINHF:
36986 case BUILT_IN_SINF:
36987 case BUILT_IN_ASINHF:
36988 case BUILT_IN_ASINF:
36989 case BUILT_IN_COSHF:
36990 case BUILT_IN_COSF:
36991 case BUILT_IN_ACOSHF:
36992 case BUILT_IN_ACOSF:
36993 if (el_mode != SFmode || n != 4)
36994 return NULL_TREE;
36995 break;
36997 default:
36998 return NULL_TREE;
37001 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37003 if (fn == BUILT_IN_LOGF)
37004 strcpy (name, "vmlsLn4");
37005 else if (fn == BUILT_IN_LOG)
37006 strcpy (name, "vmldLn2");
37007 else if (n == 4)
37009 sprintf (name, "vmls%s", bname+10);
37010 name[strlen (name)-1] = '4';
37012 else
37013 sprintf (name, "vmld%s2", bname+10);
37015 /* Convert to uppercase. */
37016 name[4] &= ~0x20;
37018 arity = 0;
37019 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37020 args;
37021 args = TREE_CHAIN (args))
37022 arity++;
37024 if (arity == 1)
37025 fntype = build_function_type_list (type_out, type_in, NULL);
37026 else
37027 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37029 /* Build a function declaration for the vectorized function. */
37030 new_fndecl = build_decl (BUILTINS_LOCATION,
37031 FUNCTION_DECL, get_identifier (name), fntype);
37032 TREE_PUBLIC (new_fndecl) = 1;
37033 DECL_EXTERNAL (new_fndecl) = 1;
37034 DECL_IS_NOVOPS (new_fndecl) = 1;
37035 TREE_READONLY (new_fndecl) = 1;
37037 return new_fndecl;
37040 /* Handler for an ACML-style interface to
37041 a library with vectorized intrinsics. */
37043 static tree
37044 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
37046 char name[20] = "__vr.._";
37047 tree fntype, new_fndecl, args;
37048 unsigned arity;
37049 const char *bname;
37050 enum machine_mode el_mode, in_mode;
37051 int n, in_n;
37053 /* The ACML is 64bits only and suitable for unsafe math only as
37054 it does not correctly support parts of IEEE with the required
37055 precision such as denormals. */
37056 if (!TARGET_64BIT
37057 || !flag_unsafe_math_optimizations)
37058 return NULL_TREE;
37060 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37061 n = TYPE_VECTOR_SUBPARTS (type_out);
37062 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37063 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37064 if (el_mode != in_mode
37065 || n != in_n)
37066 return NULL_TREE;
37068 switch (fn)
37070 case BUILT_IN_SIN:
37071 case BUILT_IN_COS:
37072 case BUILT_IN_EXP:
37073 case BUILT_IN_LOG:
37074 case BUILT_IN_LOG2:
37075 case BUILT_IN_LOG10:
37076 name[4] = 'd';
37077 name[5] = '2';
37078 if (el_mode != DFmode
37079 || n != 2)
37080 return NULL_TREE;
37081 break;
37083 case BUILT_IN_SINF:
37084 case BUILT_IN_COSF:
37085 case BUILT_IN_EXPF:
37086 case BUILT_IN_POWF:
37087 case BUILT_IN_LOGF:
37088 case BUILT_IN_LOG2F:
37089 case BUILT_IN_LOG10F:
37090 name[4] = 's';
37091 name[5] = '4';
37092 if (el_mode != SFmode
37093 || n != 4)
37094 return NULL_TREE;
37095 break;
37097 default:
37098 return NULL_TREE;
37101 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37102 sprintf (name + 7, "%s", bname+10);
37104 arity = 0;
37105 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37106 args;
37107 args = TREE_CHAIN (args))
37108 arity++;
37110 if (arity == 1)
37111 fntype = build_function_type_list (type_out, type_in, NULL);
37112 else
37113 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37115 /* Build a function declaration for the vectorized function. */
37116 new_fndecl = build_decl (BUILTINS_LOCATION,
37117 FUNCTION_DECL, get_identifier (name), fntype);
37118 TREE_PUBLIC (new_fndecl) = 1;
37119 DECL_EXTERNAL (new_fndecl) = 1;
37120 DECL_IS_NOVOPS (new_fndecl) = 1;
37121 TREE_READONLY (new_fndecl) = 1;
37123 return new_fndecl;
37126 /* Returns a decl of a function that implements gather load with
37127 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37128 Return NULL_TREE if it is not available. */
37130 static tree
37131 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37132 const_tree index_type, int scale)
37134 bool si;
37135 enum ix86_builtins code;
37137 if (! TARGET_AVX2)
37138 return NULL_TREE;
37140 if ((TREE_CODE (index_type) != INTEGER_TYPE
37141 && !POINTER_TYPE_P (index_type))
37142 || (TYPE_MODE (index_type) != SImode
37143 && TYPE_MODE (index_type) != DImode))
37144 return NULL_TREE;
37146 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37147 return NULL_TREE;
37149 /* v*gather* insn sign extends index to pointer mode. */
37150 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37151 && TYPE_UNSIGNED (index_type))
37152 return NULL_TREE;
37154 if (scale <= 0
37155 || scale > 8
37156 || (scale & (scale - 1)) != 0)
37157 return NULL_TREE;
37159 si = TYPE_MODE (index_type) == SImode;
37160 switch (TYPE_MODE (mem_vectype))
37162 case V2DFmode:
37163 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37164 break;
37165 case V4DFmode:
37166 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37167 break;
37168 case V2DImode:
37169 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37170 break;
37171 case V4DImode:
37172 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37173 break;
37174 case V4SFmode:
37175 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37176 break;
37177 case V8SFmode:
37178 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37179 break;
37180 case V4SImode:
37181 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37182 break;
37183 case V8SImode:
37184 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37185 break;
37186 case V8DFmode:
37187 if (TARGET_AVX512F)
37188 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37189 else
37190 return NULL_TREE;
37191 break;
37192 case V8DImode:
37193 if (TARGET_AVX512F)
37194 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37195 else
37196 return NULL_TREE;
37197 break;
37198 case V16SFmode:
37199 if (TARGET_AVX512F)
37200 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37201 else
37202 return NULL_TREE;
37203 break;
37204 case V16SImode:
37205 if (TARGET_AVX512F)
37206 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37207 else
37208 return NULL_TREE;
37209 break;
37210 default:
37211 return NULL_TREE;
37214 return ix86_get_builtin (code);
37217 /* Returns a code for a target-specific builtin that implements
37218 reciprocal of the function, or NULL_TREE if not available. */
37220 static tree
37221 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
37222 bool sqrt ATTRIBUTE_UNUSED)
37224 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
37225 && flag_finite_math_only && !flag_trapping_math
37226 && flag_unsafe_math_optimizations))
37227 return NULL_TREE;
37229 if (md_fn)
37230 /* Machine dependent builtins. */
37231 switch (fn)
37233 /* Vectorized version of sqrt to rsqrt conversion. */
37234 case IX86_BUILTIN_SQRTPS_NR:
37235 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37237 case IX86_BUILTIN_SQRTPS_NR256:
37238 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37240 default:
37241 return NULL_TREE;
37243 else
37244 /* Normal builtins. */
37245 switch (fn)
37247 /* Sqrt to rsqrt conversion. */
37248 case BUILT_IN_SQRTF:
37249 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37251 default:
37252 return NULL_TREE;
37256 /* Helper for avx_vpermilps256_operand et al. This is also used by
37257 the expansion functions to turn the parallel back into a mask.
37258 The return value is 0 for no match and the imm8+1 for a match. */
37261 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37263 unsigned i, nelt = GET_MODE_NUNITS (mode);
37264 unsigned mask = 0;
37265 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37267 if (XVECLEN (par, 0) != (int) nelt)
37268 return 0;
37270 /* Validate that all of the elements are constants, and not totally
37271 out of range. Copy the data into an integral array to make the
37272 subsequent checks easier. */
37273 for (i = 0; i < nelt; ++i)
37275 rtx er = XVECEXP (par, 0, i);
37276 unsigned HOST_WIDE_INT ei;
37278 if (!CONST_INT_P (er))
37279 return 0;
37280 ei = INTVAL (er);
37281 if (ei >= nelt)
37282 return 0;
37283 ipar[i] = ei;
37286 switch (mode)
37288 case V8DFmode:
37289 /* In the 512-bit DFmode case, we can only move elements within
37290 a 128-bit lane. First fill the second part of the mask,
37291 then fallthru. */
37292 for (i = 4; i < 6; ++i)
37294 if (ipar[i] < 4 || ipar[i] >= 6)
37295 return 0;
37296 mask |= (ipar[i] - 4) << i;
37298 for (i = 6; i < 8; ++i)
37300 if (ipar[i] < 6)
37301 return 0;
37302 mask |= (ipar[i] - 6) << i;
37304 /* FALLTHRU */
37306 case V4DFmode:
37307 /* In the 256-bit DFmode case, we can only move elements within
37308 a 128-bit lane. */
37309 for (i = 0; i < 2; ++i)
37311 if (ipar[i] >= 2)
37312 return 0;
37313 mask |= ipar[i] << i;
37315 for (i = 2; i < 4; ++i)
37317 if (ipar[i] < 2)
37318 return 0;
37319 mask |= (ipar[i] - 2) << i;
37321 break;
37323 case V16SFmode:
37324 /* In 512 bit SFmode case, permutation in the upper 256 bits
37325 must mirror the permutation in the lower 256-bits. */
37326 for (i = 0; i < 8; ++i)
37327 if (ipar[i] + 8 != ipar[i + 8])
37328 return 0;
37329 /* FALLTHRU */
37331 case V8SFmode:
37332 /* In 256 bit SFmode case, we have full freedom of
37333 movement within the low 128-bit lane, but the high 128-bit
37334 lane must mirror the exact same pattern. */
37335 for (i = 0; i < 4; ++i)
37336 if (ipar[i] + 4 != ipar[i + 4])
37337 return 0;
37338 nelt = 4;
37339 /* FALLTHRU */
37341 case V2DFmode:
37342 case V4SFmode:
37343 /* In the 128-bit case, we've full freedom in the placement of
37344 the elements from the source operand. */
37345 for (i = 0; i < nelt; ++i)
37346 mask |= ipar[i] << (i * (nelt / 2));
37347 break;
37349 default:
37350 gcc_unreachable ();
37353 /* Make sure success has a non-zero value by adding one. */
37354 return mask + 1;
37357 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37358 the expansion functions to turn the parallel back into a mask.
37359 The return value is 0 for no match and the imm8+1 for a match. */
37362 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37364 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37365 unsigned mask = 0;
37366 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37368 if (XVECLEN (par, 0) != (int) nelt)
37369 return 0;
37371 /* Validate that all of the elements are constants, and not totally
37372 out of range. Copy the data into an integral array to make the
37373 subsequent checks easier. */
37374 for (i = 0; i < nelt; ++i)
37376 rtx er = XVECEXP (par, 0, i);
37377 unsigned HOST_WIDE_INT ei;
37379 if (!CONST_INT_P (er))
37380 return 0;
37381 ei = INTVAL (er);
37382 if (ei >= 2 * nelt)
37383 return 0;
37384 ipar[i] = ei;
37387 /* Validate that the halves of the permute are halves. */
37388 for (i = 0; i < nelt2 - 1; ++i)
37389 if (ipar[i] + 1 != ipar[i + 1])
37390 return 0;
37391 for (i = nelt2; i < nelt - 1; ++i)
37392 if (ipar[i] + 1 != ipar[i + 1])
37393 return 0;
37395 /* Reconstruct the mask. */
37396 for (i = 0; i < 2; ++i)
37398 unsigned e = ipar[i * nelt2];
37399 if (e % nelt2)
37400 return 0;
37401 e /= nelt2;
37402 mask |= e << (i * 4);
37405 /* Make sure success has a non-zero value by adding one. */
37406 return mask + 1;
37409 /* Return a register priority for hard reg REGNO. */
37410 static int
37411 ix86_register_priority (int hard_regno)
37413 /* ebp and r13 as the base always wants a displacement, r12 as the
37414 base always wants an index. So discourage their usage in an
37415 address. */
37416 if (hard_regno == R12_REG || hard_regno == R13_REG)
37417 return 0;
37418 if (hard_regno == BP_REG)
37419 return 1;
37420 /* New x86-64 int registers result in bigger code size. Discourage
37421 them. */
37422 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37423 return 2;
37424 /* New x86-64 SSE registers result in bigger code size. Discourage
37425 them. */
37426 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37427 return 2;
37428 /* Usage of AX register results in smaller code. Prefer it. */
37429 if (hard_regno == 0)
37430 return 4;
37431 return 3;
37434 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37436 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37437 QImode must go into class Q_REGS.
37438 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37439 movdf to do mem-to-mem moves through integer regs. */
37441 static reg_class_t
37442 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37444 enum machine_mode mode = GET_MODE (x);
37446 /* We're only allowed to return a subclass of CLASS. Many of the
37447 following checks fail for NO_REGS, so eliminate that early. */
37448 if (regclass == NO_REGS)
37449 return NO_REGS;
37451 /* All classes can load zeros. */
37452 if (x == CONST0_RTX (mode))
37453 return regclass;
37455 /* Force constants into memory if we are loading a (nonzero) constant into
37456 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37457 instructions to load from a constant. */
37458 if (CONSTANT_P (x)
37459 && (MAYBE_MMX_CLASS_P (regclass)
37460 || MAYBE_SSE_CLASS_P (regclass)
37461 || MAYBE_MASK_CLASS_P (regclass)))
37462 return NO_REGS;
37464 /* Prefer SSE regs only, if we can use them for math. */
37465 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37466 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37468 /* Floating-point constants need more complex checks. */
37469 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37471 /* General regs can load everything. */
37472 if (reg_class_subset_p (regclass, GENERAL_REGS))
37473 return regclass;
37475 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37476 zero above. We only want to wind up preferring 80387 registers if
37477 we plan on doing computation with them. */
37478 if (TARGET_80387
37479 && standard_80387_constant_p (x) > 0)
37481 /* Limit class to non-sse. */
37482 if (regclass == FLOAT_SSE_REGS)
37483 return FLOAT_REGS;
37484 if (regclass == FP_TOP_SSE_REGS)
37485 return FP_TOP_REG;
37486 if (regclass == FP_SECOND_SSE_REGS)
37487 return FP_SECOND_REG;
37488 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37489 return regclass;
37492 return NO_REGS;
37495 /* Generally when we see PLUS here, it's the function invariant
37496 (plus soft-fp const_int). Which can only be computed into general
37497 regs. */
37498 if (GET_CODE (x) == PLUS)
37499 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37501 /* QImode constants are easy to load, but non-constant QImode data
37502 must go into Q_REGS. */
37503 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37505 if (reg_class_subset_p (regclass, Q_REGS))
37506 return regclass;
37507 if (reg_class_subset_p (Q_REGS, regclass))
37508 return Q_REGS;
37509 return NO_REGS;
37512 return regclass;
37515 /* Discourage putting floating-point values in SSE registers unless
37516 SSE math is being used, and likewise for the 387 registers. */
37517 static reg_class_t
37518 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37520 enum machine_mode mode = GET_MODE (x);
37522 /* Restrict the output reload class to the register bank that we are doing
37523 math on. If we would like not to return a subset of CLASS, reject this
37524 alternative: if reload cannot do this, it will still use its choice. */
37525 mode = GET_MODE (x);
37526 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37527 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37529 if (X87_FLOAT_MODE_P (mode))
37531 if (regclass == FP_TOP_SSE_REGS)
37532 return FP_TOP_REG;
37533 else if (regclass == FP_SECOND_SSE_REGS)
37534 return FP_SECOND_REG;
37535 else
37536 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37539 return regclass;
37542 static reg_class_t
37543 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37544 enum machine_mode mode, secondary_reload_info *sri)
37546 /* Double-word spills from general registers to non-offsettable memory
37547 references (zero-extended addresses) require special handling. */
37548 if (TARGET_64BIT
37549 && MEM_P (x)
37550 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37551 && INTEGER_CLASS_P (rclass)
37552 && !offsettable_memref_p (x))
37554 sri->icode = (in_p
37555 ? CODE_FOR_reload_noff_load
37556 : CODE_FOR_reload_noff_store);
37557 /* Add the cost of moving address to a temporary. */
37558 sri->extra_cost = 1;
37560 return NO_REGS;
37563 /* QImode spills from non-QI registers require
37564 intermediate register on 32bit targets. */
37565 if (mode == QImode
37566 && (MAYBE_MASK_CLASS_P (rclass)
37567 || (!TARGET_64BIT && !in_p
37568 && INTEGER_CLASS_P (rclass)
37569 && MAYBE_NON_Q_CLASS_P (rclass))))
37571 int regno;
37573 if (REG_P (x))
37574 regno = REGNO (x);
37575 else
37576 regno = -1;
37578 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37579 regno = true_regnum (x);
37581 /* Return Q_REGS if the operand is in memory. */
37582 if (regno == -1)
37583 return Q_REGS;
37586 /* This condition handles corner case where an expression involving
37587 pointers gets vectorized. We're trying to use the address of a
37588 stack slot as a vector initializer.
37590 (set (reg:V2DI 74 [ vect_cst_.2 ])
37591 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37593 Eventually frame gets turned into sp+offset like this:
37595 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37596 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37597 (const_int 392 [0x188]))))
37599 That later gets turned into:
37601 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37602 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37603 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37605 We'll have the following reload recorded:
37607 Reload 0: reload_in (DI) =
37608 (plus:DI (reg/f:DI 7 sp)
37609 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37610 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37611 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37612 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37613 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37614 reload_reg_rtx: (reg:V2DI 22 xmm1)
37616 Which isn't going to work since SSE instructions can't handle scalar
37617 additions. Returning GENERAL_REGS forces the addition into integer
37618 register and reload can handle subsequent reloads without problems. */
37620 if (in_p && GET_CODE (x) == PLUS
37621 && SSE_CLASS_P (rclass)
37622 && SCALAR_INT_MODE_P (mode))
37623 return GENERAL_REGS;
37625 return NO_REGS;
37628 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37630 static bool
37631 ix86_class_likely_spilled_p (reg_class_t rclass)
37633 switch (rclass)
37635 case AREG:
37636 case DREG:
37637 case CREG:
37638 case BREG:
37639 case AD_REGS:
37640 case SIREG:
37641 case DIREG:
37642 case SSE_FIRST_REG:
37643 case FP_TOP_REG:
37644 case FP_SECOND_REG:
37645 return true;
37647 default:
37648 break;
37651 return false;
37654 /* If we are copying between general and FP registers, we need a memory
37655 location. The same is true for SSE and MMX registers.
37657 To optimize register_move_cost performance, allow inline variant.
37659 The macro can't work reliably when one of the CLASSES is class containing
37660 registers from multiple units (SSE, MMX, integer). We avoid this by never
37661 combining those units in single alternative in the machine description.
37662 Ensure that this constraint holds to avoid unexpected surprises.
37664 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37665 enforce these sanity checks. */
37667 static inline bool
37668 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37669 enum machine_mode mode, int strict)
37671 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37672 return false;
37673 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37674 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37675 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37676 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37677 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37678 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37680 gcc_assert (!strict || lra_in_progress);
37681 return true;
37684 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37685 return true;
37687 /* ??? This is a lie. We do have moves between mmx/general, and for
37688 mmx/sse2. But by saying we need secondary memory we discourage the
37689 register allocator from using the mmx registers unless needed. */
37690 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37691 return true;
37693 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37695 /* SSE1 doesn't have any direct moves from other classes. */
37696 if (!TARGET_SSE2)
37697 return true;
37699 /* If the target says that inter-unit moves are more expensive
37700 than moving through memory, then don't generate them. */
37701 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37702 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37703 return true;
37705 /* Between SSE and general, we have moves no larger than word size. */
37706 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37707 return true;
37710 return false;
37713 bool
37714 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37715 enum machine_mode mode, int strict)
37717 return inline_secondary_memory_needed (class1, class2, mode, strict);
37720 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37722 On the 80386, this is the size of MODE in words,
37723 except in the FP regs, where a single reg is always enough. */
37725 static unsigned char
37726 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37728 if (MAYBE_INTEGER_CLASS_P (rclass))
37730 if (mode == XFmode)
37731 return (TARGET_64BIT ? 2 : 3);
37732 else if (mode == XCmode)
37733 return (TARGET_64BIT ? 4 : 6);
37734 else
37735 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37737 else
37739 if (COMPLEX_MODE_P (mode))
37740 return 2;
37741 else
37742 return 1;
37746 /* Return true if the registers in CLASS cannot represent the change from
37747 modes FROM to TO. */
37749 bool
37750 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37751 enum reg_class regclass)
37753 if (from == to)
37754 return false;
37756 /* x87 registers can't do subreg at all, as all values are reformatted
37757 to extended precision. */
37758 if (MAYBE_FLOAT_CLASS_P (regclass))
37759 return true;
37761 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37763 /* Vector registers do not support QI or HImode loads. If we don't
37764 disallow a change to these modes, reload will assume it's ok to
37765 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37766 the vec_dupv4hi pattern. */
37767 if (GET_MODE_SIZE (from) < 4)
37768 return true;
37770 /* Vector registers do not support subreg with nonzero offsets, which
37771 are otherwise valid for integer registers. Since we can't see
37772 whether we have a nonzero offset from here, prohibit all
37773 nonparadoxical subregs changing size. */
37774 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37775 return true;
37778 return false;
37781 /* Return the cost of moving data of mode M between a
37782 register and memory. A value of 2 is the default; this cost is
37783 relative to those in `REGISTER_MOVE_COST'.
37785 This function is used extensively by register_move_cost that is used to
37786 build tables at startup. Make it inline in this case.
37787 When IN is 2, return maximum of in and out move cost.
37789 If moving between registers and memory is more expensive than
37790 between two registers, you should define this macro to express the
37791 relative cost.
37793 Model also increased moving costs of QImode registers in non
37794 Q_REGS classes.
37796 static inline int
37797 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37798 int in)
37800 int cost;
37801 if (FLOAT_CLASS_P (regclass))
37803 int index;
37804 switch (mode)
37806 case SFmode:
37807 index = 0;
37808 break;
37809 case DFmode:
37810 index = 1;
37811 break;
37812 case XFmode:
37813 index = 2;
37814 break;
37815 default:
37816 return 100;
37818 if (in == 2)
37819 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37820 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37822 if (SSE_CLASS_P (regclass))
37824 int index;
37825 switch (GET_MODE_SIZE (mode))
37827 case 4:
37828 index = 0;
37829 break;
37830 case 8:
37831 index = 1;
37832 break;
37833 case 16:
37834 index = 2;
37835 break;
37836 default:
37837 return 100;
37839 if (in == 2)
37840 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37841 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37843 if (MMX_CLASS_P (regclass))
37845 int index;
37846 switch (GET_MODE_SIZE (mode))
37848 case 4:
37849 index = 0;
37850 break;
37851 case 8:
37852 index = 1;
37853 break;
37854 default:
37855 return 100;
37857 if (in)
37858 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37859 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37861 switch (GET_MODE_SIZE (mode))
37863 case 1:
37864 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37866 if (!in)
37867 return ix86_cost->int_store[0];
37868 if (TARGET_PARTIAL_REG_DEPENDENCY
37869 && optimize_function_for_speed_p (cfun))
37870 cost = ix86_cost->movzbl_load;
37871 else
37872 cost = ix86_cost->int_load[0];
37873 if (in == 2)
37874 return MAX (cost, ix86_cost->int_store[0]);
37875 return cost;
37877 else
37879 if (in == 2)
37880 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37881 if (in)
37882 return ix86_cost->movzbl_load;
37883 else
37884 return ix86_cost->int_store[0] + 4;
37886 break;
37887 case 2:
37888 if (in == 2)
37889 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37890 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37891 default:
37892 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37893 if (mode == TFmode)
37894 mode = XFmode;
37895 if (in == 2)
37896 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37897 else if (in)
37898 cost = ix86_cost->int_load[2];
37899 else
37900 cost = ix86_cost->int_store[2];
37901 return (cost * (((int) GET_MODE_SIZE (mode)
37902 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37906 static int
37907 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37908 bool in)
37910 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37914 /* Return the cost of moving data from a register in class CLASS1 to
37915 one in class CLASS2.
37917 It is not required that the cost always equal 2 when FROM is the same as TO;
37918 on some machines it is expensive to move between registers if they are not
37919 general registers. */
37921 static int
37922 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37923 reg_class_t class2_i)
37925 enum reg_class class1 = (enum reg_class) class1_i;
37926 enum reg_class class2 = (enum reg_class) class2_i;
37928 /* In case we require secondary memory, compute cost of the store followed
37929 by load. In order to avoid bad register allocation choices, we need
37930 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37932 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37934 int cost = 1;
37936 cost += inline_memory_move_cost (mode, class1, 2);
37937 cost += inline_memory_move_cost (mode, class2, 2);
37939 /* In case of copying from general_purpose_register we may emit multiple
37940 stores followed by single load causing memory size mismatch stall.
37941 Count this as arbitrarily high cost of 20. */
37942 if (targetm.class_max_nregs (class1, mode)
37943 > targetm.class_max_nregs (class2, mode))
37944 cost += 20;
37946 /* In the case of FP/MMX moves, the registers actually overlap, and we
37947 have to switch modes in order to treat them differently. */
37948 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37949 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37950 cost += 20;
37952 return cost;
37955 /* Moves between SSE/MMX and integer unit are expensive. */
37956 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37957 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37959 /* ??? By keeping returned value relatively high, we limit the number
37960 of moves between integer and MMX/SSE registers for all targets.
37961 Additionally, high value prevents problem with x86_modes_tieable_p(),
37962 where integer modes in MMX/SSE registers are not tieable
37963 because of missing QImode and HImode moves to, from or between
37964 MMX/SSE registers. */
37965 return MAX (8, ix86_cost->mmxsse_to_integer);
37967 if (MAYBE_FLOAT_CLASS_P (class1))
37968 return ix86_cost->fp_move;
37969 if (MAYBE_SSE_CLASS_P (class1))
37970 return ix86_cost->sse_move;
37971 if (MAYBE_MMX_CLASS_P (class1))
37972 return ix86_cost->mmx_move;
37973 return 2;
37976 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37977 MODE. */
37979 bool
37980 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37982 /* Flags and only flags can only hold CCmode values. */
37983 if (CC_REGNO_P (regno))
37984 return GET_MODE_CLASS (mode) == MODE_CC;
37985 if (GET_MODE_CLASS (mode) == MODE_CC
37986 || GET_MODE_CLASS (mode) == MODE_RANDOM
37987 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37988 return false;
37989 if (STACK_REGNO_P (regno))
37990 return VALID_FP_MODE_P (mode);
37991 if (MASK_REGNO_P (regno))
37992 return VALID_MASK_REG_MODE (mode);
37993 if (SSE_REGNO_P (regno))
37995 /* We implement the move patterns for all vector modes into and
37996 out of SSE registers, even when no operation instructions
37997 are available. */
37999 /* For AVX-512 we allow, regardless of regno:
38000 - XI mode
38001 - any of 512-bit wide vector mode
38002 - any scalar mode. */
38003 if (TARGET_AVX512F
38004 && (mode == XImode
38005 || VALID_AVX512F_REG_MODE (mode)
38006 || VALID_AVX512F_SCALAR_MODE (mode)))
38007 return true;
38009 /* xmm16-xmm31 are only available for AVX-512. */
38010 if (EXT_REX_SSE_REGNO_P (regno))
38011 return false;
38013 /* OImode and AVX modes are available only when AVX is enabled. */
38014 return ((TARGET_AVX
38015 && VALID_AVX256_REG_OR_OI_MODE (mode))
38016 || VALID_SSE_REG_MODE (mode)
38017 || VALID_SSE2_REG_MODE (mode)
38018 || VALID_MMX_REG_MODE (mode)
38019 || VALID_MMX_REG_MODE_3DNOW (mode));
38021 if (MMX_REGNO_P (regno))
38023 /* We implement the move patterns for 3DNOW modes even in MMX mode,
38024 so if the register is available at all, then we can move data of
38025 the given mode into or out of it. */
38026 return (VALID_MMX_REG_MODE (mode)
38027 || VALID_MMX_REG_MODE_3DNOW (mode));
38030 if (mode == QImode)
38032 /* Take care for QImode values - they can be in non-QI regs,
38033 but then they do cause partial register stalls. */
38034 if (ANY_QI_REGNO_P (regno))
38035 return true;
38036 if (!TARGET_PARTIAL_REG_STALL)
38037 return true;
38038 /* LRA checks if the hard register is OK for the given mode.
38039 QImode values can live in non-QI regs, so we allow all
38040 registers here. */
38041 if (lra_in_progress)
38042 return true;
38043 return !can_create_pseudo_p ();
38045 /* We handle both integer and floats in the general purpose registers. */
38046 else if (VALID_INT_MODE_P (mode))
38047 return true;
38048 else if (VALID_FP_MODE_P (mode))
38049 return true;
38050 else if (VALID_DFP_MODE_P (mode))
38051 return true;
38052 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
38053 on to use that value in smaller contexts, this can easily force a
38054 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
38055 supporting DImode, allow it. */
38056 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
38057 return true;
38059 return false;
38062 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
38063 tieable integer mode. */
38065 static bool
38066 ix86_tieable_integer_mode_p (enum machine_mode mode)
38068 switch (mode)
38070 case HImode:
38071 case SImode:
38072 return true;
38074 case QImode:
38075 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
38077 case DImode:
38078 return TARGET_64BIT;
38080 default:
38081 return false;
38085 /* Return true if MODE1 is accessible in a register that can hold MODE2
38086 without copying. That is, all register classes that can hold MODE2
38087 can also hold MODE1. */
38089 bool
38090 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
38092 if (mode1 == mode2)
38093 return true;
38095 if (ix86_tieable_integer_mode_p (mode1)
38096 && ix86_tieable_integer_mode_p (mode2))
38097 return true;
38099 /* MODE2 being XFmode implies fp stack or general regs, which means we
38100 can tie any smaller floating point modes to it. Note that we do not
38101 tie this with TFmode. */
38102 if (mode2 == XFmode)
38103 return mode1 == SFmode || mode1 == DFmode;
38105 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
38106 that we can tie it with SFmode. */
38107 if (mode2 == DFmode)
38108 return mode1 == SFmode;
38110 /* If MODE2 is only appropriate for an SSE register, then tie with
38111 any other mode acceptable to SSE registers. */
38112 if (GET_MODE_SIZE (mode2) == 32
38113 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38114 return (GET_MODE_SIZE (mode1) == 32
38115 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38116 if (GET_MODE_SIZE (mode2) == 16
38117 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38118 return (GET_MODE_SIZE (mode1) == 16
38119 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38121 /* If MODE2 is appropriate for an MMX register, then tie
38122 with any other mode acceptable to MMX registers. */
38123 if (GET_MODE_SIZE (mode2) == 8
38124 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
38125 return (GET_MODE_SIZE (mode1) == 8
38126 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
38128 return false;
38131 /* Return the cost of moving between two registers of mode MODE. */
38133 static int
38134 ix86_set_reg_reg_cost (enum machine_mode mode)
38136 unsigned int units = UNITS_PER_WORD;
38138 switch (GET_MODE_CLASS (mode))
38140 default:
38141 break;
38143 case MODE_CC:
38144 units = GET_MODE_SIZE (CCmode);
38145 break;
38147 case MODE_FLOAT:
38148 if ((TARGET_SSE && mode == TFmode)
38149 || (TARGET_80387 && mode == XFmode)
38150 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
38151 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
38152 units = GET_MODE_SIZE (mode);
38153 break;
38155 case MODE_COMPLEX_FLOAT:
38156 if ((TARGET_SSE && mode == TCmode)
38157 || (TARGET_80387 && mode == XCmode)
38158 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
38159 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38160 units = GET_MODE_SIZE (mode);
38161 break;
38163 case MODE_VECTOR_INT:
38164 case MODE_VECTOR_FLOAT:
38165 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38166 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38167 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38168 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38169 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38170 units = GET_MODE_SIZE (mode);
38173 /* Return the cost of moving between two registers of mode MODE,
38174 assuming that the move will be in pieces of at most UNITS bytes. */
38175 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
38178 /* Compute a (partial) cost for rtx X. Return true if the complete
38179 cost has been computed, and false if subexpressions should be
38180 scanned. In either case, *TOTAL contains the cost result. */
38182 static bool
38183 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
38184 bool speed)
38186 rtx mask;
38187 enum rtx_code code = (enum rtx_code) code_i;
38188 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38189 enum machine_mode mode = GET_MODE (x);
38190 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38192 switch (code)
38194 case SET:
38195 if (register_operand (SET_DEST (x), VOIDmode)
38196 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38198 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38199 return true;
38201 return false;
38203 case CONST_INT:
38204 case CONST:
38205 case LABEL_REF:
38206 case SYMBOL_REF:
38207 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38208 *total = 3;
38209 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38210 *total = 2;
38211 else if (flag_pic && SYMBOLIC_CONST (x)
38212 && !(TARGET_64BIT
38213 && (GET_CODE (x) == LABEL_REF
38214 || (GET_CODE (x) == SYMBOL_REF
38215 && SYMBOL_REF_LOCAL_P (x)))))
38216 *total = 1;
38217 else
38218 *total = 0;
38219 return true;
38221 case CONST_DOUBLE:
38222 if (mode == VOIDmode)
38224 *total = 0;
38225 return true;
38227 switch (standard_80387_constant_p (x))
38229 case 1: /* 0.0 */
38230 *total = 1;
38231 return true;
38232 default: /* Other constants */
38233 *total = 2;
38234 return true;
38235 case 0:
38236 case -1:
38237 break;
38239 if (SSE_FLOAT_MODE_P (mode))
38241 case CONST_VECTOR:
38242 switch (standard_sse_constant_p (x))
38244 case 0:
38245 break;
38246 case 1: /* 0: xor eliminates false dependency */
38247 *total = 0;
38248 return true;
38249 default: /* -1: cmp contains false dependency */
38250 *total = 1;
38251 return true;
38254 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38255 it'll probably end up. Add a penalty for size. */
38256 *total = (COSTS_N_INSNS (1)
38257 + (flag_pic != 0 && !TARGET_64BIT)
38258 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38259 return true;
38261 case ZERO_EXTEND:
38262 /* The zero extensions is often completely free on x86_64, so make
38263 it as cheap as possible. */
38264 if (TARGET_64BIT && mode == DImode
38265 && GET_MODE (XEXP (x, 0)) == SImode)
38266 *total = 1;
38267 else if (TARGET_ZERO_EXTEND_WITH_AND)
38268 *total = cost->add;
38269 else
38270 *total = cost->movzx;
38271 return false;
38273 case SIGN_EXTEND:
38274 *total = cost->movsx;
38275 return false;
38277 case ASHIFT:
38278 if (SCALAR_INT_MODE_P (mode)
38279 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38280 && CONST_INT_P (XEXP (x, 1)))
38282 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38283 if (value == 1)
38285 *total = cost->add;
38286 return false;
38288 if ((value == 2 || value == 3)
38289 && cost->lea <= cost->shift_const)
38291 *total = cost->lea;
38292 return false;
38295 /* FALLTHRU */
38297 case ROTATE:
38298 case ASHIFTRT:
38299 case LSHIFTRT:
38300 case ROTATERT:
38301 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38303 /* ??? Should be SSE vector operation cost. */
38304 /* At least for published AMD latencies, this really is the same
38305 as the latency for a simple fpu operation like fabs. */
38306 /* V*QImode is emulated with 1-11 insns. */
38307 if (mode == V16QImode || mode == V32QImode)
38309 int count = 11;
38310 if (TARGET_XOP && mode == V16QImode)
38312 /* For XOP we use vpshab, which requires a broadcast of the
38313 value to the variable shift insn. For constants this
38314 means a V16Q const in mem; even when we can perform the
38315 shift with one insn set the cost to prefer paddb. */
38316 if (CONSTANT_P (XEXP (x, 1)))
38318 *total = (cost->fabs
38319 + rtx_cost (XEXP (x, 0), code, 0, speed)
38320 + (speed ? 2 : COSTS_N_BYTES (16)));
38321 return true;
38323 count = 3;
38325 else if (TARGET_SSSE3)
38326 count = 7;
38327 *total = cost->fabs * count;
38329 else
38330 *total = cost->fabs;
38332 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38334 if (CONST_INT_P (XEXP (x, 1)))
38336 if (INTVAL (XEXP (x, 1)) > 32)
38337 *total = cost->shift_const + COSTS_N_INSNS (2);
38338 else
38339 *total = cost->shift_const * 2;
38341 else
38343 if (GET_CODE (XEXP (x, 1)) == AND)
38344 *total = cost->shift_var * 2;
38345 else
38346 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38349 else
38351 if (CONST_INT_P (XEXP (x, 1)))
38352 *total = cost->shift_const;
38353 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38354 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38356 /* Return the cost after shift-and truncation. */
38357 *total = cost->shift_var;
38358 return true;
38360 else
38361 *total = cost->shift_var;
38363 return false;
38365 case FMA:
38367 rtx sub;
38369 gcc_assert (FLOAT_MODE_P (mode));
38370 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38372 /* ??? SSE scalar/vector cost should be used here. */
38373 /* ??? Bald assumption that fma has the same cost as fmul. */
38374 *total = cost->fmul;
38375 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38377 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38378 sub = XEXP (x, 0);
38379 if (GET_CODE (sub) == NEG)
38380 sub = XEXP (sub, 0);
38381 *total += rtx_cost (sub, FMA, 0, speed);
38383 sub = XEXP (x, 2);
38384 if (GET_CODE (sub) == NEG)
38385 sub = XEXP (sub, 0);
38386 *total += rtx_cost (sub, FMA, 2, speed);
38387 return true;
38390 case MULT:
38391 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38393 /* ??? SSE scalar cost should be used here. */
38394 *total = cost->fmul;
38395 return false;
38397 else if (X87_FLOAT_MODE_P (mode))
38399 *total = cost->fmul;
38400 return false;
38402 else if (FLOAT_MODE_P (mode))
38404 /* ??? SSE vector cost should be used here. */
38405 *total = cost->fmul;
38406 return false;
38408 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38410 /* V*QImode is emulated with 7-13 insns. */
38411 if (mode == V16QImode || mode == V32QImode)
38413 int extra = 11;
38414 if (TARGET_XOP && mode == V16QImode)
38415 extra = 5;
38416 else if (TARGET_SSSE3)
38417 extra = 6;
38418 *total = cost->fmul * 2 + cost->fabs * extra;
38420 /* V*DImode is emulated with 5-8 insns. */
38421 else if (mode == V2DImode || mode == V4DImode)
38423 if (TARGET_XOP && mode == V2DImode)
38424 *total = cost->fmul * 2 + cost->fabs * 3;
38425 else
38426 *total = cost->fmul * 3 + cost->fabs * 5;
38428 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38429 insns, including two PMULUDQ. */
38430 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38431 *total = cost->fmul * 2 + cost->fabs * 5;
38432 else
38433 *total = cost->fmul;
38434 return false;
38436 else
38438 rtx op0 = XEXP (x, 0);
38439 rtx op1 = XEXP (x, 1);
38440 int nbits;
38441 if (CONST_INT_P (XEXP (x, 1)))
38443 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38444 for (nbits = 0; value != 0; value &= value - 1)
38445 nbits++;
38447 else
38448 /* This is arbitrary. */
38449 nbits = 7;
38451 /* Compute costs correctly for widening multiplication. */
38452 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38453 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38454 == GET_MODE_SIZE (mode))
38456 int is_mulwiden = 0;
38457 enum machine_mode inner_mode = GET_MODE (op0);
38459 if (GET_CODE (op0) == GET_CODE (op1))
38460 is_mulwiden = 1, op1 = XEXP (op1, 0);
38461 else if (CONST_INT_P (op1))
38463 if (GET_CODE (op0) == SIGN_EXTEND)
38464 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38465 == INTVAL (op1);
38466 else
38467 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38470 if (is_mulwiden)
38471 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38474 *total = (cost->mult_init[MODE_INDEX (mode)]
38475 + nbits * cost->mult_bit
38476 + rtx_cost (op0, outer_code, opno, speed)
38477 + rtx_cost (op1, outer_code, opno, speed));
38479 return true;
38482 case DIV:
38483 case UDIV:
38484 case MOD:
38485 case UMOD:
38486 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38487 /* ??? SSE cost should be used here. */
38488 *total = cost->fdiv;
38489 else if (X87_FLOAT_MODE_P (mode))
38490 *total = cost->fdiv;
38491 else if (FLOAT_MODE_P (mode))
38492 /* ??? SSE vector cost should be used here. */
38493 *total = cost->fdiv;
38494 else
38495 *total = cost->divide[MODE_INDEX (mode)];
38496 return false;
38498 case PLUS:
38499 if (GET_MODE_CLASS (mode) == MODE_INT
38500 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38502 if (GET_CODE (XEXP (x, 0)) == PLUS
38503 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38504 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38505 && CONSTANT_P (XEXP (x, 1)))
38507 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38508 if (val == 2 || val == 4 || val == 8)
38510 *total = cost->lea;
38511 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38512 outer_code, opno, speed);
38513 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38514 outer_code, opno, speed);
38515 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38516 return true;
38519 else if (GET_CODE (XEXP (x, 0)) == MULT
38520 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38522 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38523 if (val == 2 || val == 4 || val == 8)
38525 *total = cost->lea;
38526 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38527 outer_code, opno, speed);
38528 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38529 return true;
38532 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38534 *total = cost->lea;
38535 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38536 outer_code, opno, speed);
38537 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38538 outer_code, opno, speed);
38539 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38540 return true;
38543 /* FALLTHRU */
38545 case MINUS:
38546 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38548 /* ??? SSE cost should be used here. */
38549 *total = cost->fadd;
38550 return false;
38552 else if (X87_FLOAT_MODE_P (mode))
38554 *total = cost->fadd;
38555 return false;
38557 else if (FLOAT_MODE_P (mode))
38559 /* ??? SSE vector cost should be used here. */
38560 *total = cost->fadd;
38561 return false;
38563 /* FALLTHRU */
38565 case AND:
38566 case IOR:
38567 case XOR:
38568 if (GET_MODE_CLASS (mode) == MODE_INT
38569 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38571 *total = (cost->add * 2
38572 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38573 << (GET_MODE (XEXP (x, 0)) != DImode))
38574 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38575 << (GET_MODE (XEXP (x, 1)) != DImode)));
38576 return true;
38578 /* FALLTHRU */
38580 case NEG:
38581 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38583 /* ??? SSE cost should be used here. */
38584 *total = cost->fchs;
38585 return false;
38587 else if (X87_FLOAT_MODE_P (mode))
38589 *total = cost->fchs;
38590 return false;
38592 else if (FLOAT_MODE_P (mode))
38594 /* ??? SSE vector cost should be used here. */
38595 *total = cost->fchs;
38596 return false;
38598 /* FALLTHRU */
38600 case NOT:
38601 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38603 /* ??? Should be SSE vector operation cost. */
38604 /* At least for published AMD latencies, this really is the same
38605 as the latency for a simple fpu operation like fabs. */
38606 *total = cost->fabs;
38608 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38609 *total = cost->add * 2;
38610 else
38611 *total = cost->add;
38612 return false;
38614 case COMPARE:
38615 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38616 && XEXP (XEXP (x, 0), 1) == const1_rtx
38617 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38618 && XEXP (x, 1) == const0_rtx)
38620 /* This kind of construct is implemented using test[bwl].
38621 Treat it as if we had an AND. */
38622 *total = (cost->add
38623 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38624 + rtx_cost (const1_rtx, outer_code, opno, speed));
38625 return true;
38627 return false;
38629 case FLOAT_EXTEND:
38630 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38631 *total = 0;
38632 return false;
38634 case ABS:
38635 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38636 /* ??? SSE cost should be used here. */
38637 *total = cost->fabs;
38638 else if (X87_FLOAT_MODE_P (mode))
38639 *total = cost->fabs;
38640 else if (FLOAT_MODE_P (mode))
38641 /* ??? SSE vector cost should be used here. */
38642 *total = cost->fabs;
38643 return false;
38645 case SQRT:
38646 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38647 /* ??? SSE cost should be used here. */
38648 *total = cost->fsqrt;
38649 else if (X87_FLOAT_MODE_P (mode))
38650 *total = cost->fsqrt;
38651 else if (FLOAT_MODE_P (mode))
38652 /* ??? SSE vector cost should be used here. */
38653 *total = cost->fsqrt;
38654 return false;
38656 case UNSPEC:
38657 if (XINT (x, 1) == UNSPEC_TP)
38658 *total = 0;
38659 return false;
38661 case VEC_SELECT:
38662 case VEC_CONCAT:
38663 case VEC_DUPLICATE:
38664 /* ??? Assume all of these vector manipulation patterns are
38665 recognizable. In which case they all pretty much have the
38666 same cost. */
38667 *total = cost->fabs;
38668 return true;
38669 case VEC_MERGE:
38670 mask = XEXP (x, 2);
38671 /* This is masked instruction, assume the same cost,
38672 as nonmasked variant. */
38673 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38674 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38675 else
38676 *total = cost->fabs;
38677 return true;
38679 default:
38680 return false;
38684 #if TARGET_MACHO
38686 static int current_machopic_label_num;
38688 /* Given a symbol name and its associated stub, write out the
38689 definition of the stub. */
38691 void
38692 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38694 unsigned int length;
38695 char *binder_name, *symbol_name, lazy_ptr_name[32];
38696 int label = ++current_machopic_label_num;
38698 /* For 64-bit we shouldn't get here. */
38699 gcc_assert (!TARGET_64BIT);
38701 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38702 symb = targetm.strip_name_encoding (symb);
38704 length = strlen (stub);
38705 binder_name = XALLOCAVEC (char, length + 32);
38706 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38708 length = strlen (symb);
38709 symbol_name = XALLOCAVEC (char, length + 32);
38710 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38712 sprintf (lazy_ptr_name, "L%d$lz", label);
38714 if (MACHOPIC_ATT_STUB)
38715 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38716 else if (MACHOPIC_PURE)
38717 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38718 else
38719 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38721 fprintf (file, "%s:\n", stub);
38722 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38724 if (MACHOPIC_ATT_STUB)
38726 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38728 else if (MACHOPIC_PURE)
38730 /* PIC stub. */
38731 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38732 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38733 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38734 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38735 label, lazy_ptr_name, label);
38736 fprintf (file, "\tjmp\t*%%ecx\n");
38738 else
38739 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38741 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38742 it needs no stub-binding-helper. */
38743 if (MACHOPIC_ATT_STUB)
38744 return;
38746 fprintf (file, "%s:\n", binder_name);
38748 if (MACHOPIC_PURE)
38750 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38751 fprintf (file, "\tpushl\t%%ecx\n");
38753 else
38754 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38756 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38758 /* N.B. Keep the correspondence of these
38759 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38760 old-pic/new-pic/non-pic stubs; altering this will break
38761 compatibility with existing dylibs. */
38762 if (MACHOPIC_PURE)
38764 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38765 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38767 else
38768 /* 16-byte -mdynamic-no-pic stub. */
38769 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38771 fprintf (file, "%s:\n", lazy_ptr_name);
38772 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38773 fprintf (file, ASM_LONG "%s\n", binder_name);
38775 #endif /* TARGET_MACHO */
38777 /* Order the registers for register allocator. */
38779 void
38780 x86_order_regs_for_local_alloc (void)
38782 int pos = 0;
38783 int i;
38785 /* First allocate the local general purpose registers. */
38786 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38787 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38788 reg_alloc_order [pos++] = i;
38790 /* Global general purpose registers. */
38791 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38792 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38793 reg_alloc_order [pos++] = i;
38795 /* x87 registers come first in case we are doing FP math
38796 using them. */
38797 if (!TARGET_SSE_MATH)
38798 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38799 reg_alloc_order [pos++] = i;
38801 /* SSE registers. */
38802 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38803 reg_alloc_order [pos++] = i;
38804 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38805 reg_alloc_order [pos++] = i;
38807 /* Extended REX SSE registers. */
38808 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38809 reg_alloc_order [pos++] = i;
38811 /* Mask register. */
38812 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38813 reg_alloc_order [pos++] = i;
38815 /* x87 registers. */
38816 if (TARGET_SSE_MATH)
38817 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38818 reg_alloc_order [pos++] = i;
38820 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38821 reg_alloc_order [pos++] = i;
38823 /* Initialize the rest of array as we do not allocate some registers
38824 at all. */
38825 while (pos < FIRST_PSEUDO_REGISTER)
38826 reg_alloc_order [pos++] = 0;
38829 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38830 in struct attribute_spec handler. */
38831 static tree
38832 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38833 tree args,
38834 int flags ATTRIBUTE_UNUSED,
38835 bool *no_add_attrs)
38837 if (TREE_CODE (*node) != FUNCTION_TYPE
38838 && TREE_CODE (*node) != METHOD_TYPE
38839 && TREE_CODE (*node) != FIELD_DECL
38840 && TREE_CODE (*node) != TYPE_DECL)
38842 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38843 name);
38844 *no_add_attrs = true;
38845 return NULL_TREE;
38847 if (TARGET_64BIT)
38849 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38850 name);
38851 *no_add_attrs = true;
38852 return NULL_TREE;
38854 if (is_attribute_p ("callee_pop_aggregate_return", name))
38856 tree cst;
38858 cst = TREE_VALUE (args);
38859 if (TREE_CODE (cst) != INTEGER_CST)
38861 warning (OPT_Wattributes,
38862 "%qE attribute requires an integer constant argument",
38863 name);
38864 *no_add_attrs = true;
38866 else if (compare_tree_int (cst, 0) != 0
38867 && compare_tree_int (cst, 1) != 0)
38869 warning (OPT_Wattributes,
38870 "argument to %qE attribute is neither zero, nor one",
38871 name);
38872 *no_add_attrs = true;
38875 return NULL_TREE;
38878 return NULL_TREE;
38881 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38882 struct attribute_spec.handler. */
38883 static tree
38884 ix86_handle_abi_attribute (tree *node, tree name,
38885 tree args ATTRIBUTE_UNUSED,
38886 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38888 if (TREE_CODE (*node) != FUNCTION_TYPE
38889 && TREE_CODE (*node) != METHOD_TYPE
38890 && TREE_CODE (*node) != FIELD_DECL
38891 && TREE_CODE (*node) != TYPE_DECL)
38893 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38894 name);
38895 *no_add_attrs = true;
38896 return NULL_TREE;
38899 /* Can combine regparm with all attributes but fastcall. */
38900 if (is_attribute_p ("ms_abi", name))
38902 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38904 error ("ms_abi and sysv_abi attributes are not compatible");
38907 return NULL_TREE;
38909 else if (is_attribute_p ("sysv_abi", name))
38911 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38913 error ("ms_abi and sysv_abi attributes are not compatible");
38916 return NULL_TREE;
38919 return NULL_TREE;
38922 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38923 struct attribute_spec.handler. */
38924 static tree
38925 ix86_handle_struct_attribute (tree *node, tree name,
38926 tree args ATTRIBUTE_UNUSED,
38927 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38929 tree *type = NULL;
38930 if (DECL_P (*node))
38932 if (TREE_CODE (*node) == TYPE_DECL)
38933 type = &TREE_TYPE (*node);
38935 else
38936 type = node;
38938 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38940 warning (OPT_Wattributes, "%qE attribute ignored",
38941 name);
38942 *no_add_attrs = true;
38945 else if ((is_attribute_p ("ms_struct", name)
38946 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38947 || ((is_attribute_p ("gcc_struct", name)
38948 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38950 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38951 name);
38952 *no_add_attrs = true;
38955 return NULL_TREE;
38958 static tree
38959 ix86_handle_fndecl_attribute (tree *node, tree name,
38960 tree args ATTRIBUTE_UNUSED,
38961 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38963 if (TREE_CODE (*node) != FUNCTION_DECL)
38965 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38966 name);
38967 *no_add_attrs = true;
38969 return NULL_TREE;
38972 static bool
38973 ix86_ms_bitfield_layout_p (const_tree record_type)
38975 return ((TARGET_MS_BITFIELD_LAYOUT
38976 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38977 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38980 /* Returns an expression indicating where the this parameter is
38981 located on entry to the FUNCTION. */
38983 static rtx
38984 x86_this_parameter (tree function)
38986 tree type = TREE_TYPE (function);
38987 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38988 int nregs;
38990 if (TARGET_64BIT)
38992 const int *parm_regs;
38994 if (ix86_function_type_abi (type) == MS_ABI)
38995 parm_regs = x86_64_ms_abi_int_parameter_registers;
38996 else
38997 parm_regs = x86_64_int_parameter_registers;
38998 return gen_rtx_REG (Pmode, parm_regs[aggr]);
39001 nregs = ix86_function_regparm (type, function);
39003 if (nregs > 0 && !stdarg_p (type))
39005 int regno;
39006 unsigned int ccvt = ix86_get_callcvt (type);
39008 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39009 regno = aggr ? DX_REG : CX_REG;
39010 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39012 regno = CX_REG;
39013 if (aggr)
39014 return gen_rtx_MEM (SImode,
39015 plus_constant (Pmode, stack_pointer_rtx, 4));
39017 else
39019 regno = AX_REG;
39020 if (aggr)
39022 regno = DX_REG;
39023 if (nregs == 1)
39024 return gen_rtx_MEM (SImode,
39025 plus_constant (Pmode,
39026 stack_pointer_rtx, 4));
39029 return gen_rtx_REG (SImode, regno);
39032 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
39033 aggr ? 8 : 4));
39036 /* Determine whether x86_output_mi_thunk can succeed. */
39038 static bool
39039 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
39040 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
39041 HOST_WIDE_INT vcall_offset, const_tree function)
39043 /* 64-bit can handle anything. */
39044 if (TARGET_64BIT)
39045 return true;
39047 /* For 32-bit, everything's fine if we have one free register. */
39048 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
39049 return true;
39051 /* Need a free register for vcall_offset. */
39052 if (vcall_offset)
39053 return false;
39055 /* Need a free register for GOT references. */
39056 if (flag_pic && !targetm.binds_local_p (function))
39057 return false;
39059 /* Otherwise ok. */
39060 return true;
39063 /* Output the assembler code for a thunk function. THUNK_DECL is the
39064 declaration for the thunk function itself, FUNCTION is the decl for
39065 the target function. DELTA is an immediate constant offset to be
39066 added to THIS. If VCALL_OFFSET is nonzero, the word at
39067 *(*this + vcall_offset) should be added to THIS. */
39069 static void
39070 x86_output_mi_thunk (FILE *file,
39071 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
39072 HOST_WIDE_INT vcall_offset, tree function)
39074 rtx this_param = x86_this_parameter (function);
39075 rtx this_reg, tmp, fnaddr;
39076 unsigned int tmp_regno;
39078 if (TARGET_64BIT)
39079 tmp_regno = R10_REG;
39080 else
39082 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
39083 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39084 tmp_regno = AX_REG;
39085 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39086 tmp_regno = DX_REG;
39087 else
39088 tmp_regno = CX_REG;
39091 emit_note (NOTE_INSN_PROLOGUE_END);
39093 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
39094 pull it in now and let DELTA benefit. */
39095 if (REG_P (this_param))
39096 this_reg = this_param;
39097 else if (vcall_offset)
39099 /* Put the this parameter into %eax. */
39100 this_reg = gen_rtx_REG (Pmode, AX_REG);
39101 emit_move_insn (this_reg, this_param);
39103 else
39104 this_reg = NULL_RTX;
39106 /* Adjust the this parameter by a fixed constant. */
39107 if (delta)
39109 rtx delta_rtx = GEN_INT (delta);
39110 rtx delta_dst = this_reg ? this_reg : this_param;
39112 if (TARGET_64BIT)
39114 if (!x86_64_general_operand (delta_rtx, Pmode))
39116 tmp = gen_rtx_REG (Pmode, tmp_regno);
39117 emit_move_insn (tmp, delta_rtx);
39118 delta_rtx = tmp;
39122 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
39125 /* Adjust the this parameter by a value stored in the vtable. */
39126 if (vcall_offset)
39128 rtx vcall_addr, vcall_mem, this_mem;
39130 tmp = gen_rtx_REG (Pmode, tmp_regno);
39132 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
39133 if (Pmode != ptr_mode)
39134 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
39135 emit_move_insn (tmp, this_mem);
39137 /* Adjust the this parameter. */
39138 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
39139 if (TARGET_64BIT
39140 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
39142 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
39143 emit_move_insn (tmp2, GEN_INT (vcall_offset));
39144 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
39147 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
39148 if (Pmode != ptr_mode)
39149 emit_insn (gen_addsi_1_zext (this_reg,
39150 gen_rtx_REG (ptr_mode,
39151 REGNO (this_reg)),
39152 vcall_mem));
39153 else
39154 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
39157 /* If necessary, drop THIS back to its stack slot. */
39158 if (this_reg && this_reg != this_param)
39159 emit_move_insn (this_param, this_reg);
39161 fnaddr = XEXP (DECL_RTL (function), 0);
39162 if (TARGET_64BIT)
39164 if (!flag_pic || targetm.binds_local_p (function)
39165 || TARGET_PECOFF)
39167 else
39169 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39170 tmp = gen_rtx_CONST (Pmode, tmp);
39171 fnaddr = gen_const_mem (Pmode, tmp);
39174 else
39176 if (!flag_pic || targetm.binds_local_p (function))
39178 #if TARGET_MACHO
39179 else if (TARGET_MACHO)
39181 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39182 fnaddr = XEXP (fnaddr, 0);
39184 #endif /* TARGET_MACHO */
39185 else
39187 tmp = gen_rtx_REG (Pmode, CX_REG);
39188 output_set_got (tmp, NULL_RTX);
39190 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39191 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39192 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39193 fnaddr = gen_const_mem (Pmode, fnaddr);
39197 /* Our sibling call patterns do not allow memories, because we have no
39198 predicate that can distinguish between frame and non-frame memory.
39199 For our purposes here, we can get away with (ab)using a jump pattern,
39200 because we're going to do no optimization. */
39201 if (MEM_P (fnaddr))
39202 emit_jump_insn (gen_indirect_jump (fnaddr));
39203 else
39205 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39206 fnaddr = legitimize_pic_address (fnaddr,
39207 gen_rtx_REG (Pmode, tmp_regno));
39209 if (!sibcall_insn_operand (fnaddr, word_mode))
39211 tmp = gen_rtx_REG (word_mode, tmp_regno);
39212 if (GET_MODE (fnaddr) != word_mode)
39213 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39214 emit_move_insn (tmp, fnaddr);
39215 fnaddr = tmp;
39218 tmp = gen_rtx_MEM (QImode, fnaddr);
39219 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39220 tmp = emit_call_insn (tmp);
39221 SIBLING_CALL_P (tmp) = 1;
39223 emit_barrier ();
39225 /* Emit just enough of rest_of_compilation to get the insns emitted.
39226 Note that use_thunk calls assemble_start_function et al. */
39227 tmp = get_insns ();
39228 shorten_branches (tmp);
39229 final_start_function (tmp, file, 1);
39230 final (tmp, file, 1);
39231 final_end_function ();
39234 static void
39235 x86_file_start (void)
39237 default_file_start ();
39238 if (TARGET_16BIT)
39239 fputs ("\t.code16gcc\n", asm_out_file);
39240 #if TARGET_MACHO
39241 darwin_file_start ();
39242 #endif
39243 if (X86_FILE_START_VERSION_DIRECTIVE)
39244 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39245 if (X86_FILE_START_FLTUSED)
39246 fputs ("\t.global\t__fltused\n", asm_out_file);
39247 if (ix86_asm_dialect == ASM_INTEL)
39248 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39252 x86_field_alignment (tree field, int computed)
39254 enum machine_mode mode;
39255 tree type = TREE_TYPE (field);
39257 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39258 return computed;
39259 mode = TYPE_MODE (strip_array_types (type));
39260 if (mode == DFmode || mode == DCmode
39261 || GET_MODE_CLASS (mode) == MODE_INT
39262 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39263 return MIN (32, computed);
39264 return computed;
39267 /* Output assembler code to FILE to increment profiler label # LABELNO
39268 for profiling a function entry. */
39269 void
39270 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39272 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39273 : MCOUNT_NAME);
39275 if (TARGET_64BIT)
39277 #ifndef NO_PROFILE_COUNTERS
39278 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39279 #endif
39281 if (!TARGET_PECOFF && flag_pic)
39282 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39283 else
39284 fprintf (file, "\tcall\t%s\n", mcount_name);
39286 else if (flag_pic)
39288 #ifndef NO_PROFILE_COUNTERS
39289 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39290 LPREFIX, labelno);
39291 #endif
39292 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39294 else
39296 #ifndef NO_PROFILE_COUNTERS
39297 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39298 LPREFIX, labelno);
39299 #endif
39300 fprintf (file, "\tcall\t%s\n", mcount_name);
39304 /* We don't have exact information about the insn sizes, but we may assume
39305 quite safely that we are informed about all 1 byte insns and memory
39306 address sizes. This is enough to eliminate unnecessary padding in
39307 99% of cases. */
39309 static int
39310 min_insn_size (rtx insn)
39312 int l = 0, len;
39314 if (!INSN_P (insn) || !active_insn_p (insn))
39315 return 0;
39317 /* Discard alignments we've emit and jump instructions. */
39318 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39319 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39320 return 0;
39322 /* Important case - calls are always 5 bytes.
39323 It is common to have many calls in the row. */
39324 if (CALL_P (insn)
39325 && symbolic_reference_mentioned_p (PATTERN (insn))
39326 && !SIBLING_CALL_P (insn))
39327 return 5;
39328 len = get_attr_length (insn);
39329 if (len <= 1)
39330 return 1;
39332 /* For normal instructions we rely on get_attr_length being exact,
39333 with a few exceptions. */
39334 if (!JUMP_P (insn))
39336 enum attr_type type = get_attr_type (insn);
39338 switch (type)
39340 case TYPE_MULTI:
39341 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39342 || asm_noperands (PATTERN (insn)) >= 0)
39343 return 0;
39344 break;
39345 case TYPE_OTHER:
39346 case TYPE_FCMP:
39347 break;
39348 default:
39349 /* Otherwise trust get_attr_length. */
39350 return len;
39353 l = get_attr_length_address (insn);
39354 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39355 l = 4;
39357 if (l)
39358 return 1+l;
39359 else
39360 return 2;
39363 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39365 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39366 window. */
39368 static void
39369 ix86_avoid_jump_mispredicts (void)
39371 rtx insn, start = get_insns ();
39372 int nbytes = 0, njumps = 0;
39373 int isjump = 0;
39375 /* Look for all minimal intervals of instructions containing 4 jumps.
39376 The intervals are bounded by START and INSN. NBYTES is the total
39377 size of instructions in the interval including INSN and not including
39378 START. When the NBYTES is smaller than 16 bytes, it is possible
39379 that the end of START and INSN ends up in the same 16byte page.
39381 The smallest offset in the page INSN can start is the case where START
39382 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39383 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39385 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39386 have to, control transfer to label(s) can be performed through other
39387 means, and also we estimate minimum length of all asm stmts as 0. */
39388 for (insn = start; insn; insn = NEXT_INSN (insn))
39390 int min_size;
39392 if (LABEL_P (insn))
39394 int align = label_to_alignment (insn);
39395 int max_skip = label_to_max_skip (insn);
39397 if (max_skip > 15)
39398 max_skip = 15;
39399 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39400 already in the current 16 byte page, because otherwise
39401 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39402 bytes to reach 16 byte boundary. */
39403 if (align <= 0
39404 || (align <= 3 && max_skip != (1 << align) - 1))
39405 max_skip = 0;
39406 if (dump_file)
39407 fprintf (dump_file, "Label %i with max_skip %i\n",
39408 INSN_UID (insn), max_skip);
39409 if (max_skip)
39411 while (nbytes + max_skip >= 16)
39413 start = NEXT_INSN (start);
39414 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39415 || CALL_P (start))
39416 njumps--, isjump = 1;
39417 else
39418 isjump = 0;
39419 nbytes -= min_insn_size (start);
39422 continue;
39425 min_size = min_insn_size (insn);
39426 nbytes += min_size;
39427 if (dump_file)
39428 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39429 INSN_UID (insn), min_size);
39430 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39431 || CALL_P (insn))
39432 njumps++;
39433 else
39434 continue;
39436 while (njumps > 3)
39438 start = NEXT_INSN (start);
39439 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39440 || CALL_P (start))
39441 njumps--, isjump = 1;
39442 else
39443 isjump = 0;
39444 nbytes -= min_insn_size (start);
39446 gcc_assert (njumps >= 0);
39447 if (dump_file)
39448 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39449 INSN_UID (start), INSN_UID (insn), nbytes);
39451 if (njumps == 3 && isjump && nbytes < 16)
39453 int padsize = 15 - nbytes + min_insn_size (insn);
39455 if (dump_file)
39456 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39457 INSN_UID (insn), padsize);
39458 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39462 #endif
39464 /* AMD Athlon works faster
39465 when RET is not destination of conditional jump or directly preceded
39466 by other jump instruction. We avoid the penalty by inserting NOP just
39467 before the RET instructions in such cases. */
39468 static void
39469 ix86_pad_returns (void)
39471 edge e;
39472 edge_iterator ei;
39474 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39476 basic_block bb = e->src;
39477 rtx ret = BB_END (bb);
39478 rtx prev;
39479 bool replace = false;
39481 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39482 || optimize_bb_for_size_p (bb))
39483 continue;
39484 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39485 if (active_insn_p (prev) || LABEL_P (prev))
39486 break;
39487 if (prev && LABEL_P (prev))
39489 edge e;
39490 edge_iterator ei;
39492 FOR_EACH_EDGE (e, ei, bb->preds)
39493 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39494 && !(e->flags & EDGE_FALLTHRU))
39496 replace = true;
39497 break;
39500 if (!replace)
39502 prev = prev_active_insn (ret);
39503 if (prev
39504 && ((JUMP_P (prev) && any_condjump_p (prev))
39505 || CALL_P (prev)))
39506 replace = true;
39507 /* Empty functions get branch mispredict even when
39508 the jump destination is not visible to us. */
39509 if (!prev && !optimize_function_for_size_p (cfun))
39510 replace = true;
39512 if (replace)
39514 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39515 delete_insn (ret);
39520 /* Count the minimum number of instructions in BB. Return 4 if the
39521 number of instructions >= 4. */
39523 static int
39524 ix86_count_insn_bb (basic_block bb)
39526 rtx insn;
39527 int insn_count = 0;
39529 /* Count number of instructions in this block. Return 4 if the number
39530 of instructions >= 4. */
39531 FOR_BB_INSNS (bb, insn)
39533 /* Only happen in exit blocks. */
39534 if (JUMP_P (insn)
39535 && ANY_RETURN_P (PATTERN (insn)))
39536 break;
39538 if (NONDEBUG_INSN_P (insn)
39539 && GET_CODE (PATTERN (insn)) != USE
39540 && GET_CODE (PATTERN (insn)) != CLOBBER)
39542 insn_count++;
39543 if (insn_count >= 4)
39544 return insn_count;
39548 return insn_count;
39552 /* Count the minimum number of instructions in code path in BB.
39553 Return 4 if the number of instructions >= 4. */
39555 static int
39556 ix86_count_insn (basic_block bb)
39558 edge e;
39559 edge_iterator ei;
39560 int min_prev_count;
39562 /* Only bother counting instructions along paths with no
39563 more than 2 basic blocks between entry and exit. Given
39564 that BB has an edge to exit, determine if a predecessor
39565 of BB has an edge from entry. If so, compute the number
39566 of instructions in the predecessor block. If there
39567 happen to be multiple such blocks, compute the minimum. */
39568 min_prev_count = 4;
39569 FOR_EACH_EDGE (e, ei, bb->preds)
39571 edge prev_e;
39572 edge_iterator prev_ei;
39574 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39576 min_prev_count = 0;
39577 break;
39579 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39581 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39583 int count = ix86_count_insn_bb (e->src);
39584 if (count < min_prev_count)
39585 min_prev_count = count;
39586 break;
39591 if (min_prev_count < 4)
39592 min_prev_count += ix86_count_insn_bb (bb);
39594 return min_prev_count;
39597 /* Pad short function to 4 instructions. */
39599 static void
39600 ix86_pad_short_function (void)
39602 edge e;
39603 edge_iterator ei;
39605 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39607 rtx ret = BB_END (e->src);
39608 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39610 int insn_count = ix86_count_insn (e->src);
39612 /* Pad short function. */
39613 if (insn_count < 4)
39615 rtx insn = ret;
39617 /* Find epilogue. */
39618 while (insn
39619 && (!NOTE_P (insn)
39620 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39621 insn = PREV_INSN (insn);
39623 if (!insn)
39624 insn = ret;
39626 /* Two NOPs count as one instruction. */
39627 insn_count = 2 * (4 - insn_count);
39628 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39634 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39635 the epilogue, the Windows system unwinder will apply epilogue logic and
39636 produce incorrect offsets. This can be avoided by adding a nop between
39637 the last insn that can throw and the first insn of the epilogue. */
39639 static void
39640 ix86_seh_fixup_eh_fallthru (void)
39642 edge e;
39643 edge_iterator ei;
39645 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39647 rtx insn, next;
39649 /* Find the beginning of the epilogue. */
39650 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39651 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39652 break;
39653 if (insn == NULL)
39654 continue;
39656 /* We only care about preceding insns that can throw. */
39657 insn = prev_active_insn (insn);
39658 if (insn == NULL || !can_throw_internal (insn))
39659 continue;
39661 /* Do not separate calls from their debug information. */
39662 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39663 if (NOTE_P (next)
39664 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39665 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39666 insn = next;
39667 else
39668 break;
39670 emit_insn_after (gen_nops (const1_rtx), insn);
39674 /* Implement machine specific optimizations. We implement padding of returns
39675 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39676 static void
39677 ix86_reorg (void)
39679 /* We are freeing block_for_insn in the toplev to keep compatibility
39680 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39681 compute_bb_for_insn ();
39683 if (TARGET_SEH && current_function_has_exception_handlers ())
39684 ix86_seh_fixup_eh_fallthru ();
39686 if (optimize && optimize_function_for_speed_p (cfun))
39688 if (TARGET_PAD_SHORT_FUNCTION)
39689 ix86_pad_short_function ();
39690 else if (TARGET_PAD_RETURNS)
39691 ix86_pad_returns ();
39692 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39693 if (TARGET_FOUR_JUMP_LIMIT)
39694 ix86_avoid_jump_mispredicts ();
39695 #endif
39699 /* Return nonzero when QImode register that must be represented via REX prefix
39700 is used. */
39701 bool
39702 x86_extended_QIreg_mentioned_p (rtx insn)
39704 int i;
39705 extract_insn_cached (insn);
39706 for (i = 0; i < recog_data.n_operands; i++)
39707 if (GENERAL_REG_P (recog_data.operand[i])
39708 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39709 return true;
39710 return false;
39713 /* Return nonzero when P points to register encoded via REX prefix.
39714 Called via for_each_rtx. */
39715 static int
39716 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39718 unsigned int regno;
39719 if (!REG_P (*p))
39720 return 0;
39721 regno = REGNO (*p);
39722 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39725 /* Return true when INSN mentions register that must be encoded using REX
39726 prefix. */
39727 bool
39728 x86_extended_reg_mentioned_p (rtx insn)
39730 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39731 extended_reg_mentioned_1, NULL);
39734 /* If profitable, negate (without causing overflow) integer constant
39735 of mode MODE at location LOC. Return true in this case. */
39736 bool
39737 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39739 HOST_WIDE_INT val;
39741 if (!CONST_INT_P (*loc))
39742 return false;
39744 switch (mode)
39746 case DImode:
39747 /* DImode x86_64 constants must fit in 32 bits. */
39748 gcc_assert (x86_64_immediate_operand (*loc, mode));
39750 mode = SImode;
39751 break;
39753 case SImode:
39754 case HImode:
39755 case QImode:
39756 break;
39758 default:
39759 gcc_unreachable ();
39762 /* Avoid overflows. */
39763 if (mode_signbit_p (mode, *loc))
39764 return false;
39766 val = INTVAL (*loc);
39768 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39769 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39770 if ((val < 0 && val != -128)
39771 || val == 128)
39773 *loc = GEN_INT (-val);
39774 return true;
39777 return false;
39780 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39781 optabs would emit if we didn't have TFmode patterns. */
39783 void
39784 x86_emit_floatuns (rtx operands[2])
39786 rtx neglab, donelab, i0, i1, f0, in, out;
39787 enum machine_mode mode, inmode;
39789 inmode = GET_MODE (operands[1]);
39790 gcc_assert (inmode == SImode || inmode == DImode);
39792 out = operands[0];
39793 in = force_reg (inmode, operands[1]);
39794 mode = GET_MODE (out);
39795 neglab = gen_label_rtx ();
39796 donelab = gen_label_rtx ();
39797 f0 = gen_reg_rtx (mode);
39799 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39801 expand_float (out, in, 0);
39803 emit_jump_insn (gen_jump (donelab));
39804 emit_barrier ();
39806 emit_label (neglab);
39808 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39809 1, OPTAB_DIRECT);
39810 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39811 1, OPTAB_DIRECT);
39812 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39814 expand_float (f0, i0, 0);
39816 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39818 emit_label (donelab);
39821 /* AVX512F does support 64-byte integer vector operations,
39822 thus the longest vector we are faced with is V64QImode. */
39823 #define MAX_VECT_LEN 64
39825 struct expand_vec_perm_d
39827 rtx target, op0, op1;
39828 unsigned char perm[MAX_VECT_LEN];
39829 enum machine_mode vmode;
39830 unsigned char nelt;
39831 bool one_operand_p;
39832 bool testing_p;
39835 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39836 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39837 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39839 /* Get a vector mode of the same size as the original but with elements
39840 twice as wide. This is only guaranteed to apply to integral vectors. */
39842 static inline enum machine_mode
39843 get_mode_wider_vector (enum machine_mode o)
39845 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39846 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39847 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39848 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39849 return n;
39852 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39853 fill target with val via vec_duplicate. */
39855 static bool
39856 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39858 bool ok;
39859 rtx insn, dup;
39861 /* First attempt to recognize VAL as-is. */
39862 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39863 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39864 if (recog_memoized (insn) < 0)
39866 rtx seq;
39867 /* If that fails, force VAL into a register. */
39869 start_sequence ();
39870 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39871 seq = get_insns ();
39872 end_sequence ();
39873 if (seq)
39874 emit_insn_before (seq, insn);
39876 ok = recog_memoized (insn) >= 0;
39877 gcc_assert (ok);
39879 return true;
39882 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39883 with all elements equal to VAR. Return true if successful. */
39885 static bool
39886 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39887 rtx target, rtx val)
39889 bool ok;
39891 switch (mode)
39893 case V2SImode:
39894 case V2SFmode:
39895 if (!mmx_ok)
39896 return false;
39897 /* FALLTHRU */
39899 case V4DFmode:
39900 case V4DImode:
39901 case V8SFmode:
39902 case V8SImode:
39903 case V2DFmode:
39904 case V2DImode:
39905 case V4SFmode:
39906 case V4SImode:
39907 case V16SImode:
39908 case V8DImode:
39909 case V16SFmode:
39910 case V8DFmode:
39911 return ix86_vector_duplicate_value (mode, target, val);
39913 case V4HImode:
39914 if (!mmx_ok)
39915 return false;
39916 if (TARGET_SSE || TARGET_3DNOW_A)
39918 rtx x;
39920 val = gen_lowpart (SImode, val);
39921 x = gen_rtx_TRUNCATE (HImode, val);
39922 x = gen_rtx_VEC_DUPLICATE (mode, x);
39923 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39924 return true;
39926 goto widen;
39928 case V8QImode:
39929 if (!mmx_ok)
39930 return false;
39931 goto widen;
39933 case V8HImode:
39934 if (TARGET_SSE2)
39936 struct expand_vec_perm_d dperm;
39937 rtx tmp1, tmp2;
39939 permute:
39940 memset (&dperm, 0, sizeof (dperm));
39941 dperm.target = target;
39942 dperm.vmode = mode;
39943 dperm.nelt = GET_MODE_NUNITS (mode);
39944 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39945 dperm.one_operand_p = true;
39947 /* Extend to SImode using a paradoxical SUBREG. */
39948 tmp1 = gen_reg_rtx (SImode);
39949 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39951 /* Insert the SImode value as low element of a V4SImode vector. */
39952 tmp2 = gen_reg_rtx (V4SImode);
39953 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39954 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39956 ok = (expand_vec_perm_1 (&dperm)
39957 || expand_vec_perm_broadcast_1 (&dperm));
39958 gcc_assert (ok);
39959 return ok;
39961 goto widen;
39963 case V16QImode:
39964 if (TARGET_SSE2)
39965 goto permute;
39966 goto widen;
39968 widen:
39969 /* Replicate the value once into the next wider mode and recurse. */
39971 enum machine_mode smode, wsmode, wvmode;
39972 rtx x;
39974 smode = GET_MODE_INNER (mode);
39975 wvmode = get_mode_wider_vector (mode);
39976 wsmode = GET_MODE_INNER (wvmode);
39978 val = convert_modes (wsmode, smode, val, true);
39979 x = expand_simple_binop (wsmode, ASHIFT, val,
39980 GEN_INT (GET_MODE_BITSIZE (smode)),
39981 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39982 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39984 x = gen_reg_rtx (wvmode);
39985 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39986 gcc_assert (ok);
39987 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39988 return ok;
39991 case V16HImode:
39992 case V32QImode:
39994 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39995 rtx x = gen_reg_rtx (hvmode);
39997 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39998 gcc_assert (ok);
40000 x = gen_rtx_VEC_CONCAT (mode, x, x);
40001 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40003 return true;
40005 default:
40006 return false;
40010 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40011 whose ONE_VAR element is VAR, and other elements are zero. Return true
40012 if successful. */
40014 static bool
40015 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
40016 rtx target, rtx var, int one_var)
40018 enum machine_mode vsimode;
40019 rtx new_target;
40020 rtx x, tmp;
40021 bool use_vector_set = false;
40023 switch (mode)
40025 case V2DImode:
40026 /* For SSE4.1, we normally use vector set. But if the second
40027 element is zero and inter-unit moves are OK, we use movq
40028 instead. */
40029 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
40030 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
40031 && one_var == 0));
40032 break;
40033 case V16QImode:
40034 case V4SImode:
40035 case V4SFmode:
40036 use_vector_set = TARGET_SSE4_1;
40037 break;
40038 case V8HImode:
40039 use_vector_set = TARGET_SSE2;
40040 break;
40041 case V4HImode:
40042 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
40043 break;
40044 case V32QImode:
40045 case V16HImode:
40046 case V8SImode:
40047 case V8SFmode:
40048 case V4DFmode:
40049 use_vector_set = TARGET_AVX;
40050 break;
40051 case V4DImode:
40052 /* Use ix86_expand_vector_set in 64bit mode only. */
40053 use_vector_set = TARGET_AVX && TARGET_64BIT;
40054 break;
40055 default:
40056 break;
40059 if (use_vector_set)
40061 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
40062 var = force_reg (GET_MODE_INNER (mode), var);
40063 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40064 return true;
40067 switch (mode)
40069 case V2SFmode:
40070 case V2SImode:
40071 if (!mmx_ok)
40072 return false;
40073 /* FALLTHRU */
40075 case V2DFmode:
40076 case V2DImode:
40077 if (one_var != 0)
40078 return false;
40079 var = force_reg (GET_MODE_INNER (mode), var);
40080 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
40081 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40082 return true;
40084 case V4SFmode:
40085 case V4SImode:
40086 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
40087 new_target = gen_reg_rtx (mode);
40088 else
40089 new_target = target;
40090 var = force_reg (GET_MODE_INNER (mode), var);
40091 x = gen_rtx_VEC_DUPLICATE (mode, var);
40092 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
40093 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
40094 if (one_var != 0)
40096 /* We need to shuffle the value to the correct position, so
40097 create a new pseudo to store the intermediate result. */
40099 /* With SSE2, we can use the integer shuffle insns. */
40100 if (mode != V4SFmode && TARGET_SSE2)
40102 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
40103 const1_rtx,
40104 GEN_INT (one_var == 1 ? 0 : 1),
40105 GEN_INT (one_var == 2 ? 0 : 1),
40106 GEN_INT (one_var == 3 ? 0 : 1)));
40107 if (target != new_target)
40108 emit_move_insn (target, new_target);
40109 return true;
40112 /* Otherwise convert the intermediate result to V4SFmode and
40113 use the SSE1 shuffle instructions. */
40114 if (mode != V4SFmode)
40116 tmp = gen_reg_rtx (V4SFmode);
40117 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
40119 else
40120 tmp = new_target;
40122 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
40123 const1_rtx,
40124 GEN_INT (one_var == 1 ? 0 : 1),
40125 GEN_INT (one_var == 2 ? 0+4 : 1+4),
40126 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
40128 if (mode != V4SFmode)
40129 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
40130 else if (tmp != target)
40131 emit_move_insn (target, tmp);
40133 else if (target != new_target)
40134 emit_move_insn (target, new_target);
40135 return true;
40137 case V8HImode:
40138 case V16QImode:
40139 vsimode = V4SImode;
40140 goto widen;
40141 case V4HImode:
40142 case V8QImode:
40143 if (!mmx_ok)
40144 return false;
40145 vsimode = V2SImode;
40146 goto widen;
40147 widen:
40148 if (one_var != 0)
40149 return false;
40151 /* Zero extend the variable element to SImode and recurse. */
40152 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
40154 x = gen_reg_rtx (vsimode);
40155 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
40156 var, one_var))
40157 gcc_unreachable ();
40159 emit_move_insn (target, gen_lowpart (mode, x));
40160 return true;
40162 default:
40163 return false;
40167 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40168 consisting of the values in VALS. It is known that all elements
40169 except ONE_VAR are constants. Return true if successful. */
40171 static bool
40172 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
40173 rtx target, rtx vals, int one_var)
40175 rtx var = XVECEXP (vals, 0, one_var);
40176 enum machine_mode wmode;
40177 rtx const_vec, x;
40179 const_vec = copy_rtx (vals);
40180 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
40181 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
40183 switch (mode)
40185 case V2DFmode:
40186 case V2DImode:
40187 case V2SFmode:
40188 case V2SImode:
40189 /* For the two element vectors, it's just as easy to use
40190 the general case. */
40191 return false;
40193 case V4DImode:
40194 /* Use ix86_expand_vector_set in 64bit mode only. */
40195 if (!TARGET_64BIT)
40196 return false;
40197 case V4DFmode:
40198 case V8SFmode:
40199 case V8SImode:
40200 case V16HImode:
40201 case V32QImode:
40202 case V4SFmode:
40203 case V4SImode:
40204 case V8HImode:
40205 case V4HImode:
40206 break;
40208 case V16QImode:
40209 if (TARGET_SSE4_1)
40210 break;
40211 wmode = V8HImode;
40212 goto widen;
40213 case V8QImode:
40214 wmode = V4HImode;
40215 goto widen;
40216 widen:
40217 /* There's no way to set one QImode entry easily. Combine
40218 the variable value with its adjacent constant value, and
40219 promote to an HImode set. */
40220 x = XVECEXP (vals, 0, one_var ^ 1);
40221 if (one_var & 1)
40223 var = convert_modes (HImode, QImode, var, true);
40224 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40225 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40226 x = GEN_INT (INTVAL (x) & 0xff);
40228 else
40230 var = convert_modes (HImode, QImode, var, true);
40231 x = gen_int_mode (INTVAL (x) << 8, HImode);
40233 if (x != const0_rtx)
40234 var = expand_simple_binop (HImode, IOR, var, x, var,
40235 1, OPTAB_LIB_WIDEN);
40237 x = gen_reg_rtx (wmode);
40238 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40239 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40241 emit_move_insn (target, gen_lowpart (mode, x));
40242 return true;
40244 default:
40245 return false;
40248 emit_move_insn (target, const_vec);
40249 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40250 return true;
40253 /* A subroutine of ix86_expand_vector_init_general. Use vector
40254 concatenate to handle the most general case: all values variable,
40255 and none identical. */
40257 static void
40258 ix86_expand_vector_init_concat (enum machine_mode mode,
40259 rtx target, rtx *ops, int n)
40261 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40262 rtx first[16], second[8], third[4];
40263 rtvec v;
40264 int i, j;
40266 switch (n)
40268 case 2:
40269 switch (mode)
40271 case V16SImode:
40272 cmode = V8SImode;
40273 break;
40274 case V16SFmode:
40275 cmode = V8SFmode;
40276 break;
40277 case V8DImode:
40278 cmode = V4DImode;
40279 break;
40280 case V8DFmode:
40281 cmode = V4DFmode;
40282 break;
40283 case V8SImode:
40284 cmode = V4SImode;
40285 break;
40286 case V8SFmode:
40287 cmode = V4SFmode;
40288 break;
40289 case V4DImode:
40290 cmode = V2DImode;
40291 break;
40292 case V4DFmode:
40293 cmode = V2DFmode;
40294 break;
40295 case V4SImode:
40296 cmode = V2SImode;
40297 break;
40298 case V4SFmode:
40299 cmode = V2SFmode;
40300 break;
40301 case V2DImode:
40302 cmode = DImode;
40303 break;
40304 case V2SImode:
40305 cmode = SImode;
40306 break;
40307 case V2DFmode:
40308 cmode = DFmode;
40309 break;
40310 case V2SFmode:
40311 cmode = SFmode;
40312 break;
40313 default:
40314 gcc_unreachable ();
40317 if (!register_operand (ops[1], cmode))
40318 ops[1] = force_reg (cmode, ops[1]);
40319 if (!register_operand (ops[0], cmode))
40320 ops[0] = force_reg (cmode, ops[0]);
40321 emit_insn (gen_rtx_SET (VOIDmode, target,
40322 gen_rtx_VEC_CONCAT (mode, ops[0],
40323 ops[1])));
40324 break;
40326 case 4:
40327 switch (mode)
40329 case V4DImode:
40330 cmode = V2DImode;
40331 break;
40332 case V4DFmode:
40333 cmode = V2DFmode;
40334 break;
40335 case V4SImode:
40336 cmode = V2SImode;
40337 break;
40338 case V4SFmode:
40339 cmode = V2SFmode;
40340 break;
40341 default:
40342 gcc_unreachable ();
40344 goto half;
40346 case 8:
40347 switch (mode)
40349 case V8DImode:
40350 cmode = V2DImode;
40351 hmode = V4DImode;
40352 break;
40353 case V8DFmode:
40354 cmode = V2DFmode;
40355 hmode = V4DFmode;
40356 break;
40357 case V8SImode:
40358 cmode = V2SImode;
40359 hmode = V4SImode;
40360 break;
40361 case V8SFmode:
40362 cmode = V2SFmode;
40363 hmode = V4SFmode;
40364 break;
40365 default:
40366 gcc_unreachable ();
40368 goto half;
40370 case 16:
40371 switch (mode)
40373 case V16SImode:
40374 cmode = V2SImode;
40375 hmode = V4SImode;
40376 gmode = V8SImode;
40377 break;
40378 case V16SFmode:
40379 cmode = V2SFmode;
40380 hmode = V4SFmode;
40381 gmode = V8SFmode;
40382 break;
40383 default:
40384 gcc_unreachable ();
40386 goto half;
40388 half:
40389 /* FIXME: We process inputs backward to help RA. PR 36222. */
40390 i = n - 1;
40391 j = (n >> 1) - 1;
40392 for (; i > 0; i -= 2, j--)
40394 first[j] = gen_reg_rtx (cmode);
40395 v = gen_rtvec (2, ops[i - 1], ops[i]);
40396 ix86_expand_vector_init (false, first[j],
40397 gen_rtx_PARALLEL (cmode, v));
40400 n >>= 1;
40401 if (n > 4)
40403 gcc_assert (hmode != VOIDmode);
40404 gcc_assert (gmode != VOIDmode);
40405 for (i = j = 0; i < n; i += 2, j++)
40407 second[j] = gen_reg_rtx (hmode);
40408 ix86_expand_vector_init_concat (hmode, second [j],
40409 &first [i], 2);
40411 n >>= 1;
40412 for (i = j = 0; i < n; i += 2, j++)
40414 third[j] = gen_reg_rtx (gmode);
40415 ix86_expand_vector_init_concat (gmode, third[j],
40416 &second[i], 2);
40418 n >>= 1;
40419 ix86_expand_vector_init_concat (mode, target, third, n);
40421 else if (n > 2)
40423 gcc_assert (hmode != VOIDmode);
40424 for (i = j = 0; i < n; i += 2, j++)
40426 second[j] = gen_reg_rtx (hmode);
40427 ix86_expand_vector_init_concat (hmode, second [j],
40428 &first [i], 2);
40430 n >>= 1;
40431 ix86_expand_vector_init_concat (mode, target, second, n);
40433 else
40434 ix86_expand_vector_init_concat (mode, target, first, n);
40435 break;
40437 default:
40438 gcc_unreachable ();
40442 /* A subroutine of ix86_expand_vector_init_general. Use vector
40443 interleave to handle the most general case: all values variable,
40444 and none identical. */
40446 static void
40447 ix86_expand_vector_init_interleave (enum machine_mode mode,
40448 rtx target, rtx *ops, int n)
40450 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40451 int i, j;
40452 rtx op0, op1;
40453 rtx (*gen_load_even) (rtx, rtx, rtx);
40454 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40455 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40457 switch (mode)
40459 case V8HImode:
40460 gen_load_even = gen_vec_setv8hi;
40461 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40462 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40463 inner_mode = HImode;
40464 first_imode = V4SImode;
40465 second_imode = V2DImode;
40466 third_imode = VOIDmode;
40467 break;
40468 case V16QImode:
40469 gen_load_even = gen_vec_setv16qi;
40470 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40471 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40472 inner_mode = QImode;
40473 first_imode = V8HImode;
40474 second_imode = V4SImode;
40475 third_imode = V2DImode;
40476 break;
40477 default:
40478 gcc_unreachable ();
40481 for (i = 0; i < n; i++)
40483 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40484 op0 = gen_reg_rtx (SImode);
40485 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40487 /* Insert the SImode value as low element of V4SImode vector. */
40488 op1 = gen_reg_rtx (V4SImode);
40489 op0 = gen_rtx_VEC_MERGE (V4SImode,
40490 gen_rtx_VEC_DUPLICATE (V4SImode,
40491 op0),
40492 CONST0_RTX (V4SImode),
40493 const1_rtx);
40494 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40496 /* Cast the V4SImode vector back to a vector in orignal mode. */
40497 op0 = gen_reg_rtx (mode);
40498 emit_move_insn (op0, gen_lowpart (mode, op1));
40500 /* Load even elements into the second position. */
40501 emit_insn (gen_load_even (op0,
40502 force_reg (inner_mode,
40503 ops [i + i + 1]),
40504 const1_rtx));
40506 /* Cast vector to FIRST_IMODE vector. */
40507 ops[i] = gen_reg_rtx (first_imode);
40508 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40511 /* Interleave low FIRST_IMODE vectors. */
40512 for (i = j = 0; i < n; i += 2, j++)
40514 op0 = gen_reg_rtx (first_imode);
40515 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40517 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40518 ops[j] = gen_reg_rtx (second_imode);
40519 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40522 /* Interleave low SECOND_IMODE vectors. */
40523 switch (second_imode)
40525 case V4SImode:
40526 for (i = j = 0; i < n / 2; i += 2, j++)
40528 op0 = gen_reg_rtx (second_imode);
40529 emit_insn (gen_interleave_second_low (op0, ops[i],
40530 ops[i + 1]));
40532 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40533 vector. */
40534 ops[j] = gen_reg_rtx (third_imode);
40535 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40537 second_imode = V2DImode;
40538 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40539 /* FALLTHRU */
40541 case V2DImode:
40542 op0 = gen_reg_rtx (second_imode);
40543 emit_insn (gen_interleave_second_low (op0, ops[0],
40544 ops[1]));
40546 /* Cast the SECOND_IMODE vector back to a vector on original
40547 mode. */
40548 emit_insn (gen_rtx_SET (VOIDmode, target,
40549 gen_lowpart (mode, op0)));
40550 break;
40552 default:
40553 gcc_unreachable ();
40557 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40558 all values variable, and none identical. */
40560 static void
40561 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40562 rtx target, rtx vals)
40564 rtx ops[64], op0, op1;
40565 enum machine_mode half_mode = VOIDmode;
40566 int n, i;
40568 switch (mode)
40570 case V2SFmode:
40571 case V2SImode:
40572 if (!mmx_ok && !TARGET_SSE)
40573 break;
40574 /* FALLTHRU */
40576 case V16SImode:
40577 case V16SFmode:
40578 case V8DFmode:
40579 case V8DImode:
40580 case V8SFmode:
40581 case V8SImode:
40582 case V4DFmode:
40583 case V4DImode:
40584 case V4SFmode:
40585 case V4SImode:
40586 case V2DFmode:
40587 case V2DImode:
40588 n = GET_MODE_NUNITS (mode);
40589 for (i = 0; i < n; i++)
40590 ops[i] = XVECEXP (vals, 0, i);
40591 ix86_expand_vector_init_concat (mode, target, ops, n);
40592 return;
40594 case V32QImode:
40595 half_mode = V16QImode;
40596 goto half;
40598 case V16HImode:
40599 half_mode = V8HImode;
40600 goto half;
40602 half:
40603 n = GET_MODE_NUNITS (mode);
40604 for (i = 0; i < n; i++)
40605 ops[i] = XVECEXP (vals, 0, i);
40606 op0 = gen_reg_rtx (half_mode);
40607 op1 = gen_reg_rtx (half_mode);
40608 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40609 n >> 2);
40610 ix86_expand_vector_init_interleave (half_mode, op1,
40611 &ops [n >> 1], n >> 2);
40612 emit_insn (gen_rtx_SET (VOIDmode, target,
40613 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40614 return;
40616 case V16QImode:
40617 if (!TARGET_SSE4_1)
40618 break;
40619 /* FALLTHRU */
40621 case V8HImode:
40622 if (!TARGET_SSE2)
40623 break;
40625 /* Don't use ix86_expand_vector_init_interleave if we can't
40626 move from GPR to SSE register directly. */
40627 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40628 break;
40630 n = GET_MODE_NUNITS (mode);
40631 for (i = 0; i < n; i++)
40632 ops[i] = XVECEXP (vals, 0, i);
40633 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40634 return;
40636 case V4HImode:
40637 case V8QImode:
40638 break;
40640 default:
40641 gcc_unreachable ();
40645 int i, j, n_elts, n_words, n_elt_per_word;
40646 enum machine_mode inner_mode;
40647 rtx words[4], shift;
40649 inner_mode = GET_MODE_INNER (mode);
40650 n_elts = GET_MODE_NUNITS (mode);
40651 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40652 n_elt_per_word = n_elts / n_words;
40653 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40655 for (i = 0; i < n_words; ++i)
40657 rtx word = NULL_RTX;
40659 for (j = 0; j < n_elt_per_word; ++j)
40661 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40662 elt = convert_modes (word_mode, inner_mode, elt, true);
40664 if (j == 0)
40665 word = elt;
40666 else
40668 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40669 word, 1, OPTAB_LIB_WIDEN);
40670 word = expand_simple_binop (word_mode, IOR, word, elt,
40671 word, 1, OPTAB_LIB_WIDEN);
40675 words[i] = word;
40678 if (n_words == 1)
40679 emit_move_insn (target, gen_lowpart (mode, words[0]));
40680 else if (n_words == 2)
40682 rtx tmp = gen_reg_rtx (mode);
40683 emit_clobber (tmp);
40684 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40685 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40686 emit_move_insn (target, tmp);
40688 else if (n_words == 4)
40690 rtx tmp = gen_reg_rtx (V4SImode);
40691 gcc_assert (word_mode == SImode);
40692 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40693 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40694 emit_move_insn (target, gen_lowpart (mode, tmp));
40696 else
40697 gcc_unreachable ();
40701 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40702 instructions unless MMX_OK is true. */
40704 void
40705 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40707 enum machine_mode mode = GET_MODE (target);
40708 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40709 int n_elts = GET_MODE_NUNITS (mode);
40710 int n_var = 0, one_var = -1;
40711 bool all_same = true, all_const_zero = true;
40712 int i;
40713 rtx x;
40715 for (i = 0; i < n_elts; ++i)
40717 x = XVECEXP (vals, 0, i);
40718 if (!(CONST_INT_P (x)
40719 || GET_CODE (x) == CONST_DOUBLE
40720 || GET_CODE (x) == CONST_FIXED))
40721 n_var++, one_var = i;
40722 else if (x != CONST0_RTX (inner_mode))
40723 all_const_zero = false;
40724 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40725 all_same = false;
40728 /* Constants are best loaded from the constant pool. */
40729 if (n_var == 0)
40731 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40732 return;
40735 /* If all values are identical, broadcast the value. */
40736 if (all_same
40737 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40738 XVECEXP (vals, 0, 0)))
40739 return;
40741 /* Values where only one field is non-constant are best loaded from
40742 the pool and overwritten via move later. */
40743 if (n_var == 1)
40745 if (all_const_zero
40746 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40747 XVECEXP (vals, 0, one_var),
40748 one_var))
40749 return;
40751 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40752 return;
40755 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40758 void
40759 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40761 enum machine_mode mode = GET_MODE (target);
40762 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40763 enum machine_mode half_mode;
40764 bool use_vec_merge = false;
40765 rtx tmp;
40766 static rtx (*gen_extract[6][2]) (rtx, rtx)
40768 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40769 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40770 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40771 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40772 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40773 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40775 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40777 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40778 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40779 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40780 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40781 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40782 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40784 int i, j, n;
40786 switch (mode)
40788 case V2SFmode:
40789 case V2SImode:
40790 if (mmx_ok)
40792 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40793 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40794 if (elt == 0)
40795 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40796 else
40797 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40798 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40799 return;
40801 break;
40803 case V2DImode:
40804 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40805 if (use_vec_merge)
40806 break;
40808 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40809 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40810 if (elt == 0)
40811 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40812 else
40813 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40814 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40815 return;
40817 case V2DFmode:
40819 rtx op0, op1;
40821 /* For the two element vectors, we implement a VEC_CONCAT with
40822 the extraction of the other element. */
40824 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40825 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40827 if (elt == 0)
40828 op0 = val, op1 = tmp;
40829 else
40830 op0 = tmp, op1 = val;
40832 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40833 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40835 return;
40837 case V4SFmode:
40838 use_vec_merge = TARGET_SSE4_1;
40839 if (use_vec_merge)
40840 break;
40842 switch (elt)
40844 case 0:
40845 use_vec_merge = true;
40846 break;
40848 case 1:
40849 /* tmp = target = A B C D */
40850 tmp = copy_to_reg (target);
40851 /* target = A A B B */
40852 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40853 /* target = X A B B */
40854 ix86_expand_vector_set (false, target, val, 0);
40855 /* target = A X C D */
40856 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40857 const1_rtx, const0_rtx,
40858 GEN_INT (2+4), GEN_INT (3+4)));
40859 return;
40861 case 2:
40862 /* tmp = target = A B C D */
40863 tmp = copy_to_reg (target);
40864 /* tmp = X B C D */
40865 ix86_expand_vector_set (false, tmp, val, 0);
40866 /* target = A B X D */
40867 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40868 const0_rtx, const1_rtx,
40869 GEN_INT (0+4), GEN_INT (3+4)));
40870 return;
40872 case 3:
40873 /* tmp = target = A B C D */
40874 tmp = copy_to_reg (target);
40875 /* tmp = X B C D */
40876 ix86_expand_vector_set (false, tmp, val, 0);
40877 /* target = A B X D */
40878 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40879 const0_rtx, const1_rtx,
40880 GEN_INT (2+4), GEN_INT (0+4)));
40881 return;
40883 default:
40884 gcc_unreachable ();
40886 break;
40888 case V4SImode:
40889 use_vec_merge = TARGET_SSE4_1;
40890 if (use_vec_merge)
40891 break;
40893 /* Element 0 handled by vec_merge below. */
40894 if (elt == 0)
40896 use_vec_merge = true;
40897 break;
40900 if (TARGET_SSE2)
40902 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40903 store into element 0, then shuffle them back. */
40905 rtx order[4];
40907 order[0] = GEN_INT (elt);
40908 order[1] = const1_rtx;
40909 order[2] = const2_rtx;
40910 order[3] = GEN_INT (3);
40911 order[elt] = const0_rtx;
40913 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40914 order[1], order[2], order[3]));
40916 ix86_expand_vector_set (false, target, val, 0);
40918 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40919 order[1], order[2], order[3]));
40921 else
40923 /* For SSE1, we have to reuse the V4SF code. */
40924 rtx t = gen_reg_rtx (V4SFmode);
40925 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40926 emit_move_insn (target, gen_lowpart (mode, t));
40928 return;
40930 case V8HImode:
40931 use_vec_merge = TARGET_SSE2;
40932 break;
40933 case V4HImode:
40934 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40935 break;
40937 case V16QImode:
40938 use_vec_merge = TARGET_SSE4_1;
40939 break;
40941 case V8QImode:
40942 break;
40944 case V32QImode:
40945 half_mode = V16QImode;
40946 j = 0;
40947 n = 16;
40948 goto half;
40950 case V16HImode:
40951 half_mode = V8HImode;
40952 j = 1;
40953 n = 8;
40954 goto half;
40956 case V8SImode:
40957 half_mode = V4SImode;
40958 j = 2;
40959 n = 4;
40960 goto half;
40962 case V4DImode:
40963 half_mode = V2DImode;
40964 j = 3;
40965 n = 2;
40966 goto half;
40968 case V8SFmode:
40969 half_mode = V4SFmode;
40970 j = 4;
40971 n = 4;
40972 goto half;
40974 case V4DFmode:
40975 half_mode = V2DFmode;
40976 j = 5;
40977 n = 2;
40978 goto half;
40980 half:
40981 /* Compute offset. */
40982 i = elt / n;
40983 elt %= n;
40985 gcc_assert (i <= 1);
40987 /* Extract the half. */
40988 tmp = gen_reg_rtx (half_mode);
40989 emit_insn (gen_extract[j][i] (tmp, target));
40991 /* Put val in tmp at elt. */
40992 ix86_expand_vector_set (false, tmp, val, elt);
40994 /* Put it back. */
40995 emit_insn (gen_insert[j][i] (target, target, tmp));
40996 return;
40998 default:
40999 break;
41002 if (use_vec_merge)
41004 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
41005 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
41006 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41008 else
41010 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41012 emit_move_insn (mem, target);
41014 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41015 emit_move_insn (tmp, val);
41017 emit_move_insn (target, mem);
41021 void
41022 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
41024 enum machine_mode mode = GET_MODE (vec);
41025 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41026 bool use_vec_extr = false;
41027 rtx tmp;
41029 switch (mode)
41031 case V2SImode:
41032 case V2SFmode:
41033 if (!mmx_ok)
41034 break;
41035 /* FALLTHRU */
41037 case V2DFmode:
41038 case V2DImode:
41039 use_vec_extr = true;
41040 break;
41042 case V4SFmode:
41043 use_vec_extr = TARGET_SSE4_1;
41044 if (use_vec_extr)
41045 break;
41047 switch (elt)
41049 case 0:
41050 tmp = vec;
41051 break;
41053 case 1:
41054 case 3:
41055 tmp = gen_reg_rtx (mode);
41056 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
41057 GEN_INT (elt), GEN_INT (elt),
41058 GEN_INT (elt+4), GEN_INT (elt+4)));
41059 break;
41061 case 2:
41062 tmp = gen_reg_rtx (mode);
41063 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
41064 break;
41066 default:
41067 gcc_unreachable ();
41069 vec = tmp;
41070 use_vec_extr = true;
41071 elt = 0;
41072 break;
41074 case V4SImode:
41075 use_vec_extr = TARGET_SSE4_1;
41076 if (use_vec_extr)
41077 break;
41079 if (TARGET_SSE2)
41081 switch (elt)
41083 case 0:
41084 tmp = vec;
41085 break;
41087 case 1:
41088 case 3:
41089 tmp = gen_reg_rtx (mode);
41090 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
41091 GEN_INT (elt), GEN_INT (elt),
41092 GEN_INT (elt), GEN_INT (elt)));
41093 break;
41095 case 2:
41096 tmp = gen_reg_rtx (mode);
41097 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
41098 break;
41100 default:
41101 gcc_unreachable ();
41103 vec = tmp;
41104 use_vec_extr = true;
41105 elt = 0;
41107 else
41109 /* For SSE1, we have to reuse the V4SF code. */
41110 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
41111 gen_lowpart (V4SFmode, vec), elt);
41112 return;
41114 break;
41116 case V8HImode:
41117 use_vec_extr = TARGET_SSE2;
41118 break;
41119 case V4HImode:
41120 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
41121 break;
41123 case V16QImode:
41124 use_vec_extr = TARGET_SSE4_1;
41125 break;
41127 case V8SFmode:
41128 if (TARGET_AVX)
41130 tmp = gen_reg_rtx (V4SFmode);
41131 if (elt < 4)
41132 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
41133 else
41134 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
41135 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41136 return;
41138 break;
41140 case V4DFmode:
41141 if (TARGET_AVX)
41143 tmp = gen_reg_rtx (V2DFmode);
41144 if (elt < 2)
41145 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
41146 else
41147 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
41148 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41149 return;
41151 break;
41153 case V32QImode:
41154 if (TARGET_AVX)
41156 tmp = gen_reg_rtx (V16QImode);
41157 if (elt < 16)
41158 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
41159 else
41160 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
41161 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41162 return;
41164 break;
41166 case V16HImode:
41167 if (TARGET_AVX)
41169 tmp = gen_reg_rtx (V8HImode);
41170 if (elt < 8)
41171 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
41172 else
41173 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
41174 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41175 return;
41177 break;
41179 case V8SImode:
41180 if (TARGET_AVX)
41182 tmp = gen_reg_rtx (V4SImode);
41183 if (elt < 4)
41184 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
41185 else
41186 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
41187 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41188 return;
41190 break;
41192 case V4DImode:
41193 if (TARGET_AVX)
41195 tmp = gen_reg_rtx (V2DImode);
41196 if (elt < 2)
41197 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41198 else
41199 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41200 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41201 return;
41203 break;
41205 case V16SFmode:
41206 tmp = gen_reg_rtx (V8SFmode);
41207 if (elt < 8)
41208 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41209 else
41210 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41211 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41212 return;
41214 case V8DFmode:
41215 tmp = gen_reg_rtx (V4DFmode);
41216 if (elt < 4)
41217 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41218 else
41219 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41220 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41221 return;
41223 case V16SImode:
41224 tmp = gen_reg_rtx (V8SImode);
41225 if (elt < 8)
41226 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41227 else
41228 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41229 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41230 return;
41232 case V8DImode:
41233 tmp = gen_reg_rtx (V4DImode);
41234 if (elt < 4)
41235 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41236 else
41237 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41238 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41239 return;
41241 case V8QImode:
41242 /* ??? Could extract the appropriate HImode element and shift. */
41243 default:
41244 break;
41247 if (use_vec_extr)
41249 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41250 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41252 /* Let the rtl optimizers know about the zero extension performed. */
41253 if (inner_mode == QImode || inner_mode == HImode)
41255 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41256 target = gen_lowpart (SImode, target);
41259 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41261 else
41263 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41265 emit_move_insn (mem, vec);
41267 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41268 emit_move_insn (target, tmp);
41272 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41273 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41274 The upper bits of DEST are undefined, though they shouldn't cause
41275 exceptions (some bits from src or all zeros are ok). */
41277 static void
41278 emit_reduc_half (rtx dest, rtx src, int i)
41280 rtx tem, d = dest;
41281 switch (GET_MODE (src))
41283 case V4SFmode:
41284 if (i == 128)
41285 tem = gen_sse_movhlps (dest, src, src);
41286 else
41287 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41288 GEN_INT (1 + 4), GEN_INT (1 + 4));
41289 break;
41290 case V2DFmode:
41291 tem = gen_vec_interleave_highv2df (dest, src, src);
41292 break;
41293 case V16QImode:
41294 case V8HImode:
41295 case V4SImode:
41296 case V2DImode:
41297 d = gen_reg_rtx (V1TImode);
41298 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41299 GEN_INT (i / 2));
41300 break;
41301 case V8SFmode:
41302 if (i == 256)
41303 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41304 else
41305 tem = gen_avx_shufps256 (dest, src, src,
41306 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41307 break;
41308 case V4DFmode:
41309 if (i == 256)
41310 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41311 else
41312 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41313 break;
41314 case V32QImode:
41315 case V16HImode:
41316 case V8SImode:
41317 case V4DImode:
41318 if (i == 256)
41320 if (GET_MODE (dest) != V4DImode)
41321 d = gen_reg_rtx (V4DImode);
41322 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41323 gen_lowpart (V4DImode, src),
41324 const1_rtx);
41326 else
41328 d = gen_reg_rtx (V2TImode);
41329 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41330 GEN_INT (i / 2));
41332 break;
41333 case V16SImode:
41334 case V16SFmode:
41335 case V8DImode:
41336 case V8DFmode:
41337 if (i > 128)
41338 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41339 gen_lowpart (V16SImode, src),
41340 gen_lowpart (V16SImode, src),
41341 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41342 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41343 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41344 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41345 GEN_INT (0xC), GEN_INT (0xD),
41346 GEN_INT (0xE), GEN_INT (0xF),
41347 GEN_INT (0x10), GEN_INT (0x11),
41348 GEN_INT (0x12), GEN_INT (0x13),
41349 GEN_INT (0x14), GEN_INT (0x15),
41350 GEN_INT (0x16), GEN_INT (0x17));
41351 else
41352 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41353 gen_lowpart (V16SImode, src),
41354 GEN_INT (i == 128 ? 0x2 : 0x1),
41355 GEN_INT (0x3),
41356 GEN_INT (0x3),
41357 GEN_INT (0x3),
41358 GEN_INT (i == 128 ? 0x6 : 0x5),
41359 GEN_INT (0x7),
41360 GEN_INT (0x7),
41361 GEN_INT (0x7),
41362 GEN_INT (i == 128 ? 0xA : 0x9),
41363 GEN_INT (0xB),
41364 GEN_INT (0xB),
41365 GEN_INT (0xB),
41366 GEN_INT (i == 128 ? 0xE : 0xD),
41367 GEN_INT (0xF),
41368 GEN_INT (0xF),
41369 GEN_INT (0xF));
41370 break;
41371 default:
41372 gcc_unreachable ();
41374 emit_insn (tem);
41375 if (d != dest)
41376 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41379 /* Expand a vector reduction. FN is the binary pattern to reduce;
41380 DEST is the destination; IN is the input vector. */
41382 void
41383 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41385 rtx half, dst, vec = in;
41386 enum machine_mode mode = GET_MODE (in);
41387 int i;
41389 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41390 if (TARGET_SSE4_1
41391 && mode == V8HImode
41392 && fn == gen_uminv8hi3)
41394 emit_insn (gen_sse4_1_phminposuw (dest, in));
41395 return;
41398 for (i = GET_MODE_BITSIZE (mode);
41399 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41400 i >>= 1)
41402 half = gen_reg_rtx (mode);
41403 emit_reduc_half (half, vec, i);
41404 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41405 dst = dest;
41406 else
41407 dst = gen_reg_rtx (mode);
41408 emit_insn (fn (dst, half, vec));
41409 vec = dst;
41413 /* Target hook for scalar_mode_supported_p. */
41414 static bool
41415 ix86_scalar_mode_supported_p (enum machine_mode mode)
41417 if (DECIMAL_FLOAT_MODE_P (mode))
41418 return default_decimal_float_supported_p ();
41419 else if (mode == TFmode)
41420 return true;
41421 else
41422 return default_scalar_mode_supported_p (mode);
41425 /* Implements target hook vector_mode_supported_p. */
41426 static bool
41427 ix86_vector_mode_supported_p (enum machine_mode mode)
41429 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41430 return true;
41431 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41432 return true;
41433 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41434 return true;
41435 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41436 return true;
41437 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41438 return true;
41439 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41440 return true;
41441 return false;
41444 /* Target hook for c_mode_for_suffix. */
41445 static enum machine_mode
41446 ix86_c_mode_for_suffix (char suffix)
41448 if (suffix == 'q')
41449 return TFmode;
41450 if (suffix == 'w')
41451 return XFmode;
41453 return VOIDmode;
41456 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41458 We do this in the new i386 backend to maintain source compatibility
41459 with the old cc0-based compiler. */
41461 static tree
41462 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41463 tree inputs ATTRIBUTE_UNUSED,
41464 tree clobbers)
41466 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41467 clobbers);
41468 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41469 clobbers);
41470 return clobbers;
41473 /* Implements target vector targetm.asm.encode_section_info. */
41475 static void ATTRIBUTE_UNUSED
41476 ix86_encode_section_info (tree decl, rtx rtl, int first)
41478 default_encode_section_info (decl, rtl, first);
41480 if (((TREE_CODE (decl) == VAR_DECL && is_global_var (decl))
41481 || TREE_CODE(decl) == STRING_CST)
41482 && ix86_in_large_data_p (decl))
41483 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41486 /* Worker function for REVERSE_CONDITION. */
41488 enum rtx_code
41489 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41491 return (mode != CCFPmode && mode != CCFPUmode
41492 ? reverse_condition (code)
41493 : reverse_condition_maybe_unordered (code));
41496 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41497 to OPERANDS[0]. */
41499 const char *
41500 output_387_reg_move (rtx insn, rtx *operands)
41502 if (REG_P (operands[0]))
41504 if (REG_P (operands[1])
41505 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41507 if (REGNO (operands[0]) == FIRST_STACK_REG)
41508 return output_387_ffreep (operands, 0);
41509 return "fstp\t%y0";
41511 if (STACK_TOP_P (operands[0]))
41512 return "fld%Z1\t%y1";
41513 return "fst\t%y0";
41515 else if (MEM_P (operands[0]))
41517 gcc_assert (REG_P (operands[1]));
41518 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41519 return "fstp%Z0\t%y0";
41520 else
41522 /* There is no non-popping store to memory for XFmode.
41523 So if we need one, follow the store with a load. */
41524 if (GET_MODE (operands[0]) == XFmode)
41525 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41526 else
41527 return "fst%Z0\t%y0";
41530 else
41531 gcc_unreachable();
41534 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41535 FP status register is set. */
41537 void
41538 ix86_emit_fp_unordered_jump (rtx label)
41540 rtx reg = gen_reg_rtx (HImode);
41541 rtx temp;
41543 emit_insn (gen_x86_fnstsw_1 (reg));
41545 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41547 emit_insn (gen_x86_sahf_1 (reg));
41549 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41550 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41552 else
41554 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41556 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41557 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41560 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41561 gen_rtx_LABEL_REF (VOIDmode, label),
41562 pc_rtx);
41563 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41565 emit_jump_insn (temp);
41566 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41569 /* Output code to perform a log1p XFmode calculation. */
41571 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41573 rtx label1 = gen_label_rtx ();
41574 rtx label2 = gen_label_rtx ();
41576 rtx tmp = gen_reg_rtx (XFmode);
41577 rtx tmp2 = gen_reg_rtx (XFmode);
41578 rtx test;
41580 emit_insn (gen_absxf2 (tmp, op1));
41581 test = gen_rtx_GE (VOIDmode, tmp,
41582 CONST_DOUBLE_FROM_REAL_VALUE (
41583 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41584 XFmode));
41585 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41587 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41588 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41589 emit_jump (label2);
41591 emit_label (label1);
41592 emit_move_insn (tmp, CONST1_RTX (XFmode));
41593 emit_insn (gen_addxf3 (tmp, op1, tmp));
41594 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41595 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41597 emit_label (label2);
41600 /* Emit code for round calculation. */
41601 void ix86_emit_i387_round (rtx op0, rtx op1)
41603 enum machine_mode inmode = GET_MODE (op1);
41604 enum machine_mode outmode = GET_MODE (op0);
41605 rtx e1, e2, res, tmp, tmp1, half;
41606 rtx scratch = gen_reg_rtx (HImode);
41607 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41608 rtx jump_label = gen_label_rtx ();
41609 rtx insn;
41610 rtx (*gen_abs) (rtx, rtx);
41611 rtx (*gen_neg) (rtx, rtx);
41613 switch (inmode)
41615 case SFmode:
41616 gen_abs = gen_abssf2;
41617 break;
41618 case DFmode:
41619 gen_abs = gen_absdf2;
41620 break;
41621 case XFmode:
41622 gen_abs = gen_absxf2;
41623 break;
41624 default:
41625 gcc_unreachable ();
41628 switch (outmode)
41630 case SFmode:
41631 gen_neg = gen_negsf2;
41632 break;
41633 case DFmode:
41634 gen_neg = gen_negdf2;
41635 break;
41636 case XFmode:
41637 gen_neg = gen_negxf2;
41638 break;
41639 case HImode:
41640 gen_neg = gen_neghi2;
41641 break;
41642 case SImode:
41643 gen_neg = gen_negsi2;
41644 break;
41645 case DImode:
41646 gen_neg = gen_negdi2;
41647 break;
41648 default:
41649 gcc_unreachable ();
41652 e1 = gen_reg_rtx (inmode);
41653 e2 = gen_reg_rtx (inmode);
41654 res = gen_reg_rtx (outmode);
41656 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41658 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41660 /* scratch = fxam(op1) */
41661 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41662 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41663 UNSPEC_FXAM)));
41664 /* e1 = fabs(op1) */
41665 emit_insn (gen_abs (e1, op1));
41667 /* e2 = e1 + 0.5 */
41668 half = force_reg (inmode, half);
41669 emit_insn (gen_rtx_SET (VOIDmode, e2,
41670 gen_rtx_PLUS (inmode, e1, half)));
41672 /* res = floor(e2) */
41673 if (inmode != XFmode)
41675 tmp1 = gen_reg_rtx (XFmode);
41677 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41678 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41680 else
41681 tmp1 = e2;
41683 switch (outmode)
41685 case SFmode:
41686 case DFmode:
41688 rtx tmp0 = gen_reg_rtx (XFmode);
41690 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41692 emit_insn (gen_rtx_SET (VOIDmode, res,
41693 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41694 UNSPEC_TRUNC_NOOP)));
41696 break;
41697 case XFmode:
41698 emit_insn (gen_frndintxf2_floor (res, tmp1));
41699 break;
41700 case HImode:
41701 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41702 break;
41703 case SImode:
41704 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41705 break;
41706 case DImode:
41707 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41708 break;
41709 default:
41710 gcc_unreachable ();
41713 /* flags = signbit(a) */
41714 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41716 /* if (flags) then res = -res */
41717 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41718 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41719 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41720 pc_rtx);
41721 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41722 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41723 JUMP_LABEL (insn) = jump_label;
41725 emit_insn (gen_neg (res, res));
41727 emit_label (jump_label);
41728 LABEL_NUSES (jump_label) = 1;
41730 emit_move_insn (op0, res);
41733 /* Output code to perform a Newton-Rhapson approximation of a single precision
41734 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41736 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41738 rtx x0, x1, e0, e1;
41740 x0 = gen_reg_rtx (mode);
41741 e0 = gen_reg_rtx (mode);
41742 e1 = gen_reg_rtx (mode);
41743 x1 = gen_reg_rtx (mode);
41745 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41747 b = force_reg (mode, b);
41749 /* x0 = rcp(b) estimate */
41750 if (mode == V16SFmode || mode == V8DFmode)
41751 emit_insn (gen_rtx_SET (VOIDmode, x0,
41752 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41753 UNSPEC_RCP14)));
41754 else
41755 emit_insn (gen_rtx_SET (VOIDmode, x0,
41756 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41757 UNSPEC_RCP)));
41759 /* e0 = x0 * b */
41760 emit_insn (gen_rtx_SET (VOIDmode, e0,
41761 gen_rtx_MULT (mode, x0, b)));
41763 /* e0 = x0 * e0 */
41764 emit_insn (gen_rtx_SET (VOIDmode, e0,
41765 gen_rtx_MULT (mode, x0, e0)));
41767 /* e1 = x0 + x0 */
41768 emit_insn (gen_rtx_SET (VOIDmode, e1,
41769 gen_rtx_PLUS (mode, x0, x0)));
41771 /* x1 = e1 - e0 */
41772 emit_insn (gen_rtx_SET (VOIDmode, x1,
41773 gen_rtx_MINUS (mode, e1, e0)));
41775 /* res = a * x1 */
41776 emit_insn (gen_rtx_SET (VOIDmode, res,
41777 gen_rtx_MULT (mode, a, x1)));
41780 /* Output code to perform a Newton-Rhapson approximation of a
41781 single precision floating point [reciprocal] square root. */
41783 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41784 bool recip)
41786 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41787 REAL_VALUE_TYPE r;
41788 int unspec;
41790 x0 = gen_reg_rtx (mode);
41791 e0 = gen_reg_rtx (mode);
41792 e1 = gen_reg_rtx (mode);
41793 e2 = gen_reg_rtx (mode);
41794 e3 = gen_reg_rtx (mode);
41796 real_from_integer (&r, VOIDmode, -3, -1, 0);
41797 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41799 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41800 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41801 unspec = UNSPEC_RSQRT;
41803 if (VECTOR_MODE_P (mode))
41805 mthree = ix86_build_const_vector (mode, true, mthree);
41806 mhalf = ix86_build_const_vector (mode, true, mhalf);
41807 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41808 if (GET_MODE_SIZE (mode) == 64)
41809 unspec = UNSPEC_RSQRT14;
41812 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41813 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41815 a = force_reg (mode, a);
41817 /* x0 = rsqrt(a) estimate */
41818 emit_insn (gen_rtx_SET (VOIDmode, x0,
41819 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41820 unspec)));
41822 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41823 if (!recip)
41825 rtx zero, mask;
41827 zero = gen_reg_rtx (mode);
41828 mask = gen_reg_rtx (mode);
41830 zero = force_reg (mode, CONST0_RTX(mode));
41832 /* Handle masked compare. */
41833 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41835 mask = gen_reg_rtx (HImode);
41836 /* Imm value 0x4 corresponds to not-equal comparison. */
41837 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41838 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41840 else
41842 emit_insn (gen_rtx_SET (VOIDmode, mask,
41843 gen_rtx_NE (mode, zero, a)));
41845 emit_insn (gen_rtx_SET (VOIDmode, x0,
41846 gen_rtx_AND (mode, x0, mask)));
41850 /* e0 = x0 * a */
41851 emit_insn (gen_rtx_SET (VOIDmode, e0,
41852 gen_rtx_MULT (mode, x0, a)));
41853 /* e1 = e0 * x0 */
41854 emit_insn (gen_rtx_SET (VOIDmode, e1,
41855 gen_rtx_MULT (mode, e0, x0)));
41857 /* e2 = e1 - 3. */
41858 mthree = force_reg (mode, mthree);
41859 emit_insn (gen_rtx_SET (VOIDmode, e2,
41860 gen_rtx_PLUS (mode, e1, mthree)));
41862 mhalf = force_reg (mode, mhalf);
41863 if (recip)
41864 /* e3 = -.5 * x0 */
41865 emit_insn (gen_rtx_SET (VOIDmode, e3,
41866 gen_rtx_MULT (mode, x0, mhalf)));
41867 else
41868 /* e3 = -.5 * e0 */
41869 emit_insn (gen_rtx_SET (VOIDmode, e3,
41870 gen_rtx_MULT (mode, e0, mhalf)));
41871 /* ret = e2 * e3 */
41872 emit_insn (gen_rtx_SET (VOIDmode, res,
41873 gen_rtx_MULT (mode, e2, e3)));
41876 #ifdef TARGET_SOLARIS
41877 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41879 static void
41880 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41881 tree decl)
41883 /* With Binutils 2.15, the "@unwind" marker must be specified on
41884 every occurrence of the ".eh_frame" section, not just the first
41885 one. */
41886 if (TARGET_64BIT
41887 && strcmp (name, ".eh_frame") == 0)
41889 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41890 flags & SECTION_WRITE ? "aw" : "a");
41891 return;
41894 #ifndef USE_GAS
41895 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41897 solaris_elf_asm_comdat_section (name, flags, decl);
41898 return;
41900 #endif
41902 default_elf_asm_named_section (name, flags, decl);
41904 #endif /* TARGET_SOLARIS */
41906 /* Return the mangling of TYPE if it is an extended fundamental type. */
41908 static const char *
41909 ix86_mangle_type (const_tree type)
41911 type = TYPE_MAIN_VARIANT (type);
41913 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41914 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41915 return NULL;
41917 switch (TYPE_MODE (type))
41919 case TFmode:
41920 /* __float128 is "g". */
41921 return "g";
41922 case XFmode:
41923 /* "long double" or __float80 is "e". */
41924 return "e";
41925 default:
41926 return NULL;
41930 /* For 32-bit code we can save PIC register setup by using
41931 __stack_chk_fail_local hidden function instead of calling
41932 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41933 register, so it is better to call __stack_chk_fail directly. */
41935 static tree ATTRIBUTE_UNUSED
41936 ix86_stack_protect_fail (void)
41938 return TARGET_64BIT
41939 ? default_external_stack_protect_fail ()
41940 : default_hidden_stack_protect_fail ();
41943 /* Select a format to encode pointers in exception handling data. CODE
41944 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41945 true if the symbol may be affected by dynamic relocations.
41947 ??? All x86 object file formats are capable of representing this.
41948 After all, the relocation needed is the same as for the call insn.
41949 Whether or not a particular assembler allows us to enter such, I
41950 guess we'll have to see. */
41952 asm_preferred_eh_data_format (int code, int global)
41954 if (flag_pic)
41956 int type = DW_EH_PE_sdata8;
41957 if (!TARGET_64BIT
41958 || ix86_cmodel == CM_SMALL_PIC
41959 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41960 type = DW_EH_PE_sdata4;
41961 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41963 if (ix86_cmodel == CM_SMALL
41964 || (ix86_cmodel == CM_MEDIUM && code))
41965 return DW_EH_PE_udata4;
41966 return DW_EH_PE_absptr;
41969 /* Expand copysign from SIGN to the positive value ABS_VALUE
41970 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41971 the sign-bit. */
41972 static void
41973 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41975 enum machine_mode mode = GET_MODE (sign);
41976 rtx sgn = gen_reg_rtx (mode);
41977 if (mask == NULL_RTX)
41979 enum machine_mode vmode;
41981 if (mode == SFmode)
41982 vmode = V4SFmode;
41983 else if (mode == DFmode)
41984 vmode = V2DFmode;
41985 else
41986 vmode = mode;
41988 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41989 if (!VECTOR_MODE_P (mode))
41991 /* We need to generate a scalar mode mask in this case. */
41992 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41993 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41994 mask = gen_reg_rtx (mode);
41995 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41998 else
41999 mask = gen_rtx_NOT (mode, mask);
42000 emit_insn (gen_rtx_SET (VOIDmode, sgn,
42001 gen_rtx_AND (mode, mask, sign)));
42002 emit_insn (gen_rtx_SET (VOIDmode, result,
42003 gen_rtx_IOR (mode, abs_value, sgn)));
42006 /* Expand fabs (OP0) and return a new rtx that holds the result. The
42007 mask for masking out the sign-bit is stored in *SMASK, if that is
42008 non-null. */
42009 static rtx
42010 ix86_expand_sse_fabs (rtx op0, rtx *smask)
42012 enum machine_mode vmode, mode = GET_MODE (op0);
42013 rtx xa, mask;
42015 xa = gen_reg_rtx (mode);
42016 if (mode == SFmode)
42017 vmode = V4SFmode;
42018 else if (mode == DFmode)
42019 vmode = V2DFmode;
42020 else
42021 vmode = mode;
42022 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
42023 if (!VECTOR_MODE_P (mode))
42025 /* We need to generate a scalar mode mask in this case. */
42026 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42027 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42028 mask = gen_reg_rtx (mode);
42029 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42031 emit_insn (gen_rtx_SET (VOIDmode, xa,
42032 gen_rtx_AND (mode, op0, mask)));
42034 if (smask)
42035 *smask = mask;
42037 return xa;
42040 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
42041 swapping the operands if SWAP_OPERANDS is true. The expanded
42042 code is a forward jump to a newly created label in case the
42043 comparison is true. The generated label rtx is returned. */
42044 static rtx
42045 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
42046 bool swap_operands)
42048 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
42049 rtx label, tmp;
42051 if (swap_operands)
42053 tmp = op0;
42054 op0 = op1;
42055 op1 = tmp;
42058 label = gen_label_rtx ();
42059 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
42060 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42061 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
42062 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
42063 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
42064 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
42065 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
42066 JUMP_LABEL (tmp) = label;
42068 return label;
42071 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
42072 using comparison code CODE. Operands are swapped for the comparison if
42073 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
42074 static rtx
42075 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
42076 bool swap_operands)
42078 rtx (*insn)(rtx, rtx, rtx, rtx);
42079 enum machine_mode mode = GET_MODE (op0);
42080 rtx mask = gen_reg_rtx (mode);
42082 if (swap_operands)
42084 rtx tmp = op0;
42085 op0 = op1;
42086 op1 = tmp;
42089 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
42091 emit_insn (insn (mask, op0, op1,
42092 gen_rtx_fmt_ee (code, mode, op0, op1)));
42093 return mask;
42096 /* Generate and return a rtx of mode MODE for 2**n where n is the number
42097 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
42098 static rtx
42099 ix86_gen_TWO52 (enum machine_mode mode)
42101 REAL_VALUE_TYPE TWO52r;
42102 rtx TWO52;
42104 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
42105 TWO52 = const_double_from_real_value (TWO52r, mode);
42106 TWO52 = force_reg (mode, TWO52);
42108 return TWO52;
42111 /* Expand SSE sequence for computing lround from OP1 storing
42112 into OP0. */
42113 void
42114 ix86_expand_lround (rtx op0, rtx op1)
42116 /* C code for the stuff we're doing below:
42117 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
42118 return (long)tmp;
42120 enum machine_mode mode = GET_MODE (op1);
42121 const struct real_format *fmt;
42122 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42123 rtx adj;
42125 /* load nextafter (0.5, 0.0) */
42126 fmt = REAL_MODE_FORMAT (mode);
42127 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42128 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42130 /* adj = copysign (0.5, op1) */
42131 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
42132 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
42134 /* adj = op1 + adj */
42135 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
42137 /* op0 = (imode)adj */
42138 expand_fix (op0, adj, 0);
42141 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
42142 into OPERAND0. */
42143 void
42144 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
42146 /* C code for the stuff we're doing below (for do_floor):
42147 xi = (long)op1;
42148 xi -= (double)xi > op1 ? 1 : 0;
42149 return xi;
42151 enum machine_mode fmode = GET_MODE (op1);
42152 enum machine_mode imode = GET_MODE (op0);
42153 rtx ireg, freg, label, tmp;
42155 /* reg = (long)op1 */
42156 ireg = gen_reg_rtx (imode);
42157 expand_fix (ireg, op1, 0);
42159 /* freg = (double)reg */
42160 freg = gen_reg_rtx (fmode);
42161 expand_float (freg, ireg, 0);
42163 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42164 label = ix86_expand_sse_compare_and_jump (UNLE,
42165 freg, op1, !do_floor);
42166 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42167 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42168 emit_move_insn (ireg, tmp);
42170 emit_label (label);
42171 LABEL_NUSES (label) = 1;
42173 emit_move_insn (op0, ireg);
42176 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42177 result in OPERAND0. */
42178 void
42179 ix86_expand_rint (rtx operand0, rtx operand1)
42181 /* C code for the stuff we're doing below:
42182 xa = fabs (operand1);
42183 if (!isless (xa, 2**52))
42184 return operand1;
42185 xa = xa + 2**52 - 2**52;
42186 return copysign (xa, operand1);
42188 enum machine_mode mode = GET_MODE (operand0);
42189 rtx res, xa, label, TWO52, mask;
42191 res = gen_reg_rtx (mode);
42192 emit_move_insn (res, operand1);
42194 /* xa = abs (operand1) */
42195 xa = ix86_expand_sse_fabs (res, &mask);
42197 /* if (!isless (xa, TWO52)) goto label; */
42198 TWO52 = ix86_gen_TWO52 (mode);
42199 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42201 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42202 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42204 ix86_sse_copysign_to_positive (res, xa, res, mask);
42206 emit_label (label);
42207 LABEL_NUSES (label) = 1;
42209 emit_move_insn (operand0, res);
42212 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42213 into OPERAND0. */
42214 void
42215 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42217 /* C code for the stuff we expand below.
42218 double xa = fabs (x), x2;
42219 if (!isless (xa, TWO52))
42220 return x;
42221 xa = xa + TWO52 - TWO52;
42222 x2 = copysign (xa, x);
42223 Compensate. Floor:
42224 if (x2 > x)
42225 x2 -= 1;
42226 Compensate. Ceil:
42227 if (x2 < x)
42228 x2 -= -1;
42229 return x2;
42231 enum machine_mode mode = GET_MODE (operand0);
42232 rtx xa, TWO52, tmp, label, one, res, mask;
42234 TWO52 = ix86_gen_TWO52 (mode);
42236 /* Temporary for holding the result, initialized to the input
42237 operand to ease control flow. */
42238 res = gen_reg_rtx (mode);
42239 emit_move_insn (res, operand1);
42241 /* xa = abs (operand1) */
42242 xa = ix86_expand_sse_fabs (res, &mask);
42244 /* if (!isless (xa, TWO52)) goto label; */
42245 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42247 /* xa = xa + TWO52 - TWO52; */
42248 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42249 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42251 /* xa = copysign (xa, operand1) */
42252 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42254 /* generate 1.0 or -1.0 */
42255 one = force_reg (mode,
42256 const_double_from_real_value (do_floor
42257 ? dconst1 : dconstm1, mode));
42259 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42260 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42261 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42262 gen_rtx_AND (mode, one, tmp)));
42263 /* We always need to subtract here to preserve signed zero. */
42264 tmp = expand_simple_binop (mode, MINUS,
42265 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42266 emit_move_insn (res, tmp);
42268 emit_label (label);
42269 LABEL_NUSES (label) = 1;
42271 emit_move_insn (operand0, res);
42274 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42275 into OPERAND0. */
42276 void
42277 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42279 /* C code for the stuff we expand below.
42280 double xa = fabs (x), x2;
42281 if (!isless (xa, TWO52))
42282 return x;
42283 x2 = (double)(long)x;
42284 Compensate. Floor:
42285 if (x2 > x)
42286 x2 -= 1;
42287 Compensate. Ceil:
42288 if (x2 < x)
42289 x2 += 1;
42290 if (HONOR_SIGNED_ZEROS (mode))
42291 return copysign (x2, x);
42292 return x2;
42294 enum machine_mode mode = GET_MODE (operand0);
42295 rtx xa, xi, TWO52, tmp, label, one, res, mask;
42297 TWO52 = ix86_gen_TWO52 (mode);
42299 /* Temporary for holding the result, initialized to the input
42300 operand to ease control flow. */
42301 res = gen_reg_rtx (mode);
42302 emit_move_insn (res, operand1);
42304 /* xa = abs (operand1) */
42305 xa = ix86_expand_sse_fabs (res, &mask);
42307 /* if (!isless (xa, TWO52)) goto label; */
42308 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42310 /* xa = (double)(long)x */
42311 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42312 expand_fix (xi, res, 0);
42313 expand_float (xa, xi, 0);
42315 /* generate 1.0 */
42316 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42318 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42319 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42320 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42321 gen_rtx_AND (mode, one, tmp)));
42322 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42323 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42324 emit_move_insn (res, tmp);
42326 if (HONOR_SIGNED_ZEROS (mode))
42327 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42329 emit_label (label);
42330 LABEL_NUSES (label) = 1;
42332 emit_move_insn (operand0, res);
42335 /* Expand SSE sequence for computing round from OPERAND1 storing
42336 into OPERAND0. Sequence that works without relying on DImode truncation
42337 via cvttsd2siq that is only available on 64bit targets. */
42338 void
42339 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42341 /* C code for the stuff we expand below.
42342 double xa = fabs (x), xa2, x2;
42343 if (!isless (xa, TWO52))
42344 return x;
42345 Using the absolute value and copying back sign makes
42346 -0.0 -> -0.0 correct.
42347 xa2 = xa + TWO52 - TWO52;
42348 Compensate.
42349 dxa = xa2 - xa;
42350 if (dxa <= -0.5)
42351 xa2 += 1;
42352 else if (dxa > 0.5)
42353 xa2 -= 1;
42354 x2 = copysign (xa2, x);
42355 return x2;
42357 enum machine_mode mode = GET_MODE (operand0);
42358 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
42360 TWO52 = ix86_gen_TWO52 (mode);
42362 /* Temporary for holding the result, initialized to the input
42363 operand to ease control flow. */
42364 res = gen_reg_rtx (mode);
42365 emit_move_insn (res, operand1);
42367 /* xa = abs (operand1) */
42368 xa = ix86_expand_sse_fabs (res, &mask);
42370 /* if (!isless (xa, TWO52)) goto label; */
42371 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42373 /* xa2 = xa + TWO52 - TWO52; */
42374 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42375 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42377 /* dxa = xa2 - xa; */
42378 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42380 /* generate 0.5, 1.0 and -0.5 */
42381 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42382 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42383 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42384 0, OPTAB_DIRECT);
42386 /* Compensate. */
42387 tmp = gen_reg_rtx (mode);
42388 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42389 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42390 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42391 gen_rtx_AND (mode, one, tmp)));
42392 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42393 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42394 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42395 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42396 gen_rtx_AND (mode, one, tmp)));
42397 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42399 /* res = copysign (xa2, operand1) */
42400 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42402 emit_label (label);
42403 LABEL_NUSES (label) = 1;
42405 emit_move_insn (operand0, res);
42408 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42409 into OPERAND0. */
42410 void
42411 ix86_expand_trunc (rtx operand0, rtx operand1)
42413 /* C code for SSE variant we expand below.
42414 double xa = fabs (x), x2;
42415 if (!isless (xa, TWO52))
42416 return x;
42417 x2 = (double)(long)x;
42418 if (HONOR_SIGNED_ZEROS (mode))
42419 return copysign (x2, x);
42420 return x2;
42422 enum machine_mode mode = GET_MODE (operand0);
42423 rtx xa, xi, TWO52, label, res, mask;
42425 TWO52 = ix86_gen_TWO52 (mode);
42427 /* Temporary for holding the result, initialized to the input
42428 operand to ease control flow. */
42429 res = gen_reg_rtx (mode);
42430 emit_move_insn (res, operand1);
42432 /* xa = abs (operand1) */
42433 xa = ix86_expand_sse_fabs (res, &mask);
42435 /* if (!isless (xa, TWO52)) goto label; */
42436 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42438 /* x = (double)(long)x */
42439 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42440 expand_fix (xi, res, 0);
42441 expand_float (res, xi, 0);
42443 if (HONOR_SIGNED_ZEROS (mode))
42444 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42446 emit_label (label);
42447 LABEL_NUSES (label) = 1;
42449 emit_move_insn (operand0, res);
42452 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42453 into OPERAND0. */
42454 void
42455 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42457 enum machine_mode mode = GET_MODE (operand0);
42458 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42460 /* C code for SSE variant we expand below.
42461 double xa = fabs (x), x2;
42462 if (!isless (xa, TWO52))
42463 return x;
42464 xa2 = xa + TWO52 - TWO52;
42465 Compensate:
42466 if (xa2 > xa)
42467 xa2 -= 1.0;
42468 x2 = copysign (xa2, x);
42469 return x2;
42472 TWO52 = ix86_gen_TWO52 (mode);
42474 /* Temporary for holding the result, initialized to the input
42475 operand to ease control flow. */
42476 res = gen_reg_rtx (mode);
42477 emit_move_insn (res, operand1);
42479 /* xa = abs (operand1) */
42480 xa = ix86_expand_sse_fabs (res, &smask);
42482 /* if (!isless (xa, TWO52)) goto label; */
42483 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42485 /* res = xa + TWO52 - TWO52; */
42486 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42487 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42488 emit_move_insn (res, tmp);
42490 /* generate 1.0 */
42491 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42493 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42494 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42495 emit_insn (gen_rtx_SET (VOIDmode, mask,
42496 gen_rtx_AND (mode, mask, one)));
42497 tmp = expand_simple_binop (mode, MINUS,
42498 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42499 emit_move_insn (res, tmp);
42501 /* res = copysign (res, operand1) */
42502 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42504 emit_label (label);
42505 LABEL_NUSES (label) = 1;
42507 emit_move_insn (operand0, res);
42510 /* Expand SSE sequence for computing round from OPERAND1 storing
42511 into OPERAND0. */
42512 void
42513 ix86_expand_round (rtx operand0, rtx operand1)
42515 /* C code for the stuff we're doing below:
42516 double xa = fabs (x);
42517 if (!isless (xa, TWO52))
42518 return x;
42519 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42520 return copysign (xa, x);
42522 enum machine_mode mode = GET_MODE (operand0);
42523 rtx res, TWO52, xa, label, xi, half, mask;
42524 const struct real_format *fmt;
42525 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42527 /* Temporary for holding the result, initialized to the input
42528 operand to ease control flow. */
42529 res = gen_reg_rtx (mode);
42530 emit_move_insn (res, operand1);
42532 TWO52 = ix86_gen_TWO52 (mode);
42533 xa = ix86_expand_sse_fabs (res, &mask);
42534 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42536 /* load nextafter (0.5, 0.0) */
42537 fmt = REAL_MODE_FORMAT (mode);
42538 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42539 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42541 /* xa = xa + 0.5 */
42542 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42543 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42545 /* xa = (double)(int64_t)xa */
42546 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42547 expand_fix (xi, xa, 0);
42548 expand_float (xa, xi, 0);
42550 /* res = copysign (xa, operand1) */
42551 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42553 emit_label (label);
42554 LABEL_NUSES (label) = 1;
42556 emit_move_insn (operand0, res);
42559 /* Expand SSE sequence for computing round
42560 from OP1 storing into OP0 using sse4 round insn. */
42561 void
42562 ix86_expand_round_sse4 (rtx op0, rtx op1)
42564 enum machine_mode mode = GET_MODE (op0);
42565 rtx e1, e2, res, half;
42566 const struct real_format *fmt;
42567 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42568 rtx (*gen_copysign) (rtx, rtx, rtx);
42569 rtx (*gen_round) (rtx, rtx, rtx);
42571 switch (mode)
42573 case SFmode:
42574 gen_copysign = gen_copysignsf3;
42575 gen_round = gen_sse4_1_roundsf2;
42576 break;
42577 case DFmode:
42578 gen_copysign = gen_copysigndf3;
42579 gen_round = gen_sse4_1_rounddf2;
42580 break;
42581 default:
42582 gcc_unreachable ();
42585 /* round (a) = trunc (a + copysign (0.5, a)) */
42587 /* load nextafter (0.5, 0.0) */
42588 fmt = REAL_MODE_FORMAT (mode);
42589 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42590 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42591 half = const_double_from_real_value (pred_half, mode);
42593 /* e1 = copysign (0.5, op1) */
42594 e1 = gen_reg_rtx (mode);
42595 emit_insn (gen_copysign (e1, half, op1));
42597 /* e2 = op1 + e1 */
42598 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42600 /* res = trunc (e2) */
42601 res = gen_reg_rtx (mode);
42602 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42604 emit_move_insn (op0, res);
42608 /* Table of valid machine attributes. */
42609 static const struct attribute_spec ix86_attribute_table[] =
42611 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42612 affects_type_identity } */
42613 /* Stdcall attribute says callee is responsible for popping arguments
42614 if they are not variable. */
42615 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42616 true },
42617 /* Fastcall attribute says callee is responsible for popping arguments
42618 if they are not variable. */
42619 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42620 true },
42621 /* Thiscall attribute says callee is responsible for popping arguments
42622 if they are not variable. */
42623 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42624 true },
42625 /* Cdecl attribute says the callee is a normal C declaration */
42626 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42627 true },
42628 /* Regparm attribute specifies how many integer arguments are to be
42629 passed in registers. */
42630 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42631 true },
42632 /* Sseregparm attribute says we are using x86_64 calling conventions
42633 for FP arguments. */
42634 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42635 true },
42636 /* The transactional memory builtins are implicitly regparm or fastcall
42637 depending on the ABI. Override the generic do-nothing attribute that
42638 these builtins were declared with. */
42639 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42640 true },
42641 /* force_align_arg_pointer says this function realigns the stack at entry. */
42642 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42643 false, true, true, ix86_handle_cconv_attribute, false },
42644 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42645 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42646 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42647 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42648 false },
42649 #endif
42650 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42651 false },
42652 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42653 false },
42654 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42655 SUBTARGET_ATTRIBUTE_TABLE,
42656 #endif
42657 /* ms_abi and sysv_abi calling convention function attributes. */
42658 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42659 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42660 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42661 false },
42662 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42663 ix86_handle_callee_pop_aggregate_return, true },
42664 /* End element. */
42665 { NULL, 0, 0, false, false, false, NULL, false }
42668 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42669 static int
42670 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42671 tree vectype,
42672 int misalign ATTRIBUTE_UNUSED)
42674 unsigned elements;
42676 switch (type_of_cost)
42678 case scalar_stmt:
42679 return ix86_cost->scalar_stmt_cost;
42681 case scalar_load:
42682 return ix86_cost->scalar_load_cost;
42684 case scalar_store:
42685 return ix86_cost->scalar_store_cost;
42687 case vector_stmt:
42688 return ix86_cost->vec_stmt_cost;
42690 case vector_load:
42691 return ix86_cost->vec_align_load_cost;
42693 case vector_store:
42694 return ix86_cost->vec_store_cost;
42696 case vec_to_scalar:
42697 return ix86_cost->vec_to_scalar_cost;
42699 case scalar_to_vec:
42700 return ix86_cost->scalar_to_vec_cost;
42702 case unaligned_load:
42703 case unaligned_store:
42704 return ix86_cost->vec_unalign_load_cost;
42706 case cond_branch_taken:
42707 return ix86_cost->cond_taken_branch_cost;
42709 case cond_branch_not_taken:
42710 return ix86_cost->cond_not_taken_branch_cost;
42712 case vec_perm:
42713 case vec_promote_demote:
42714 return ix86_cost->vec_stmt_cost;
42716 case vec_construct:
42717 elements = TYPE_VECTOR_SUBPARTS (vectype);
42718 return elements / 2 + 1;
42720 default:
42721 gcc_unreachable ();
42725 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42726 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42727 insn every time. */
42729 static GTY(()) rtx vselect_insn;
42731 /* Initialize vselect_insn. */
42733 static void
42734 init_vselect_insn (void)
42736 unsigned i;
42737 rtx x;
42739 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42740 for (i = 0; i < MAX_VECT_LEN; ++i)
42741 XVECEXP (x, 0, i) = const0_rtx;
42742 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42743 const0_rtx), x);
42744 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42745 start_sequence ();
42746 vselect_insn = emit_insn (x);
42747 end_sequence ();
42750 /* Construct (set target (vec_select op0 (parallel perm))) and
42751 return true if that's a valid instruction in the active ISA. */
42753 static bool
42754 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42755 unsigned nelt, bool testing_p)
42757 unsigned int i;
42758 rtx x, save_vconcat;
42759 int icode;
42761 if (vselect_insn == NULL_RTX)
42762 init_vselect_insn ();
42764 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42765 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42766 for (i = 0; i < nelt; ++i)
42767 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42768 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42769 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42770 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42771 SET_DEST (PATTERN (vselect_insn)) = target;
42772 icode = recog_memoized (vselect_insn);
42774 if (icode >= 0 && !testing_p)
42775 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42777 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42778 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42779 INSN_CODE (vselect_insn) = -1;
42781 return icode >= 0;
42784 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42786 static bool
42787 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42788 const unsigned char *perm, unsigned nelt,
42789 bool testing_p)
42791 enum machine_mode v2mode;
42792 rtx x;
42793 bool ok;
42795 if (vselect_insn == NULL_RTX)
42796 init_vselect_insn ();
42798 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42799 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42800 PUT_MODE (x, v2mode);
42801 XEXP (x, 0) = op0;
42802 XEXP (x, 1) = op1;
42803 ok = expand_vselect (target, x, perm, nelt, testing_p);
42804 XEXP (x, 0) = const0_rtx;
42805 XEXP (x, 1) = const0_rtx;
42806 return ok;
42809 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42810 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42812 static bool
42813 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42815 enum machine_mode vmode = d->vmode;
42816 unsigned i, mask, nelt = d->nelt;
42817 rtx target, op0, op1, x;
42818 rtx rperm[32], vperm;
42820 if (d->one_operand_p)
42821 return false;
42822 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42824 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42826 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42828 else
42829 return false;
42831 /* This is a blend, not a permute. Elements must stay in their
42832 respective lanes. */
42833 for (i = 0; i < nelt; ++i)
42835 unsigned e = d->perm[i];
42836 if (!(e == i || e == i + nelt))
42837 return false;
42840 if (d->testing_p)
42841 return true;
42843 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42844 decision should be extracted elsewhere, so that we only try that
42845 sequence once all budget==3 options have been tried. */
42846 target = d->target;
42847 op0 = d->op0;
42848 op1 = d->op1;
42849 mask = 0;
42851 switch (vmode)
42853 case V4DFmode:
42854 case V8SFmode:
42855 case V2DFmode:
42856 case V4SFmode:
42857 case V8HImode:
42858 case V8SImode:
42859 for (i = 0; i < nelt; ++i)
42860 mask |= (d->perm[i] >= nelt) << i;
42861 break;
42863 case V2DImode:
42864 for (i = 0; i < 2; ++i)
42865 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42866 vmode = V8HImode;
42867 goto do_subreg;
42869 case V4SImode:
42870 for (i = 0; i < 4; ++i)
42871 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42872 vmode = V8HImode;
42873 goto do_subreg;
42875 case V16QImode:
42876 /* See if bytes move in pairs so we can use pblendw with
42877 an immediate argument, rather than pblendvb with a vector
42878 argument. */
42879 for (i = 0; i < 16; i += 2)
42880 if (d->perm[i] + 1 != d->perm[i + 1])
42882 use_pblendvb:
42883 for (i = 0; i < nelt; ++i)
42884 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42886 finish_pblendvb:
42887 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42888 vperm = force_reg (vmode, vperm);
42890 if (GET_MODE_SIZE (vmode) == 16)
42891 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42892 else
42893 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42894 if (target != d->target)
42895 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42896 return true;
42899 for (i = 0; i < 8; ++i)
42900 mask |= (d->perm[i * 2] >= 16) << i;
42901 vmode = V8HImode;
42902 /* FALLTHRU */
42904 do_subreg:
42905 target = gen_reg_rtx (vmode);
42906 op0 = gen_lowpart (vmode, op0);
42907 op1 = gen_lowpart (vmode, op1);
42908 break;
42910 case V32QImode:
42911 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42912 for (i = 0; i < 32; i += 2)
42913 if (d->perm[i] + 1 != d->perm[i + 1])
42914 goto use_pblendvb;
42915 /* See if bytes move in quadruplets. If yes, vpblendd
42916 with immediate can be used. */
42917 for (i = 0; i < 32; i += 4)
42918 if (d->perm[i] + 2 != d->perm[i + 2])
42919 break;
42920 if (i < 32)
42922 /* See if bytes move the same in both lanes. If yes,
42923 vpblendw with immediate can be used. */
42924 for (i = 0; i < 16; i += 2)
42925 if (d->perm[i] + 16 != d->perm[i + 16])
42926 goto use_pblendvb;
42928 /* Use vpblendw. */
42929 for (i = 0; i < 16; ++i)
42930 mask |= (d->perm[i * 2] >= 32) << i;
42931 vmode = V16HImode;
42932 goto do_subreg;
42935 /* Use vpblendd. */
42936 for (i = 0; i < 8; ++i)
42937 mask |= (d->perm[i * 4] >= 32) << i;
42938 vmode = V8SImode;
42939 goto do_subreg;
42941 case V16HImode:
42942 /* See if words move in pairs. If yes, vpblendd can be used. */
42943 for (i = 0; i < 16; i += 2)
42944 if (d->perm[i] + 1 != d->perm[i + 1])
42945 break;
42946 if (i < 16)
42948 /* See if words move the same in both lanes. If not,
42949 vpblendvb must be used. */
42950 for (i = 0; i < 8; i++)
42951 if (d->perm[i] + 8 != d->perm[i + 8])
42953 /* Use vpblendvb. */
42954 for (i = 0; i < 32; ++i)
42955 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42957 vmode = V32QImode;
42958 nelt = 32;
42959 target = gen_reg_rtx (vmode);
42960 op0 = gen_lowpart (vmode, op0);
42961 op1 = gen_lowpart (vmode, op1);
42962 goto finish_pblendvb;
42965 /* Use vpblendw. */
42966 for (i = 0; i < 16; ++i)
42967 mask |= (d->perm[i] >= 16) << i;
42968 break;
42971 /* Use vpblendd. */
42972 for (i = 0; i < 8; ++i)
42973 mask |= (d->perm[i * 2] >= 16) << i;
42974 vmode = V8SImode;
42975 goto do_subreg;
42977 case V4DImode:
42978 /* Use vpblendd. */
42979 for (i = 0; i < 4; ++i)
42980 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42981 vmode = V8SImode;
42982 goto do_subreg;
42984 default:
42985 gcc_unreachable ();
42988 /* This matches five different patterns with the different modes. */
42989 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42990 x = gen_rtx_SET (VOIDmode, target, x);
42991 emit_insn (x);
42992 if (target != d->target)
42993 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42995 return true;
42998 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42999 in terms of the variable form of vpermilps.
43001 Note that we will have already failed the immediate input vpermilps,
43002 which requires that the high and low part shuffle be identical; the
43003 variable form doesn't require that. */
43005 static bool
43006 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
43008 rtx rperm[8], vperm;
43009 unsigned i;
43011 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
43012 return false;
43014 /* We can only permute within the 128-bit lane. */
43015 for (i = 0; i < 8; ++i)
43017 unsigned e = d->perm[i];
43018 if (i < 4 ? e >= 4 : e < 4)
43019 return false;
43022 if (d->testing_p)
43023 return true;
43025 for (i = 0; i < 8; ++i)
43027 unsigned e = d->perm[i];
43029 /* Within each 128-bit lane, the elements of op0 are numbered
43030 from 0 and the elements of op1 are numbered from 4. */
43031 if (e >= 8 + 4)
43032 e -= 8;
43033 else if (e >= 4)
43034 e -= 4;
43036 rperm[i] = GEN_INT (e);
43039 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
43040 vperm = force_reg (V8SImode, vperm);
43041 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
43043 return true;
43046 /* Return true if permutation D can be performed as VMODE permutation
43047 instead. */
43049 static bool
43050 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
43052 unsigned int i, j, chunk;
43054 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
43055 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
43056 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
43057 return false;
43059 if (GET_MODE_NUNITS (vmode) >= d->nelt)
43060 return true;
43062 chunk = d->nelt / GET_MODE_NUNITS (vmode);
43063 for (i = 0; i < d->nelt; i += chunk)
43064 if (d->perm[i] & (chunk - 1))
43065 return false;
43066 else
43067 for (j = 1; j < chunk; ++j)
43068 if (d->perm[i] + j != d->perm[i + j])
43069 return false;
43071 return true;
43074 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43075 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
43077 static bool
43078 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
43080 unsigned i, nelt, eltsz, mask;
43081 unsigned char perm[32];
43082 enum machine_mode vmode = V16QImode;
43083 rtx rperm[32], vperm, target, op0, op1;
43085 nelt = d->nelt;
43087 if (!d->one_operand_p)
43089 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
43091 if (TARGET_AVX2
43092 && valid_perm_using_mode_p (V2TImode, d))
43094 if (d->testing_p)
43095 return true;
43097 /* Use vperm2i128 insn. The pattern uses
43098 V4DImode instead of V2TImode. */
43099 target = d->target;
43100 if (d->vmode != V4DImode)
43101 target = gen_reg_rtx (V4DImode);
43102 op0 = gen_lowpart (V4DImode, d->op0);
43103 op1 = gen_lowpart (V4DImode, d->op1);
43104 rperm[0]
43105 = GEN_INT ((d->perm[0] / (nelt / 2))
43106 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
43107 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
43108 if (target != d->target)
43109 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43110 return true;
43112 return false;
43115 else
43117 if (GET_MODE_SIZE (d->vmode) == 16)
43119 if (!TARGET_SSSE3)
43120 return false;
43122 else if (GET_MODE_SIZE (d->vmode) == 32)
43124 if (!TARGET_AVX2)
43125 return false;
43127 /* V4DImode should be already handled through
43128 expand_vselect by vpermq instruction. */
43129 gcc_assert (d->vmode != V4DImode);
43131 vmode = V32QImode;
43132 if (d->vmode == V8SImode
43133 || d->vmode == V16HImode
43134 || d->vmode == V32QImode)
43136 /* First see if vpermq can be used for
43137 V8SImode/V16HImode/V32QImode. */
43138 if (valid_perm_using_mode_p (V4DImode, d))
43140 for (i = 0; i < 4; i++)
43141 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43142 if (d->testing_p)
43143 return true;
43144 target = gen_reg_rtx (V4DImode);
43145 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43146 perm, 4, false))
43148 emit_move_insn (d->target,
43149 gen_lowpart (d->vmode, target));
43150 return true;
43152 return false;
43155 /* Next see if vpermd can be used. */
43156 if (valid_perm_using_mode_p (V8SImode, d))
43157 vmode = V8SImode;
43159 /* Or if vpermps can be used. */
43160 else if (d->vmode == V8SFmode)
43161 vmode = V8SImode;
43163 if (vmode == V32QImode)
43165 /* vpshufb only works intra lanes, it is not
43166 possible to shuffle bytes in between the lanes. */
43167 for (i = 0; i < nelt; ++i)
43168 if ((d->perm[i] ^ i) & (nelt / 2))
43169 return false;
43172 else
43173 return false;
43176 if (d->testing_p)
43177 return true;
43179 if (vmode == V8SImode)
43180 for (i = 0; i < 8; ++i)
43181 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43182 else
43184 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43185 if (!d->one_operand_p)
43186 mask = 2 * nelt - 1;
43187 else if (vmode == V16QImode)
43188 mask = nelt - 1;
43189 else
43190 mask = nelt / 2 - 1;
43192 for (i = 0; i < nelt; ++i)
43194 unsigned j, e = d->perm[i] & mask;
43195 for (j = 0; j < eltsz; ++j)
43196 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43200 vperm = gen_rtx_CONST_VECTOR (vmode,
43201 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43202 vperm = force_reg (vmode, vperm);
43204 target = d->target;
43205 if (d->vmode != vmode)
43206 target = gen_reg_rtx (vmode);
43207 op0 = gen_lowpart (vmode, d->op0);
43208 if (d->one_operand_p)
43210 if (vmode == V16QImode)
43211 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43212 else if (vmode == V32QImode)
43213 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43214 else if (vmode == V8SFmode)
43215 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43216 else
43217 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43219 else
43221 op1 = gen_lowpart (vmode, d->op1);
43222 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43224 if (target != d->target)
43225 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43227 return true;
43230 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43231 in a single instruction. */
43233 static bool
43234 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43236 unsigned i, nelt = d->nelt;
43237 unsigned char perm2[MAX_VECT_LEN];
43239 /* Check plain VEC_SELECT first, because AVX has instructions that could
43240 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43241 input where SEL+CONCAT may not. */
43242 if (d->one_operand_p)
43244 int mask = nelt - 1;
43245 bool identity_perm = true;
43246 bool broadcast_perm = true;
43248 for (i = 0; i < nelt; i++)
43250 perm2[i] = d->perm[i] & mask;
43251 if (perm2[i] != i)
43252 identity_perm = false;
43253 if (perm2[i])
43254 broadcast_perm = false;
43257 if (identity_perm)
43259 if (!d->testing_p)
43260 emit_move_insn (d->target, d->op0);
43261 return true;
43263 else if (broadcast_perm && TARGET_AVX2)
43265 /* Use vpbroadcast{b,w,d}. */
43266 rtx (*gen) (rtx, rtx) = NULL;
43267 switch (d->vmode)
43269 case V32QImode:
43270 gen = gen_avx2_pbroadcastv32qi_1;
43271 break;
43272 case V16HImode:
43273 gen = gen_avx2_pbroadcastv16hi_1;
43274 break;
43275 case V8SImode:
43276 gen = gen_avx2_pbroadcastv8si_1;
43277 break;
43278 case V16QImode:
43279 gen = gen_avx2_pbroadcastv16qi;
43280 break;
43281 case V8HImode:
43282 gen = gen_avx2_pbroadcastv8hi;
43283 break;
43284 case V8SFmode:
43285 gen = gen_avx2_vec_dupv8sf_1;
43286 break;
43287 /* For other modes prefer other shuffles this function creates. */
43288 default: break;
43290 if (gen != NULL)
43292 if (!d->testing_p)
43293 emit_insn (gen (d->target, d->op0));
43294 return true;
43298 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43299 return true;
43301 /* There are plenty of patterns in sse.md that are written for
43302 SEL+CONCAT and are not replicated for a single op. Perhaps
43303 that should be changed, to avoid the nastiness here. */
43305 /* Recognize interleave style patterns, which means incrementing
43306 every other permutation operand. */
43307 for (i = 0; i < nelt; i += 2)
43309 perm2[i] = d->perm[i] & mask;
43310 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43312 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43313 d->testing_p))
43314 return true;
43316 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43317 if (nelt >= 4)
43319 for (i = 0; i < nelt; i += 4)
43321 perm2[i + 0] = d->perm[i + 0] & mask;
43322 perm2[i + 1] = d->perm[i + 1] & mask;
43323 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43324 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43327 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43328 d->testing_p))
43329 return true;
43333 /* Finally, try the fully general two operand permute. */
43334 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43335 d->testing_p))
43336 return true;
43338 /* Recognize interleave style patterns with reversed operands. */
43339 if (!d->one_operand_p)
43341 for (i = 0; i < nelt; ++i)
43343 unsigned e = d->perm[i];
43344 if (e >= nelt)
43345 e -= nelt;
43346 else
43347 e += nelt;
43348 perm2[i] = e;
43351 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43352 d->testing_p))
43353 return true;
43356 /* Try the SSE4.1 blend variable merge instructions. */
43357 if (expand_vec_perm_blend (d))
43358 return true;
43360 /* Try one of the AVX vpermil variable permutations. */
43361 if (expand_vec_perm_vpermil (d))
43362 return true;
43364 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43365 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43366 if (expand_vec_perm_pshufb (d))
43367 return true;
43369 /* Try the AVX512F vpermi2 instructions. */
43370 rtx vec[64];
43371 enum machine_mode mode = d->vmode;
43372 if (mode == V8DFmode)
43373 mode = V8DImode;
43374 else if (mode == V16SFmode)
43375 mode = V16SImode;
43376 for (i = 0; i < nelt; ++i)
43377 vec[i] = GEN_INT (d->perm[i]);
43378 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43379 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43380 return true;
43382 return false;
43385 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43386 in terms of a pair of pshuflw + pshufhw instructions. */
43388 static bool
43389 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43391 unsigned char perm2[MAX_VECT_LEN];
43392 unsigned i;
43393 bool ok;
43395 if (d->vmode != V8HImode || !d->one_operand_p)
43396 return false;
43398 /* The two permutations only operate in 64-bit lanes. */
43399 for (i = 0; i < 4; ++i)
43400 if (d->perm[i] >= 4)
43401 return false;
43402 for (i = 4; i < 8; ++i)
43403 if (d->perm[i] < 4)
43404 return false;
43406 if (d->testing_p)
43407 return true;
43409 /* Emit the pshuflw. */
43410 memcpy (perm2, d->perm, 4);
43411 for (i = 4; i < 8; ++i)
43412 perm2[i] = i;
43413 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43414 gcc_assert (ok);
43416 /* Emit the pshufhw. */
43417 memcpy (perm2 + 4, d->perm + 4, 4);
43418 for (i = 0; i < 4; ++i)
43419 perm2[i] = i;
43420 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43421 gcc_assert (ok);
43423 return true;
43426 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43427 the permutation using the SSSE3 palignr instruction. This succeeds
43428 when all of the elements in PERM fit within one vector and we merely
43429 need to shift them down so that a single vector permutation has a
43430 chance to succeed. */
43432 static bool
43433 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43435 unsigned i, nelt = d->nelt;
43436 unsigned min, max;
43437 bool in_order, ok;
43438 rtx shift, target;
43439 struct expand_vec_perm_d dcopy;
43441 /* Even with AVX, palignr only operates on 128-bit vectors. */
43442 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43443 return false;
43445 min = nelt, max = 0;
43446 for (i = 0; i < nelt; ++i)
43448 unsigned e = d->perm[i];
43449 if (e < min)
43450 min = e;
43451 if (e > max)
43452 max = e;
43454 if (min == 0 || max - min >= nelt)
43455 return false;
43457 /* Given that we have SSSE3, we know we'll be able to implement the
43458 single operand permutation after the palignr with pshufb. */
43459 if (d->testing_p)
43460 return true;
43462 dcopy = *d;
43463 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43464 target = gen_reg_rtx (TImode);
43465 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43466 gen_lowpart (TImode, d->op0), shift));
43468 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43469 dcopy.one_operand_p = true;
43471 in_order = true;
43472 for (i = 0; i < nelt; ++i)
43474 unsigned e = dcopy.perm[i] - min;
43475 if (e != i)
43476 in_order = false;
43477 dcopy.perm[i] = e;
43480 /* Test for the degenerate case where the alignment by itself
43481 produces the desired permutation. */
43482 if (in_order)
43484 emit_move_insn (d->target, dcopy.op0);
43485 return true;
43488 ok = expand_vec_perm_1 (&dcopy);
43489 gcc_assert (ok);
43491 return ok;
43494 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43496 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43497 a two vector permutation into a single vector permutation by using
43498 an interleave operation to merge the vectors. */
43500 static bool
43501 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43503 struct expand_vec_perm_d dremap, dfinal;
43504 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43505 unsigned HOST_WIDE_INT contents;
43506 unsigned char remap[2 * MAX_VECT_LEN];
43507 rtx seq;
43508 bool ok, same_halves = false;
43510 if (GET_MODE_SIZE (d->vmode) == 16)
43512 if (d->one_operand_p)
43513 return false;
43515 else if (GET_MODE_SIZE (d->vmode) == 32)
43517 if (!TARGET_AVX)
43518 return false;
43519 /* For 32-byte modes allow even d->one_operand_p.
43520 The lack of cross-lane shuffling in some instructions
43521 might prevent a single insn shuffle. */
43522 dfinal = *d;
43523 dfinal.testing_p = true;
43524 /* If expand_vec_perm_interleave3 can expand this into
43525 a 3 insn sequence, give up and let it be expanded as
43526 3 insn sequence. While that is one insn longer,
43527 it doesn't need a memory operand and in the common
43528 case that both interleave low and high permutations
43529 with the same operands are adjacent needs 4 insns
43530 for both after CSE. */
43531 if (expand_vec_perm_interleave3 (&dfinal))
43532 return false;
43534 else
43535 return false;
43537 /* Examine from whence the elements come. */
43538 contents = 0;
43539 for (i = 0; i < nelt; ++i)
43540 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43542 memset (remap, 0xff, sizeof (remap));
43543 dremap = *d;
43545 if (GET_MODE_SIZE (d->vmode) == 16)
43547 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43549 /* Split the two input vectors into 4 halves. */
43550 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43551 h2 = h1 << nelt2;
43552 h3 = h2 << nelt2;
43553 h4 = h3 << nelt2;
43555 /* If the elements from the low halves use interleave low, and similarly
43556 for interleave high. If the elements are from mis-matched halves, we
43557 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43558 if ((contents & (h1 | h3)) == contents)
43560 /* punpckl* */
43561 for (i = 0; i < nelt2; ++i)
43563 remap[i] = i * 2;
43564 remap[i + nelt] = i * 2 + 1;
43565 dremap.perm[i * 2] = i;
43566 dremap.perm[i * 2 + 1] = i + nelt;
43568 if (!TARGET_SSE2 && d->vmode == V4SImode)
43569 dremap.vmode = V4SFmode;
43571 else if ((contents & (h2 | h4)) == contents)
43573 /* punpckh* */
43574 for (i = 0; i < nelt2; ++i)
43576 remap[i + nelt2] = i * 2;
43577 remap[i + nelt + nelt2] = i * 2 + 1;
43578 dremap.perm[i * 2] = i + nelt2;
43579 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43581 if (!TARGET_SSE2 && d->vmode == V4SImode)
43582 dremap.vmode = V4SFmode;
43584 else if ((contents & (h1 | h4)) == contents)
43586 /* shufps */
43587 for (i = 0; i < nelt2; ++i)
43589 remap[i] = i;
43590 remap[i + nelt + nelt2] = i + nelt2;
43591 dremap.perm[i] = i;
43592 dremap.perm[i + nelt2] = i + nelt + nelt2;
43594 if (nelt != 4)
43596 /* shufpd */
43597 dremap.vmode = V2DImode;
43598 dremap.nelt = 2;
43599 dremap.perm[0] = 0;
43600 dremap.perm[1] = 3;
43603 else if ((contents & (h2 | h3)) == contents)
43605 /* shufps */
43606 for (i = 0; i < nelt2; ++i)
43608 remap[i + nelt2] = i;
43609 remap[i + nelt] = i + nelt2;
43610 dremap.perm[i] = i + nelt2;
43611 dremap.perm[i + nelt2] = i + nelt;
43613 if (nelt != 4)
43615 /* shufpd */
43616 dremap.vmode = V2DImode;
43617 dremap.nelt = 2;
43618 dremap.perm[0] = 1;
43619 dremap.perm[1] = 2;
43622 else
43623 return false;
43625 else
43627 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43628 unsigned HOST_WIDE_INT q[8];
43629 unsigned int nonzero_halves[4];
43631 /* Split the two input vectors into 8 quarters. */
43632 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43633 for (i = 1; i < 8; ++i)
43634 q[i] = q[0] << (nelt4 * i);
43635 for (i = 0; i < 4; ++i)
43636 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43638 nonzero_halves[nzcnt] = i;
43639 ++nzcnt;
43642 if (nzcnt == 1)
43644 gcc_assert (d->one_operand_p);
43645 nonzero_halves[1] = nonzero_halves[0];
43646 same_halves = true;
43648 else if (d->one_operand_p)
43650 gcc_assert (nonzero_halves[0] == 0);
43651 gcc_assert (nonzero_halves[1] == 1);
43654 if (nzcnt <= 2)
43656 if (d->perm[0] / nelt2 == nonzero_halves[1])
43658 /* Attempt to increase the likelihood that dfinal
43659 shuffle will be intra-lane. */
43660 char tmph = nonzero_halves[0];
43661 nonzero_halves[0] = nonzero_halves[1];
43662 nonzero_halves[1] = tmph;
43665 /* vperm2f128 or vperm2i128. */
43666 for (i = 0; i < nelt2; ++i)
43668 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43669 remap[i + nonzero_halves[0] * nelt2] = i;
43670 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43671 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43674 if (d->vmode != V8SFmode
43675 && d->vmode != V4DFmode
43676 && d->vmode != V8SImode)
43678 dremap.vmode = V8SImode;
43679 dremap.nelt = 8;
43680 for (i = 0; i < 4; ++i)
43682 dremap.perm[i] = i + nonzero_halves[0] * 4;
43683 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43687 else if (d->one_operand_p)
43688 return false;
43689 else if (TARGET_AVX2
43690 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43692 /* vpunpckl* */
43693 for (i = 0; i < nelt4; ++i)
43695 remap[i] = i * 2;
43696 remap[i + nelt] = i * 2 + 1;
43697 remap[i + nelt2] = i * 2 + nelt2;
43698 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43699 dremap.perm[i * 2] = i;
43700 dremap.perm[i * 2 + 1] = i + nelt;
43701 dremap.perm[i * 2 + nelt2] = i + nelt2;
43702 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43705 else if (TARGET_AVX2
43706 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43708 /* vpunpckh* */
43709 for (i = 0; i < nelt4; ++i)
43711 remap[i + nelt4] = i * 2;
43712 remap[i + nelt + nelt4] = i * 2 + 1;
43713 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43714 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43715 dremap.perm[i * 2] = i + nelt4;
43716 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43717 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43718 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43721 else
43722 return false;
43725 /* Use the remapping array set up above to move the elements from their
43726 swizzled locations into their final destinations. */
43727 dfinal = *d;
43728 for (i = 0; i < nelt; ++i)
43730 unsigned e = remap[d->perm[i]];
43731 gcc_assert (e < nelt);
43732 /* If same_halves is true, both halves of the remapped vector are the
43733 same. Avoid cross-lane accesses if possible. */
43734 if (same_halves && i >= nelt2)
43736 gcc_assert (e < nelt2);
43737 dfinal.perm[i] = e + nelt2;
43739 else
43740 dfinal.perm[i] = e;
43742 if (!d->testing_p)
43744 dremap.target = gen_reg_rtx (dremap.vmode);
43745 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43747 dfinal.op1 = dfinal.op0;
43748 dfinal.one_operand_p = true;
43750 /* Test if the final remap can be done with a single insn. For V4SFmode or
43751 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43752 start_sequence ();
43753 ok = expand_vec_perm_1 (&dfinal);
43754 seq = get_insns ();
43755 end_sequence ();
43757 if (!ok)
43758 return false;
43760 if (d->testing_p)
43761 return true;
43763 if (dremap.vmode != dfinal.vmode)
43765 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43766 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43769 ok = expand_vec_perm_1 (&dremap);
43770 gcc_assert (ok);
43772 emit_insn (seq);
43773 return true;
43776 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43777 a single vector cross-lane permutation into vpermq followed
43778 by any of the single insn permutations. */
43780 static bool
43781 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43783 struct expand_vec_perm_d dremap, dfinal;
43784 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43785 unsigned contents[2];
43786 bool ok;
43788 if (!(TARGET_AVX2
43789 && (d->vmode == V32QImode || d->vmode == V16HImode)
43790 && d->one_operand_p))
43791 return false;
43793 contents[0] = 0;
43794 contents[1] = 0;
43795 for (i = 0; i < nelt2; ++i)
43797 contents[0] |= 1u << (d->perm[i] / nelt4);
43798 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43801 for (i = 0; i < 2; ++i)
43803 unsigned int cnt = 0;
43804 for (j = 0; j < 4; ++j)
43805 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43806 return false;
43809 if (d->testing_p)
43810 return true;
43812 dremap = *d;
43813 dremap.vmode = V4DImode;
43814 dremap.nelt = 4;
43815 dremap.target = gen_reg_rtx (V4DImode);
43816 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43817 dremap.op1 = dremap.op0;
43818 dremap.one_operand_p = true;
43819 for (i = 0; i < 2; ++i)
43821 unsigned int cnt = 0;
43822 for (j = 0; j < 4; ++j)
43823 if ((contents[i] & (1u << j)) != 0)
43824 dremap.perm[2 * i + cnt++] = j;
43825 for (; cnt < 2; ++cnt)
43826 dremap.perm[2 * i + cnt] = 0;
43829 dfinal = *d;
43830 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43831 dfinal.op1 = dfinal.op0;
43832 dfinal.one_operand_p = true;
43833 for (i = 0, j = 0; i < nelt; ++i)
43835 if (i == nelt2)
43836 j = 2;
43837 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43838 if ((d->perm[i] / nelt4) == dremap.perm[j])
43840 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43841 dfinal.perm[i] |= nelt4;
43842 else
43843 gcc_unreachable ();
43846 ok = expand_vec_perm_1 (&dremap);
43847 gcc_assert (ok);
43849 ok = expand_vec_perm_1 (&dfinal);
43850 gcc_assert (ok);
43852 return true;
43855 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43856 a vector permutation using two instructions, vperm2f128 resp.
43857 vperm2i128 followed by any single in-lane permutation. */
43859 static bool
43860 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43862 struct expand_vec_perm_d dfirst, dsecond;
43863 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43864 bool ok;
43866 if (!TARGET_AVX
43867 || GET_MODE_SIZE (d->vmode) != 32
43868 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43869 return false;
43871 dsecond = *d;
43872 dsecond.one_operand_p = false;
43873 dsecond.testing_p = true;
43875 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43876 immediate. For perm < 16 the second permutation uses
43877 d->op0 as first operand, for perm >= 16 it uses d->op1
43878 as first operand. The second operand is the result of
43879 vperm2[fi]128. */
43880 for (perm = 0; perm < 32; perm++)
43882 /* Ignore permutations which do not move anything cross-lane. */
43883 if (perm < 16)
43885 /* The second shuffle for e.g. V4DFmode has
43886 0123 and ABCD operands.
43887 Ignore AB23, as 23 is already in the second lane
43888 of the first operand. */
43889 if ((perm & 0xc) == (1 << 2)) continue;
43890 /* And 01CD, as 01 is in the first lane of the first
43891 operand. */
43892 if ((perm & 3) == 0) continue;
43893 /* And 4567, as then the vperm2[fi]128 doesn't change
43894 anything on the original 4567 second operand. */
43895 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43897 else
43899 /* The second shuffle for e.g. V4DFmode has
43900 4567 and ABCD operands.
43901 Ignore AB67, as 67 is already in the second lane
43902 of the first operand. */
43903 if ((perm & 0xc) == (3 << 2)) continue;
43904 /* And 45CD, as 45 is in the first lane of the first
43905 operand. */
43906 if ((perm & 3) == 2) continue;
43907 /* And 0123, as then the vperm2[fi]128 doesn't change
43908 anything on the original 0123 first operand. */
43909 if ((perm & 0xf) == (1 << 2)) continue;
43912 for (i = 0; i < nelt; i++)
43914 j = d->perm[i] / nelt2;
43915 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43916 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43917 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43918 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43919 else
43920 break;
43923 if (i == nelt)
43925 start_sequence ();
43926 ok = expand_vec_perm_1 (&dsecond);
43927 end_sequence ();
43929 else
43930 ok = false;
43932 if (ok)
43934 if (d->testing_p)
43935 return true;
43937 /* Found a usable second shuffle. dfirst will be
43938 vperm2f128 on d->op0 and d->op1. */
43939 dsecond.testing_p = false;
43940 dfirst = *d;
43941 dfirst.target = gen_reg_rtx (d->vmode);
43942 for (i = 0; i < nelt; i++)
43943 dfirst.perm[i] = (i & (nelt2 - 1))
43944 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43946 ok = expand_vec_perm_1 (&dfirst);
43947 gcc_assert (ok);
43949 /* And dsecond is some single insn shuffle, taking
43950 d->op0 and result of vperm2f128 (if perm < 16) or
43951 d->op1 and result of vperm2f128 (otherwise). */
43952 dsecond.op1 = dfirst.target;
43953 if (perm >= 16)
43954 dsecond.op0 = dfirst.op1;
43956 ok = expand_vec_perm_1 (&dsecond);
43957 gcc_assert (ok);
43959 return true;
43962 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43963 if (d->one_operand_p)
43964 return false;
43967 return false;
43970 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43971 a two vector permutation using 2 intra-lane interleave insns
43972 and cross-lane shuffle for 32-byte vectors. */
43974 static bool
43975 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43977 unsigned i, nelt;
43978 rtx (*gen) (rtx, rtx, rtx);
43980 if (d->one_operand_p)
43981 return false;
43982 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43984 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43986 else
43987 return false;
43989 nelt = d->nelt;
43990 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43991 return false;
43992 for (i = 0; i < nelt; i += 2)
43993 if (d->perm[i] != d->perm[0] + i / 2
43994 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43995 return false;
43997 if (d->testing_p)
43998 return true;
44000 switch (d->vmode)
44002 case V32QImode:
44003 if (d->perm[0])
44004 gen = gen_vec_interleave_highv32qi;
44005 else
44006 gen = gen_vec_interleave_lowv32qi;
44007 break;
44008 case V16HImode:
44009 if (d->perm[0])
44010 gen = gen_vec_interleave_highv16hi;
44011 else
44012 gen = gen_vec_interleave_lowv16hi;
44013 break;
44014 case V8SImode:
44015 if (d->perm[0])
44016 gen = gen_vec_interleave_highv8si;
44017 else
44018 gen = gen_vec_interleave_lowv8si;
44019 break;
44020 case V4DImode:
44021 if (d->perm[0])
44022 gen = gen_vec_interleave_highv4di;
44023 else
44024 gen = gen_vec_interleave_lowv4di;
44025 break;
44026 case V8SFmode:
44027 if (d->perm[0])
44028 gen = gen_vec_interleave_highv8sf;
44029 else
44030 gen = gen_vec_interleave_lowv8sf;
44031 break;
44032 case V4DFmode:
44033 if (d->perm[0])
44034 gen = gen_vec_interleave_highv4df;
44035 else
44036 gen = gen_vec_interleave_lowv4df;
44037 break;
44038 default:
44039 gcc_unreachable ();
44042 emit_insn (gen (d->target, d->op0, d->op1));
44043 return true;
44046 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
44047 a single vector permutation using a single intra-lane vector
44048 permutation, vperm2f128 swapping the lanes and vblend* insn blending
44049 the non-swapped and swapped vectors together. */
44051 static bool
44052 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
44054 struct expand_vec_perm_d dfirst, dsecond;
44055 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
44056 rtx seq;
44057 bool ok;
44058 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
44060 if (!TARGET_AVX
44061 || TARGET_AVX2
44062 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
44063 || !d->one_operand_p)
44064 return false;
44066 dfirst = *d;
44067 for (i = 0; i < nelt; i++)
44068 dfirst.perm[i] = 0xff;
44069 for (i = 0, msk = 0; i < nelt; i++)
44071 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44072 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44073 return false;
44074 dfirst.perm[j] = d->perm[i];
44075 if (j != i)
44076 msk |= (1 << i);
44078 for (i = 0; i < nelt; i++)
44079 if (dfirst.perm[i] == 0xff)
44080 dfirst.perm[i] = i;
44082 if (!d->testing_p)
44083 dfirst.target = gen_reg_rtx (dfirst.vmode);
44085 start_sequence ();
44086 ok = expand_vec_perm_1 (&dfirst);
44087 seq = get_insns ();
44088 end_sequence ();
44090 if (!ok)
44091 return false;
44093 if (d->testing_p)
44094 return true;
44096 emit_insn (seq);
44098 dsecond = *d;
44099 dsecond.op0 = dfirst.target;
44100 dsecond.op1 = dfirst.target;
44101 dsecond.one_operand_p = true;
44102 dsecond.target = gen_reg_rtx (dsecond.vmode);
44103 for (i = 0; i < nelt; i++)
44104 dsecond.perm[i] = i ^ nelt2;
44106 ok = expand_vec_perm_1 (&dsecond);
44107 gcc_assert (ok);
44109 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44110 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44111 return true;
44114 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44115 permutation using two vperm2f128, followed by a vshufpd insn blending
44116 the two vectors together. */
44118 static bool
44119 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44121 struct expand_vec_perm_d dfirst, dsecond, dthird;
44122 bool ok;
44124 if (!TARGET_AVX || (d->vmode != V4DFmode))
44125 return false;
44127 if (d->testing_p)
44128 return true;
44130 dfirst = *d;
44131 dsecond = *d;
44132 dthird = *d;
44134 dfirst.perm[0] = (d->perm[0] & ~1);
44135 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44136 dfirst.perm[2] = (d->perm[2] & ~1);
44137 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44138 dsecond.perm[0] = (d->perm[1] & ~1);
44139 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44140 dsecond.perm[2] = (d->perm[3] & ~1);
44141 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44142 dthird.perm[0] = (d->perm[0] % 2);
44143 dthird.perm[1] = (d->perm[1] % 2) + 4;
44144 dthird.perm[2] = (d->perm[2] % 2) + 2;
44145 dthird.perm[3] = (d->perm[3] % 2) + 6;
44147 dfirst.target = gen_reg_rtx (dfirst.vmode);
44148 dsecond.target = gen_reg_rtx (dsecond.vmode);
44149 dthird.op0 = dfirst.target;
44150 dthird.op1 = dsecond.target;
44151 dthird.one_operand_p = false;
44153 canonicalize_perm (&dfirst);
44154 canonicalize_perm (&dsecond);
44156 ok = expand_vec_perm_1 (&dfirst)
44157 && expand_vec_perm_1 (&dsecond)
44158 && expand_vec_perm_1 (&dthird);
44160 gcc_assert (ok);
44162 return true;
44165 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44166 permutation with two pshufb insns and an ior. We should have already
44167 failed all two instruction sequences. */
44169 static bool
44170 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44172 rtx rperm[2][16], vperm, l, h, op, m128;
44173 unsigned int i, nelt, eltsz;
44175 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44176 return false;
44177 gcc_assert (!d->one_operand_p);
44179 if (d->testing_p)
44180 return true;
44182 nelt = d->nelt;
44183 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44185 /* Generate two permutation masks. If the required element is within
44186 the given vector it is shuffled into the proper lane. If the required
44187 element is in the other vector, force a zero into the lane by setting
44188 bit 7 in the permutation mask. */
44189 m128 = GEN_INT (-128);
44190 for (i = 0; i < nelt; ++i)
44192 unsigned j, e = d->perm[i];
44193 unsigned which = (e >= nelt);
44194 if (e >= nelt)
44195 e -= nelt;
44197 for (j = 0; j < eltsz; ++j)
44199 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44200 rperm[1-which][i*eltsz + j] = m128;
44204 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44205 vperm = force_reg (V16QImode, vperm);
44207 l = gen_reg_rtx (V16QImode);
44208 op = gen_lowpart (V16QImode, d->op0);
44209 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44211 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44212 vperm = force_reg (V16QImode, vperm);
44214 h = gen_reg_rtx (V16QImode);
44215 op = gen_lowpart (V16QImode, d->op1);
44216 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44218 op = d->target;
44219 if (d->vmode != V16QImode)
44220 op = gen_reg_rtx (V16QImode);
44221 emit_insn (gen_iorv16qi3 (op, l, h));
44222 if (op != d->target)
44223 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44225 return true;
44228 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44229 with two vpshufb insns, vpermq and vpor. We should have already failed
44230 all two or three instruction sequences. */
44232 static bool
44233 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44235 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44236 unsigned int i, nelt, eltsz;
44238 if (!TARGET_AVX2
44239 || !d->one_operand_p
44240 || (d->vmode != V32QImode && d->vmode != V16HImode))
44241 return false;
44243 if (d->testing_p)
44244 return true;
44246 nelt = d->nelt;
44247 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44249 /* Generate two permutation masks. If the required element is within
44250 the same lane, it is shuffled in. If the required element from the
44251 other lane, force a zero by setting bit 7 in the permutation mask.
44252 In the other mask the mask has non-negative elements if element
44253 is requested from the other lane, but also moved to the other lane,
44254 so that the result of vpshufb can have the two V2TImode halves
44255 swapped. */
44256 m128 = GEN_INT (-128);
44257 for (i = 0; i < nelt; ++i)
44259 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44260 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44262 for (j = 0; j < eltsz; ++j)
44264 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44265 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44269 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44270 vperm = force_reg (V32QImode, vperm);
44272 h = gen_reg_rtx (V32QImode);
44273 op = gen_lowpart (V32QImode, d->op0);
44274 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44276 /* Swap the 128-byte lanes of h into hp. */
44277 hp = gen_reg_rtx (V4DImode);
44278 op = gen_lowpart (V4DImode, h);
44279 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44280 const1_rtx));
44282 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44283 vperm = force_reg (V32QImode, vperm);
44285 l = gen_reg_rtx (V32QImode);
44286 op = gen_lowpart (V32QImode, d->op0);
44287 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44289 op = d->target;
44290 if (d->vmode != V32QImode)
44291 op = gen_reg_rtx (V32QImode);
44292 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44293 if (op != d->target)
44294 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44296 return true;
44299 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44300 and extract-odd permutations of two V32QImode and V16QImode operand
44301 with two vpshufb insns, vpor and vpermq. We should have already
44302 failed all two or three instruction sequences. */
44304 static bool
44305 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44307 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44308 unsigned int i, nelt, eltsz;
44310 if (!TARGET_AVX2
44311 || d->one_operand_p
44312 || (d->vmode != V32QImode && d->vmode != V16HImode))
44313 return false;
44315 for (i = 0; i < d->nelt; ++i)
44316 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44317 return false;
44319 if (d->testing_p)
44320 return true;
44322 nelt = d->nelt;
44323 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44325 /* Generate two permutation masks. In the first permutation mask
44326 the first quarter will contain indexes for the first half
44327 of the op0, the second quarter will contain bit 7 set, third quarter
44328 will contain indexes for the second half of the op0 and the
44329 last quarter bit 7 set. In the second permutation mask
44330 the first quarter will contain bit 7 set, the second quarter
44331 indexes for the first half of the op1, the third quarter bit 7 set
44332 and last quarter indexes for the second half of the op1.
44333 I.e. the first mask e.g. for V32QImode extract even will be:
44334 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44335 (all values masked with 0xf except for -128) and second mask
44336 for extract even will be
44337 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44338 m128 = GEN_INT (-128);
44339 for (i = 0; i < nelt; ++i)
44341 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44342 unsigned which = d->perm[i] >= nelt;
44343 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44345 for (j = 0; j < eltsz; ++j)
44347 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44348 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44352 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44353 vperm = force_reg (V32QImode, vperm);
44355 l = gen_reg_rtx (V32QImode);
44356 op = gen_lowpart (V32QImode, d->op0);
44357 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44359 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44360 vperm = force_reg (V32QImode, vperm);
44362 h = gen_reg_rtx (V32QImode);
44363 op = gen_lowpart (V32QImode, d->op1);
44364 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44366 ior = gen_reg_rtx (V32QImode);
44367 emit_insn (gen_iorv32qi3 (ior, l, h));
44369 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44370 op = gen_reg_rtx (V4DImode);
44371 ior = gen_lowpart (V4DImode, ior);
44372 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44373 const1_rtx, GEN_INT (3)));
44374 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44376 return true;
44379 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44380 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
44381 with two "and" and "pack" or two "shift" and "pack" insns. We should
44382 have already failed all two instruction sequences. */
44384 static bool
44385 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
44387 rtx op, dop0, dop1, t, rperm[16];
44388 unsigned i, odd, c, s, nelt = d->nelt;
44389 bool end_perm = false;
44390 machine_mode half_mode;
44391 rtx (*gen_and) (rtx, rtx, rtx);
44392 rtx (*gen_pack) (rtx, rtx, rtx);
44393 rtx (*gen_shift) (rtx, rtx, rtx);
44395 if (d->one_operand_p)
44396 return false;
44398 switch (d->vmode)
44400 case V8HImode:
44401 /* Required for "pack". */
44402 if (!TARGET_SSE4_1)
44403 return false;
44404 c = 0xffff;
44405 s = 16;
44406 half_mode = V4SImode;
44407 gen_and = gen_andv4si3;
44408 gen_pack = gen_sse4_1_packusdw;
44409 gen_shift = gen_lshrv4si3;
44410 break;
44411 case V16QImode:
44412 /* No check as all instructions are SSE2. */
44413 c = 0xff;
44414 s = 8;
44415 half_mode = V8HImode;
44416 gen_and = gen_andv8hi3;
44417 gen_pack = gen_sse2_packuswb;
44418 gen_shift = gen_lshrv8hi3;
44419 break;
44420 case V16HImode:
44421 if (!TARGET_AVX2)
44422 return false;
44423 c = 0xffff;
44424 s = 16;
44425 half_mode = V8SImode;
44426 gen_and = gen_andv8si3;
44427 gen_pack = gen_avx2_packusdw;
44428 gen_shift = gen_lshrv8si3;
44429 end_perm = true;
44430 break;
44431 case V32QImode:
44432 if (!TARGET_AVX2)
44433 return false;
44434 c = 0xff;
44435 s = 8;
44436 half_mode = V16HImode;
44437 gen_and = gen_andv16hi3;
44438 gen_pack = gen_avx2_packuswb;
44439 gen_shift = gen_lshrv16hi3;
44440 end_perm = true;
44441 break;
44442 default:
44443 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
44444 general shuffles. */
44445 return false;
44448 /* Check that permutation is even or odd. */
44449 odd = d->perm[0];
44450 if (odd > 1)
44451 return false;
44453 for (i = 1; i < nelt; ++i)
44454 if (d->perm[i] != 2 * i + odd)
44455 return false;
44457 if (d->testing_p)
44458 return true;
44460 dop0 = gen_reg_rtx (half_mode);
44461 dop1 = gen_reg_rtx (half_mode);
44462 if (odd == 0)
44464 for (i = 0; i < nelt / 2; i++)
44465 rperm[i] = GEN_INT (c);
44466 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
44467 t = force_reg (half_mode, t);
44468 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
44469 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
44471 else
44473 emit_insn (gen_shift (dop0,
44474 gen_lowpart (half_mode, d->op0),
44475 GEN_INT (s)));
44476 emit_insn (gen_shift (dop1,
44477 gen_lowpart (half_mode, d->op1),
44478 GEN_INT (s)));
44480 /* In AVX2 for 256 bit case we need to permute pack result. */
44481 if (TARGET_AVX2 && end_perm)
44483 op = gen_reg_rtx (d->vmode);
44484 t = gen_reg_rtx (V4DImode);
44485 emit_insn (gen_pack (op, dop0, dop1));
44486 emit_insn (gen_avx2_permv4di_1 (t,
44487 gen_lowpart (V4DImode, op),
44488 const0_rtx,
44489 const2_rtx,
44490 const1_rtx,
44491 GEN_INT (3)));
44492 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
44494 else
44495 emit_insn (gen_pack (d->target, dop0, dop1));
44497 return true;
44500 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44501 and extract-odd permutations. */
44503 static bool
44504 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44506 rtx t1, t2, t3, t4, t5;
44508 switch (d->vmode)
44510 case V4DFmode:
44511 if (d->testing_p)
44512 break;
44513 t1 = gen_reg_rtx (V4DFmode);
44514 t2 = gen_reg_rtx (V4DFmode);
44516 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44517 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44518 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44520 /* Now an unpck[lh]pd will produce the result required. */
44521 if (odd)
44522 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44523 else
44524 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44525 emit_insn (t3);
44526 break;
44528 case V8SFmode:
44530 int mask = odd ? 0xdd : 0x88;
44532 if (d->testing_p)
44533 break;
44534 t1 = gen_reg_rtx (V8SFmode);
44535 t2 = gen_reg_rtx (V8SFmode);
44536 t3 = gen_reg_rtx (V8SFmode);
44538 /* Shuffle within the 128-bit lanes to produce:
44539 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44540 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44541 GEN_INT (mask)));
44543 /* Shuffle the lanes around to produce:
44544 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44545 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44546 GEN_INT (0x3)));
44548 /* Shuffle within the 128-bit lanes to produce:
44549 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44550 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44552 /* Shuffle within the 128-bit lanes to produce:
44553 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44554 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44556 /* Shuffle the lanes around to produce:
44557 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44558 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44559 GEN_INT (0x20)));
44561 break;
44563 case V2DFmode:
44564 case V4SFmode:
44565 case V2DImode:
44566 case V4SImode:
44567 /* These are always directly implementable by expand_vec_perm_1. */
44568 gcc_unreachable ();
44570 case V8HImode:
44571 if (TARGET_SSE4_1)
44572 return expand_vec_perm_even_odd_pack (d);
44573 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44574 return expand_vec_perm_pshufb2 (d);
44575 else
44577 if (d->testing_p)
44578 break;
44579 /* We need 2*log2(N)-1 operations to achieve odd/even
44580 with interleave. */
44581 t1 = gen_reg_rtx (V8HImode);
44582 t2 = gen_reg_rtx (V8HImode);
44583 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44584 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44585 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44586 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44587 if (odd)
44588 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44589 else
44590 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44591 emit_insn (t3);
44593 break;
44595 case V16QImode:
44596 return expand_vec_perm_even_odd_pack (d);
44598 case V16HImode:
44599 case V32QImode:
44600 return expand_vec_perm_even_odd_pack (d);
44602 case V4DImode:
44603 if (!TARGET_AVX2)
44605 struct expand_vec_perm_d d_copy = *d;
44606 d_copy.vmode = V4DFmode;
44607 if (d->testing_p)
44608 d_copy.target = gen_lowpart (V4DFmode, d->target);
44609 else
44610 d_copy.target = gen_reg_rtx (V4DFmode);
44611 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44612 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44613 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44615 if (!d->testing_p)
44616 emit_move_insn (d->target,
44617 gen_lowpart (V4DImode, d_copy.target));
44618 return true;
44620 return false;
44623 if (d->testing_p)
44624 break;
44626 t1 = gen_reg_rtx (V4DImode);
44627 t2 = gen_reg_rtx (V4DImode);
44629 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44630 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44631 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44633 /* Now an vpunpck[lh]qdq will produce the result required. */
44634 if (odd)
44635 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44636 else
44637 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44638 emit_insn (t3);
44639 break;
44641 case V8SImode:
44642 if (!TARGET_AVX2)
44644 struct expand_vec_perm_d d_copy = *d;
44645 d_copy.vmode = V8SFmode;
44646 if (d->testing_p)
44647 d_copy.target = gen_lowpart (V8SFmode, d->target);
44648 else
44649 d_copy.target = gen_reg_rtx (V8SFmode);
44650 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44651 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44652 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44654 if (!d->testing_p)
44655 emit_move_insn (d->target,
44656 gen_lowpart (V8SImode, d_copy.target));
44657 return true;
44659 return false;
44662 if (d->testing_p)
44663 break;
44665 t1 = gen_reg_rtx (V8SImode);
44666 t2 = gen_reg_rtx (V8SImode);
44667 t3 = gen_reg_rtx (V4DImode);
44668 t4 = gen_reg_rtx (V4DImode);
44669 t5 = gen_reg_rtx (V4DImode);
44671 /* Shuffle the lanes around into
44672 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44673 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44674 gen_lowpart (V4DImode, d->op1),
44675 GEN_INT (0x20)));
44676 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44677 gen_lowpart (V4DImode, d->op1),
44678 GEN_INT (0x31)));
44680 /* Swap the 2nd and 3rd position in each lane into
44681 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44682 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44683 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44684 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44685 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44687 /* Now an vpunpck[lh]qdq will produce
44688 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44689 if (odd)
44690 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44691 gen_lowpart (V4DImode, t2));
44692 else
44693 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44694 gen_lowpart (V4DImode, t2));
44695 emit_insn (t3);
44696 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44697 break;
44699 default:
44700 gcc_unreachable ();
44703 return true;
44706 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44707 extract-even and extract-odd permutations. */
44709 static bool
44710 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44712 unsigned i, odd, nelt = d->nelt;
44714 odd = d->perm[0];
44715 if (odd != 0 && odd != 1)
44716 return false;
44718 for (i = 1; i < nelt; ++i)
44719 if (d->perm[i] != 2 * i + odd)
44720 return false;
44722 return expand_vec_perm_even_odd_1 (d, odd);
44725 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44726 permutations. We assume that expand_vec_perm_1 has already failed. */
44728 static bool
44729 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44731 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44732 enum machine_mode vmode = d->vmode;
44733 unsigned char perm2[4];
44734 rtx op0 = d->op0, dest;
44735 bool ok;
44737 switch (vmode)
44739 case V4DFmode:
44740 case V8SFmode:
44741 /* These are special-cased in sse.md so that we can optionally
44742 use the vbroadcast instruction. They expand to two insns
44743 if the input happens to be in a register. */
44744 gcc_unreachable ();
44746 case V2DFmode:
44747 case V2DImode:
44748 case V4SFmode:
44749 case V4SImode:
44750 /* These are always implementable using standard shuffle patterns. */
44751 gcc_unreachable ();
44753 case V8HImode:
44754 case V16QImode:
44755 /* These can be implemented via interleave. We save one insn by
44756 stopping once we have promoted to V4SImode and then use pshufd. */
44757 if (d->testing_p)
44758 return true;
44761 rtx dest;
44762 rtx (*gen) (rtx, rtx, rtx)
44763 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44764 : gen_vec_interleave_lowv8hi;
44766 if (elt >= nelt2)
44768 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44769 : gen_vec_interleave_highv8hi;
44770 elt -= nelt2;
44772 nelt2 /= 2;
44774 dest = gen_reg_rtx (vmode);
44775 emit_insn (gen (dest, op0, op0));
44776 vmode = get_mode_wider_vector (vmode);
44777 op0 = gen_lowpart (vmode, dest);
44779 while (vmode != V4SImode);
44781 memset (perm2, elt, 4);
44782 dest = gen_reg_rtx (V4SImode);
44783 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44784 gcc_assert (ok);
44785 if (!d->testing_p)
44786 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44787 return true;
44789 case V32QImode:
44790 case V16HImode:
44791 case V8SImode:
44792 case V4DImode:
44793 /* For AVX2 broadcasts of the first element vpbroadcast* or
44794 vpermq should be used by expand_vec_perm_1. */
44795 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44796 return false;
44798 default:
44799 gcc_unreachable ();
44803 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44804 broadcast permutations. */
44806 static bool
44807 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44809 unsigned i, elt, nelt = d->nelt;
44811 if (!d->one_operand_p)
44812 return false;
44814 elt = d->perm[0];
44815 for (i = 1; i < nelt; ++i)
44816 if (d->perm[i] != elt)
44817 return false;
44819 return expand_vec_perm_broadcast_1 (d);
44822 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44823 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44824 all the shorter instruction sequences. */
44826 static bool
44827 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44829 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44830 unsigned int i, nelt, eltsz;
44831 bool used[4];
44833 if (!TARGET_AVX2
44834 || d->one_operand_p
44835 || (d->vmode != V32QImode && d->vmode != V16HImode))
44836 return false;
44838 if (d->testing_p)
44839 return true;
44841 nelt = d->nelt;
44842 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44844 /* Generate 4 permutation masks. If the required element is within
44845 the same lane, it is shuffled in. If the required element from the
44846 other lane, force a zero by setting bit 7 in the permutation mask.
44847 In the other mask the mask has non-negative elements if element
44848 is requested from the other lane, but also moved to the other lane,
44849 so that the result of vpshufb can have the two V2TImode halves
44850 swapped. */
44851 m128 = GEN_INT (-128);
44852 for (i = 0; i < 32; ++i)
44854 rperm[0][i] = m128;
44855 rperm[1][i] = m128;
44856 rperm[2][i] = m128;
44857 rperm[3][i] = m128;
44859 used[0] = false;
44860 used[1] = false;
44861 used[2] = false;
44862 used[3] = false;
44863 for (i = 0; i < nelt; ++i)
44865 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44866 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44867 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44869 for (j = 0; j < eltsz; ++j)
44870 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44871 used[which] = true;
44874 for (i = 0; i < 2; ++i)
44876 if (!used[2 * i + 1])
44878 h[i] = NULL_RTX;
44879 continue;
44881 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44882 gen_rtvec_v (32, rperm[2 * i + 1]));
44883 vperm = force_reg (V32QImode, vperm);
44884 h[i] = gen_reg_rtx (V32QImode);
44885 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44886 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44889 /* Swap the 128-byte lanes of h[X]. */
44890 for (i = 0; i < 2; ++i)
44892 if (h[i] == NULL_RTX)
44893 continue;
44894 op = gen_reg_rtx (V4DImode);
44895 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44896 const2_rtx, GEN_INT (3), const0_rtx,
44897 const1_rtx));
44898 h[i] = gen_lowpart (V32QImode, op);
44901 for (i = 0; i < 2; ++i)
44903 if (!used[2 * i])
44905 l[i] = NULL_RTX;
44906 continue;
44908 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44909 vperm = force_reg (V32QImode, vperm);
44910 l[i] = gen_reg_rtx (V32QImode);
44911 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44912 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44915 for (i = 0; i < 2; ++i)
44917 if (h[i] && l[i])
44919 op = gen_reg_rtx (V32QImode);
44920 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44921 l[i] = op;
44923 else if (h[i])
44924 l[i] = h[i];
44927 gcc_assert (l[0] && l[1]);
44928 op = d->target;
44929 if (d->vmode != V32QImode)
44930 op = gen_reg_rtx (V32QImode);
44931 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44932 if (op != d->target)
44933 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44934 return true;
44937 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44938 With all of the interface bits taken care of, perform the expansion
44939 in D and return true on success. */
44941 static bool
44942 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44944 /* Try a single instruction expansion. */
44945 if (expand_vec_perm_1 (d))
44946 return true;
44948 /* Try sequences of two instructions. */
44950 if (expand_vec_perm_pshuflw_pshufhw (d))
44951 return true;
44953 if (expand_vec_perm_palignr (d))
44954 return true;
44956 if (expand_vec_perm_interleave2 (d))
44957 return true;
44959 if (expand_vec_perm_broadcast (d))
44960 return true;
44962 if (expand_vec_perm_vpermq_perm_1 (d))
44963 return true;
44965 if (expand_vec_perm_vperm2f128 (d))
44966 return true;
44968 /* Try sequences of three instructions. */
44970 if (expand_vec_perm_even_odd_pack (d))
44971 return true;
44973 if (expand_vec_perm_2vperm2f128_vshuf (d))
44974 return true;
44976 if (expand_vec_perm_pshufb2 (d))
44977 return true;
44979 if (expand_vec_perm_interleave3 (d))
44980 return true;
44982 if (expand_vec_perm_vperm2f128_vblend (d))
44983 return true;
44985 /* Try sequences of four instructions. */
44987 if (expand_vec_perm_vpshufb2_vpermq (d))
44988 return true;
44990 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44991 return true;
44993 /* ??? Look for narrow permutations whose element orderings would
44994 allow the promotion to a wider mode. */
44996 /* ??? Look for sequences of interleave or a wider permute that place
44997 the data into the correct lanes for a half-vector shuffle like
44998 pshuf[lh]w or vpermilps. */
45000 /* ??? Look for sequences of interleave that produce the desired results.
45001 The combinatorics of punpck[lh] get pretty ugly... */
45003 if (expand_vec_perm_even_odd (d))
45004 return true;
45006 /* Even longer sequences. */
45007 if (expand_vec_perm_vpshufb4_vpermq2 (d))
45008 return true;
45010 return false;
45013 /* If a permutation only uses one operand, make it clear. Returns true
45014 if the permutation references both operands. */
45016 static bool
45017 canonicalize_perm (struct expand_vec_perm_d *d)
45019 int i, which, nelt = d->nelt;
45021 for (i = which = 0; i < nelt; ++i)
45022 which |= (d->perm[i] < nelt ? 1 : 2);
45024 d->one_operand_p = true;
45025 switch (which)
45027 default:
45028 gcc_unreachable();
45030 case 3:
45031 if (!rtx_equal_p (d->op0, d->op1))
45033 d->one_operand_p = false;
45034 break;
45036 /* The elements of PERM do not suggest that only the first operand
45037 is used, but both operands are identical. Allow easier matching
45038 of the permutation by folding the permutation into the single
45039 input vector. */
45040 /* FALLTHRU */
45042 case 2:
45043 for (i = 0; i < nelt; ++i)
45044 d->perm[i] &= nelt - 1;
45045 d->op0 = d->op1;
45046 break;
45048 case 1:
45049 d->op1 = d->op0;
45050 break;
45053 return (which == 3);
45056 bool
45057 ix86_expand_vec_perm_const (rtx operands[4])
45059 struct expand_vec_perm_d d;
45060 unsigned char perm[MAX_VECT_LEN];
45061 int i, nelt;
45062 bool two_args;
45063 rtx sel;
45065 d.target = operands[0];
45066 d.op0 = operands[1];
45067 d.op1 = operands[2];
45068 sel = operands[3];
45070 d.vmode = GET_MODE (d.target);
45071 gcc_assert (VECTOR_MODE_P (d.vmode));
45072 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45073 d.testing_p = false;
45075 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
45076 gcc_assert (XVECLEN (sel, 0) == nelt);
45077 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
45079 for (i = 0; i < nelt; ++i)
45081 rtx e = XVECEXP (sel, 0, i);
45082 int ei = INTVAL (e) & (2 * nelt - 1);
45083 d.perm[i] = ei;
45084 perm[i] = ei;
45087 two_args = canonicalize_perm (&d);
45089 if (ix86_expand_vec_perm_const_1 (&d))
45090 return true;
45092 /* If the selector says both arguments are needed, but the operands are the
45093 same, the above tried to expand with one_operand_p and flattened selector.
45094 If that didn't work, retry without one_operand_p; we succeeded with that
45095 during testing. */
45096 if (two_args && d.one_operand_p)
45098 d.one_operand_p = false;
45099 memcpy (d.perm, perm, sizeof (perm));
45100 return ix86_expand_vec_perm_const_1 (&d);
45103 return false;
45106 /* Implement targetm.vectorize.vec_perm_const_ok. */
45108 static bool
45109 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
45110 const unsigned char *sel)
45112 struct expand_vec_perm_d d;
45113 unsigned int i, nelt, which;
45114 bool ret;
45116 d.vmode = vmode;
45117 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45118 d.testing_p = true;
45120 /* Given sufficient ISA support we can just return true here
45121 for selected vector modes. */
45122 if (d.vmode == V16SImode || d.vmode == V16SFmode
45123 || d.vmode == V8DFmode || d.vmode == V8DImode)
45124 /* All implementable with a single vpermi2 insn. */
45125 return true;
45126 if (GET_MODE_SIZE (d.vmode) == 16)
45128 /* All implementable with a single vpperm insn. */
45129 if (TARGET_XOP)
45130 return true;
45131 /* All implementable with 2 pshufb + 1 ior. */
45132 if (TARGET_SSSE3)
45133 return true;
45134 /* All implementable with shufpd or unpck[lh]pd. */
45135 if (d.nelt == 2)
45136 return true;
45139 /* Extract the values from the vector CST into the permutation
45140 array in D. */
45141 memcpy (d.perm, sel, nelt);
45142 for (i = which = 0; i < nelt; ++i)
45144 unsigned char e = d.perm[i];
45145 gcc_assert (e < 2 * nelt);
45146 which |= (e < nelt ? 1 : 2);
45149 /* For all elements from second vector, fold the elements to first. */
45150 if (which == 2)
45151 for (i = 0; i < nelt; ++i)
45152 d.perm[i] -= nelt;
45154 /* Check whether the mask can be applied to the vector type. */
45155 d.one_operand_p = (which != 3);
45157 /* Implementable with shufps or pshufd. */
45158 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
45159 return true;
45161 /* Otherwise we have to go through the motions and see if we can
45162 figure out how to generate the requested permutation. */
45163 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
45164 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
45165 if (!d.one_operand_p)
45166 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
45168 start_sequence ();
45169 ret = ix86_expand_vec_perm_const_1 (&d);
45170 end_sequence ();
45172 return ret;
45175 void
45176 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45178 struct expand_vec_perm_d d;
45179 unsigned i, nelt;
45181 d.target = targ;
45182 d.op0 = op0;
45183 d.op1 = op1;
45184 d.vmode = GET_MODE (targ);
45185 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45186 d.one_operand_p = false;
45187 d.testing_p = false;
45189 for (i = 0; i < nelt; ++i)
45190 d.perm[i] = i * 2 + odd;
45192 /* We'll either be able to implement the permutation directly... */
45193 if (expand_vec_perm_1 (&d))
45194 return;
45196 /* ... or we use the special-case patterns. */
45197 expand_vec_perm_even_odd_1 (&d, odd);
45200 static void
45201 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45203 struct expand_vec_perm_d d;
45204 unsigned i, nelt, base;
45205 bool ok;
45207 d.target = targ;
45208 d.op0 = op0;
45209 d.op1 = op1;
45210 d.vmode = GET_MODE (targ);
45211 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45212 d.one_operand_p = false;
45213 d.testing_p = false;
45215 base = high_p ? nelt / 2 : 0;
45216 for (i = 0; i < nelt / 2; ++i)
45218 d.perm[i * 2] = i + base;
45219 d.perm[i * 2 + 1] = i + base + nelt;
45222 /* Note that for AVX this isn't one instruction. */
45223 ok = ix86_expand_vec_perm_const_1 (&d);
45224 gcc_assert (ok);
45228 /* Expand a vector operation CODE for a V*QImode in terms of the
45229 same operation on V*HImode. */
45231 void
45232 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45234 enum machine_mode qimode = GET_MODE (dest);
45235 enum machine_mode himode;
45236 rtx (*gen_il) (rtx, rtx, rtx);
45237 rtx (*gen_ih) (rtx, rtx, rtx);
45238 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45239 struct expand_vec_perm_d d;
45240 bool ok, full_interleave;
45241 bool uns_p = false;
45242 int i;
45244 switch (qimode)
45246 case V16QImode:
45247 himode = V8HImode;
45248 gen_il = gen_vec_interleave_lowv16qi;
45249 gen_ih = gen_vec_interleave_highv16qi;
45250 break;
45251 case V32QImode:
45252 himode = V16HImode;
45253 gen_il = gen_avx2_interleave_lowv32qi;
45254 gen_ih = gen_avx2_interleave_highv32qi;
45255 break;
45256 default:
45257 gcc_unreachable ();
45260 op2_l = op2_h = op2;
45261 switch (code)
45263 case MULT:
45264 /* Unpack data such that we've got a source byte in each low byte of
45265 each word. We don't care what goes into the high byte of each word.
45266 Rather than trying to get zero in there, most convenient is to let
45267 it be a copy of the low byte. */
45268 op2_l = gen_reg_rtx (qimode);
45269 op2_h = gen_reg_rtx (qimode);
45270 emit_insn (gen_il (op2_l, op2, op2));
45271 emit_insn (gen_ih (op2_h, op2, op2));
45272 /* FALLTHRU */
45274 op1_l = gen_reg_rtx (qimode);
45275 op1_h = gen_reg_rtx (qimode);
45276 emit_insn (gen_il (op1_l, op1, op1));
45277 emit_insn (gen_ih (op1_h, op1, op1));
45278 full_interleave = qimode == V16QImode;
45279 break;
45281 case ASHIFT:
45282 case LSHIFTRT:
45283 uns_p = true;
45284 /* FALLTHRU */
45285 case ASHIFTRT:
45286 op1_l = gen_reg_rtx (himode);
45287 op1_h = gen_reg_rtx (himode);
45288 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45289 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45290 full_interleave = true;
45291 break;
45292 default:
45293 gcc_unreachable ();
45296 /* Perform the operation. */
45297 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45298 1, OPTAB_DIRECT);
45299 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45300 1, OPTAB_DIRECT);
45301 gcc_assert (res_l && res_h);
45303 /* Merge the data back into the right place. */
45304 d.target = dest;
45305 d.op0 = gen_lowpart (qimode, res_l);
45306 d.op1 = gen_lowpart (qimode, res_h);
45307 d.vmode = qimode;
45308 d.nelt = GET_MODE_NUNITS (qimode);
45309 d.one_operand_p = false;
45310 d.testing_p = false;
45312 if (full_interleave)
45314 /* For SSE2, we used an full interleave, so the desired
45315 results are in the even elements. */
45316 for (i = 0; i < 32; ++i)
45317 d.perm[i] = i * 2;
45319 else
45321 /* For AVX, the interleave used above was not cross-lane. So the
45322 extraction is evens but with the second and third quarter swapped.
45323 Happily, that is even one insn shorter than even extraction. */
45324 for (i = 0; i < 32; ++i)
45325 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45328 ok = ix86_expand_vec_perm_const_1 (&d);
45329 gcc_assert (ok);
45331 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45332 gen_rtx_fmt_ee (code, qimode, op1, op2));
45335 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45336 if op is CONST_VECTOR with all odd elements equal to their
45337 preceding element. */
45339 static bool
45340 const_vector_equal_evenodd_p (rtx op)
45342 enum machine_mode mode = GET_MODE (op);
45343 int i, nunits = GET_MODE_NUNITS (mode);
45344 if (GET_CODE (op) != CONST_VECTOR
45345 || nunits != CONST_VECTOR_NUNITS (op))
45346 return false;
45347 for (i = 0; i < nunits; i += 2)
45348 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45349 return false;
45350 return true;
45353 void
45354 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45355 bool uns_p, bool odd_p)
45357 enum machine_mode mode = GET_MODE (op1);
45358 enum machine_mode wmode = GET_MODE (dest);
45359 rtx x;
45360 rtx orig_op1 = op1, orig_op2 = op2;
45362 if (!nonimmediate_operand (op1, mode))
45363 op1 = force_reg (mode, op1);
45364 if (!nonimmediate_operand (op2, mode))
45365 op2 = force_reg (mode, op2);
45367 /* We only play even/odd games with vectors of SImode. */
45368 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45370 /* If we're looking for the odd results, shift those members down to
45371 the even slots. For some cpus this is faster than a PSHUFD. */
45372 if (odd_p)
45374 /* For XOP use vpmacsdqh, but only for smult, as it is only
45375 signed. */
45376 if (TARGET_XOP && mode == V4SImode && !uns_p)
45378 x = force_reg (wmode, CONST0_RTX (wmode));
45379 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45380 return;
45383 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45384 if (!const_vector_equal_evenodd_p (orig_op1))
45385 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45386 x, NULL, 1, OPTAB_DIRECT);
45387 if (!const_vector_equal_evenodd_p (orig_op2))
45388 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45389 x, NULL, 1, OPTAB_DIRECT);
45390 op1 = gen_lowpart (mode, op1);
45391 op2 = gen_lowpart (mode, op2);
45394 if (mode == V16SImode)
45396 if (uns_p)
45397 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45398 else
45399 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45401 else if (mode == V8SImode)
45403 if (uns_p)
45404 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45405 else
45406 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45408 else if (uns_p)
45409 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45410 else if (TARGET_SSE4_1)
45411 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45412 else
45414 rtx s1, s2, t0, t1, t2;
45416 /* The easiest way to implement this without PMULDQ is to go through
45417 the motions as if we are performing a full 64-bit multiply. With
45418 the exception that we need to do less shuffling of the elements. */
45420 /* Compute the sign-extension, aka highparts, of the two operands. */
45421 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45422 op1, pc_rtx, pc_rtx);
45423 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45424 op2, pc_rtx, pc_rtx);
45426 /* Multiply LO(A) * HI(B), and vice-versa. */
45427 t1 = gen_reg_rtx (wmode);
45428 t2 = gen_reg_rtx (wmode);
45429 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45430 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45432 /* Multiply LO(A) * LO(B). */
45433 t0 = gen_reg_rtx (wmode);
45434 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45436 /* Combine and shift the highparts into place. */
45437 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45438 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45439 1, OPTAB_DIRECT);
45441 /* Combine high and low parts. */
45442 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45443 return;
45445 emit_insn (x);
45448 void
45449 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45450 bool uns_p, bool high_p)
45452 enum machine_mode wmode = GET_MODE (dest);
45453 enum machine_mode mode = GET_MODE (op1);
45454 rtx t1, t2, t3, t4, mask;
45456 switch (mode)
45458 case V4SImode:
45459 t1 = gen_reg_rtx (mode);
45460 t2 = gen_reg_rtx (mode);
45461 if (TARGET_XOP && !uns_p)
45463 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45464 shuffle the elements once so that all elements are in the right
45465 place for immediate use: { A C B D }. */
45466 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45467 const1_rtx, GEN_INT (3)));
45468 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45469 const1_rtx, GEN_INT (3)));
45471 else
45473 /* Put the elements into place for the multiply. */
45474 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45475 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45476 high_p = false;
45478 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45479 break;
45481 case V8SImode:
45482 /* Shuffle the elements between the lanes. After this we
45483 have { A B E F | C D G H } for each operand. */
45484 t1 = gen_reg_rtx (V4DImode);
45485 t2 = gen_reg_rtx (V4DImode);
45486 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45487 const0_rtx, const2_rtx,
45488 const1_rtx, GEN_INT (3)));
45489 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45490 const0_rtx, const2_rtx,
45491 const1_rtx, GEN_INT (3)));
45493 /* Shuffle the elements within the lanes. After this we
45494 have { A A B B | C C D D } or { E E F F | G G H H }. */
45495 t3 = gen_reg_rtx (V8SImode);
45496 t4 = gen_reg_rtx (V8SImode);
45497 mask = GEN_INT (high_p
45498 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45499 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45500 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45501 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45503 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45504 break;
45506 case V8HImode:
45507 case V16HImode:
45508 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45509 uns_p, OPTAB_DIRECT);
45510 t2 = expand_binop (mode,
45511 uns_p ? umul_highpart_optab : smul_highpart_optab,
45512 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45513 gcc_assert (t1 && t2);
45515 t3 = gen_reg_rtx (mode);
45516 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45517 emit_move_insn (dest, gen_lowpart (wmode, t3));
45518 break;
45520 case V16QImode:
45521 case V32QImode:
45522 t1 = gen_reg_rtx (wmode);
45523 t2 = gen_reg_rtx (wmode);
45524 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45525 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45527 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45528 break;
45530 default:
45531 gcc_unreachable ();
45535 void
45536 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45538 rtx res_1, res_2, res_3, res_4;
45540 res_1 = gen_reg_rtx (V4SImode);
45541 res_2 = gen_reg_rtx (V4SImode);
45542 res_3 = gen_reg_rtx (V2DImode);
45543 res_4 = gen_reg_rtx (V2DImode);
45544 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45545 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45547 /* Move the results in element 2 down to element 1; we don't care
45548 what goes in elements 2 and 3. Then we can merge the parts
45549 back together with an interleave.
45551 Note that two other sequences were tried:
45552 (1) Use interleaves at the start instead of psrldq, which allows
45553 us to use a single shufps to merge things back at the end.
45554 (2) Use shufps here to combine the two vectors, then pshufd to
45555 put the elements in the correct order.
45556 In both cases the cost of the reformatting stall was too high
45557 and the overall sequence slower. */
45559 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45560 const0_rtx, const2_rtx,
45561 const0_rtx, const0_rtx));
45562 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45563 const0_rtx, const2_rtx,
45564 const0_rtx, const0_rtx));
45565 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45567 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45570 void
45571 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45573 enum machine_mode mode = GET_MODE (op0);
45574 rtx t1, t2, t3, t4, t5, t6;
45576 if (TARGET_XOP && mode == V2DImode)
45578 /* op1: A,B,C,D, op2: E,F,G,H */
45579 op1 = gen_lowpart (V4SImode, op1);
45580 op2 = gen_lowpart (V4SImode, op2);
45582 t1 = gen_reg_rtx (V4SImode);
45583 t2 = gen_reg_rtx (V4SImode);
45584 t3 = gen_reg_rtx (V2DImode);
45585 t4 = gen_reg_rtx (V2DImode);
45587 /* t1: B,A,D,C */
45588 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45589 GEN_INT (1),
45590 GEN_INT (0),
45591 GEN_INT (3),
45592 GEN_INT (2)));
45594 /* t2: (B*E),(A*F),(D*G),(C*H) */
45595 emit_insn (gen_mulv4si3 (t2, t1, op2));
45597 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45598 emit_insn (gen_xop_phadddq (t3, t2));
45600 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45601 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45603 /* Multiply lower parts and add all */
45604 t5 = gen_reg_rtx (V2DImode);
45605 emit_insn (gen_vec_widen_umult_even_v4si (t5, gen_lowpart (V4SImode, op1), gen_lowpart (V4SImode, op2)));
45606 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45608 else
45610 enum machine_mode nmode;
45611 rtx (*umul) (rtx, rtx, rtx);
45613 if (mode == V2DImode)
45615 umul = gen_vec_widen_umult_even_v4si;
45616 nmode = V4SImode;
45618 else if (mode == V4DImode)
45620 umul = gen_vec_widen_umult_even_v8si;
45621 nmode = V8SImode;
45623 else if (mode == V8DImode)
45625 umul = gen_vec_widen_umult_even_v16si;
45626 nmode = V16SImode;
45628 else
45629 gcc_unreachable ();
45632 /* Multiply low parts. */
45633 t1 = gen_reg_rtx (mode);
45634 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45636 /* Shift input vectors right 32 bits so we can multiply high parts. */
45637 t6 = GEN_INT (32);
45638 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45639 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45641 /* Multiply high parts by low parts. */
45642 t4 = gen_reg_rtx (mode);
45643 t5 = gen_reg_rtx (mode);
45644 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45645 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45647 /* Combine and shift the highparts back. */
45648 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45649 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45651 /* Combine high and low parts. */
45652 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45655 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45656 gen_rtx_MULT (mode, op1, op2));
45659 /* Calculate integer abs() using only SSE2 instructions. */
45661 void
45662 ix86_expand_sse2_abs (rtx target, rtx input)
45664 enum machine_mode mode = GET_MODE (target);
45665 rtx tmp0, tmp1, x;
45667 switch (mode)
45669 /* For 32-bit signed integer X, the best way to calculate the absolute
45670 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45671 case V4SImode:
45672 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45673 GEN_INT (GET_MODE_BITSIZE
45674 (GET_MODE_INNER (mode)) - 1),
45675 NULL, 0, OPTAB_DIRECT);
45676 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45677 NULL, 0, OPTAB_DIRECT);
45678 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45679 target, 0, OPTAB_DIRECT);
45680 break;
45682 /* For 16-bit signed integer X, the best way to calculate the absolute
45683 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45684 case V8HImode:
45685 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45687 x = expand_simple_binop (mode, SMAX, tmp0, input,
45688 target, 0, OPTAB_DIRECT);
45689 break;
45691 /* For 8-bit signed integer X, the best way to calculate the absolute
45692 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45693 as SSE2 provides the PMINUB insn. */
45694 case V16QImode:
45695 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45697 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45698 target, 0, OPTAB_DIRECT);
45699 break;
45701 default:
45702 gcc_unreachable ();
45705 if (x != target)
45706 emit_move_insn (target, x);
45709 /* Expand an insert into a vector register through pinsr insn.
45710 Return true if successful. */
45712 bool
45713 ix86_expand_pinsr (rtx *operands)
45715 rtx dst = operands[0];
45716 rtx src = operands[3];
45718 unsigned int size = INTVAL (operands[1]);
45719 unsigned int pos = INTVAL (operands[2]);
45721 if (GET_CODE (dst) == SUBREG)
45723 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45724 dst = SUBREG_REG (dst);
45727 if (GET_CODE (src) == SUBREG)
45728 src = SUBREG_REG (src);
45730 switch (GET_MODE (dst))
45732 case V16QImode:
45733 case V8HImode:
45734 case V4SImode:
45735 case V2DImode:
45737 enum machine_mode srcmode, dstmode;
45738 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45740 srcmode = mode_for_size (size, MODE_INT, 0);
45742 switch (srcmode)
45744 case QImode:
45745 if (!TARGET_SSE4_1)
45746 return false;
45747 dstmode = V16QImode;
45748 pinsr = gen_sse4_1_pinsrb;
45749 break;
45751 case HImode:
45752 if (!TARGET_SSE2)
45753 return false;
45754 dstmode = V8HImode;
45755 pinsr = gen_sse2_pinsrw;
45756 break;
45758 case SImode:
45759 if (!TARGET_SSE4_1)
45760 return false;
45761 dstmode = V4SImode;
45762 pinsr = gen_sse4_1_pinsrd;
45763 break;
45765 case DImode:
45766 gcc_assert (TARGET_64BIT);
45767 if (!TARGET_SSE4_1)
45768 return false;
45769 dstmode = V2DImode;
45770 pinsr = gen_sse4_1_pinsrq;
45771 break;
45773 default:
45774 return false;
45777 rtx d = dst;
45778 if (GET_MODE (dst) != dstmode)
45779 d = gen_reg_rtx (dstmode);
45780 src = gen_lowpart (srcmode, src);
45782 pos /= size;
45784 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45785 GEN_INT (1 << pos)));
45786 if (d != dst)
45787 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45788 return true;
45791 default:
45792 return false;
45796 /* This function returns the calling abi specific va_list type node.
45797 It returns the FNDECL specific va_list type. */
45799 static tree
45800 ix86_fn_abi_va_list (tree fndecl)
45802 if (!TARGET_64BIT)
45803 return va_list_type_node;
45804 gcc_assert (fndecl != NULL_TREE);
45806 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45807 return ms_va_list_type_node;
45808 else
45809 return sysv_va_list_type_node;
45812 /* Returns the canonical va_list type specified by TYPE. If there
45813 is no valid TYPE provided, it return NULL_TREE. */
45815 static tree
45816 ix86_canonical_va_list_type (tree type)
45818 tree wtype, htype;
45820 /* Resolve references and pointers to va_list type. */
45821 if (TREE_CODE (type) == MEM_REF)
45822 type = TREE_TYPE (type);
45823 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45824 type = TREE_TYPE (type);
45825 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45826 type = TREE_TYPE (type);
45828 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45830 wtype = va_list_type_node;
45831 gcc_assert (wtype != NULL_TREE);
45832 htype = type;
45833 if (TREE_CODE (wtype) == ARRAY_TYPE)
45835 /* If va_list is an array type, the argument may have decayed
45836 to a pointer type, e.g. by being passed to another function.
45837 In that case, unwrap both types so that we can compare the
45838 underlying records. */
45839 if (TREE_CODE (htype) == ARRAY_TYPE
45840 || POINTER_TYPE_P (htype))
45842 wtype = TREE_TYPE (wtype);
45843 htype = TREE_TYPE (htype);
45846 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45847 return va_list_type_node;
45848 wtype = sysv_va_list_type_node;
45849 gcc_assert (wtype != NULL_TREE);
45850 htype = type;
45851 if (TREE_CODE (wtype) == ARRAY_TYPE)
45853 /* If va_list is an array type, the argument may have decayed
45854 to a pointer type, e.g. by being passed to another function.
45855 In that case, unwrap both types so that we can compare the
45856 underlying records. */
45857 if (TREE_CODE (htype) == ARRAY_TYPE
45858 || POINTER_TYPE_P (htype))
45860 wtype = TREE_TYPE (wtype);
45861 htype = TREE_TYPE (htype);
45864 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45865 return sysv_va_list_type_node;
45866 wtype = ms_va_list_type_node;
45867 gcc_assert (wtype != NULL_TREE);
45868 htype = type;
45869 if (TREE_CODE (wtype) == ARRAY_TYPE)
45871 /* If va_list is an array type, the argument may have decayed
45872 to a pointer type, e.g. by being passed to another function.
45873 In that case, unwrap both types so that we can compare the
45874 underlying records. */
45875 if (TREE_CODE (htype) == ARRAY_TYPE
45876 || POINTER_TYPE_P (htype))
45878 wtype = TREE_TYPE (wtype);
45879 htype = TREE_TYPE (htype);
45882 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45883 return ms_va_list_type_node;
45884 return NULL_TREE;
45886 return std_canonical_va_list_type (type);
45889 /* Iterate through the target-specific builtin types for va_list.
45890 IDX denotes the iterator, *PTREE is set to the result type of
45891 the va_list builtin, and *PNAME to its internal type.
45892 Returns zero if there is no element for this index, otherwise
45893 IDX should be increased upon the next call.
45894 Note, do not iterate a base builtin's name like __builtin_va_list.
45895 Used from c_common_nodes_and_builtins. */
45897 static int
45898 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45900 if (TARGET_64BIT)
45902 switch (idx)
45904 default:
45905 break;
45907 case 0:
45908 *ptree = ms_va_list_type_node;
45909 *pname = "__builtin_ms_va_list";
45910 return 1;
45912 case 1:
45913 *ptree = sysv_va_list_type_node;
45914 *pname = "__builtin_sysv_va_list";
45915 return 1;
45919 return 0;
45922 #undef TARGET_SCHED_DISPATCH
45923 #define TARGET_SCHED_DISPATCH has_dispatch
45924 #undef TARGET_SCHED_DISPATCH_DO
45925 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45926 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45927 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45928 #undef TARGET_SCHED_REORDER
45929 #define TARGET_SCHED_REORDER ix86_sched_reorder
45930 #undef TARGET_SCHED_ADJUST_PRIORITY
45931 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45932 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45933 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45934 ix86_dependencies_evaluation_hook
45936 /* The size of the dispatch window is the total number of bytes of
45937 object code allowed in a window. */
45938 #define DISPATCH_WINDOW_SIZE 16
45940 /* Number of dispatch windows considered for scheduling. */
45941 #define MAX_DISPATCH_WINDOWS 3
45943 /* Maximum number of instructions in a window. */
45944 #define MAX_INSN 4
45946 /* Maximum number of immediate operands in a window. */
45947 #define MAX_IMM 4
45949 /* Maximum number of immediate bits allowed in a window. */
45950 #define MAX_IMM_SIZE 128
45952 /* Maximum number of 32 bit immediates allowed in a window. */
45953 #define MAX_IMM_32 4
45955 /* Maximum number of 64 bit immediates allowed in a window. */
45956 #define MAX_IMM_64 2
45958 /* Maximum total of loads or prefetches allowed in a window. */
45959 #define MAX_LOAD 2
45961 /* Maximum total of stores allowed in a window. */
45962 #define MAX_STORE 1
45964 #undef BIG
45965 #define BIG 100
45968 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45969 enum dispatch_group {
45970 disp_no_group = 0,
45971 disp_load,
45972 disp_store,
45973 disp_load_store,
45974 disp_prefetch,
45975 disp_imm,
45976 disp_imm_32,
45977 disp_imm_64,
45978 disp_branch,
45979 disp_cmp,
45980 disp_jcc,
45981 disp_last
45984 /* Number of allowable groups in a dispatch window. It is an array
45985 indexed by dispatch_group enum. 100 is used as a big number,
45986 because the number of these kind of operations does not have any
45987 effect in dispatch window, but we need them for other reasons in
45988 the table. */
45989 static unsigned int num_allowable_groups[disp_last] = {
45990 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45993 char group_name[disp_last + 1][16] = {
45994 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45995 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45996 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45999 /* Instruction path. */
46000 enum insn_path {
46001 no_path = 0,
46002 path_single, /* Single micro op. */
46003 path_double, /* Double micro op. */
46004 path_multi, /* Instructions with more than 2 micro op.. */
46005 last_path
46008 /* sched_insn_info defines a window to the instructions scheduled in
46009 the basic block. It contains a pointer to the insn_info table and
46010 the instruction scheduled.
46012 Windows are allocated for each basic block and are linked
46013 together. */
46014 typedef struct sched_insn_info_s {
46015 rtx insn;
46016 enum dispatch_group group;
46017 enum insn_path path;
46018 int byte_len;
46019 int imm_bytes;
46020 } sched_insn_info;
46022 /* Linked list of dispatch windows. This is a two way list of
46023 dispatch windows of a basic block. It contains information about
46024 the number of uops in the window and the total number of
46025 instructions and of bytes in the object code for this dispatch
46026 window. */
46027 typedef struct dispatch_windows_s {
46028 int num_insn; /* Number of insn in the window. */
46029 int num_uops; /* Number of uops in the window. */
46030 int window_size; /* Number of bytes in the window. */
46031 int window_num; /* Window number between 0 or 1. */
46032 int num_imm; /* Number of immediates in an insn. */
46033 int num_imm_32; /* Number of 32 bit immediates in an insn. */
46034 int num_imm_64; /* Number of 64 bit immediates in an insn. */
46035 int imm_size; /* Total immediates in the window. */
46036 int num_loads; /* Total memory loads in the window. */
46037 int num_stores; /* Total memory stores in the window. */
46038 int violation; /* Violation exists in window. */
46039 sched_insn_info *window; /* Pointer to the window. */
46040 struct dispatch_windows_s *next;
46041 struct dispatch_windows_s *prev;
46042 } dispatch_windows;
46044 /* Immediate valuse used in an insn. */
46045 typedef struct imm_info_s
46047 int imm;
46048 int imm32;
46049 int imm64;
46050 } imm_info;
46052 static dispatch_windows *dispatch_window_list;
46053 static dispatch_windows *dispatch_window_list1;
46055 /* Get dispatch group of insn. */
46057 static enum dispatch_group
46058 get_mem_group (rtx insn)
46060 enum attr_memory memory;
46062 if (INSN_CODE (insn) < 0)
46063 return disp_no_group;
46064 memory = get_attr_memory (insn);
46065 if (memory == MEMORY_STORE)
46066 return disp_store;
46068 if (memory == MEMORY_LOAD)
46069 return disp_load;
46071 if (memory == MEMORY_BOTH)
46072 return disp_load_store;
46074 return disp_no_group;
46077 /* Return true if insn is a compare instruction. */
46079 static bool
46080 is_cmp (rtx insn)
46082 enum attr_type type;
46084 type = get_attr_type (insn);
46085 return (type == TYPE_TEST
46086 || type == TYPE_ICMP
46087 || type == TYPE_FCMP
46088 || GET_CODE (PATTERN (insn)) == COMPARE);
46091 /* Return true if a dispatch violation encountered. */
46093 static bool
46094 dispatch_violation (void)
46096 if (dispatch_window_list->next)
46097 return dispatch_window_list->next->violation;
46098 return dispatch_window_list->violation;
46101 /* Return true if insn is a branch instruction. */
46103 static bool
46104 is_branch (rtx insn)
46106 return (CALL_P (insn) || JUMP_P (insn));
46109 /* Return true if insn is a prefetch instruction. */
46111 static bool
46112 is_prefetch (rtx insn)
46114 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
46117 /* This function initializes a dispatch window and the list container holding a
46118 pointer to the window. */
46120 static void
46121 init_window (int window_num)
46123 int i;
46124 dispatch_windows *new_list;
46126 if (window_num == 0)
46127 new_list = dispatch_window_list;
46128 else
46129 new_list = dispatch_window_list1;
46131 new_list->num_insn = 0;
46132 new_list->num_uops = 0;
46133 new_list->window_size = 0;
46134 new_list->next = NULL;
46135 new_list->prev = NULL;
46136 new_list->window_num = window_num;
46137 new_list->num_imm = 0;
46138 new_list->num_imm_32 = 0;
46139 new_list->num_imm_64 = 0;
46140 new_list->imm_size = 0;
46141 new_list->num_loads = 0;
46142 new_list->num_stores = 0;
46143 new_list->violation = false;
46145 for (i = 0; i < MAX_INSN; i++)
46147 new_list->window[i].insn = NULL;
46148 new_list->window[i].group = disp_no_group;
46149 new_list->window[i].path = no_path;
46150 new_list->window[i].byte_len = 0;
46151 new_list->window[i].imm_bytes = 0;
46153 return;
46156 /* This function allocates and initializes a dispatch window and the
46157 list container holding a pointer to the window. */
46159 static dispatch_windows *
46160 allocate_window (void)
46162 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
46163 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
46165 return new_list;
46168 /* This routine initializes the dispatch scheduling information. It
46169 initiates building dispatch scheduler tables and constructs the
46170 first dispatch window. */
46172 static void
46173 init_dispatch_sched (void)
46175 /* Allocate a dispatch list and a window. */
46176 dispatch_window_list = allocate_window ();
46177 dispatch_window_list1 = allocate_window ();
46178 init_window (0);
46179 init_window (1);
46182 /* This function returns true if a branch is detected. End of a basic block
46183 does not have to be a branch, but here we assume only branches end a
46184 window. */
46186 static bool
46187 is_end_basic_block (enum dispatch_group group)
46189 return group == disp_branch;
46192 /* This function is called when the end of a window processing is reached. */
46194 static void
46195 process_end_window (void)
46197 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46198 if (dispatch_window_list->next)
46200 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46201 gcc_assert (dispatch_window_list->window_size
46202 + dispatch_window_list1->window_size <= 48);
46203 init_window (1);
46205 init_window (0);
46208 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46209 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46210 for 48 bytes of instructions. Note that these windows are not dispatch
46211 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46213 static dispatch_windows *
46214 allocate_next_window (int window_num)
46216 if (window_num == 0)
46218 if (dispatch_window_list->next)
46219 init_window (1);
46220 init_window (0);
46221 return dispatch_window_list;
46224 dispatch_window_list->next = dispatch_window_list1;
46225 dispatch_window_list1->prev = dispatch_window_list;
46227 return dispatch_window_list1;
46230 /* Increment the number of immediate operands of an instruction. */
46232 static int
46233 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46235 if (*in_rtx == 0)
46236 return 0;
46238 switch ( GET_CODE (*in_rtx))
46240 case CONST:
46241 case SYMBOL_REF:
46242 case CONST_INT:
46243 (imm_values->imm)++;
46244 if (x86_64_immediate_operand (*in_rtx, SImode))
46245 (imm_values->imm32)++;
46246 else
46247 (imm_values->imm64)++;
46248 break;
46250 case CONST_DOUBLE:
46251 (imm_values->imm)++;
46252 (imm_values->imm64)++;
46253 break;
46255 case CODE_LABEL:
46256 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46258 (imm_values->imm)++;
46259 (imm_values->imm32)++;
46261 break;
46263 default:
46264 break;
46267 return 0;
46270 /* Compute number of immediate operands of an instruction. */
46272 static void
46273 find_constant (rtx in_rtx, imm_info *imm_values)
46275 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46276 (rtx_function) find_constant_1, (void *) imm_values);
46279 /* Return total size of immediate operands of an instruction along with number
46280 of corresponding immediate-operands. It initializes its parameters to zero
46281 befor calling FIND_CONSTANT.
46282 INSN is the input instruction. IMM is the total of immediates.
46283 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46284 bit immediates. */
46286 static int
46287 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46289 imm_info imm_values = {0, 0, 0};
46291 find_constant (insn, &imm_values);
46292 *imm = imm_values.imm;
46293 *imm32 = imm_values.imm32;
46294 *imm64 = imm_values.imm64;
46295 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46298 /* This function indicates if an operand of an instruction is an
46299 immediate. */
46301 static bool
46302 has_immediate (rtx insn)
46304 int num_imm_operand;
46305 int num_imm32_operand;
46306 int num_imm64_operand;
46308 if (insn)
46309 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46310 &num_imm64_operand);
46311 return false;
46314 /* Return single or double path for instructions. */
46316 static enum insn_path
46317 get_insn_path (rtx insn)
46319 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46321 if ((int)path == 0)
46322 return path_single;
46324 if ((int)path == 1)
46325 return path_double;
46327 return path_multi;
46330 /* Return insn dispatch group. */
46332 static enum dispatch_group
46333 get_insn_group (rtx insn)
46335 enum dispatch_group group = get_mem_group (insn);
46336 if (group)
46337 return group;
46339 if (is_branch (insn))
46340 return disp_branch;
46342 if (is_cmp (insn))
46343 return disp_cmp;
46345 if (has_immediate (insn))
46346 return disp_imm;
46348 if (is_prefetch (insn))
46349 return disp_prefetch;
46351 return disp_no_group;
46354 /* Count number of GROUP restricted instructions in a dispatch
46355 window WINDOW_LIST. */
46357 static int
46358 count_num_restricted (rtx insn, dispatch_windows *window_list)
46360 enum dispatch_group group = get_insn_group (insn);
46361 int imm_size;
46362 int num_imm_operand;
46363 int num_imm32_operand;
46364 int num_imm64_operand;
46366 if (group == disp_no_group)
46367 return 0;
46369 if (group == disp_imm)
46371 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46372 &num_imm64_operand);
46373 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46374 || num_imm_operand + window_list->num_imm > MAX_IMM
46375 || (num_imm32_operand > 0
46376 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46377 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46378 || (num_imm64_operand > 0
46379 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46380 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46381 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46382 && num_imm64_operand > 0
46383 && ((window_list->num_imm_64 > 0
46384 && window_list->num_insn >= 2)
46385 || window_list->num_insn >= 3)))
46386 return BIG;
46388 return 1;
46391 if ((group == disp_load_store
46392 && (window_list->num_loads >= MAX_LOAD
46393 || window_list->num_stores >= MAX_STORE))
46394 || ((group == disp_load
46395 || group == disp_prefetch)
46396 && window_list->num_loads >= MAX_LOAD)
46397 || (group == disp_store
46398 && window_list->num_stores >= MAX_STORE))
46399 return BIG;
46401 return 1;
46404 /* This function returns true if insn satisfies dispatch rules on the
46405 last window scheduled. */
46407 static bool
46408 fits_dispatch_window (rtx insn)
46410 dispatch_windows *window_list = dispatch_window_list;
46411 dispatch_windows *window_list_next = dispatch_window_list->next;
46412 unsigned int num_restrict;
46413 enum dispatch_group group = get_insn_group (insn);
46414 enum insn_path path = get_insn_path (insn);
46415 int sum;
46417 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46418 instructions should be given the lowest priority in the
46419 scheduling process in Haifa scheduler to make sure they will be
46420 scheduled in the same dispatch window as the reference to them. */
46421 if (group == disp_jcc || group == disp_cmp)
46422 return false;
46424 /* Check nonrestricted. */
46425 if (group == disp_no_group || group == disp_branch)
46426 return true;
46428 /* Get last dispatch window. */
46429 if (window_list_next)
46430 window_list = window_list_next;
46432 if (window_list->window_num == 1)
46434 sum = window_list->prev->window_size + window_list->window_size;
46436 if (sum == 32
46437 || (min_insn_size (insn) + sum) >= 48)
46438 /* Window 1 is full. Go for next window. */
46439 return true;
46442 num_restrict = count_num_restricted (insn, window_list);
46444 if (num_restrict > num_allowable_groups[group])
46445 return false;
46447 /* See if it fits in the first window. */
46448 if (window_list->window_num == 0)
46450 /* The first widow should have only single and double path
46451 uops. */
46452 if (path == path_double
46453 && (window_list->num_uops + 2) > MAX_INSN)
46454 return false;
46455 else if (path != path_single)
46456 return false;
46458 return true;
46461 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46462 dispatch window WINDOW_LIST. */
46464 static void
46465 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
46467 int byte_len = min_insn_size (insn);
46468 int num_insn = window_list->num_insn;
46469 int imm_size;
46470 sched_insn_info *window = window_list->window;
46471 enum dispatch_group group = get_insn_group (insn);
46472 enum insn_path path = get_insn_path (insn);
46473 int num_imm_operand;
46474 int num_imm32_operand;
46475 int num_imm64_operand;
46477 if (!window_list->violation && group != disp_cmp
46478 && !fits_dispatch_window (insn))
46479 window_list->violation = true;
46481 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46482 &num_imm64_operand);
46484 /* Initialize window with new instruction. */
46485 window[num_insn].insn = insn;
46486 window[num_insn].byte_len = byte_len;
46487 window[num_insn].group = group;
46488 window[num_insn].path = path;
46489 window[num_insn].imm_bytes = imm_size;
46491 window_list->window_size += byte_len;
46492 window_list->num_insn = num_insn + 1;
46493 window_list->num_uops = window_list->num_uops + num_uops;
46494 window_list->imm_size += imm_size;
46495 window_list->num_imm += num_imm_operand;
46496 window_list->num_imm_32 += num_imm32_operand;
46497 window_list->num_imm_64 += num_imm64_operand;
46499 if (group == disp_store)
46500 window_list->num_stores += 1;
46501 else if (group == disp_load
46502 || group == disp_prefetch)
46503 window_list->num_loads += 1;
46504 else if (group == disp_load_store)
46506 window_list->num_stores += 1;
46507 window_list->num_loads += 1;
46511 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46512 If the total bytes of instructions or the number of instructions in
46513 the window exceed allowable, it allocates a new window. */
46515 static void
46516 add_to_dispatch_window (rtx insn)
46518 int byte_len;
46519 dispatch_windows *window_list;
46520 dispatch_windows *next_list;
46521 dispatch_windows *window0_list;
46522 enum insn_path path;
46523 enum dispatch_group insn_group;
46524 bool insn_fits;
46525 int num_insn;
46526 int num_uops;
46527 int window_num;
46528 int insn_num_uops;
46529 int sum;
46531 if (INSN_CODE (insn) < 0)
46532 return;
46534 byte_len = min_insn_size (insn);
46535 window_list = dispatch_window_list;
46536 next_list = window_list->next;
46537 path = get_insn_path (insn);
46538 insn_group = get_insn_group (insn);
46540 /* Get the last dispatch window. */
46541 if (next_list)
46542 window_list = dispatch_window_list->next;
46544 if (path == path_single)
46545 insn_num_uops = 1;
46546 else if (path == path_double)
46547 insn_num_uops = 2;
46548 else
46549 insn_num_uops = (int) path;
46551 /* If current window is full, get a new window.
46552 Window number zero is full, if MAX_INSN uops are scheduled in it.
46553 Window number one is full, if window zero's bytes plus window
46554 one's bytes is 32, or if the bytes of the new instruction added
46555 to the total makes it greater than 48, or it has already MAX_INSN
46556 instructions in it. */
46557 num_insn = window_list->num_insn;
46558 num_uops = window_list->num_uops;
46559 window_num = window_list->window_num;
46560 insn_fits = fits_dispatch_window (insn);
46562 if (num_insn >= MAX_INSN
46563 || num_uops + insn_num_uops > MAX_INSN
46564 || !(insn_fits))
46566 window_num = ~window_num & 1;
46567 window_list = allocate_next_window (window_num);
46570 if (window_num == 0)
46572 add_insn_window (insn, window_list, insn_num_uops);
46573 if (window_list->num_insn >= MAX_INSN
46574 && insn_group == disp_branch)
46576 process_end_window ();
46577 return;
46580 else if (window_num == 1)
46582 window0_list = window_list->prev;
46583 sum = window0_list->window_size + window_list->window_size;
46584 if (sum == 32
46585 || (byte_len + sum) >= 48)
46587 process_end_window ();
46588 window_list = dispatch_window_list;
46591 add_insn_window (insn, window_list, insn_num_uops);
46593 else
46594 gcc_unreachable ();
46596 if (is_end_basic_block (insn_group))
46598 /* End of basic block is reached do end-basic-block process. */
46599 process_end_window ();
46600 return;
46604 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46606 DEBUG_FUNCTION static void
46607 debug_dispatch_window_file (FILE *file, int window_num)
46609 dispatch_windows *list;
46610 int i;
46612 if (window_num == 0)
46613 list = dispatch_window_list;
46614 else
46615 list = dispatch_window_list1;
46617 fprintf (file, "Window #%d:\n", list->window_num);
46618 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46619 list->num_insn, list->num_uops, list->window_size);
46620 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46621 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46623 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46624 list->num_stores);
46625 fprintf (file, " insn info:\n");
46627 for (i = 0; i < MAX_INSN; i++)
46629 if (!list->window[i].insn)
46630 break;
46631 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46632 i, group_name[list->window[i].group],
46633 i, (void *)list->window[i].insn,
46634 i, list->window[i].path,
46635 i, list->window[i].byte_len,
46636 i, list->window[i].imm_bytes);
46640 /* Print to stdout a dispatch window. */
46642 DEBUG_FUNCTION void
46643 debug_dispatch_window (int window_num)
46645 debug_dispatch_window_file (stdout, window_num);
46648 /* Print INSN dispatch information to FILE. */
46650 DEBUG_FUNCTION static void
46651 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46653 int byte_len;
46654 enum insn_path path;
46655 enum dispatch_group group;
46656 int imm_size;
46657 int num_imm_operand;
46658 int num_imm32_operand;
46659 int num_imm64_operand;
46661 if (INSN_CODE (insn) < 0)
46662 return;
46664 byte_len = min_insn_size (insn);
46665 path = get_insn_path (insn);
46666 group = get_insn_group (insn);
46667 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46668 &num_imm64_operand);
46670 fprintf (file, " insn info:\n");
46671 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46672 group_name[group], path, byte_len);
46673 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46674 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46677 /* Print to STDERR the status of the ready list with respect to
46678 dispatch windows. */
46680 DEBUG_FUNCTION void
46681 debug_ready_dispatch (void)
46683 int i;
46684 int no_ready = number_in_ready ();
46686 fprintf (stdout, "Number of ready: %d\n", no_ready);
46688 for (i = 0; i < no_ready; i++)
46689 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46692 /* This routine is the driver of the dispatch scheduler. */
46694 static void
46695 do_dispatch (rtx insn, int mode)
46697 if (mode == DISPATCH_INIT)
46698 init_dispatch_sched ();
46699 else if (mode == ADD_TO_DISPATCH_WINDOW)
46700 add_to_dispatch_window (insn);
46703 /* Return TRUE if Dispatch Scheduling is supported. */
46705 static bool
46706 has_dispatch (rtx insn, int action)
46708 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46709 && flag_dispatch_scheduler)
46710 switch (action)
46712 default:
46713 return false;
46715 case IS_DISPATCH_ON:
46716 return true;
46717 break;
46719 case IS_CMP:
46720 return is_cmp (insn);
46722 case DISPATCH_VIOLATION:
46723 return dispatch_violation ();
46725 case FITS_DISPATCH_WINDOW:
46726 return fits_dispatch_window (insn);
46729 return false;
46732 /* Implementation of reassociation_width target hook used by
46733 reassoc phase to identify parallelism level in reassociated
46734 tree. Statements tree_code is passed in OPC. Arguments type
46735 is passed in MODE.
46737 Currently parallel reassociation is enabled for Atom
46738 processors only and we set reassociation width to be 2
46739 because Atom may issue up to 2 instructions per cycle.
46741 Return value should be fixed if parallel reassociation is
46742 enabled for other processors. */
46744 static int
46745 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46746 enum machine_mode mode)
46748 int res = 1;
46750 /* Vector part. */
46751 if (VECTOR_MODE_P (mode))
46753 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46754 return 2;
46755 else
46756 return 1;
46759 /* Scalar part. */
46760 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46761 res = 2;
46762 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46763 res = 2;
46765 return res;
46768 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46769 place emms and femms instructions. */
46771 static enum machine_mode
46772 ix86_preferred_simd_mode (enum machine_mode mode)
46774 if (!TARGET_SSE)
46775 return word_mode;
46777 switch (mode)
46779 case QImode:
46780 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46781 case HImode:
46782 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46783 case SImode:
46784 return TARGET_AVX512F ? V16SImode :
46785 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46786 case DImode:
46787 return TARGET_AVX512F ? V8DImode :
46788 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46790 case SFmode:
46791 if (TARGET_AVX512F)
46792 return V16SFmode;
46793 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46794 return V8SFmode;
46795 else
46796 return V4SFmode;
46798 case DFmode:
46799 if (!TARGET_VECTORIZE_DOUBLE)
46800 return word_mode;
46801 else if (TARGET_AVX512F)
46802 return V8DFmode;
46803 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46804 return V4DFmode;
46805 else if (TARGET_SSE2)
46806 return V2DFmode;
46807 /* FALLTHRU */
46809 default:
46810 return word_mode;
46814 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46815 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46816 256bit and 128bit vectors. */
46818 static unsigned int
46819 ix86_autovectorize_vector_sizes (void)
46821 return TARGET_AVX512F ? 64 | 32 | 16 :
46822 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46827 /* Return class of registers which could be used for pseudo of MODE
46828 and of class RCLASS for spilling instead of memory. Return NO_REGS
46829 if it is not possible or non-profitable. */
46830 static reg_class_t
46831 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46833 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46834 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46835 && INTEGER_CLASS_P (rclass))
46836 return ALL_SSE_REGS;
46837 return NO_REGS;
46840 /* Implement targetm.vectorize.init_cost. */
46842 static void *
46843 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46845 unsigned *cost = XNEWVEC (unsigned, 3);
46846 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46847 return cost;
46850 /* Implement targetm.vectorize.add_stmt_cost. */
46852 static unsigned
46853 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46854 struct _stmt_vec_info *stmt_info, int misalign,
46855 enum vect_cost_model_location where)
46857 unsigned *cost = (unsigned *) data;
46858 unsigned retval = 0;
46859 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46860 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46862 /* Statements in an inner loop relative to the loop being
46863 vectorized are weighted more heavily. The value here is
46864 arbitrary and could potentially be improved with analysis. */
46865 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46866 count *= 50; /* FIXME. */
46868 retval = (unsigned) (count * stmt_cost);
46870 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46871 for Silvermont as it has out of order integer pipeline and can execute
46872 2 scalar instruction per tick, but has in order SIMD pipeline. */
46873 if (TARGET_SILVERMONT || TARGET_INTEL)
46874 if (stmt_info && stmt_info->stmt)
46876 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46877 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46878 retval = (retval * 17) / 10;
46881 cost[where] += retval;
46883 return retval;
46886 /* Implement targetm.vectorize.finish_cost. */
46888 static void
46889 ix86_finish_cost (void *data, unsigned *prologue_cost,
46890 unsigned *body_cost, unsigned *epilogue_cost)
46892 unsigned *cost = (unsigned *) data;
46893 *prologue_cost = cost[vect_prologue];
46894 *body_cost = cost[vect_body];
46895 *epilogue_cost = cost[vect_epilogue];
46898 /* Implement targetm.vectorize.destroy_cost_data. */
46900 static void
46901 ix86_destroy_cost_data (void *data)
46903 free (data);
46906 /* Validate target specific memory model bits in VAL. */
46908 static unsigned HOST_WIDE_INT
46909 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46911 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46912 bool strong;
46914 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46915 |MEMMODEL_MASK)
46916 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46918 warning (OPT_Winvalid_memory_model,
46919 "Unknown architecture specific memory model");
46920 return MEMMODEL_SEQ_CST;
46922 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46923 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46925 warning (OPT_Winvalid_memory_model,
46926 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46927 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46929 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46931 warning (OPT_Winvalid_memory_model,
46932 "HLE_RELEASE not used with RELEASE or stronger memory model");
46933 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46935 return val;
46938 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46939 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46940 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46941 or number of vecsize_mangle variants that should be emitted. */
46943 static int
46944 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46945 struct cgraph_simd_clone *clonei,
46946 tree base_type, int num)
46948 int ret = 1;
46950 if (clonei->simdlen
46951 && (clonei->simdlen < 2
46952 || clonei->simdlen > 16
46953 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46955 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46956 "unsupported simdlen %d", clonei->simdlen);
46957 return 0;
46960 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46961 if (TREE_CODE (ret_type) != VOID_TYPE)
46962 switch (TYPE_MODE (ret_type))
46964 case QImode:
46965 case HImode:
46966 case SImode:
46967 case DImode:
46968 case SFmode:
46969 case DFmode:
46970 /* case SCmode: */
46971 /* case DCmode: */
46972 break;
46973 default:
46974 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46975 "unsupported return type %qT for simd\n", ret_type);
46976 return 0;
46979 tree t;
46980 int i;
46982 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46983 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46984 switch (TYPE_MODE (TREE_TYPE (t)))
46986 case QImode:
46987 case HImode:
46988 case SImode:
46989 case DImode:
46990 case SFmode:
46991 case DFmode:
46992 /* case SCmode: */
46993 /* case DCmode: */
46994 break;
46995 default:
46996 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46997 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46998 return 0;
47001 if (clonei->cilk_elemental)
47003 /* Parse here processor clause. If not present, default to 'b'. */
47004 clonei->vecsize_mangle = 'b';
47006 else if (!TREE_PUBLIC (node->decl))
47008 /* If the function isn't exported, we can pick up just one ISA
47009 for the clones. */
47010 if (TARGET_AVX2)
47011 clonei->vecsize_mangle = 'd';
47012 else if (TARGET_AVX)
47013 clonei->vecsize_mangle = 'c';
47014 else
47015 clonei->vecsize_mangle = 'b';
47016 ret = 1;
47018 else
47020 clonei->vecsize_mangle = "bcd"[num];
47021 ret = 3;
47023 switch (clonei->vecsize_mangle)
47025 case 'b':
47026 clonei->vecsize_int = 128;
47027 clonei->vecsize_float = 128;
47028 break;
47029 case 'c':
47030 clonei->vecsize_int = 128;
47031 clonei->vecsize_float = 256;
47032 break;
47033 case 'd':
47034 clonei->vecsize_int = 256;
47035 clonei->vecsize_float = 256;
47036 break;
47038 if (clonei->simdlen == 0)
47040 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
47041 clonei->simdlen = clonei->vecsize_int;
47042 else
47043 clonei->simdlen = clonei->vecsize_float;
47044 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
47045 if (clonei->simdlen > 16)
47046 clonei->simdlen = 16;
47048 return ret;
47051 /* Add target attribute to SIMD clone NODE if needed. */
47053 static void
47054 ix86_simd_clone_adjust (struct cgraph_node *node)
47056 const char *str = NULL;
47057 gcc_assert (node->decl == cfun->decl);
47058 switch (node->simdclone->vecsize_mangle)
47060 case 'b':
47061 if (!TARGET_SSE2)
47062 str = "sse2";
47063 break;
47064 case 'c':
47065 if (!TARGET_AVX)
47066 str = "avx";
47067 break;
47068 case 'd':
47069 if (!TARGET_AVX2)
47070 str = "avx2";
47071 break;
47072 default:
47073 gcc_unreachable ();
47075 if (str == NULL)
47076 return;
47077 push_cfun (NULL);
47078 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
47079 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
47080 gcc_assert (ok);
47081 pop_cfun ();
47082 ix86_previous_fndecl = NULL_TREE;
47083 ix86_set_current_function (node->decl);
47086 /* If SIMD clone NODE can't be used in a vectorized loop
47087 in current function, return -1, otherwise return a badness of using it
47088 (0 if it is most desirable from vecsize_mangle point of view, 1
47089 slightly less desirable, etc.). */
47091 static int
47092 ix86_simd_clone_usable (struct cgraph_node *node)
47094 switch (node->simdclone->vecsize_mangle)
47096 case 'b':
47097 if (!TARGET_SSE2)
47098 return -1;
47099 if (!TARGET_AVX)
47100 return 0;
47101 return TARGET_AVX2 ? 2 : 1;
47102 case 'c':
47103 if (!TARGET_AVX)
47104 return -1;
47105 return TARGET_AVX2 ? 1 : 0;
47106 break;
47107 case 'd':
47108 if (!TARGET_AVX2)
47109 return -1;
47110 return 0;
47111 default:
47112 gcc_unreachable ();
47116 /* This function gives out the number of memory references.
47117 This value determines the unrolling factor for
47118 bdver3 and bdver4 architectures. */
47120 static int
47121 ix86_loop_memcount (rtx *x, unsigned *mem_count)
47123 if (*x != NULL_RTX && MEM_P (*x))
47125 enum machine_mode mode;
47126 unsigned int n_words;
47128 mode = GET_MODE (*x);
47129 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47131 if (n_words > 4)
47132 (*mem_count)+=2;
47133 else
47134 (*mem_count)+=1;
47136 return 0;
47139 /* This function adjusts the unroll factor based on
47140 the hardware capabilities. For ex, bdver3 has
47141 a loop buffer which makes unrolling of smaller
47142 loops less important. This function decides the
47143 unroll factor using number of memory references
47144 (value 32 is used) as a heuristic. */
47146 static unsigned
47147 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
47149 basic_block *bbs;
47150 rtx insn;
47151 unsigned i;
47152 unsigned mem_count = 0;
47154 if (!TARGET_ADJUST_UNROLL)
47155 return nunroll;
47157 /* Count the number of memory references within the loop body. */
47158 bbs = get_loop_body (loop);
47159 for (i = 0; i < loop->num_nodes; i++)
47161 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
47162 if (NONDEBUG_INSN_P (insn))
47163 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
47165 free (bbs);
47167 if (mem_count && mem_count <=32)
47168 return 32/mem_count;
47170 return nunroll;
47174 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47176 static bool
47177 ix86_float_exceptions_rounding_supported_p (void)
47179 /* For x87 floating point with standard excess precision handling,
47180 there is no adddf3 pattern (since x87 floating point only has
47181 XFmode operations) so the default hook implementation gets this
47182 wrong. */
47183 return TARGET_80387 || TARGET_SSE_MATH;
47186 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47188 static void
47189 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47191 if (!TARGET_80387 && !TARGET_SSE_MATH)
47192 return;
47193 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47194 if (TARGET_80387)
47196 tree fenv_index_type = build_index_type (size_int (6));
47197 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47198 tree fenv_var = create_tmp_var (fenv_type, NULL);
47199 mark_addressable (fenv_var);
47200 tree fenv_ptr = build_pointer_type (fenv_type);
47201 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47202 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47203 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47204 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47205 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47206 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47207 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47208 tree hold_fnclex = build_call_expr (fnclex, 0);
47209 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47210 hold_fnclex);
47211 *clear = build_call_expr (fnclex, 0);
47212 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47213 mark_addressable (sw_var);
47214 tree su_ptr = build_pointer_type (short_unsigned_type_node);
47215 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
47216 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
47217 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47218 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47219 exceptions_var, exceptions_x87);
47220 *update = build2 (COMPOUND_EXPR, integer_type_node,
47221 fnstsw_call, update_mod);
47222 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47223 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47225 if (TARGET_SSE_MATH)
47227 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47228 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47229 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47230 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47231 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47232 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47233 mxcsr_orig_var, stmxcsr_hold_call);
47234 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47235 mxcsr_orig_var,
47236 build_int_cst (unsigned_type_node, 0x1f80));
47237 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47238 build_int_cst (unsigned_type_node, 0xffffffc0));
47239 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47240 mxcsr_mod_var, hold_mod_val);
47241 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47242 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47243 hold_assign_orig, hold_assign_mod);
47244 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47245 ldmxcsr_hold_call);
47246 if (*hold)
47247 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47248 else
47249 *hold = hold_all;
47250 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47251 if (*clear)
47252 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47253 ldmxcsr_clear_call);
47254 else
47255 *clear = ldmxcsr_clear_call;
47256 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47257 tree exceptions_sse = fold_convert (integer_type_node,
47258 stxmcsr_update_call);
47259 if (*update)
47261 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47262 exceptions_var, exceptions_sse);
47263 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47264 exceptions_var, exceptions_mod);
47265 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47266 exceptions_assign);
47268 else
47269 *update = build2 (MODIFY_EXPR, integer_type_node,
47270 exceptions_var, exceptions_sse);
47271 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47272 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47273 ldmxcsr_update_call);
47275 tree atomic_feraiseexcept
47276 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47277 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47278 1, exceptions_var);
47279 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47280 atomic_feraiseexcept_call);
47283 /* Try to determine BASE/OFFSET/SIZE parts of the given MEM.
47284 Return true if successful, false if all the values couldn't
47285 be determined.
47287 This function only looks for REG/SYMBOL or REG/SYMBOL+CONST
47288 address forms. */
47290 static bool
47291 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
47292 HOST_WIDE_INT *size)
47294 rtx addr_rtx;
47295 if MEM_SIZE_KNOWN_P (mem)
47296 *size = MEM_SIZE (mem);
47297 else
47298 return false;
47300 if (GET_CODE (XEXP (mem, 0)) == CONST)
47301 addr_rtx = XEXP (XEXP (mem, 0), 0);
47302 else
47303 addr_rtx = (XEXP (mem, 0));
47305 if (GET_CODE (addr_rtx) == REG
47306 || GET_CODE (addr_rtx) == SYMBOL_REF)
47308 *base = addr_rtx;
47309 *offset = 0;
47311 else if (GET_CODE (addr_rtx) == PLUS
47312 && CONST_INT_P (XEXP (addr_rtx, 1)))
47314 *base = XEXP (addr_rtx, 0);
47315 *offset = INTVAL (XEXP (addr_rtx, 1));
47317 else
47318 return false;
47320 return true;
47323 /* If MEM1 is adjacent to MEM2 and MEM1 has lower address,
47324 return true. */
47326 extern bool
47327 adjacent_mem_locations (rtx mem1, rtx mem2)
47329 rtx base1, base2;
47330 HOST_WIDE_INT off1, size1, off2, size2;
47332 if (get_memref_parts (mem1, &base1, &off1, &size1)
47333 && get_memref_parts (mem2, &base2, &off2, &size2))
47335 if (GET_CODE (base1) == SYMBOL_REF
47336 && GET_CODE (base2) == SYMBOL_REF
47337 && SYMBOL_REF_DECL (base1) == SYMBOL_REF_DECL (base2))
47338 return (off1 + size1 == off2);
47339 else if (REG_P (base1)
47340 && REG_P (base2)
47341 && REGNO (base1) == REGNO (base2))
47342 return (off1 + size1 == off2);
47344 return false;
47347 /* Initialize the GCC target structure. */
47348 #undef TARGET_RETURN_IN_MEMORY
47349 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47351 #undef TARGET_LEGITIMIZE_ADDRESS
47352 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47354 #undef TARGET_ATTRIBUTE_TABLE
47355 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47356 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47357 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47358 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47359 # undef TARGET_MERGE_DECL_ATTRIBUTES
47360 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47361 #endif
47363 #undef TARGET_COMP_TYPE_ATTRIBUTES
47364 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47366 #undef TARGET_INIT_BUILTINS
47367 #define TARGET_INIT_BUILTINS ix86_init_builtins
47368 #undef TARGET_BUILTIN_DECL
47369 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47370 #undef TARGET_EXPAND_BUILTIN
47371 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47373 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47374 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47375 ix86_builtin_vectorized_function
47377 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47378 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47380 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47381 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47383 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47384 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47386 #undef TARGET_BUILTIN_RECIPROCAL
47387 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47389 #undef TARGET_ASM_FUNCTION_PROLOGUE
47390 #define TARGET_ASM_FUNCTION_PROLOGUE ix86_output_function_prologue
47392 #undef TARGET_ASM_FUNCTION_EPILOGUE
47393 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47395 #undef TARGET_ASM_NAMED_SECTION
47396 #define TARGET_ASM_NAMED_SECTION ix86_elf_asm_named_section
47398 #undef TARGET_ENCODE_SECTION_INFO
47399 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47400 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47401 #else
47402 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47403 #endif
47405 #undef TARGET_ASM_OPEN_PAREN
47406 #define TARGET_ASM_OPEN_PAREN ""
47407 #undef TARGET_ASM_CLOSE_PAREN
47408 #define TARGET_ASM_CLOSE_PAREN ""
47410 #undef TARGET_ASM_BYTE_OP
47411 #define TARGET_ASM_BYTE_OP ASM_BYTE
47413 #undef TARGET_ASM_ALIGNED_HI_OP
47414 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47415 #undef TARGET_ASM_ALIGNED_SI_OP
47416 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47417 #ifdef ASM_QUAD
47418 #undef TARGET_ASM_ALIGNED_DI_OP
47419 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47420 #endif
47422 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47423 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47425 #undef TARGET_SET_FP_INSN
47426 #define TARGET_SET_FP_INSN ix86_set_fp_insn
47428 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47429 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47431 #undef TARGET_ASM_UNALIGNED_HI_OP
47432 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47433 #undef TARGET_ASM_UNALIGNED_SI_OP
47434 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47435 #undef TARGET_ASM_UNALIGNED_DI_OP
47436 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47438 #undef TARGET_PRINT_OPERAND
47439 #define TARGET_PRINT_OPERAND ix86_print_operand
47440 #undef TARGET_PRINT_OPERAND_ADDRESS
47441 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47442 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47443 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47444 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47445 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47447 #undef TARGET_SCHED_INIT_GLOBAL
47448 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47449 #undef TARGET_SCHED_ADJUST_COST
47450 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47451 #undef TARGET_SCHED_ISSUE_RATE
47452 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47453 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47454 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47455 ia32_multipass_dfa_lookahead
47456 #undef TARGET_SCHED_MACRO_FUSION_P
47457 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47458 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47459 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47461 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47462 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47464 #undef TARGET_MEMMODEL_CHECK
47465 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47467 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47468 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47470 #ifdef HAVE_AS_TLS
47471 #undef TARGET_HAVE_TLS
47472 #define TARGET_HAVE_TLS true
47473 #endif
47474 #undef TARGET_CANNOT_FORCE_CONST_MEM
47475 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47476 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47477 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47479 #undef TARGET_DELEGITIMIZE_ADDRESS
47480 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47482 #undef TARGET_MS_BITFIELD_LAYOUT_P
47483 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47485 #if TARGET_MACHO
47486 #undef TARGET_BINDS_LOCAL_P
47487 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47488 #endif
47489 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47490 #undef TARGET_BINDS_LOCAL_P
47491 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47492 #endif
47494 #undef TARGET_ASM_OUTPUT_MI_THUNK
47495 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47499 #undef TARGET_ASM_FILE_START
47500 #define TARGET_ASM_FILE_START x86_file_start
47502 #undef TARGET_OPTION_OVERRIDE
47503 #define TARGET_OPTION_OVERRIDE ix86_option_override
47505 #undef TARGET_REGISTER_MOVE_COST
47506 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47507 #undef TARGET_MEMORY_MOVE_COST
47508 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47509 #undef TARGET_RTX_COSTS
47510 #define TARGET_RTX_COSTS ix86_rtx_costs
47511 #undef TARGET_ADDRESS_COST
47512 #define TARGET_ADDRESS_COST ix86_address_cost
47514 #undef TARGET_FIXED_CONDITION_CODE_REGS
47515 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47516 #undef TARGET_CC_MODES_COMPATIBLE
47517 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47519 #undef TARGET_MACHINE_DEPENDENT_REORG
47520 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47522 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47523 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47525 #undef TARGET_BUILD_BUILTIN_VA_LIST
47526 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47528 #undef TARGET_FOLD_BUILTIN
47529 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47531 #undef TARGET_COMPARE_VERSION_PRIORITY
47532 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47534 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47535 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47536 ix86_generate_version_dispatcher_body
47538 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47539 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47540 ix86_get_function_versions_dispatcher
47542 #undef TARGET_ENUM_VA_LIST_P
47543 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47545 #undef TARGET_FN_ABI_VA_LIST
47546 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47548 #undef TARGET_CANONICAL_VA_LIST_TYPE
47549 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47551 #undef TARGET_EXPAND_BUILTIN_VA_START
47552 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47554 #undef TARGET_MD_ASM_CLOBBERS
47555 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47557 #undef TARGET_PROMOTE_PROTOTYPES
47558 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47559 #undef TARGET_SETUP_INCOMING_VARARGS
47560 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47561 #undef TARGET_MUST_PASS_IN_STACK
47562 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47563 #undef TARGET_FUNCTION_ARG_ADVANCE
47564 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47565 #undef TARGET_FUNCTION_ARG
47566 #define TARGET_FUNCTION_ARG ix86_function_arg
47567 #undef TARGET_FUNCTION_ARG_BOUNDARY
47568 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47569 #undef TARGET_PASS_BY_REFERENCE
47570 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47571 #undef TARGET_INTERNAL_ARG_POINTER
47572 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47573 #undef TARGET_UPDATE_STACK_BOUNDARY
47574 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47575 #undef TARGET_GET_DRAP_RTX
47576 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47577 #undef TARGET_STRICT_ARGUMENT_NAMING
47578 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47579 #undef TARGET_STATIC_CHAIN
47580 #define TARGET_STATIC_CHAIN ix86_static_chain
47581 #undef TARGET_TRAMPOLINE_INIT
47582 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47583 #undef TARGET_RETURN_POPS_ARGS
47584 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47586 #undef TARGET_LEGITIMATE_COMBINED_INSN
47587 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47589 #undef TARGET_ASAN_SHADOW_OFFSET
47590 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47593 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47596 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47598 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47599 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47601 #undef TARGET_C_MODE_FOR_SUFFIX
47602 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47604 #ifdef HAVE_AS_TLS
47605 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47606 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47607 #endif
47609 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47610 #undef TARGET_INSERT_ATTRIBUTES
47611 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47612 #endif
47614 #undef TARGET_MANGLE_TYPE
47615 #define TARGET_MANGLE_TYPE ix86_mangle_type
47617 #if !TARGET_MACHO
47618 #undef TARGET_STACK_PROTECT_FAIL
47619 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47620 #endif
47622 #undef TARGET_FUNCTION_VALUE
47623 #define TARGET_FUNCTION_VALUE ix86_function_value
47625 #undef TARGET_FUNCTION_VALUE_REGNO_P
47626 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47628 #undef TARGET_PROMOTE_FUNCTION_MODE
47629 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47631 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47632 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47634 #undef TARGET_INSTANTIATE_DECLS
47635 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47637 #undef TARGET_SECONDARY_RELOAD
47638 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47640 #undef TARGET_CLASS_MAX_NREGS
47641 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47643 #undef TARGET_PREFERRED_RELOAD_CLASS
47644 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47645 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47646 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47647 #undef TARGET_CLASS_LIKELY_SPILLED_P
47648 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47650 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47651 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47652 ix86_builtin_vectorization_cost
47653 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47654 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47655 ix86_vectorize_vec_perm_const_ok
47656 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47657 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47658 ix86_preferred_simd_mode
47659 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47660 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47661 ix86_autovectorize_vector_sizes
47662 #undef TARGET_VECTORIZE_INIT_COST
47663 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47664 #undef TARGET_VECTORIZE_ADD_STMT_COST
47665 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47666 #undef TARGET_VECTORIZE_FINISH_COST
47667 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47668 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47669 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47671 #undef TARGET_SET_CURRENT_FUNCTION
47672 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47674 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47675 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47677 #undef TARGET_OPTION_SAVE
47678 #define TARGET_OPTION_SAVE ix86_function_specific_save
47680 #undef TARGET_OPTION_RESTORE
47681 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47683 #undef TARGET_OPTION_PRINT
47684 #define TARGET_OPTION_PRINT ix86_function_specific_print
47686 #undef TARGET_OPTION_FUNCTION_VERSIONS
47687 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47689 #undef TARGET_CAN_INLINE_P
47690 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47692 #undef TARGET_EXPAND_TO_RTL_HOOK
47693 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47695 #undef TARGET_LEGITIMATE_ADDRESS_P
47696 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47698 #undef TARGET_LRA_P
47699 #define TARGET_LRA_P hook_bool_void_true
47701 #undef TARGET_REGISTER_PRIORITY
47702 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47704 #undef TARGET_REGISTER_USAGE_LEVELING_P
47705 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47707 #undef TARGET_LEGITIMATE_CONSTANT_P
47708 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47710 #undef TARGET_FRAME_POINTER_REQUIRED
47711 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47713 #undef TARGET_CAN_OMIT_LEAF_FRAME_POINTER
47714 #define TARGET_CAN_OMIT_LEAF_FRAME_POINTER ix86_can_omit_leaf_frame_pointer
47716 #undef TARGET_CAN_ELIMINATE
47717 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47719 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47720 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47722 #undef TARGET_ASM_CODE_END
47723 #define TARGET_ASM_CODE_END ix86_code_end
47725 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47726 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47728 #if TARGET_MACHO
47729 #undef TARGET_INIT_LIBFUNCS
47730 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47731 #endif
47733 #undef TARGET_LOOP_UNROLL_ADJUST
47734 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47736 #undef TARGET_SPILL_CLASS
47737 #define TARGET_SPILL_CLASS ix86_spill_class
47739 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47740 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47741 ix86_simd_clone_compute_vecsize_and_simdlen
47743 #undef TARGET_SIMD_CLONE_ADJUST
47744 #define TARGET_SIMD_CLONE_ADJUST \
47745 ix86_simd_clone_adjust
47747 #undef TARGET_SIMD_CLONE_USABLE
47748 #define TARGET_SIMD_CLONE_USABLE \
47749 ix86_simd_clone_usable
47751 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47752 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47753 ix86_float_exceptions_rounding_supported_p
47755 #undef TARGET_STRICT_ALIGN
47756 #define TARGET_STRICT_ALIGN true
47757 struct gcc_target targetm = TARGET_INITIALIZER;
47759 #include "gt-i386.h"