1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 #include "pass_manager.h"
68 static rtx
legitimize_dllimport_symbol (rtx
, bool);
69 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
70 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
72 #ifndef CHECK_STACK_LIMIT
73 #define CHECK_STACK_LIMIT (-1)
76 /* Return index of given mode in mult and division cost tables. */
77 #define MODE_INDEX(mode) \
78 ((mode) == QImode ? 0 \
79 : (mode) == HImode ? 1 \
80 : (mode) == SImode ? 2 \
81 : (mode) == DImode ? 3 \
84 /* Processor costs (relative to an add) */
85 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
86 #define COSTS_N_BYTES(N) ((N) * 2)
88 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
91 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
92 COSTS_N_BYTES (2), /* cost of an add instruction */
93 COSTS_N_BYTES (3), /* cost of a lea instruction */
94 COSTS_N_BYTES (2), /* variable shift costs */
95 COSTS_N_BYTES (3), /* constant shift costs */
96 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
97 COSTS_N_BYTES (3), /* HI */
98 COSTS_N_BYTES (3), /* SI */
99 COSTS_N_BYTES (3), /* DI */
100 COSTS_N_BYTES (5)}, /* other */
101 0, /* cost of multiply per each bit set */
102 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
103 COSTS_N_BYTES (3), /* HI */
104 COSTS_N_BYTES (3), /* SI */
105 COSTS_N_BYTES (3), /* DI */
106 COSTS_N_BYTES (5)}, /* other */
107 COSTS_N_BYTES (3), /* cost of movsx */
108 COSTS_N_BYTES (3), /* cost of movzx */
109 0, /* "large" insn */
111 2, /* cost for loading QImode using movzbl */
112 {2, 2, 2}, /* cost of loading integer registers
113 in QImode, HImode and SImode.
114 Relative to reg-reg move (2). */
115 {2, 2, 2}, /* cost of storing integer registers */
116 2, /* cost of reg,reg fld/fst */
117 {2, 2, 2}, /* cost of loading fp registers
118 in SFmode, DFmode and XFmode */
119 {2, 2, 2}, /* cost of storing fp registers
120 in SFmode, DFmode and XFmode */
121 3, /* cost of moving MMX register */
122 {3, 3}, /* cost of loading MMX registers
123 in SImode and DImode */
124 {3, 3}, /* cost of storing MMX registers
125 in SImode and DImode */
126 3, /* cost of moving SSE register */
127 {3, 3, 3}, /* cost of loading SSE registers
128 in SImode, DImode and TImode */
129 {3, 3, 3}, /* cost of storing SSE registers
130 in SImode, DImode and TImode */
131 3, /* MMX or SSE register to integer */
132 0, /* size of l1 cache */
133 0, /* size of l2 cache */
134 0, /* size of prefetch block */
135 0, /* number of parallel prefetches */
137 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
138 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
139 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
140 COSTS_N_BYTES (2), /* cost of FABS instruction. */
141 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
142 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
143 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
144 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
145 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
146 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
147 1, /* scalar_stmt_cost. */
148 1, /* scalar load_cost. */
149 1, /* scalar_store_cost. */
150 1, /* vec_stmt_cost. */
151 1, /* vec_to_scalar_cost. */
152 1, /* scalar_to_vec_cost. */
153 1, /* vec_align_load_cost. */
154 1, /* vec_unalign_load_cost. */
155 1, /* vec_store_cost. */
156 1, /* cond_taken_branch_cost. */
157 1, /* cond_not_taken_branch_cost. */
160 /* Processor costs (relative to an add) */
162 struct processor_costs i386_cost
= { /* 386 specific costs */
163 COSTS_N_INSNS (1), /* cost of an add instruction */
164 COSTS_N_INSNS (1), /* cost of a lea instruction */
165 COSTS_N_INSNS (3), /* variable shift costs */
166 COSTS_N_INSNS (2), /* constant shift costs */
167 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
168 COSTS_N_INSNS (6), /* HI */
169 COSTS_N_INSNS (6), /* SI */
170 COSTS_N_INSNS (6), /* DI */
171 COSTS_N_INSNS (6)}, /* other */
172 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
173 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
174 COSTS_N_INSNS (23), /* HI */
175 COSTS_N_INSNS (23), /* SI */
176 COSTS_N_INSNS (23), /* DI */
177 COSTS_N_INSNS (23)}, /* other */
178 COSTS_N_INSNS (3), /* cost of movsx */
179 COSTS_N_INSNS (2), /* cost of movzx */
180 15, /* "large" insn */
182 4, /* cost for loading QImode using movzbl */
183 {2, 4, 2}, /* cost of loading integer registers
184 in QImode, HImode and SImode.
185 Relative to reg-reg move (2). */
186 {2, 4, 2}, /* cost of storing integer registers */
187 2, /* cost of reg,reg fld/fst */
188 {8, 8, 8}, /* cost of loading fp registers
189 in SFmode, DFmode and XFmode */
190 {8, 8, 8}, /* cost of storing fp registers
191 in SFmode, DFmode and XFmode */
192 2, /* cost of moving MMX register */
193 {4, 8}, /* cost of loading MMX registers
194 in SImode and DImode */
195 {4, 8}, /* cost of storing MMX registers
196 in SImode and DImode */
197 2, /* cost of moving SSE register */
198 {4, 8, 16}, /* cost of loading SSE registers
199 in SImode, DImode and TImode */
200 {4, 8, 16}, /* cost of storing SSE registers
201 in SImode, DImode and TImode */
202 3, /* MMX or SSE register to integer */
203 0, /* size of l1 cache */
204 0, /* size of l2 cache */
205 0, /* size of prefetch block */
206 0, /* number of parallel prefetches */
208 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
209 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
210 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
211 COSTS_N_INSNS (22), /* cost of FABS instruction. */
212 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
213 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
214 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
215 DUMMY_STRINGOP_ALGS
},
216 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
217 DUMMY_STRINGOP_ALGS
},
218 1, /* scalar_stmt_cost. */
219 1, /* scalar load_cost. */
220 1, /* scalar_store_cost. */
221 1, /* vec_stmt_cost. */
222 1, /* vec_to_scalar_cost. */
223 1, /* scalar_to_vec_cost. */
224 1, /* vec_align_load_cost. */
225 2, /* vec_unalign_load_cost. */
226 1, /* vec_store_cost. */
227 3, /* cond_taken_branch_cost. */
228 1, /* cond_not_taken_branch_cost. */
232 struct processor_costs i486_cost
= { /* 486 specific costs */
233 COSTS_N_INSNS (1), /* cost of an add instruction */
234 COSTS_N_INSNS (1), /* cost of a lea instruction */
235 COSTS_N_INSNS (3), /* variable shift costs */
236 COSTS_N_INSNS (2), /* constant shift costs */
237 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
238 COSTS_N_INSNS (12), /* HI */
239 COSTS_N_INSNS (12), /* SI */
240 COSTS_N_INSNS (12), /* DI */
241 COSTS_N_INSNS (12)}, /* other */
242 1, /* cost of multiply per each bit set */
243 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
244 COSTS_N_INSNS (40), /* HI */
245 COSTS_N_INSNS (40), /* SI */
246 COSTS_N_INSNS (40), /* DI */
247 COSTS_N_INSNS (40)}, /* other */
248 COSTS_N_INSNS (3), /* cost of movsx */
249 COSTS_N_INSNS (2), /* cost of movzx */
250 15, /* "large" insn */
252 4, /* cost for loading QImode using movzbl */
253 {2, 4, 2}, /* cost of loading integer registers
254 in QImode, HImode and SImode.
255 Relative to reg-reg move (2). */
256 {2, 4, 2}, /* cost of storing integer registers */
257 2, /* cost of reg,reg fld/fst */
258 {8, 8, 8}, /* cost of loading fp registers
259 in SFmode, DFmode and XFmode */
260 {8, 8, 8}, /* cost of storing fp registers
261 in SFmode, DFmode and XFmode */
262 2, /* cost of moving MMX register */
263 {4, 8}, /* cost of loading MMX registers
264 in SImode and DImode */
265 {4, 8}, /* cost of storing MMX registers
266 in SImode and DImode */
267 2, /* cost of moving SSE register */
268 {4, 8, 16}, /* cost of loading SSE registers
269 in SImode, DImode and TImode */
270 {4, 8, 16}, /* cost of storing SSE registers
271 in SImode, DImode and TImode */
272 3, /* MMX or SSE register to integer */
273 4, /* size of l1 cache. 486 has 8kB cache
274 shared for code and data, so 4kB is
275 not really precise. */
276 4, /* size of l2 cache */
277 0, /* size of prefetch block */
278 0, /* number of parallel prefetches */
280 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
281 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
282 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
283 COSTS_N_INSNS (3), /* cost of FABS instruction. */
284 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
285 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
286 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
287 DUMMY_STRINGOP_ALGS
},
288 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
289 DUMMY_STRINGOP_ALGS
},
290 1, /* scalar_stmt_cost. */
291 1, /* scalar load_cost. */
292 1, /* scalar_store_cost. */
293 1, /* vec_stmt_cost. */
294 1, /* vec_to_scalar_cost. */
295 1, /* scalar_to_vec_cost. */
296 1, /* vec_align_load_cost. */
297 2, /* vec_unalign_load_cost. */
298 1, /* vec_store_cost. */
299 3, /* cond_taken_branch_cost. */
300 1, /* cond_not_taken_branch_cost. */
304 struct processor_costs pentium_cost
= {
305 COSTS_N_INSNS (1), /* cost of an add instruction */
306 COSTS_N_INSNS (1), /* cost of a lea instruction */
307 COSTS_N_INSNS (4), /* variable shift costs */
308 COSTS_N_INSNS (1), /* constant shift costs */
309 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
310 COSTS_N_INSNS (11), /* HI */
311 COSTS_N_INSNS (11), /* SI */
312 COSTS_N_INSNS (11), /* DI */
313 COSTS_N_INSNS (11)}, /* other */
314 0, /* cost of multiply per each bit set */
315 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
316 COSTS_N_INSNS (25), /* HI */
317 COSTS_N_INSNS (25), /* SI */
318 COSTS_N_INSNS (25), /* DI */
319 COSTS_N_INSNS (25)}, /* other */
320 COSTS_N_INSNS (3), /* cost of movsx */
321 COSTS_N_INSNS (2), /* cost of movzx */
322 8, /* "large" insn */
324 6, /* cost for loading QImode using movzbl */
325 {2, 4, 2}, /* cost of loading integer registers
326 in QImode, HImode and SImode.
327 Relative to reg-reg move (2). */
328 {2, 4, 2}, /* cost of storing integer registers */
329 2, /* cost of reg,reg fld/fst */
330 {2, 2, 6}, /* cost of loading fp registers
331 in SFmode, DFmode and XFmode */
332 {4, 4, 6}, /* cost of storing fp registers
333 in SFmode, DFmode and XFmode */
334 8, /* cost of moving MMX register */
335 {8, 8}, /* cost of loading MMX registers
336 in SImode and DImode */
337 {8, 8}, /* cost of storing MMX registers
338 in SImode and DImode */
339 2, /* cost of moving SSE register */
340 {4, 8, 16}, /* cost of loading SSE registers
341 in SImode, DImode and TImode */
342 {4, 8, 16}, /* cost of storing SSE registers
343 in SImode, DImode and TImode */
344 3, /* MMX or SSE register to integer */
345 8, /* size of l1 cache. */
346 8, /* size of l2 cache */
347 0, /* size of prefetch block */
348 0, /* number of parallel prefetches */
350 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
351 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
352 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
353 COSTS_N_INSNS (1), /* cost of FABS instruction. */
354 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
355 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
356 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
357 DUMMY_STRINGOP_ALGS
},
358 {{libcall
, {{-1, rep_prefix_4_byte
, false}}},
359 DUMMY_STRINGOP_ALGS
},
360 1, /* scalar_stmt_cost. */
361 1, /* scalar load_cost. */
362 1, /* scalar_store_cost. */
363 1, /* vec_stmt_cost. */
364 1, /* vec_to_scalar_cost. */
365 1, /* scalar_to_vec_cost. */
366 1, /* vec_align_load_cost. */
367 2, /* vec_unalign_load_cost. */
368 1, /* vec_store_cost. */
369 3, /* cond_taken_branch_cost. */
370 1, /* cond_not_taken_branch_cost. */
374 struct processor_costs pentiumpro_cost
= {
375 COSTS_N_INSNS (1), /* cost of an add instruction */
376 COSTS_N_INSNS (1), /* cost of a lea instruction */
377 COSTS_N_INSNS (1), /* variable shift costs */
378 COSTS_N_INSNS (1), /* constant shift costs */
379 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
380 COSTS_N_INSNS (4), /* HI */
381 COSTS_N_INSNS (4), /* SI */
382 COSTS_N_INSNS (4), /* DI */
383 COSTS_N_INSNS (4)}, /* other */
384 0, /* cost of multiply per each bit set */
385 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
386 COSTS_N_INSNS (17), /* HI */
387 COSTS_N_INSNS (17), /* SI */
388 COSTS_N_INSNS (17), /* DI */
389 COSTS_N_INSNS (17)}, /* other */
390 COSTS_N_INSNS (1), /* cost of movsx */
391 COSTS_N_INSNS (1), /* cost of movzx */
392 8, /* "large" insn */
394 2, /* cost for loading QImode using movzbl */
395 {4, 4, 4}, /* cost of loading integer registers
396 in QImode, HImode and SImode.
397 Relative to reg-reg move (2). */
398 {2, 2, 2}, /* cost of storing integer registers */
399 2, /* cost of reg,reg fld/fst */
400 {2, 2, 6}, /* cost of loading fp registers
401 in SFmode, DFmode and XFmode */
402 {4, 4, 6}, /* cost of storing fp registers
403 in SFmode, DFmode and XFmode */
404 2, /* cost of moving MMX register */
405 {2, 2}, /* cost of loading MMX registers
406 in SImode and DImode */
407 {2, 2}, /* cost of storing MMX registers
408 in SImode and DImode */
409 2, /* cost of moving SSE register */
410 {2, 2, 8}, /* cost of loading SSE registers
411 in SImode, DImode and TImode */
412 {2, 2, 8}, /* cost of storing SSE registers
413 in SImode, DImode and TImode */
414 3, /* MMX or SSE register to integer */
415 8, /* size of l1 cache. */
416 256, /* size of l2 cache */
417 32, /* size of prefetch block */
418 6, /* number of parallel prefetches */
420 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
421 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
422 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
423 COSTS_N_INSNS (2), /* cost of FABS instruction. */
424 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
425 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
426 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
427 (we ensure the alignment). For small blocks inline loop is still a
428 noticeable win, for bigger blocks either rep movsl or rep movsb is
429 way to go. Rep movsb has apparently more expensive startup time in CPU,
430 but after 4K the difference is down in the noise. */
431 {{rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
432 {8192, rep_prefix_4_byte
, false},
433 {-1, rep_prefix_1_byte
, false}}},
434 DUMMY_STRINGOP_ALGS
},
435 {{rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
436 {8192, rep_prefix_4_byte
, false},
437 {-1, libcall
, false}}},
438 DUMMY_STRINGOP_ALGS
},
439 1, /* scalar_stmt_cost. */
440 1, /* scalar load_cost. */
441 1, /* scalar_store_cost. */
442 1, /* vec_stmt_cost. */
443 1, /* vec_to_scalar_cost. */
444 1, /* scalar_to_vec_cost. */
445 1, /* vec_align_load_cost. */
446 2, /* vec_unalign_load_cost. */
447 1, /* vec_store_cost. */
448 3, /* cond_taken_branch_cost. */
449 1, /* cond_not_taken_branch_cost. */
453 struct processor_costs geode_cost
= {
454 COSTS_N_INSNS (1), /* cost of an add instruction */
455 COSTS_N_INSNS (1), /* cost of a lea instruction */
456 COSTS_N_INSNS (2), /* variable shift costs */
457 COSTS_N_INSNS (1), /* constant shift costs */
458 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
459 COSTS_N_INSNS (4), /* HI */
460 COSTS_N_INSNS (7), /* SI */
461 COSTS_N_INSNS (7), /* DI */
462 COSTS_N_INSNS (7)}, /* other */
463 0, /* cost of multiply per each bit set */
464 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
465 COSTS_N_INSNS (23), /* HI */
466 COSTS_N_INSNS (39), /* SI */
467 COSTS_N_INSNS (39), /* DI */
468 COSTS_N_INSNS (39)}, /* other */
469 COSTS_N_INSNS (1), /* cost of movsx */
470 COSTS_N_INSNS (1), /* cost of movzx */
471 8, /* "large" insn */
473 1, /* cost for loading QImode using movzbl */
474 {1, 1, 1}, /* cost of loading integer registers
475 in QImode, HImode and SImode.
476 Relative to reg-reg move (2). */
477 {1, 1, 1}, /* cost of storing integer registers */
478 1, /* cost of reg,reg fld/fst */
479 {1, 1, 1}, /* cost of loading fp registers
480 in SFmode, DFmode and XFmode */
481 {4, 6, 6}, /* cost of storing fp registers
482 in SFmode, DFmode and XFmode */
484 1, /* cost of moving MMX register */
485 {1, 1}, /* cost of loading MMX registers
486 in SImode and DImode */
487 {1, 1}, /* cost of storing MMX registers
488 in SImode and DImode */
489 1, /* cost of moving SSE register */
490 {1, 1, 1}, /* cost of loading SSE registers
491 in SImode, DImode and TImode */
492 {1, 1, 1}, /* cost of storing SSE registers
493 in SImode, DImode and TImode */
494 1, /* MMX or SSE register to integer */
495 64, /* size of l1 cache. */
496 128, /* size of l2 cache. */
497 32, /* size of prefetch block */
498 1, /* number of parallel prefetches */
500 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
501 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
502 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
503 COSTS_N_INSNS (1), /* cost of FABS instruction. */
504 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
505 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
506 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
507 DUMMY_STRINGOP_ALGS
},
508 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
509 DUMMY_STRINGOP_ALGS
},
510 1, /* scalar_stmt_cost. */
511 1, /* scalar load_cost. */
512 1, /* scalar_store_cost. */
513 1, /* vec_stmt_cost. */
514 1, /* vec_to_scalar_cost. */
515 1, /* scalar_to_vec_cost. */
516 1, /* vec_align_load_cost. */
517 2, /* vec_unalign_load_cost. */
518 1, /* vec_store_cost. */
519 3, /* cond_taken_branch_cost. */
520 1, /* cond_not_taken_branch_cost. */
524 struct processor_costs k6_cost
= {
525 COSTS_N_INSNS (1), /* cost of an add instruction */
526 COSTS_N_INSNS (2), /* cost of a lea instruction */
527 COSTS_N_INSNS (1), /* variable shift costs */
528 COSTS_N_INSNS (1), /* constant shift costs */
529 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
530 COSTS_N_INSNS (3), /* HI */
531 COSTS_N_INSNS (3), /* SI */
532 COSTS_N_INSNS (3), /* DI */
533 COSTS_N_INSNS (3)}, /* other */
534 0, /* cost of multiply per each bit set */
535 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
536 COSTS_N_INSNS (18), /* HI */
537 COSTS_N_INSNS (18), /* SI */
538 COSTS_N_INSNS (18), /* DI */
539 COSTS_N_INSNS (18)}, /* other */
540 COSTS_N_INSNS (2), /* cost of movsx */
541 COSTS_N_INSNS (2), /* cost of movzx */
542 8, /* "large" insn */
544 3, /* cost for loading QImode using movzbl */
545 {4, 5, 4}, /* cost of loading integer registers
546 in QImode, HImode and SImode.
547 Relative to reg-reg move (2). */
548 {2, 3, 2}, /* cost of storing integer registers */
549 4, /* cost of reg,reg fld/fst */
550 {6, 6, 6}, /* cost of loading fp registers
551 in SFmode, DFmode and XFmode */
552 {4, 4, 4}, /* cost of storing fp registers
553 in SFmode, DFmode and XFmode */
554 2, /* cost of moving MMX register */
555 {2, 2}, /* cost of loading MMX registers
556 in SImode and DImode */
557 {2, 2}, /* cost of storing MMX registers
558 in SImode and DImode */
559 2, /* cost of moving SSE register */
560 {2, 2, 8}, /* cost of loading SSE registers
561 in SImode, DImode and TImode */
562 {2, 2, 8}, /* cost of storing SSE registers
563 in SImode, DImode and TImode */
564 6, /* MMX or SSE register to integer */
565 32, /* size of l1 cache. */
566 32, /* size of l2 cache. Some models
567 have integrated l2 cache, but
568 optimizing for k6 is not important
569 enough to worry about that. */
570 32, /* size of prefetch block */
571 1, /* number of parallel prefetches */
573 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
574 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
575 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
576 COSTS_N_INSNS (2), /* cost of FABS instruction. */
577 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
578 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
579 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
580 DUMMY_STRINGOP_ALGS
},
581 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
582 DUMMY_STRINGOP_ALGS
},
583 1, /* scalar_stmt_cost. */
584 1, /* scalar load_cost. */
585 1, /* scalar_store_cost. */
586 1, /* vec_stmt_cost. */
587 1, /* vec_to_scalar_cost. */
588 1, /* scalar_to_vec_cost. */
589 1, /* vec_align_load_cost. */
590 2, /* vec_unalign_load_cost. */
591 1, /* vec_store_cost. */
592 3, /* cond_taken_branch_cost. */
593 1, /* cond_not_taken_branch_cost. */
597 struct processor_costs athlon_cost
= {
598 COSTS_N_INSNS (1), /* cost of an add instruction */
599 COSTS_N_INSNS (2), /* cost of a lea instruction */
600 COSTS_N_INSNS (1), /* variable shift costs */
601 COSTS_N_INSNS (1), /* constant shift costs */
602 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
603 COSTS_N_INSNS (5), /* HI */
604 COSTS_N_INSNS (5), /* SI */
605 COSTS_N_INSNS (5), /* DI */
606 COSTS_N_INSNS (5)}, /* other */
607 0, /* cost of multiply per each bit set */
608 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
609 COSTS_N_INSNS (26), /* HI */
610 COSTS_N_INSNS (42), /* SI */
611 COSTS_N_INSNS (74), /* DI */
612 COSTS_N_INSNS (74)}, /* other */
613 COSTS_N_INSNS (1), /* cost of movsx */
614 COSTS_N_INSNS (1), /* cost of movzx */
615 8, /* "large" insn */
617 4, /* cost for loading QImode using movzbl */
618 {3, 4, 3}, /* cost of loading integer registers
619 in QImode, HImode and SImode.
620 Relative to reg-reg move (2). */
621 {3, 4, 3}, /* cost of storing integer registers */
622 4, /* cost of reg,reg fld/fst */
623 {4, 4, 12}, /* cost of loading fp registers
624 in SFmode, DFmode and XFmode */
625 {6, 6, 8}, /* cost of storing fp registers
626 in SFmode, DFmode and XFmode */
627 2, /* cost of moving MMX register */
628 {4, 4}, /* cost of loading MMX registers
629 in SImode and DImode */
630 {4, 4}, /* cost of storing MMX registers
631 in SImode and DImode */
632 2, /* cost of moving SSE register */
633 {4, 4, 6}, /* cost of loading SSE registers
634 in SImode, DImode and TImode */
635 {4, 4, 5}, /* cost of storing SSE registers
636 in SImode, DImode and TImode */
637 5, /* MMX or SSE register to integer */
638 64, /* size of l1 cache. */
639 256, /* size of l2 cache. */
640 64, /* size of prefetch block */
641 6, /* number of parallel prefetches */
643 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
644 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
645 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
646 COSTS_N_INSNS (2), /* cost of FABS instruction. */
647 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
648 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
649 /* For some reason, Athlon deals better with REP prefix (relative to loops)
650 compared to K8. Alignment becomes important after 8 bytes for memcpy and
651 128 bytes for memset. */
652 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
653 DUMMY_STRINGOP_ALGS
},
654 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
655 DUMMY_STRINGOP_ALGS
},
656 1, /* scalar_stmt_cost. */
657 1, /* scalar load_cost. */
658 1, /* scalar_store_cost. */
659 1, /* vec_stmt_cost. */
660 1, /* vec_to_scalar_cost. */
661 1, /* scalar_to_vec_cost. */
662 1, /* vec_align_load_cost. */
663 2, /* vec_unalign_load_cost. */
664 1, /* vec_store_cost. */
665 3, /* cond_taken_branch_cost. */
666 1, /* cond_not_taken_branch_cost. */
670 struct processor_costs k8_cost
= {
671 COSTS_N_INSNS (1), /* cost of an add instruction */
672 COSTS_N_INSNS (2), /* cost of a lea instruction */
673 COSTS_N_INSNS (1), /* variable shift costs */
674 COSTS_N_INSNS (1), /* constant shift costs */
675 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
676 COSTS_N_INSNS (4), /* HI */
677 COSTS_N_INSNS (3), /* SI */
678 COSTS_N_INSNS (4), /* DI */
679 COSTS_N_INSNS (5)}, /* other */
680 0, /* cost of multiply per each bit set */
681 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
682 COSTS_N_INSNS (26), /* HI */
683 COSTS_N_INSNS (42), /* SI */
684 COSTS_N_INSNS (74), /* DI */
685 COSTS_N_INSNS (74)}, /* other */
686 COSTS_N_INSNS (1), /* cost of movsx */
687 COSTS_N_INSNS (1), /* cost of movzx */
688 8, /* "large" insn */
690 4, /* cost for loading QImode using movzbl */
691 {3, 4, 3}, /* cost of loading integer registers
692 in QImode, HImode and SImode.
693 Relative to reg-reg move (2). */
694 {3, 4, 3}, /* cost of storing integer registers */
695 4, /* cost of reg,reg fld/fst */
696 {4, 4, 12}, /* cost of loading fp registers
697 in SFmode, DFmode and XFmode */
698 {6, 6, 8}, /* cost of storing fp registers
699 in SFmode, DFmode and XFmode */
700 2, /* cost of moving MMX register */
701 {3, 3}, /* cost of loading MMX registers
702 in SImode and DImode */
703 {4, 4}, /* cost of storing MMX registers
704 in SImode and DImode */
705 2, /* cost of moving SSE register */
706 {4, 3, 6}, /* cost of loading SSE registers
707 in SImode, DImode and TImode */
708 {4, 4, 5}, /* cost of storing SSE registers
709 in SImode, DImode and TImode */
710 5, /* MMX or SSE register to integer */
711 64, /* size of l1 cache. */
712 512, /* size of l2 cache. */
713 64, /* size of prefetch block */
714 /* New AMD processors never drop prefetches; if they cannot be performed
715 immediately, they are queued. We set number of simultaneous prefetches
716 to a large constant to reflect this (it probably is not a good idea not
717 to limit number of prefetches at all, as their execution also takes some
719 100, /* number of parallel prefetches */
721 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
722 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
723 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
724 COSTS_N_INSNS (2), /* cost of FABS instruction. */
725 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
726 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
727 /* K8 has optimized REP instruction for medium sized blocks, but for very
728 small blocks it is better to use loop. For large blocks, libcall can
729 do nontemporary accesses and beat inline considerably. */
730 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
731 {-1, rep_prefix_4_byte
, false}}},
732 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
733 {-1, libcall
, false}}}},
734 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
735 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
736 {libcall
, {{48, unrolled_loop
, false},
737 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
738 4, /* scalar_stmt_cost. */
739 2, /* scalar load_cost. */
740 2, /* scalar_store_cost. */
741 5, /* vec_stmt_cost. */
742 0, /* vec_to_scalar_cost. */
743 2, /* scalar_to_vec_cost. */
744 2, /* vec_align_load_cost. */
745 3, /* vec_unalign_load_cost. */
746 3, /* vec_store_cost. */
747 3, /* cond_taken_branch_cost. */
748 2, /* cond_not_taken_branch_cost. */
751 struct processor_costs amdfam10_cost
= {
752 COSTS_N_INSNS (1), /* cost of an add instruction */
753 COSTS_N_INSNS (2), /* cost of a lea instruction */
754 COSTS_N_INSNS (1), /* variable shift costs */
755 COSTS_N_INSNS (1), /* constant shift costs */
756 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
757 COSTS_N_INSNS (4), /* HI */
758 COSTS_N_INSNS (3), /* SI */
759 COSTS_N_INSNS (4), /* DI */
760 COSTS_N_INSNS (5)}, /* other */
761 0, /* cost of multiply per each bit set */
762 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
763 COSTS_N_INSNS (35), /* HI */
764 COSTS_N_INSNS (51), /* SI */
765 COSTS_N_INSNS (83), /* DI */
766 COSTS_N_INSNS (83)}, /* other */
767 COSTS_N_INSNS (1), /* cost of movsx */
768 COSTS_N_INSNS (1), /* cost of movzx */
769 8, /* "large" insn */
771 4, /* cost for loading QImode using movzbl */
772 {3, 4, 3}, /* cost of loading integer registers
773 in QImode, HImode and SImode.
774 Relative to reg-reg move (2). */
775 {3, 4, 3}, /* cost of storing integer registers */
776 4, /* cost of reg,reg fld/fst */
777 {4, 4, 12}, /* cost of loading fp registers
778 in SFmode, DFmode and XFmode */
779 {6, 6, 8}, /* cost of storing fp registers
780 in SFmode, DFmode and XFmode */
781 2, /* cost of moving MMX register */
782 {3, 3}, /* cost of loading MMX registers
783 in SImode and DImode */
784 {4, 4}, /* cost of storing MMX registers
785 in SImode and DImode */
786 2, /* cost of moving SSE register */
787 {4, 4, 3}, /* cost of loading SSE registers
788 in SImode, DImode and TImode */
789 {4, 4, 5}, /* cost of storing SSE registers
790 in SImode, DImode and TImode */
791 3, /* MMX or SSE register to integer */
793 MOVD reg64, xmmreg Double FSTORE 4
794 MOVD reg32, xmmreg Double FSTORE 4
796 MOVD reg64, xmmreg Double FADD 3
798 MOVD reg32, xmmreg Double FADD 3
800 64, /* size of l1 cache. */
801 512, /* size of l2 cache. */
802 64, /* size of prefetch block */
803 /* New AMD processors never drop prefetches; if they cannot be performed
804 immediately, they are queued. We set number of simultaneous prefetches
805 to a large constant to reflect this (it probably is not a good idea not
806 to limit number of prefetches at all, as their execution also takes some
808 100, /* number of parallel prefetches */
810 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
811 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
812 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
813 COSTS_N_INSNS (2), /* cost of FABS instruction. */
814 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
815 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
817 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
818 very small blocks it is better to use loop. For large blocks, libcall can
819 do nontemporary accesses and beat inline considerably. */
820 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
821 {-1, rep_prefix_4_byte
, false}}},
822 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
823 {-1, libcall
, false}}}},
824 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
825 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
826 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
827 {-1, libcall
, false}}}},
828 4, /* scalar_stmt_cost. */
829 2, /* scalar load_cost. */
830 2, /* scalar_store_cost. */
831 6, /* vec_stmt_cost. */
832 0, /* vec_to_scalar_cost. */
833 2, /* scalar_to_vec_cost. */
834 2, /* vec_align_load_cost. */
835 2, /* vec_unalign_load_cost. */
836 2, /* vec_store_cost. */
837 2, /* cond_taken_branch_cost. */
838 1, /* cond_not_taken_branch_cost. */
841 struct processor_costs bdver1_cost
= {
842 COSTS_N_INSNS (1), /* cost of an add instruction */
843 COSTS_N_INSNS (1), /* cost of a lea instruction */
844 COSTS_N_INSNS (1), /* variable shift costs */
845 COSTS_N_INSNS (1), /* constant shift costs */
846 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
847 COSTS_N_INSNS (4), /* HI */
848 COSTS_N_INSNS (4), /* SI */
849 COSTS_N_INSNS (6), /* DI */
850 COSTS_N_INSNS (6)}, /* other */
851 0, /* cost of multiply per each bit set */
852 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
853 COSTS_N_INSNS (35), /* HI */
854 COSTS_N_INSNS (51), /* SI */
855 COSTS_N_INSNS (83), /* DI */
856 COSTS_N_INSNS (83)}, /* other */
857 COSTS_N_INSNS (1), /* cost of movsx */
858 COSTS_N_INSNS (1), /* cost of movzx */
859 8, /* "large" insn */
861 4, /* cost for loading QImode using movzbl */
862 {5, 5, 4}, /* cost of loading integer registers
863 in QImode, HImode and SImode.
864 Relative to reg-reg move (2). */
865 {4, 4, 4}, /* cost of storing integer registers */
866 2, /* cost of reg,reg fld/fst */
867 {5, 5, 12}, /* cost of loading fp registers
868 in SFmode, DFmode and XFmode */
869 {4, 4, 8}, /* cost of storing fp registers
870 in SFmode, DFmode and XFmode */
871 2, /* cost of moving MMX register */
872 {4, 4}, /* cost of loading MMX registers
873 in SImode and DImode */
874 {4, 4}, /* cost of storing MMX registers
875 in SImode and DImode */
876 2, /* cost of moving SSE register */
877 {4, 4, 4}, /* cost of loading SSE registers
878 in SImode, DImode and TImode */
879 {4, 4, 4}, /* cost of storing SSE registers
880 in SImode, DImode and TImode */
881 2, /* MMX or SSE register to integer */
883 MOVD reg64, xmmreg Double FSTORE 4
884 MOVD reg32, xmmreg Double FSTORE 4
886 MOVD reg64, xmmreg Double FADD 3
888 MOVD reg32, xmmreg Double FADD 3
890 16, /* size of l1 cache. */
891 2048, /* size of l2 cache. */
892 64, /* size of prefetch block */
893 /* New AMD processors never drop prefetches; if they cannot be performed
894 immediately, they are queued. We set number of simultaneous prefetches
895 to a large constant to reflect this (it probably is not a good idea not
896 to limit number of prefetches at all, as their execution also takes some
898 100, /* number of parallel prefetches */
900 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
901 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
902 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
903 COSTS_N_INSNS (2), /* cost of FABS instruction. */
904 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
905 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
907 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
908 very small blocks it is better to use loop. For large blocks, libcall
909 can do nontemporary accesses and beat inline considerably. */
910 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
911 {-1, rep_prefix_4_byte
, false}}},
912 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}},
914 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
915 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
916 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
917 {-1, libcall
, false}}}},
918 6, /* scalar_stmt_cost. */
919 4, /* scalar load_cost. */
920 4, /* scalar_store_cost. */
921 6, /* vec_stmt_cost. */
922 0, /* vec_to_scalar_cost. */
923 2, /* scalar_to_vec_cost. */
924 4, /* vec_align_load_cost. */
925 4, /* vec_unalign_load_cost. */
926 4, /* vec_store_cost. */
927 2, /* cond_taken_branch_cost. */
928 1, /* cond_not_taken_branch_cost. */
931 struct processor_costs bdver2_cost
= {
932 COSTS_N_INSNS (1), /* cost of an add instruction */
933 COSTS_N_INSNS (1), /* cost of a lea instruction */
934 COSTS_N_INSNS (1), /* variable shift costs */
935 COSTS_N_INSNS (1), /* constant shift costs */
936 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
937 COSTS_N_INSNS (4), /* HI */
938 COSTS_N_INSNS (4), /* SI */
939 COSTS_N_INSNS (6), /* DI */
940 COSTS_N_INSNS (6)}, /* other */
941 0, /* cost of multiply per each bit set */
942 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
943 COSTS_N_INSNS (35), /* HI */
944 COSTS_N_INSNS (51), /* SI */
945 COSTS_N_INSNS (83), /* DI */
946 COSTS_N_INSNS (83)}, /* other */
947 COSTS_N_INSNS (1), /* cost of movsx */
948 COSTS_N_INSNS (1), /* cost of movzx */
949 8, /* "large" insn */
951 4, /* cost for loading QImode using movzbl */
952 {5, 5, 4}, /* cost of loading integer registers
953 in QImode, HImode and SImode.
954 Relative to reg-reg move (2). */
955 {4, 4, 4}, /* cost of storing integer registers */
956 2, /* cost of reg,reg fld/fst */
957 {5, 5, 12}, /* cost of loading fp registers
958 in SFmode, DFmode and XFmode */
959 {4, 4, 8}, /* cost of storing fp registers
960 in SFmode, DFmode and XFmode */
961 2, /* cost of moving MMX register */
962 {4, 4}, /* cost of loading MMX registers
963 in SImode and DImode */
964 {4, 4}, /* cost of storing MMX registers
965 in SImode and DImode */
966 2, /* cost of moving SSE register */
967 {4, 4, 4}, /* cost of loading SSE registers
968 in SImode, DImode and TImode */
969 {4, 4, 4}, /* cost of storing SSE registers
970 in SImode, DImode and TImode */
971 2, /* MMX or SSE register to integer */
973 MOVD reg64, xmmreg Double FSTORE 4
974 MOVD reg32, xmmreg Double FSTORE 4
976 MOVD reg64, xmmreg Double FADD 3
978 MOVD reg32, xmmreg Double FADD 3
980 16, /* size of l1 cache. */
981 2048, /* size of l2 cache. */
982 64, /* size of prefetch block */
983 /* New AMD processors never drop prefetches; if they cannot be performed
984 immediately, they are queued. We set number of simultaneous prefetches
985 to a large constant to reflect this (it probably is not a good idea not
986 to limit number of prefetches at all, as their execution also takes some
988 100, /* number of parallel prefetches */
990 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
991 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
992 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
993 COSTS_N_INSNS (2), /* cost of FABS instruction. */
994 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
995 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
997 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
998 very small blocks it is better to use loop. For large blocks, libcall
999 can do nontemporary accesses and beat inline considerably. */
1000 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1001 {-1, rep_prefix_4_byte
, false}}},
1002 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1003 {-1, libcall
, false}}}},
1004 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1005 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1006 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1007 {-1, libcall
, false}}}},
1008 6, /* scalar_stmt_cost. */
1009 4, /* scalar load_cost. */
1010 4, /* scalar_store_cost. */
1011 6, /* vec_stmt_cost. */
1012 0, /* vec_to_scalar_cost. */
1013 2, /* scalar_to_vec_cost. */
1014 4, /* vec_align_load_cost. */
1015 4, /* vec_unalign_load_cost. */
1016 4, /* vec_store_cost. */
1017 2, /* cond_taken_branch_cost. */
1018 1, /* cond_not_taken_branch_cost. */
1021 struct processor_costs bdver3_cost
= {
1022 COSTS_N_INSNS (1), /* cost of an add instruction */
1023 COSTS_N_INSNS (1), /* cost of a lea instruction */
1024 COSTS_N_INSNS (1), /* variable shift costs */
1025 COSTS_N_INSNS (1), /* constant shift costs */
1026 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1027 COSTS_N_INSNS (4), /* HI */
1028 COSTS_N_INSNS (4), /* SI */
1029 COSTS_N_INSNS (6), /* DI */
1030 COSTS_N_INSNS (6)}, /* other */
1031 0, /* cost of multiply per each bit set */
1032 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1033 COSTS_N_INSNS (35), /* HI */
1034 COSTS_N_INSNS (51), /* SI */
1035 COSTS_N_INSNS (83), /* DI */
1036 COSTS_N_INSNS (83)}, /* other */
1037 COSTS_N_INSNS (1), /* cost of movsx */
1038 COSTS_N_INSNS (1), /* cost of movzx */
1039 8, /* "large" insn */
1041 4, /* cost for loading QImode using movzbl */
1042 {5, 5, 4}, /* cost of loading integer registers
1043 in QImode, HImode and SImode.
1044 Relative to reg-reg move (2). */
1045 {4, 4, 4}, /* cost of storing integer registers */
1046 2, /* cost of reg,reg fld/fst */
1047 {5, 5, 12}, /* cost of loading fp registers
1048 in SFmode, DFmode and XFmode */
1049 {4, 4, 8}, /* cost of storing fp registers
1050 in SFmode, DFmode and XFmode */
1051 2, /* cost of moving MMX register */
1052 {4, 4}, /* cost of loading MMX registers
1053 in SImode and DImode */
1054 {4, 4}, /* cost of storing MMX registers
1055 in SImode and DImode */
1056 2, /* cost of moving SSE register */
1057 {4, 4, 4}, /* cost of loading SSE registers
1058 in SImode, DImode and TImode */
1059 {4, 4, 4}, /* cost of storing SSE registers
1060 in SImode, DImode and TImode */
1061 2, /* MMX or SSE register to integer */
1062 16, /* size of l1 cache. */
1063 2048, /* size of l2 cache. */
1064 64, /* size of prefetch block */
1065 /* New AMD processors never drop prefetches; if they cannot be performed
1066 immediately, they are queued. We set number of simultaneous prefetches
1067 to a large constant to reflect this (it probably is not a good idea not
1068 to limit number of prefetches at all, as their execution also takes some
1070 100, /* number of parallel prefetches */
1071 2, /* Branch cost */
1072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1073 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1074 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1075 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1076 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1077 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1080 very small blocks it is better to use loop. For large blocks, libcall
1081 can do nontemporary accesses and beat inline considerably. */
1082 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1083 {-1, rep_prefix_4_byte
, false}}},
1084 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}},
1086 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1087 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1088 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1089 {-1, libcall
, false}}}},
1090 6, /* scalar_stmt_cost. */
1091 4, /* scalar load_cost. */
1092 4, /* scalar_store_cost. */
1093 6, /* vec_stmt_cost. */
1094 0, /* vec_to_scalar_cost. */
1095 2, /* scalar_to_vec_cost. */
1096 4, /* vec_align_load_cost. */
1097 4, /* vec_unalign_load_cost. */
1098 4, /* vec_store_cost. */
1099 2, /* cond_taken_branch_cost. */
1100 1, /* cond_not_taken_branch_cost. */
1103 struct processor_costs btver1_cost
= {
1104 COSTS_N_INSNS (1), /* cost of an add instruction */
1105 COSTS_N_INSNS (2), /* cost of a lea instruction */
1106 COSTS_N_INSNS (1), /* variable shift costs */
1107 COSTS_N_INSNS (1), /* constant shift costs */
1108 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1109 COSTS_N_INSNS (4), /* HI */
1110 COSTS_N_INSNS (3), /* SI */
1111 COSTS_N_INSNS (4), /* DI */
1112 COSTS_N_INSNS (5)}, /* other */
1113 0, /* cost of multiply per each bit set */
1114 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1115 COSTS_N_INSNS (35), /* HI */
1116 COSTS_N_INSNS (51), /* SI */
1117 COSTS_N_INSNS (83), /* DI */
1118 COSTS_N_INSNS (83)}, /* other */
1119 COSTS_N_INSNS (1), /* cost of movsx */
1120 COSTS_N_INSNS (1), /* cost of movzx */
1121 8, /* "large" insn */
1123 4, /* cost for loading QImode using movzbl */
1124 {3, 4, 3}, /* cost of loading integer registers
1125 in QImode, HImode and SImode.
1126 Relative to reg-reg move (2). */
1127 {3, 4, 3}, /* cost of storing integer registers */
1128 4, /* cost of reg,reg fld/fst */
1129 {4, 4, 12}, /* cost of loading fp registers
1130 in SFmode, DFmode and XFmode */
1131 {6, 6, 8}, /* cost of storing fp registers
1132 in SFmode, DFmode and XFmode */
1133 2, /* cost of moving MMX register */
1134 {3, 3}, /* cost of loading MMX registers
1135 in SImode and DImode */
1136 {4, 4}, /* cost of storing MMX registers
1137 in SImode and DImode */
1138 2, /* cost of moving SSE register */
1139 {4, 4, 3}, /* cost of loading SSE registers
1140 in SImode, DImode and TImode */
1141 {4, 4, 5}, /* cost of storing SSE registers
1142 in SImode, DImode and TImode */
1143 3, /* MMX or SSE register to integer */
1145 MOVD reg64, xmmreg Double FSTORE 4
1146 MOVD reg32, xmmreg Double FSTORE 4
1148 MOVD reg64, xmmreg Double FADD 3
1150 MOVD reg32, xmmreg Double FADD 3
1152 32, /* size of l1 cache. */
1153 512, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 100, /* number of parallel prefetches */
1156 2, /* Branch cost */
1157 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1158 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1159 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1160 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1161 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1162 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1164 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1165 very small blocks it is better to use loop. For large blocks, libcall can
1166 do nontemporary accesses and beat inline considerably. */
1167 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1168 {-1, rep_prefix_4_byte
, false}}},
1169 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}},
1171 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1172 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1173 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1174 {-1, libcall
, false}}}},
1175 4, /* scalar_stmt_cost. */
1176 2, /* scalar load_cost. */
1177 2, /* scalar_store_cost. */
1178 6, /* vec_stmt_cost. */
1179 0, /* vec_to_scalar_cost. */
1180 2, /* scalar_to_vec_cost. */
1181 2, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 2, /* vec_store_cost. */
1184 2, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 struct processor_costs btver2_cost
= {
1189 COSTS_N_INSNS (1), /* cost of an add instruction */
1190 COSTS_N_INSNS (2), /* cost of a lea instruction */
1191 COSTS_N_INSNS (1), /* variable shift costs */
1192 COSTS_N_INSNS (1), /* constant shift costs */
1193 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1194 COSTS_N_INSNS (4), /* HI */
1195 COSTS_N_INSNS (3), /* SI */
1196 COSTS_N_INSNS (4), /* DI */
1197 COSTS_N_INSNS (5)}, /* other */
1198 0, /* cost of multiply per each bit set */
1199 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1200 COSTS_N_INSNS (35), /* HI */
1201 COSTS_N_INSNS (51), /* SI */
1202 COSTS_N_INSNS (83), /* DI */
1203 COSTS_N_INSNS (83)}, /* other */
1204 COSTS_N_INSNS (1), /* cost of movsx */
1205 COSTS_N_INSNS (1), /* cost of movzx */
1206 8, /* "large" insn */
1208 4, /* cost for loading QImode using movzbl */
1209 {3, 4, 3}, /* cost of loading integer registers
1210 in QImode, HImode and SImode.
1211 Relative to reg-reg move (2). */
1212 {3, 4, 3}, /* cost of storing integer registers */
1213 4, /* cost of reg,reg fld/fst */
1214 {4, 4, 12}, /* cost of loading fp registers
1215 in SFmode, DFmode and XFmode */
1216 {6, 6, 8}, /* cost of storing fp registers
1217 in SFmode, DFmode and XFmode */
1218 2, /* cost of moving MMX register */
1219 {3, 3}, /* cost of loading MMX registers
1220 in SImode and DImode */
1221 {4, 4}, /* cost of storing MMX registers
1222 in SImode and DImode */
1223 2, /* cost of moving SSE register */
1224 {4, 4, 3}, /* cost of loading SSE registers
1225 in SImode, DImode and TImode */
1226 {4, 4, 5}, /* cost of storing SSE registers
1227 in SImode, DImode and TImode */
1228 3, /* MMX or SSE register to integer */
1230 MOVD reg64, xmmreg Double FSTORE 4
1231 MOVD reg32, xmmreg Double FSTORE 4
1233 MOVD reg64, xmmreg Double FADD 3
1235 MOVD reg32, xmmreg Double FADD 3
1237 32, /* size of l1 cache. */
1238 2048, /* size of l2 cache. */
1239 64, /* size of prefetch block */
1240 100, /* number of parallel prefetches */
1241 2, /* Branch cost */
1242 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1243 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1244 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1245 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1246 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1247 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1249 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1250 {-1, rep_prefix_4_byte
, false}}},
1251 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1252 {-1, libcall
, false}}}},
1253 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1254 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1255 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1256 {-1, libcall
, false}}}},
1257 4, /* scalar_stmt_cost. */
1258 2, /* scalar load_cost. */
1259 2, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 2, /* vec_align_load_cost. */
1264 2, /* vec_unalign_load_cost. */
1265 2, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1271 struct processor_costs pentium4_cost
= {
1272 COSTS_N_INSNS (1), /* cost of an add instruction */
1273 COSTS_N_INSNS (3), /* cost of a lea instruction */
1274 COSTS_N_INSNS (4), /* variable shift costs */
1275 COSTS_N_INSNS (4), /* constant shift costs */
1276 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1277 COSTS_N_INSNS (15), /* HI */
1278 COSTS_N_INSNS (15), /* SI */
1279 COSTS_N_INSNS (15), /* DI */
1280 COSTS_N_INSNS (15)}, /* other */
1281 0, /* cost of multiply per each bit set */
1282 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1283 COSTS_N_INSNS (56), /* HI */
1284 COSTS_N_INSNS (56), /* SI */
1285 COSTS_N_INSNS (56), /* DI */
1286 COSTS_N_INSNS (56)}, /* other */
1287 COSTS_N_INSNS (1), /* cost of movsx */
1288 COSTS_N_INSNS (1), /* cost of movzx */
1289 16, /* "large" insn */
1291 2, /* cost for loading QImode using movzbl */
1292 {4, 5, 4}, /* cost of loading integer registers
1293 in QImode, HImode and SImode.
1294 Relative to reg-reg move (2). */
1295 {2, 3, 2}, /* cost of storing integer registers */
1296 2, /* cost of reg,reg fld/fst */
1297 {2, 2, 6}, /* cost of loading fp registers
1298 in SFmode, DFmode and XFmode */
1299 {4, 4, 6}, /* cost of storing fp registers
1300 in SFmode, DFmode and XFmode */
1301 2, /* cost of moving MMX register */
1302 {2, 2}, /* cost of loading MMX registers
1303 in SImode and DImode */
1304 {2, 2}, /* cost of storing MMX registers
1305 in SImode and DImode */
1306 12, /* cost of moving SSE register */
1307 {12, 12, 12}, /* cost of loading SSE registers
1308 in SImode, DImode and TImode */
1309 {2, 2, 8}, /* cost of storing SSE registers
1310 in SImode, DImode and TImode */
1311 10, /* MMX or SSE register to integer */
1312 8, /* size of l1 cache. */
1313 256, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 6, /* number of parallel prefetches */
1316 2, /* Branch cost */
1317 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1318 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1319 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1320 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1321 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1322 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1323 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1324 DUMMY_STRINGOP_ALGS
},
1325 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1326 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1327 DUMMY_STRINGOP_ALGS
},
1328 1, /* scalar_stmt_cost. */
1329 1, /* scalar load_cost. */
1330 1, /* scalar_store_cost. */
1331 1, /* vec_stmt_cost. */
1332 1, /* vec_to_scalar_cost. */
1333 1, /* scalar_to_vec_cost. */
1334 1, /* vec_align_load_cost. */
1335 2, /* vec_unalign_load_cost. */
1336 1, /* vec_store_cost. */
1337 3, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1342 struct processor_costs nocona_cost
= {
1343 COSTS_N_INSNS (1), /* cost of an add instruction */
1344 COSTS_N_INSNS (1), /* cost of a lea instruction */
1345 COSTS_N_INSNS (1), /* variable shift costs */
1346 COSTS_N_INSNS (1), /* constant shift costs */
1347 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1348 COSTS_N_INSNS (10), /* HI */
1349 COSTS_N_INSNS (10), /* SI */
1350 COSTS_N_INSNS (10), /* DI */
1351 COSTS_N_INSNS (10)}, /* other */
1352 0, /* cost of multiply per each bit set */
1353 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1354 COSTS_N_INSNS (66), /* HI */
1355 COSTS_N_INSNS (66), /* SI */
1356 COSTS_N_INSNS (66), /* DI */
1357 COSTS_N_INSNS (66)}, /* other */
1358 COSTS_N_INSNS (1), /* cost of movsx */
1359 COSTS_N_INSNS (1), /* cost of movzx */
1360 16, /* "large" insn */
1361 17, /* MOVE_RATIO */
1362 4, /* cost for loading QImode using movzbl */
1363 {4, 4, 4}, /* cost of loading integer registers
1364 in QImode, HImode and SImode.
1365 Relative to reg-reg move (2). */
1366 {4, 4, 4}, /* cost of storing integer registers */
1367 3, /* cost of reg,reg fld/fst */
1368 {12, 12, 12}, /* cost of loading fp registers
1369 in SFmode, DFmode and XFmode */
1370 {4, 4, 4}, /* cost of storing fp registers
1371 in SFmode, DFmode and XFmode */
1372 6, /* cost of moving MMX register */
1373 {12, 12}, /* cost of loading MMX registers
1374 in SImode and DImode */
1375 {12, 12}, /* cost of storing MMX registers
1376 in SImode and DImode */
1377 6, /* cost of moving SSE register */
1378 {12, 12, 12}, /* cost of loading SSE registers
1379 in SImode, DImode and TImode */
1380 {12, 12, 12}, /* cost of storing SSE registers
1381 in SImode, DImode and TImode */
1382 8, /* MMX or SSE register to integer */
1383 8, /* size of l1 cache. */
1384 1024, /* size of l2 cache. */
1385 128, /* size of prefetch block */
1386 8, /* number of parallel prefetches */
1387 1, /* Branch cost */
1388 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1389 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1390 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1391 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1392 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1393 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1394 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1395 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1396 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}},
1397 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1398 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1399 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1400 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1401 1, /* scalar_stmt_cost. */
1402 1, /* scalar load_cost. */
1403 1, /* scalar_store_cost. */
1404 1, /* vec_stmt_cost. */
1405 1, /* vec_to_scalar_cost. */
1406 1, /* scalar_to_vec_cost. */
1407 1, /* vec_align_load_cost. */
1408 2, /* vec_unalign_load_cost. */
1409 1, /* vec_store_cost. */
1410 3, /* cond_taken_branch_cost. */
1411 1, /* cond_not_taken_branch_cost. */
1415 struct processor_costs atom_cost
= {
1416 COSTS_N_INSNS (1), /* cost of an add instruction */
1417 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1418 COSTS_N_INSNS (1), /* variable shift costs */
1419 COSTS_N_INSNS (1), /* constant shift costs */
1420 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1421 COSTS_N_INSNS (4), /* HI */
1422 COSTS_N_INSNS (3), /* SI */
1423 COSTS_N_INSNS (4), /* DI */
1424 COSTS_N_INSNS (2)}, /* other */
1425 0, /* cost of multiply per each bit set */
1426 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1427 COSTS_N_INSNS (26), /* HI */
1428 COSTS_N_INSNS (42), /* SI */
1429 COSTS_N_INSNS (74), /* DI */
1430 COSTS_N_INSNS (74)}, /* other */
1431 COSTS_N_INSNS (1), /* cost of movsx */
1432 COSTS_N_INSNS (1), /* cost of movzx */
1433 8, /* "large" insn */
1434 17, /* MOVE_RATIO */
1435 4, /* cost for loading QImode using movzbl */
1436 {4, 4, 4}, /* cost of loading integer registers
1437 in QImode, HImode and SImode.
1438 Relative to reg-reg move (2). */
1439 {4, 4, 4}, /* cost of storing integer registers */
1440 4, /* cost of reg,reg fld/fst */
1441 {12, 12, 12}, /* cost of loading fp registers
1442 in SFmode, DFmode and XFmode */
1443 {6, 6, 8}, /* cost of storing fp registers
1444 in SFmode, DFmode and XFmode */
1445 2, /* cost of moving MMX register */
1446 {8, 8}, /* cost of loading MMX registers
1447 in SImode and DImode */
1448 {8, 8}, /* cost of storing MMX registers
1449 in SImode and DImode */
1450 2, /* cost of moving SSE register */
1451 {8, 8, 8}, /* cost of loading SSE registers
1452 in SImode, DImode and TImode */
1453 {8, 8, 8}, /* cost of storing SSE registers
1454 in SImode, DImode and TImode */
1455 5, /* MMX or SSE register to integer */
1456 32, /* size of l1 cache. */
1457 256, /* size of l2 cache. */
1458 64, /* size of prefetch block */
1459 6, /* number of parallel prefetches */
1460 3, /* Branch cost */
1461 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1462 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1463 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1464 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1465 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1466 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1467 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1468 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1469 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1470 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1471 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1472 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1473 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1474 1, /* scalar_stmt_cost. */
1475 1, /* scalar load_cost. */
1476 1, /* scalar_store_cost. */
1477 1, /* vec_stmt_cost. */
1478 1, /* vec_to_scalar_cost. */
1479 1, /* scalar_to_vec_cost. */
1480 1, /* vec_align_load_cost. */
1481 2, /* vec_unalign_load_cost. */
1482 1, /* vec_store_cost. */
1483 3, /* cond_taken_branch_cost. */
1484 1, /* cond_not_taken_branch_cost. */
1488 struct processor_costs slm_cost
= {
1489 COSTS_N_INSNS (1), /* cost of an add instruction */
1490 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1491 COSTS_N_INSNS (1), /* variable shift costs */
1492 COSTS_N_INSNS (1), /* constant shift costs */
1493 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1494 COSTS_N_INSNS (4), /* HI */
1495 COSTS_N_INSNS (3), /* SI */
1496 COSTS_N_INSNS (4), /* DI */
1497 COSTS_N_INSNS (2)}, /* other */
1498 0, /* cost of multiply per each bit set */
1499 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1500 COSTS_N_INSNS (26), /* HI */
1501 COSTS_N_INSNS (42), /* SI */
1502 COSTS_N_INSNS (74), /* DI */
1503 COSTS_N_INSNS (74)}, /* other */
1504 COSTS_N_INSNS (1), /* cost of movsx */
1505 COSTS_N_INSNS (1), /* cost of movzx */
1506 8, /* "large" insn */
1507 17, /* MOVE_RATIO */
1508 4, /* cost for loading QImode using movzbl */
1509 {4, 4, 4}, /* cost of loading integer registers
1510 in QImode, HImode and SImode.
1511 Relative to reg-reg move (2). */
1512 {4, 4, 4}, /* cost of storing integer registers */
1513 4, /* cost of reg,reg fld/fst */
1514 {12, 12, 12}, /* cost of loading fp registers
1515 in SFmode, DFmode and XFmode */
1516 {6, 6, 8}, /* cost of storing fp registers
1517 in SFmode, DFmode and XFmode */
1518 2, /* cost of moving MMX register */
1519 {8, 8}, /* cost of loading MMX registers
1520 in SImode and DImode */
1521 {8, 8}, /* cost of storing MMX registers
1522 in SImode and DImode */
1523 2, /* cost of moving SSE register */
1524 {8, 8, 8}, /* cost of loading SSE registers
1525 in SImode, DImode and TImode */
1526 {8, 8, 8}, /* cost of storing SSE registers
1527 in SImode, DImode and TImode */
1528 5, /* MMX or SSE register to integer */
1529 32, /* size of l1 cache. */
1530 256, /* size of l2 cache. */
1531 64, /* size of prefetch block */
1532 6, /* number of parallel prefetches */
1533 3, /* Branch cost */
1534 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1535 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1536 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1537 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1538 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1539 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1540 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1541 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1542 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1543 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1544 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1545 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1546 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1547 1, /* scalar_stmt_cost. */
1548 1, /* scalar load_cost. */
1549 1, /* scalar_store_cost. */
1550 1, /* vec_stmt_cost. */
1551 1, /* vec_to_scalar_cost. */
1552 1, /* scalar_to_vec_cost. */
1553 1, /* vec_align_load_cost. */
1554 2, /* vec_unalign_load_cost. */
1555 1, /* vec_store_cost. */
1556 3, /* cond_taken_branch_cost. */
1557 1, /* cond_not_taken_branch_cost. */
1560 /* Generic64 should produce code tuned for Nocona and K8. */
1562 struct processor_costs generic64_cost
= {
1563 COSTS_N_INSNS (1), /* cost of an add instruction */
1564 /* On all chips taken into consideration lea is 2 cycles and more. With
1565 this cost however our current implementation of synth_mult results in
1566 use of unnecessary temporary registers causing regression on several
1567 SPECfp benchmarks. */
1568 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1569 COSTS_N_INSNS (1), /* variable shift costs */
1570 COSTS_N_INSNS (1), /* constant shift costs */
1571 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1572 COSTS_N_INSNS (4), /* HI */
1573 COSTS_N_INSNS (3), /* SI */
1574 COSTS_N_INSNS (4), /* DI */
1575 COSTS_N_INSNS (2)}, /* other */
1576 0, /* cost of multiply per each bit set */
1577 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1578 COSTS_N_INSNS (26), /* HI */
1579 COSTS_N_INSNS (42), /* SI */
1580 COSTS_N_INSNS (74), /* DI */
1581 COSTS_N_INSNS (74)}, /* other */
1582 COSTS_N_INSNS (1), /* cost of movsx */
1583 COSTS_N_INSNS (1), /* cost of movzx */
1584 8, /* "large" insn */
1585 17, /* MOVE_RATIO */
1586 4, /* cost for loading QImode using movzbl */
1587 {4, 4, 4}, /* cost of loading integer registers
1588 in QImode, HImode and SImode.
1589 Relative to reg-reg move (2). */
1590 {4, 4, 4}, /* cost of storing integer registers */
1591 4, /* cost of reg,reg fld/fst */
1592 {12, 12, 12}, /* cost of loading fp registers
1593 in SFmode, DFmode and XFmode */
1594 {6, 6, 8}, /* cost of storing fp registers
1595 in SFmode, DFmode and XFmode */
1596 2, /* cost of moving MMX register */
1597 {8, 8}, /* cost of loading MMX registers
1598 in SImode and DImode */
1599 {8, 8}, /* cost of storing MMX registers
1600 in SImode and DImode */
1601 2, /* cost of moving SSE register */
1602 {8, 8, 8}, /* cost of loading SSE registers
1603 in SImode, DImode and TImode */
1604 {8, 8, 8}, /* cost of storing SSE registers
1605 in SImode, DImode and TImode */
1606 5, /* MMX or SSE register to integer */
1607 32, /* size of l1 cache. */
1608 512, /* size of l2 cache. */
1609 64, /* size of prefetch block */
1610 6, /* number of parallel prefetches */
1611 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1612 value is increased to perhaps more appropriate value of 5. */
1613 3, /* Branch cost */
1614 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1615 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1616 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1617 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1618 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1619 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1620 {DUMMY_STRINGOP_ALGS
,
1621 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1622 {-1, libcall
, false}}}},
1623 {DUMMY_STRINGOP_ALGS
,
1624 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1625 {-1, libcall
, false}}}},
1626 1, /* scalar_stmt_cost. */
1627 1, /* scalar load_cost. */
1628 1, /* scalar_store_cost. */
1629 1, /* vec_stmt_cost. */
1630 1, /* vec_to_scalar_cost. */
1631 1, /* scalar_to_vec_cost. */
1632 1, /* vec_align_load_cost. */
1633 2, /* vec_unalign_load_cost. */
1634 1, /* vec_store_cost. */
1635 3, /* cond_taken_branch_cost. */
1636 1, /* cond_not_taken_branch_cost. */
1639 /* core_cost should produce code tuned for Core familly of CPUs. */
1641 struct processor_costs core_cost
= {
1642 COSTS_N_INSNS (1), /* cost of an add instruction */
1643 /* On all chips taken into consideration lea is 2 cycles and more. With
1644 this cost however our current implementation of synth_mult results in
1645 use of unnecessary temporary registers causing regression on several
1646 SPECfp benchmarks. */
1647 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1648 COSTS_N_INSNS (1), /* variable shift costs */
1649 COSTS_N_INSNS (1), /* constant shift costs */
1650 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1651 COSTS_N_INSNS (4), /* HI */
1652 COSTS_N_INSNS (3), /* SI */
1653 COSTS_N_INSNS (4), /* DI */
1654 COSTS_N_INSNS (2)}, /* other */
1655 0, /* cost of multiply per each bit set */
1656 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1657 COSTS_N_INSNS (26), /* HI */
1658 COSTS_N_INSNS (42), /* SI */
1659 COSTS_N_INSNS (74), /* DI */
1660 COSTS_N_INSNS (74)}, /* other */
1661 COSTS_N_INSNS (1), /* cost of movsx */
1662 COSTS_N_INSNS (1), /* cost of movzx */
1663 8, /* "large" insn */
1664 17, /* MOVE_RATIO */
1665 4, /* cost for loading QImode using movzbl */
1666 {4, 4, 4}, /* cost of loading integer registers
1667 in QImode, HImode and SImode.
1668 Relative to reg-reg move (2). */
1669 {4, 4, 4}, /* cost of storing integer registers */
1670 4, /* cost of reg,reg fld/fst */
1671 {12, 12, 12}, /* cost of loading fp registers
1672 in SFmode, DFmode and XFmode */
1673 {6, 6, 8}, /* cost of storing fp registers
1674 in SFmode, DFmode and XFmode */
1675 2, /* cost of moving MMX register */
1676 {8, 8}, /* cost of loading MMX registers
1677 in SImode and DImode */
1678 {8, 8}, /* cost of storing MMX registers
1679 in SImode and DImode */
1680 2, /* cost of moving SSE register */
1681 {8, 8, 8}, /* cost of loading SSE registers
1682 in SImode, DImode and TImode */
1683 {8, 8, 8}, /* cost of storing SSE registers
1684 in SImode, DImode and TImode */
1685 5, /* MMX or SSE register to integer */
1686 64, /* size of l1 cache. */
1687 512, /* size of l2 cache. */
1688 64, /* size of prefetch block */
1689 6, /* number of parallel prefetches */
1690 /* FIXME perhaps more appropriate value is 5. */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {{libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1699 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1700 {-1, libcall
, false}}}},
1701 {{libcall
, {{6, loop_1_byte
, true},
1703 {8192, rep_prefix_4_byte
, true},
1704 {-1, libcall
, false}}},
1705 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1706 {-1, libcall
, false}}}},
1707 1, /* scalar_stmt_cost. */
1708 1, /* scalar load_cost. */
1709 1, /* scalar_store_cost. */
1710 1, /* vec_stmt_cost. */
1711 1, /* vec_to_scalar_cost. */
1712 1, /* scalar_to_vec_cost. */
1713 1, /* vec_align_load_cost. */
1714 2, /* vec_unalign_load_cost. */
1715 1, /* vec_store_cost. */
1716 3, /* cond_taken_branch_cost. */
1717 1, /* cond_not_taken_branch_cost. */
1720 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1723 struct processor_costs generic32_cost
= {
1724 COSTS_N_INSNS (1), /* cost of an add instruction */
1725 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1726 COSTS_N_INSNS (1), /* variable shift costs */
1727 COSTS_N_INSNS (1), /* constant shift costs */
1728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1729 COSTS_N_INSNS (4), /* HI */
1730 COSTS_N_INSNS (3), /* SI */
1731 COSTS_N_INSNS (4), /* DI */
1732 COSTS_N_INSNS (2)}, /* other */
1733 0, /* cost of multiply per each bit set */
1734 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1735 COSTS_N_INSNS (26), /* HI */
1736 COSTS_N_INSNS (42), /* SI */
1737 COSTS_N_INSNS (74), /* DI */
1738 COSTS_N_INSNS (74)}, /* other */
1739 COSTS_N_INSNS (1), /* cost of movsx */
1740 COSTS_N_INSNS (1), /* cost of movzx */
1741 8, /* "large" insn */
1742 17, /* MOVE_RATIO */
1743 4, /* cost for loading QImode using movzbl */
1744 {4, 4, 4}, /* cost of loading integer registers
1745 in QImode, HImode and SImode.
1746 Relative to reg-reg move (2). */
1747 {4, 4, 4}, /* cost of storing integer registers */
1748 4, /* cost of reg,reg fld/fst */
1749 {12, 12, 12}, /* cost of loading fp registers
1750 in SFmode, DFmode and XFmode */
1751 {6, 6, 8}, /* cost of storing fp registers
1752 in SFmode, DFmode and XFmode */
1753 2, /* cost of moving MMX register */
1754 {8, 8}, /* cost of loading MMX registers
1755 in SImode and DImode */
1756 {8, 8}, /* cost of storing MMX registers
1757 in SImode and DImode */
1758 2, /* cost of moving SSE register */
1759 {8, 8, 8}, /* cost of loading SSE registers
1760 in SImode, DImode and TImode */
1761 {8, 8, 8}, /* cost of storing SSE registers
1762 in SImode, DImode and TImode */
1763 5, /* MMX or SSE register to integer */
1764 32, /* size of l1 cache. */
1765 256, /* size of l2 cache. */
1766 64, /* size of prefetch block */
1767 6, /* number of parallel prefetches */
1768 3, /* Branch cost */
1769 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1770 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1771 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1772 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1773 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1774 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1775 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1776 {-1, libcall
, false}}},
1777 DUMMY_STRINGOP_ALGS
},
1778 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1779 {-1, libcall
, false}}},
1780 DUMMY_STRINGOP_ALGS
},
1781 1, /* scalar_stmt_cost. */
1782 1, /* scalar load_cost. */
1783 1, /* scalar_store_cost. */
1784 1, /* vec_stmt_cost. */
1785 1, /* vec_to_scalar_cost. */
1786 1, /* scalar_to_vec_cost. */
1787 1, /* vec_align_load_cost. */
1788 2, /* vec_unalign_load_cost. */
1789 1, /* vec_store_cost. */
1790 3, /* cond_taken_branch_cost. */
1791 1, /* cond_not_taken_branch_cost. */
1794 /* Set by -mtune. */
1795 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1797 /* Set by -mtune or -Os. */
1798 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1800 /* Processor feature/optimization bitmasks. */
1801 #define m_386 (1<<PROCESSOR_I386)
1802 #define m_486 (1<<PROCESSOR_I486)
1803 #define m_PENT (1<<PROCESSOR_PENTIUM)
1804 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1805 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1806 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1807 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1808 #define m_CORE2 (1<<PROCESSOR_CORE2)
1809 #define m_COREI7 (1<<PROCESSOR_COREI7)
1810 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1811 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1812 #define m_ATOM (1<<PROCESSOR_ATOM)
1813 #define m_SLM (1<<PROCESSOR_SLM)
1815 #define m_GEODE (1<<PROCESSOR_GEODE)
1816 #define m_K6 (1<<PROCESSOR_K6)
1817 #define m_K6_GEODE (m_K6 | m_GEODE)
1818 #define m_K8 (1<<PROCESSOR_K8)
1819 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1820 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1821 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1822 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1823 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1824 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1825 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1826 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1827 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1828 #define m_BTVER (m_BTVER1 | m_BTVER2)
1829 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1831 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1832 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1834 /* Generic instruction choice should be common subset of supported CPUs
1835 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1836 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1838 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1840 #define DEF_TUNE(tune, name) name,
1841 #include "x86-tune.def"
1845 /* Feature tests against the various tunings. */
1846 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1848 /* Feature tests against the various tunings used to create ix86_tune_features
1849 based on the processor mask. */
1850 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1851 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1852 negatively, so enabling for Generic64 seems like good code size
1853 tradeoff. We can't enable it for 32bit generic because it does not
1854 work well with PPro base chips. */
1855 m_386
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1857 /* X86_TUNE_PUSH_MEMORY */
1858 m_386
| m_P4_NOCONA
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1860 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1863 /* X86_TUNE_UNROLL_STRLEN */
1864 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_SLM
| m_CORE_ALL
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1866 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1867 on simulation result. But after P4 was made, no performance benefit
1868 was observed with branch hints. It also increases the code size.
1869 As a result, icc never generates branch hints. */
1872 /* X86_TUNE_DOUBLE_WITH_ADD */
1875 /* X86_TUNE_USE_SAHF */
1876 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1878 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1879 partial dependencies. */
1880 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1882 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1883 register stalls on Generic32 compilation setting as well. However
1884 in current implementation the partial register stalls are not eliminated
1885 very well - they can be introduced via subregs synthesized by combine
1886 and can happen in caller/callee saving sequences. Because this option
1887 pays back little on PPro based chips and is in conflict with partial reg
1888 dependencies used by Athlon/P4 based chips, it is better to leave it off
1889 for generic32 for now. */
1892 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1893 m_CORE_ALL
| m_GENERIC
,
1895 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1896 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1897 m_CORE_ALL
| m_GENERIC
,
1899 /* X86_TUNE_USE_HIMODE_FIOP */
1900 m_386
| m_486
| m_K6_GEODE
,
1902 /* X86_TUNE_USE_SIMODE_FIOP */
1903 ~(m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
),
1905 /* X86_TUNE_USE_MOV0 */
1908 /* X86_TUNE_USE_CLTD */
1909 ~(m_PENT
| m_ATOM
| m_SLM
| m_K6
),
1911 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1914 /* X86_TUNE_SPLIT_LONG_MOVES */
1917 /* X86_TUNE_READ_MODIFY_WRITE */
1920 /* X86_TUNE_READ_MODIFY */
1923 /* X86_TUNE_PROMOTE_QIMODE */
1924 m_386
| m_486
| m_PENT
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1926 /* X86_TUNE_FAST_PREFIX */
1927 ~(m_386
| m_486
| m_PENT
),
1929 /* X86_TUNE_SINGLE_STRINGOP */
1930 m_386
| m_P4_NOCONA
,
1932 /* X86_TUNE_QIMODE_MATH */
1935 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1936 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1937 might be considered for Generic32 if our scheme for avoiding partial
1938 stalls was more effective. */
1941 /* X86_TUNE_PROMOTE_QI_REGS */
1944 /* X86_TUNE_PROMOTE_HI_REGS */
1947 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1948 over esp addition. */
1949 m_386
| m_486
| m_PENT
| m_PPRO
,
1951 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1952 over esp addition. */
1955 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1956 over esp subtraction. */
1957 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1959 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1960 over esp subtraction. */
1961 m_PENT
| m_K6_GEODE
,
1963 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1964 for DFmode copies */
1965 ~(m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
),
1967 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1968 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
1970 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1971 conflict here in between PPro/Pentium4 based chips that thread 128bit
1972 SSE registers as single units versus K8 based chips that divide SSE
1973 registers to two 64bit halves. This knob promotes all store destinations
1974 to be 128bit to allow register renaming on 128bit SSE units, but usually
1975 results in one extra microop on 64bit SSE units. Experimental results
1976 shows that disabling this option on P4 brings over 20% SPECfp regression,
1977 while enabling it on K8 brings roughly 2.4% regression that can be partly
1978 masked by careful scheduling of moves. */
1979 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1981 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1982 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_SLM
,
1984 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1985 m_COREI7
| m_BDVER
| m_SLM
,
1987 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1990 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1991 are resolved on SSE register parts instead of whole registers, so we may
1992 maintain just lower part of scalar values in proper format leaving the
1993 upper part undefined. */
1996 /* X86_TUNE_SSE_TYPELESS_STORES */
1999 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2000 m_PPRO
| m_P4_NOCONA
,
2002 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2003 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2005 /* X86_TUNE_PROLOGUE_USING_MOVE */
2006 m_PPRO
| m_ATHLON_K8
,
2008 /* X86_TUNE_EPILOGUE_USING_MOVE */
2009 m_PPRO
| m_ATHLON_K8
,
2011 /* X86_TUNE_SHIFT1 */
2014 /* X86_TUNE_USE_FFREEP */
2017 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */
2018 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2020 /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */
2023 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2024 ~(m_AMDFAM10
| m_BDVER
),
2026 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2027 than 4 branch instructions in the 16 byte window. */
2028 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2030 /* X86_TUNE_SCHEDULE */
2031 m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2033 /* X86_TUNE_USE_BT */
2034 m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2036 /* X86_TUNE_USE_INCDEC */
2037 ~(m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_GENERIC
),
2039 /* X86_TUNE_PAD_RETURNS */
2040 m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
,
2042 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short function. */
2045 /* X86_TUNE_EXT_80387_CONSTANTS */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2048 /* X86_TUNE_AVOID_VECTOR_DECODE */
2049 m_CORE_ALL
| m_K8
| m_GENERIC64
,
2051 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2052 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2055 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2056 vector path on AMD machines. */
2057 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2059 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2061 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2063 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2067 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2068 but one byte longer. */
2071 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2072 operand that cannot be represented using a modRM byte. The XOR
2073 replacement is long decoded, so this split helps here as well. */
2076 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2078 m_CORE_ALL
| m_AMDFAM10
| m_GENERIC
,
2080 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2081 from integer to FP. */
2084 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2085 with a subsequent conditional jump instruction into a single
2086 compare-and-branch uop. */
2089 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2090 will impact LEA instruction selection. */
2093 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2097 /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2098 at -O3. For the moment, the prefetching seems badly tuned for Intel
2100 m_K6_GEODE
| m_AMD_MULTIPLE
,
2102 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2103 the auto-vectorizer. */
2106 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2107 during reassociation of integer computation. */
2110 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2111 during reassociation of fp computation. */
2112 m_ATOM
| m_SLM
| m_HASWELL
| m_BDVER1
| m_BDVER2
,
2114 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2115 regs instead of memory. */
2118 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2119 a conditional move. */
2122 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
2123 fp converts to destination register. */
2128 /* Feature tests against the various architecture variations. */
2129 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2131 /* Feature tests against the various architecture variations, used to create
2132 ix86_arch_features based on the processor mask. */
2133 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2134 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2135 ~(m_386
| m_486
| m_PENT
| m_K6
),
2137 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2140 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2143 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2146 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2150 static const unsigned int x86_accumulate_outgoing_args
2151 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_SLM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
2153 static const unsigned int x86_arch_always_fancy_math_387
2154 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
;
2156 static const unsigned int x86_avx256_split_unaligned_load
2157 = m_COREI7
| m_GENERIC
;
2159 static const unsigned int x86_avx256_split_unaligned_store
2160 = m_COREI7
| m_BDVER
| m_GENERIC
;
2162 /* In case the average insn count for single function invocation is
2163 lower than this constant, emit fast (but longer) prologue and
2165 #define FAST_PROLOGUE_INSN_COUNT 20
2167 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2168 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2169 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2170 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2172 /* Array of the smallest class containing reg number REGNO, indexed by
2173 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2175 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2177 /* ax, dx, cx, bx */
2178 AREG
, DREG
, CREG
, BREG
,
2179 /* si, di, bp, sp */
2180 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2182 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2183 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2186 /* flags, fpsr, fpcr, frame */
2187 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2189 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2192 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2195 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2196 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2197 /* SSE REX registers */
2198 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2202 /* The "default" register map used in 32bit mode. */
2204 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2206 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2207 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2208 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2209 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2210 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2211 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2212 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2215 /* The "default" register map used in 64bit mode. */
2217 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2219 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2220 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2221 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2222 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2223 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2224 8,9,10,11,12,13,14,15, /* extended integer registers */
2225 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2228 /* Define the register numbers to be used in Dwarf debugging information.
2229 The SVR4 reference port C compiler uses the following register numbers
2230 in its Dwarf output code:
2231 0 for %eax (gcc regno = 0)
2232 1 for %ecx (gcc regno = 2)
2233 2 for %edx (gcc regno = 1)
2234 3 for %ebx (gcc regno = 3)
2235 4 for %esp (gcc regno = 7)
2236 5 for %ebp (gcc regno = 6)
2237 6 for %esi (gcc regno = 4)
2238 7 for %edi (gcc regno = 5)
2239 The following three DWARF register numbers are never generated by
2240 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2241 believes these numbers have these meanings.
2242 8 for %eip (no gcc equivalent)
2243 9 for %eflags (gcc regno = 17)
2244 10 for %trapno (no gcc equivalent)
2245 It is not at all clear how we should number the FP stack registers
2246 for the x86 architecture. If the version of SDB on x86/svr4 were
2247 a bit less brain dead with respect to floating-point then we would
2248 have a precedent to follow with respect to DWARF register numbers
2249 for x86 FP registers, but the SDB on x86/svr4 is so completely
2250 broken with respect to FP registers that it is hardly worth thinking
2251 of it as something to strive for compatibility with.
2252 The version of x86/svr4 SDB I have at the moment does (partially)
2253 seem to believe that DWARF register number 11 is associated with
2254 the x86 register %st(0), but that's about all. Higher DWARF
2255 register numbers don't seem to be associated with anything in
2256 particular, and even for DWARF regno 11, SDB only seems to under-
2257 stand that it should say that a variable lives in %st(0) (when
2258 asked via an `=' command) if we said it was in DWARF regno 11,
2259 but SDB still prints garbage when asked for the value of the
2260 variable in question (via a `/' command).
2261 (Also note that the labels SDB prints for various FP stack regs
2262 when doing an `x' command are all wrong.)
2263 Note that these problems generally don't affect the native SVR4
2264 C compiler because it doesn't allow the use of -O with -g and
2265 because when it is *not* optimizing, it allocates a memory
2266 location for each floating-point variable, and the memory
2267 location is what gets described in the DWARF AT_location
2268 attribute for the variable in question.
2269 Regardless of the severe mental illness of the x86/svr4 SDB, we
2270 do something sensible here and we use the following DWARF
2271 register numbers. Note that these are all stack-top-relative
2273 11 for %st(0) (gcc regno = 8)
2274 12 for %st(1) (gcc regno = 9)
2275 13 for %st(2) (gcc regno = 10)
2276 14 for %st(3) (gcc regno = 11)
2277 15 for %st(4) (gcc regno = 12)
2278 16 for %st(5) (gcc regno = 13)
2279 17 for %st(6) (gcc regno = 14)
2280 18 for %st(7) (gcc regno = 15)
2282 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2284 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2285 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2286 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2287 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2288 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2289 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2290 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2293 /* Define parameter passing and return registers. */
2295 static int const x86_64_int_parameter_registers
[6] =
2297 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2300 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2302 CX_REG
, DX_REG
, R8_REG
, R9_REG
2305 static int const x86_64_int_return_registers
[4] =
2307 AX_REG
, DX_REG
, DI_REG
, SI_REG
2310 /* Additional registers that are clobbered by SYSV calls. */
2312 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2316 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2317 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2320 /* Define the structure for the machine field in struct function. */
2322 struct GTY(()) stack_local_entry
{
2323 unsigned short mode
;
2326 struct stack_local_entry
*next
;
2329 /* Structure describing stack frame layout.
2330 Stack grows downward:
2336 saved static chain if ix86_static_chain_on_stack
2338 saved frame pointer if frame_pointer_needed
2339 <- HARD_FRAME_POINTER
2345 <- sse_regs_save_offset
2348 [va_arg registers] |
2352 [padding2] | = to_allocate
2361 int outgoing_arguments_size
;
2363 /* The offsets relative to ARG_POINTER. */
2364 HOST_WIDE_INT frame_pointer_offset
;
2365 HOST_WIDE_INT hard_frame_pointer_offset
;
2366 HOST_WIDE_INT stack_pointer_offset
;
2367 HOST_WIDE_INT hfp_save_offset
;
2368 HOST_WIDE_INT reg_save_offset
;
2369 HOST_WIDE_INT sse_reg_save_offset
;
2371 /* When save_regs_using_mov is set, emit prologue using
2372 move instead of push instructions. */
2373 bool save_regs_using_mov
;
2376 /* Which cpu are we scheduling for. */
2377 enum attr_cpu ix86_schedule
;
2379 /* Which cpu are we optimizing for. */
2380 enum processor_type ix86_tune
;
2382 /* Which instruction set architecture to use. */
2383 enum processor_type ix86_arch
;
2385 /* True if processor has SSE prefetch instruction. */
2386 unsigned char x86_prefetch_sse
;
2388 /* -mstackrealign option */
2389 static const char ix86_force_align_arg_pointer_string
[]
2390 = "force_align_arg_pointer";
2392 static rtx (*ix86_gen_leave
) (void);
2393 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2394 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2395 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2396 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2397 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2398 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2399 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2400 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2401 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2402 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2403 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2405 /* Preferred alignment for stack boundary in bits. */
2406 unsigned int ix86_preferred_stack_boundary
;
2408 /* Alignment for incoming stack boundary in bits specified at
2410 static unsigned int ix86_user_incoming_stack_boundary
;
2412 /* Default alignment for incoming stack boundary in bits. */
2413 static unsigned int ix86_default_incoming_stack_boundary
;
2415 /* Alignment for incoming stack boundary in bits. */
2416 unsigned int ix86_incoming_stack_boundary
;
2418 /* Calling abi specific va_list type nodes. */
2419 static GTY(()) tree sysv_va_list_type_node
;
2420 static GTY(()) tree ms_va_list_type_node
;
2422 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2423 char internal_label_prefix
[16];
2424 int internal_label_prefix_len
;
2426 /* Fence to use after loop using movnt. */
2429 /* Register class used for passing given 64bit part of the argument.
2430 These represent classes as documented by the PS ABI, with the exception
2431 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2432 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2434 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2435 whenever possible (upper half does contain padding). */
2436 enum x86_64_reg_class
2439 X86_64_INTEGER_CLASS
,
2440 X86_64_INTEGERSI_CLASS
,
2447 X86_64_COMPLEX_X87_CLASS
,
2451 #define MAX_CLASSES 4
2453 /* Table of constants used by fldpi, fldln2, etc.... */
2454 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2455 static bool ext_80387_constants_init
= 0;
2458 static struct machine_function
* ix86_init_machine_status (void);
2459 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2460 static bool ix86_function_value_regno_p (const unsigned int);
2461 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2463 static rtx
ix86_static_chain (const_tree
, bool);
2464 static int ix86_function_regparm (const_tree
, const_tree
);
2465 static void ix86_compute_frame_layout (struct ix86_frame
*);
2466 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2468 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2469 static tree
ix86_canonical_va_list_type (tree
);
2470 static void predict_jump (int);
2471 static unsigned int split_stack_prologue_scratch_regno (void);
2472 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2474 enum ix86_function_specific_strings
2476 IX86_FUNCTION_SPECIFIC_ARCH
,
2477 IX86_FUNCTION_SPECIFIC_TUNE
,
2478 IX86_FUNCTION_SPECIFIC_MAX
2481 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2482 const char *, enum fpmath_unit
, bool);
2483 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2484 static void ix86_function_specific_save (struct cl_target_option
*);
2485 static void ix86_function_specific_restore (struct cl_target_option
*);
2486 static void ix86_function_specific_print (FILE *, int,
2487 struct cl_target_option
*);
2488 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2489 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2490 struct gcc_options
*);
2491 static bool ix86_can_inline_p (tree
, tree
);
2492 static void ix86_set_current_function (tree
);
2493 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2495 static enum calling_abi
ix86_function_abi (const_tree
);
2498 #ifndef SUBTARGET32_DEFAULT_CPU
2499 #define SUBTARGET32_DEFAULT_CPU "i386"
2502 /* Whether -mtune= or -march= were specified */
2503 static int ix86_tune_defaulted
;
2504 static int ix86_arch_specified
;
2506 /* Vectorization library interface and handlers. */
2507 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2509 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2510 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2512 /* Processor target table, indexed by processor number */
2515 const struct processor_costs
*cost
; /* Processor costs */
2516 const int align_loop
; /* Default alignments. */
2517 const int align_loop_max_skip
;
2518 const int align_jump
;
2519 const int align_jump_max_skip
;
2520 const int align_func
;
2523 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2525 {&i386_cost
, 4, 3, 4, 3, 4},
2526 {&i486_cost
, 16, 15, 16, 15, 16},
2527 {&pentium_cost
, 16, 7, 16, 7, 16},
2528 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2529 {&geode_cost
, 0, 0, 0, 0, 0},
2530 {&k6_cost
, 32, 7, 32, 7, 32},
2531 {&athlon_cost
, 16, 7, 16, 7, 16},
2532 {&pentium4_cost
, 0, 0, 0, 0, 0},
2533 {&k8_cost
, 16, 7, 16, 7, 16},
2534 {&nocona_cost
, 0, 0, 0, 0, 0},
2536 {&core_cost
, 16, 10, 16, 10, 16},
2538 {&core_cost
, 16, 10, 16, 10, 16},
2540 {&core_cost
, 16, 10, 16, 10, 16},
2541 {&generic32_cost
, 16, 7, 16, 7, 16},
2542 {&generic64_cost
, 16, 10, 16, 10, 16},
2543 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2544 {&bdver1_cost
, 16, 10, 16, 7, 11},
2545 {&bdver2_cost
, 16, 10, 16, 7, 11},
2546 {&bdver3_cost
, 16, 10, 16, 7, 11},
2547 {&btver1_cost
, 16, 10, 16, 7, 11},
2548 {&btver2_cost
, 16, 10, 16, 7, 11},
2549 {&atom_cost
, 16, 15, 16, 7, 16},
2550 {&slm_cost
, 16, 15, 16, 7, 16}
2553 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2588 gate_insert_vzeroupper (void)
2590 return TARGET_AVX
&& TARGET_VZEROUPPER
;
2594 rest_of_handle_insert_vzeroupper (void)
2598 /* vzeroupper instructions are inserted immediately after reload to
2599 account for possible spills from 256bit registers. The pass
2600 reuses mode switching infrastructure by re-running mode insertion
2601 pass, so disable entities that have already been processed. */
2602 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2603 ix86_optimize_mode_switching
[i
] = 0;
2605 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2607 /* Call optimize_mode_switching. */
2608 g
->get_passes ()->execute_pass_mode_switching ();
2614 const pass_data pass_data_insert_vzeroupper
=
2616 RTL_PASS
, /* type */
2617 "vzeroupper", /* name */
2618 OPTGROUP_NONE
, /* optinfo_flags */
2619 true, /* has_gate */
2620 true, /* has_execute */
2621 TV_NONE
, /* tv_id */
2622 0, /* properties_required */
2623 0, /* properties_provided */
2624 0, /* properties_destroyed */
2625 0, /* todo_flags_start */
2626 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2629 class pass_insert_vzeroupper
: public rtl_opt_pass
2632 pass_insert_vzeroupper(gcc::context
*ctxt
)
2633 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2636 /* opt_pass methods: */
2637 bool gate () { return gate_insert_vzeroupper (); }
2638 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2640 }; // class pass_insert_vzeroupper
2645 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2647 return new pass_insert_vzeroupper (ctxt
);
2650 /* Return true if a red-zone is in use. */
2653 ix86_using_red_zone (void)
2655 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2658 /* Return a string that documents the current -m options. The caller is
2659 responsible for freeing the string. */
2662 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2663 const char *tune
, enum fpmath_unit fpmath
,
2666 struct ix86_target_opts
2668 const char *option
; /* option string */
2669 HOST_WIDE_INT mask
; /* isa mask options */
2672 /* This table is ordered so that options like -msse4.2 that imply
2673 preceding options while match those first. */
2674 static struct ix86_target_opts isa_opts
[] =
2676 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2677 { "-mfma", OPTION_MASK_ISA_FMA
},
2678 { "-mxop", OPTION_MASK_ISA_XOP
},
2679 { "-mlwp", OPTION_MASK_ISA_LWP
},
2680 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2681 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2682 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2683 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2684 { "-msse3", OPTION_MASK_ISA_SSE3
},
2685 { "-msse2", OPTION_MASK_ISA_SSE2
},
2686 { "-msse", OPTION_MASK_ISA_SSE
},
2687 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2688 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2689 { "-mmmx", OPTION_MASK_ISA_MMX
},
2690 { "-mabm", OPTION_MASK_ISA_ABM
},
2691 { "-mbmi", OPTION_MASK_ISA_BMI
},
2692 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2693 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2694 { "-mhle", OPTION_MASK_ISA_HLE
},
2695 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2696 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2697 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2698 { "-madx", OPTION_MASK_ISA_ADX
},
2699 { "-mtbm", OPTION_MASK_ISA_TBM
},
2700 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2701 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2702 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2703 { "-maes", OPTION_MASK_ISA_AES
},
2704 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2705 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2706 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2707 { "-mf16c", OPTION_MASK_ISA_F16C
},
2708 { "-mrtm", OPTION_MASK_ISA_RTM
},
2709 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2710 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2714 static struct ix86_target_opts flag_opts
[] =
2716 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2717 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2718 { "-m80387", MASK_80387
},
2719 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2720 { "-malign-double", MASK_ALIGN_DOUBLE
},
2721 { "-mcld", MASK_CLD
},
2722 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2723 { "-mieee-fp", MASK_IEEE_FP
},
2724 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2725 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2726 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2727 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2728 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2729 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2730 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2731 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2732 { "-mrecip", MASK_RECIP
},
2733 { "-mrtd", MASK_RTD
},
2734 { "-msseregparm", MASK_SSEREGPARM
},
2735 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2736 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2737 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2738 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2739 { "-mvzeroupper", MASK_VZEROUPPER
},
2740 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2741 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2742 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2745 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2748 char target_other
[40];
2758 memset (opts
, '\0', sizeof (opts
));
2760 /* Add -march= option. */
2763 opts
[num
][0] = "-march=";
2764 opts
[num
++][1] = arch
;
2767 /* Add -mtune= option. */
2770 opts
[num
][0] = "-mtune=";
2771 opts
[num
++][1] = tune
;
2774 /* Add -m32/-m64/-mx32. */
2775 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2777 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2781 isa
&= ~ (OPTION_MASK_ISA_64BIT
2782 | OPTION_MASK_ABI_64
2783 | OPTION_MASK_ABI_X32
);
2787 opts
[num
++][0] = abi
;
2789 /* Pick out the options in isa options. */
2790 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2792 if ((isa
& isa_opts
[i
].mask
) != 0)
2794 opts
[num
++][0] = isa_opts
[i
].option
;
2795 isa
&= ~ isa_opts
[i
].mask
;
2799 if (isa
&& add_nl_p
)
2801 opts
[num
++][0] = isa_other
;
2802 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2806 /* Add flag options. */
2807 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2809 if ((flags
& flag_opts
[i
].mask
) != 0)
2811 opts
[num
++][0] = flag_opts
[i
].option
;
2812 flags
&= ~ flag_opts
[i
].mask
;
2816 if (flags
&& add_nl_p
)
2818 opts
[num
++][0] = target_other
;
2819 sprintf (target_other
, "(other flags: %#x)", flags
);
2822 /* Add -fpmath= option. */
2825 opts
[num
][0] = "-mfpmath=";
2826 switch ((int) fpmath
)
2829 opts
[num
++][1] = "387";
2833 opts
[num
++][1] = "sse";
2836 case FPMATH_387
| FPMATH_SSE
:
2837 opts
[num
++][1] = "sse+387";
2849 gcc_assert (num
< ARRAY_SIZE (opts
));
2851 /* Size the string. */
2853 sep_len
= (add_nl_p
) ? 3 : 1;
2854 for (i
= 0; i
< num
; i
++)
2857 for (j
= 0; j
< 2; j
++)
2859 len
+= strlen (opts
[i
][j
]);
2862 /* Build the string. */
2863 ret
= ptr
= (char *) xmalloc (len
);
2866 for (i
= 0; i
< num
; i
++)
2870 for (j
= 0; j
< 2; j
++)
2871 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2878 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2886 for (j
= 0; j
< 2; j
++)
2889 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2891 line_len
+= len2
[j
];
2896 gcc_assert (ret
+ len
>= ptr
);
2901 /* Return true, if profiling code should be emitted before
2902 prologue. Otherwise it returns false.
2903 Note: For x86 with "hotfix" it is sorried. */
2905 ix86_profile_before_prologue (void)
2907 return flag_fentry
!= 0;
2910 /* Function that is callable from the debugger to print the current
2913 ix86_debug_options (void)
2915 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2916 ix86_arch_string
, ix86_tune_string
,
2921 fprintf (stderr
, "%s\n\n", opts
);
2925 fputs ("<no options>\n\n", stderr
);
2930 /* Override various settings based on options. If MAIN_ARGS_P, the
2931 options are from the command line, otherwise they are from
2935 ix86_option_override_internal (bool main_args_p
)
2938 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2939 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2944 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2945 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2946 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2947 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2948 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2949 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2950 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2951 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2952 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2953 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2954 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2955 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2956 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2957 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2958 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2959 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2960 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2961 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2962 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2963 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2964 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2965 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2966 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2967 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2968 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2969 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2970 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2971 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2972 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2973 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2974 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2975 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2976 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2977 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2978 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2979 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2980 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2981 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2982 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2983 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2985 /* if this reaches 64, need to widen struct pta flags below */
2989 const char *const name
; /* processor name or nickname. */
2990 const enum processor_type processor
;
2991 const enum attr_cpu schedule
;
2992 const unsigned HOST_WIDE_INT flags
;
2994 const processor_alias_table
[] =
2996 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2997 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2998 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2999 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3000 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3001 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3002 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3003 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3004 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3005 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3006 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3007 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3008 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3009 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3010 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3011 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3012 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3013 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3014 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3015 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3016 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3017 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3018 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3019 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3020 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3021 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3022 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3023 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3024 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3025 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3026 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3027 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3028 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3029 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3030 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
3031 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3032 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3033 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3034 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3035 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
3036 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3037 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3038 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3039 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3040 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3041 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3042 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3043 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3044 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3045 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3047 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3048 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3049 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3050 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3051 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3052 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3054 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3055 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3056 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3057 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3058 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3059 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3060 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3061 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3062 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3063 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3064 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3065 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3066 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3067 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3068 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3069 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3070 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3071 {"k8", PROCESSOR_K8
, CPU_K8
,
3072 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3073 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3074 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3075 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3076 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3077 {"opteron", PROCESSOR_K8
, CPU_K8
,
3078 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3079 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3080 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3081 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3082 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3083 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3084 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3085 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3086 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3087 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3088 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3089 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3090 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3091 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3092 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3093 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3094 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3095 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3096 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3097 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3098 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3099 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3100 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3101 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3102 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3103 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3104 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3105 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3106 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3107 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3108 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3109 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3110 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3111 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3112 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3113 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3114 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3115 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3116 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3117 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3118 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3119 | PTA_FXSR
| PTA_XSAVE
},
3120 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3121 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3122 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3123 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3124 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3125 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3127 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3128 PTA_HLE
/* flags are only used for -march switch. */ },
3129 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3131 | PTA_HLE
/* flags are only used for -march switch. */ },
3134 /* -mrecip options. */
3137 const char *string
; /* option name */
3138 unsigned int mask
; /* mask bits to set */
3140 const recip_options
[] =
3142 { "all", RECIP_MASK_ALL
},
3143 { "none", RECIP_MASK_NONE
},
3144 { "div", RECIP_MASK_DIV
},
3145 { "sqrt", RECIP_MASK_SQRT
},
3146 { "vec-div", RECIP_MASK_VEC_DIV
},
3147 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3150 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3152 /* Set up prefix/suffix so the error messages refer to either the command
3153 line argument, or the attribute(target). */
3162 prefix
= "option(\"";
3167 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3168 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3169 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3170 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3171 #ifdef TARGET_BI_ARCH
3174 #if TARGET_BI_ARCH == 1
3175 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3176 is on and OPTION_MASK_ABI_X32 is off. We turn off
3177 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3180 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3182 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3183 on and OPTION_MASK_ABI_64 is off. We turn off
3184 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3187 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3194 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3195 OPTION_MASK_ABI_64 for TARGET_X32. */
3196 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3197 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3199 else if (TARGET_LP64
)
3201 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3202 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3203 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3204 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3207 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3208 SUBTARGET_OVERRIDE_OPTIONS
;
3211 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3212 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3215 /* -fPIC is the default for x86_64. */
3216 if (TARGET_MACHO
&& TARGET_64BIT
)
3219 /* Need to check -mtune=generic first. */
3220 if (ix86_tune_string
)
3222 if (!strcmp (ix86_tune_string
, "generic")
3223 || !strcmp (ix86_tune_string
, "i686")
3224 /* As special support for cross compilers we read -mtune=native
3225 as -mtune=generic. With native compilers we won't see the
3226 -mtune=native, as it was changed by the driver. */
3227 || !strcmp (ix86_tune_string
, "native"))
3230 ix86_tune_string
= "generic64";
3232 ix86_tune_string
= "generic32";
3234 /* If this call is for setting the option attribute, allow the
3235 generic32/generic64 that was previously set. */
3236 else if (!main_args_p
3237 && (!strcmp (ix86_tune_string
, "generic32")
3238 || !strcmp (ix86_tune_string
, "generic64")))
3240 else if (!strncmp (ix86_tune_string
, "generic", 7))
3241 error ("bad value (%s) for %stune=%s %s",
3242 ix86_tune_string
, prefix
, suffix
, sw
);
3243 else if (!strcmp (ix86_tune_string
, "x86-64"))
3244 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3245 "%stune=k8%s or %stune=generic%s instead as appropriate",
3246 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3250 if (ix86_arch_string
)
3251 ix86_tune_string
= ix86_arch_string
;
3252 if (!ix86_tune_string
)
3254 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3255 ix86_tune_defaulted
= 1;
3258 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3259 need to use a sensible tune option. */
3260 if (!strcmp (ix86_tune_string
, "generic")
3261 || !strcmp (ix86_tune_string
, "x86-64")
3262 || !strcmp (ix86_tune_string
, "i686"))
3265 ix86_tune_string
= "generic64";
3267 ix86_tune_string
= "generic32";
3271 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3273 /* rep; movq isn't available in 32-bit code. */
3274 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3275 ix86_stringop_alg
= no_stringop
;
3278 if (!ix86_arch_string
)
3279 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3281 ix86_arch_specified
= 1;
3283 if (global_options_set
.x_ix86_pmode
)
3285 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3286 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3287 error ("address mode %qs not supported in the %s bit mode",
3288 TARGET_64BIT
? "short" : "long",
3289 TARGET_64BIT
? "64" : "32");
3292 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3294 if (!global_options_set
.x_ix86_abi
)
3295 ix86_abi
= DEFAULT_ABI
;
3297 if (global_options_set
.x_ix86_cmodel
)
3299 switch (ix86_cmodel
)
3304 ix86_cmodel
= CM_SMALL_PIC
;
3306 error ("code model %qs not supported in the %s bit mode",
3313 ix86_cmodel
= CM_MEDIUM_PIC
;
3315 error ("code model %qs not supported in the %s bit mode",
3317 else if (TARGET_X32
)
3318 error ("code model %qs not supported in x32 mode",
3325 ix86_cmodel
= CM_LARGE_PIC
;
3327 error ("code model %qs not supported in the %s bit mode",
3329 else if (TARGET_X32
)
3330 error ("code model %qs not supported in x32 mode",
3336 error ("code model %s does not support PIC mode", "32");
3338 error ("code model %qs not supported in the %s bit mode",
3345 error ("code model %s does not support PIC mode", "kernel");
3346 ix86_cmodel
= CM_32
;
3349 error ("code model %qs not supported in the %s bit mode",
3359 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3360 use of rip-relative addressing. This eliminates fixups that
3361 would otherwise be needed if this object is to be placed in a
3362 DLL, and is essentially just as efficient as direct addressing. */
3363 if (TARGET_64BIT
&& (TARGET_RDOS
|| TARGET_PECOFF
))
3364 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3365 else if (TARGET_64BIT
)
3366 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3368 ix86_cmodel
= CM_32
;
3370 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3372 error ("-masm=intel not supported in this configuration");
3373 ix86_asm_dialect
= ASM_ATT
;
3375 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3376 sorry ("%i-bit mode not compiled in",
3377 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3379 for (i
= 0; i
< pta_size
; i
++)
3380 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3382 ix86_schedule
= processor_alias_table
[i
].schedule
;
3383 ix86_arch
= processor_alias_table
[i
].processor
;
3384 /* Default cpu tuning to the architecture. */
3385 ix86_tune
= ix86_arch
;
3387 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3388 error ("CPU you selected does not support x86-64 "
3391 if (processor_alias_table
[i
].flags
& PTA_MMX
3392 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3393 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3394 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3395 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3396 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3397 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3398 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3399 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3400 if (processor_alias_table
[i
].flags
& PTA_SSE
3401 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3402 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3403 if (processor_alias_table
[i
].flags
& PTA_SSE2
3404 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3405 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3406 if (processor_alias_table
[i
].flags
& PTA_SSE3
3407 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3408 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3409 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3410 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3411 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3412 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3413 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3414 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3415 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3416 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3417 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3418 if (processor_alias_table
[i
].flags
& PTA_AVX
3419 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3420 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3421 if (processor_alias_table
[i
].flags
& PTA_AVX2
3422 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3423 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3424 if (processor_alias_table
[i
].flags
& PTA_FMA
3425 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3426 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3427 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3428 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3429 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3430 if (processor_alias_table
[i
].flags
& PTA_FMA4
3431 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3432 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3433 if (processor_alias_table
[i
].flags
& PTA_XOP
3434 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3435 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3436 if (processor_alias_table
[i
].flags
& PTA_LWP
3437 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3438 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3439 if (processor_alias_table
[i
].flags
& PTA_ABM
3440 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3441 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3442 if (processor_alias_table
[i
].flags
& PTA_BMI
3443 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3444 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3445 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3446 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3447 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3448 if (processor_alias_table
[i
].flags
& PTA_TBM
3449 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3450 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3451 if (processor_alias_table
[i
].flags
& PTA_BMI2
3452 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3453 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3454 if (processor_alias_table
[i
].flags
& PTA_CX16
3455 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3456 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3457 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3458 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3459 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3460 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3461 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3462 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3463 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3464 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3465 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3466 if (processor_alias_table
[i
].flags
& PTA_AES
3467 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3468 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3469 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3470 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3471 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3472 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3473 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3474 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3475 if (processor_alias_table
[i
].flags
& PTA_RDRND
3476 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3477 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3478 if (processor_alias_table
[i
].flags
& PTA_F16C
3479 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3480 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3481 if (processor_alias_table
[i
].flags
& PTA_RTM
3482 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3483 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3484 if (processor_alias_table
[i
].flags
& PTA_HLE
3485 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3486 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3487 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3488 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3489 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3490 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3491 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3492 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3493 if (processor_alias_table
[i
].flags
& PTA_ADX
3494 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3495 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3496 if (processor_alias_table
[i
].flags
& PTA_FXSR
3497 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3498 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3499 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3500 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3501 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3502 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3503 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3504 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3505 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3506 x86_prefetch_sse
= true;
3511 if (!strcmp (ix86_arch_string
, "generic"))
3512 error ("generic CPU can be used only for %stune=%s %s",
3513 prefix
, suffix
, sw
);
3514 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3515 error ("bad value (%s) for %sarch=%s %s",
3516 ix86_arch_string
, prefix
, suffix
, sw
);
3518 ix86_arch_mask
= 1u << ix86_arch
;
3519 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3520 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3522 for (i
= 0; i
< pta_size
; i
++)
3523 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3525 ix86_schedule
= processor_alias_table
[i
].schedule
;
3526 ix86_tune
= processor_alias_table
[i
].processor
;
3529 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3531 if (ix86_tune_defaulted
)
3533 ix86_tune_string
= "x86-64";
3534 for (i
= 0; i
< pta_size
; i
++)
3535 if (! strcmp (ix86_tune_string
,
3536 processor_alias_table
[i
].name
))
3538 ix86_schedule
= processor_alias_table
[i
].schedule
;
3539 ix86_tune
= processor_alias_table
[i
].processor
;
3542 error ("CPU you selected does not support x86-64 "
3548 /* Adjust tuning when compiling for 32-bit ABI. */
3551 case PROCESSOR_GENERIC64
:
3552 ix86_tune
= PROCESSOR_GENERIC32
;
3553 ix86_schedule
= CPU_PENTIUMPRO
;
3560 /* Intel CPUs have always interpreted SSE prefetch instructions as
3561 NOPs; so, we can enable SSE prefetch instructions even when
3562 -mtune (rather than -march) points us to a processor that has them.
3563 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3564 higher processors. */
3566 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3567 x86_prefetch_sse
= true;
3571 if (ix86_tune_specified
&& i
== pta_size
)
3572 error ("bad value (%s) for %stune=%s %s",
3573 ix86_tune_string
, prefix
, suffix
, sw
);
3575 ix86_tune_mask
= 1u << ix86_tune
;
3576 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3577 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3579 if (ix86_tune_ctrl_string
)
3581 /* parse the tune ctrl string in the following form:
3582 [^]tune_name1,[^]tune_name2,..a */
3583 char *next_feature_string
= NULL
;
3584 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
3585 char *orig
= curr_feature_string
;
3589 next_feature_string
= strchr (curr_feature_string
, ',');
3590 if (next_feature_string
)
3591 *next_feature_string
++ = '\0';
3592 if (*curr_feature_string
== '^')
3594 curr_feature_string
++;
3597 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3599 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
3601 ix86_tune_features
[i
] = !clear
;
3605 if (i
== X86_TUNE_LAST
)
3606 warning (0, "Unknown parameter to option -mtune-ctrl: %s",
3607 clear
? curr_feature_string
- 1 : curr_feature_string
);
3608 curr_feature_string
= next_feature_string
;
3609 } while (curr_feature_string
);
3613 #ifndef USE_IX86_FRAME_POINTER
3614 #define USE_IX86_FRAME_POINTER 0
3617 #ifndef USE_X86_64_FRAME_POINTER
3618 #define USE_X86_64_FRAME_POINTER 0
3621 /* Set the default values for switches whose default depends on TARGET_64BIT
3622 in case they weren't overwritten by command line options. */
3625 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3626 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3627 if (flag_asynchronous_unwind_tables
== 2)
3628 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3629 if (flag_pcc_struct_return
== 2)
3630 flag_pcc_struct_return
= 0;
3634 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3635 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3636 if (flag_asynchronous_unwind_tables
== 2)
3637 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3638 if (flag_pcc_struct_return
== 2)
3639 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3642 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3644 ix86_cost
= &ix86_size_cost
;
3646 ix86_cost
= ix86_tune_cost
;
3648 /* Arrange to set up i386_stack_locals for all functions. */
3649 init_machine_status
= ix86_init_machine_status
;
3651 /* Validate -mregparm= value. */
3652 if (global_options_set
.x_ix86_regparm
)
3655 warning (0, "-mregparm is ignored in 64-bit mode");
3656 if (ix86_regparm
> REGPARM_MAX
)
3658 error ("-mregparm=%d is not between 0 and %d",
3659 ix86_regparm
, REGPARM_MAX
);
3664 ix86_regparm
= REGPARM_MAX
;
3666 /* Default align_* from the processor table. */
3667 if (align_loops
== 0)
3669 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3670 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3672 if (align_jumps
== 0)
3674 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3675 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3677 if (align_functions
== 0)
3679 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3682 /* Provide default for -mbranch-cost= value. */
3683 if (!global_options_set
.x_ix86_branch_cost
)
3684 ix86_branch_cost
= ix86_cost
->branch_cost
;
3688 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3690 /* Enable by default the SSE and MMX builtins. Do allow the user to
3691 explicitly disable any of these. In particular, disabling SSE and
3692 MMX for kernel code is extremely useful. */
3693 if (!ix86_arch_specified
)
3695 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3696 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3699 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3703 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3705 if (!ix86_arch_specified
)
3707 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3709 /* i386 ABI does not specify red zone. It still makes sense to use it
3710 when programmer takes care to stack from being destroyed. */
3711 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3712 target_flags
|= MASK_NO_RED_ZONE
;
3715 /* Keep nonleaf frame pointers. */
3716 if (flag_omit_frame_pointer
)
3717 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3718 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3719 flag_omit_frame_pointer
= 1;
3721 /* If we're doing fast math, we don't care about comparison order
3722 wrt NaNs. This lets us use a shorter comparison sequence. */
3723 if (flag_finite_math_only
)
3724 target_flags
&= ~MASK_IEEE_FP
;
3726 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3727 since the insns won't need emulation. */
3728 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3729 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3731 /* Likewise, if the target doesn't have a 387, or we've specified
3732 software floating point, don't use 387 inline intrinsics. */
3734 target_flags
|= MASK_NO_FANCY_MATH_387
;
3736 /* Turn on MMX builtins for -msse. */
3738 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3740 /* Enable SSE prefetch. */
3741 if (TARGET_SSE
|| (TARGET_PRFCHW
&& !TARGET_3DNOW
))
3742 x86_prefetch_sse
= true;
3744 /* Enable prefetch{,w} instructions for -m3dnow. */
3746 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
& ~ix86_isa_flags_explicit
;
3748 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3749 if (TARGET_SSE4_2
|| TARGET_ABM
)
3750 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3752 /* Enable lzcnt instruction for -mabm. */
3754 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3756 /* Validate -mpreferred-stack-boundary= value or default it to
3757 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3758 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3759 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3761 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3762 int max
= (TARGET_SEH
? 4 : 12);
3764 if (ix86_preferred_stack_boundary_arg
< min
3765 || ix86_preferred_stack_boundary_arg
> max
)
3768 error ("-mpreferred-stack-boundary is not supported "
3771 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3772 ix86_preferred_stack_boundary_arg
, min
, max
);
3775 ix86_preferred_stack_boundary
3776 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3779 /* Set the default value for -mstackrealign. */
3780 if (ix86_force_align_arg_pointer
== -1)
3781 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3783 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3785 /* Validate -mincoming-stack-boundary= value or default it to
3786 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3787 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3788 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3790 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3791 || ix86_incoming_stack_boundary_arg
> 12)
3792 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3793 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3796 ix86_user_incoming_stack_boundary
3797 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3798 ix86_incoming_stack_boundary
3799 = ix86_user_incoming_stack_boundary
;
3803 /* Accept -msseregparm only if at least SSE support is enabled. */
3804 if (TARGET_SSEREGPARM
3806 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3808 if (global_options_set
.x_ix86_fpmath
)
3810 if (ix86_fpmath
& FPMATH_SSE
)
3814 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3815 ix86_fpmath
= FPMATH_387
;
3817 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3819 warning (0, "387 instruction set disabled, using SSE arithmetics");
3820 ix86_fpmath
= FPMATH_SSE
;
3825 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3827 /* If the i387 is disabled, then do not return values in it. */
3829 target_flags
&= ~MASK_FLOAT_RETURNS
;
3831 /* Use external vectorized library in vectorizing intrinsics. */
3832 if (global_options_set
.x_ix86_veclibabi_type
)
3833 switch (ix86_veclibabi_type
)
3835 case ix86_veclibabi_type_svml
:
3836 ix86_veclib_handler
= ix86_veclibabi_svml
;
3839 case ix86_veclibabi_type_acml
:
3840 ix86_veclib_handler
= ix86_veclibabi_acml
;
3847 if ((!USE_IX86_FRAME_POINTER
3848 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3849 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3851 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3853 /* ??? Unwind info is not correct around the CFG unless either a frame
3854 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3855 unwind info generation to be aware of the CFG and propagating states
3857 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3858 || flag_exceptions
|| flag_non_call_exceptions
)
3859 && flag_omit_frame_pointer
3860 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3862 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3863 warning (0, "unwind tables currently require either a frame pointer "
3864 "or %saccumulate-outgoing-args%s for correctness",
3866 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3869 /* If stack probes are required, the space used for large function
3870 arguments on the stack must also be probed, so enable
3871 -maccumulate-outgoing-args so this happens in the prologue. */
3872 if (TARGET_STACK_PROBE
3873 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3875 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3876 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3877 "for correctness", prefix
, suffix
);
3878 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3881 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3884 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3885 p
= strchr (internal_label_prefix
, 'X');
3886 internal_label_prefix_len
= p
- internal_label_prefix
;
3890 /* When scheduling description is not available, disable scheduler pass
3891 so it won't slow down the compilation and make x87 code slower. */
3892 if (!TARGET_SCHEDULE
)
3893 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3895 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3896 ix86_tune_cost
->simultaneous_prefetches
,
3897 global_options
.x_param_values
,
3898 global_options_set
.x_param_values
);
3899 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3900 ix86_tune_cost
->prefetch_block
,
3901 global_options
.x_param_values
,
3902 global_options_set
.x_param_values
);
3903 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3904 ix86_tune_cost
->l1_cache_size
,
3905 global_options
.x_param_values
,
3906 global_options_set
.x_param_values
);
3907 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3908 ix86_tune_cost
->l2_cache_size
,
3909 global_options
.x_param_values
,
3910 global_options_set
.x_param_values
);
3912 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3913 if (flag_prefetch_loop_arrays
< 0
3915 && (optimize
>= 3 || flag_profile_use
)
3916 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3917 flag_prefetch_loop_arrays
= 1;
3919 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3920 can be optimized to ap = __builtin_next_arg (0). */
3921 if (!TARGET_64BIT
&& !flag_split_stack
)
3922 targetm
.expand_builtin_va_start
= NULL
;
3926 ix86_gen_leave
= gen_leave_rex64
;
3927 if (Pmode
== DImode
)
3929 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3930 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3931 ix86_gen_tls_local_dynamic_base_64
3932 = gen_tls_local_dynamic_base_64_di
;
3936 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3937 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3938 ix86_gen_tls_local_dynamic_base_64
3939 = gen_tls_local_dynamic_base_64_si
;
3944 ix86_gen_leave
= gen_leave
;
3945 ix86_gen_monitor
= gen_sse3_monitor
;
3948 if (Pmode
== DImode
)
3950 ix86_gen_add3
= gen_adddi3
;
3951 ix86_gen_sub3
= gen_subdi3
;
3952 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3953 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3954 ix86_gen_andsp
= gen_anddi3
;
3955 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3956 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3957 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3961 ix86_gen_add3
= gen_addsi3
;
3962 ix86_gen_sub3
= gen_subsi3
;
3963 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3964 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3965 ix86_gen_andsp
= gen_andsi3
;
3966 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3967 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3968 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3972 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3974 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3977 if (!TARGET_64BIT
&& flag_pic
)
3979 if (flag_fentry
> 0)
3980 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3984 else if (TARGET_SEH
)
3986 if (flag_fentry
== 0)
3987 sorry ("-mno-fentry isn%'t compatible with SEH");
3990 else if (flag_fentry
< 0)
3992 #if defined(PROFILE_BEFORE_PROLOGUE)
3999 /* When not optimize for size, enable vzeroupper optimization for
4000 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4001 AVX unaligned load/store. */
4004 if (flag_expensive_optimizations
4005 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4006 target_flags
|= MASK_VZEROUPPER
;
4007 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4008 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4009 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4010 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4011 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4012 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4013 /* Enable 128-bit AVX instruction generation
4014 for the auto-vectorizer. */
4015 if (TARGET_AVX128_OPTIMAL
4016 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
4017 target_flags
|= MASK_PREFER_AVX128
;
4020 if (ix86_recip_name
)
4022 char *p
= ASTRDUP (ix86_recip_name
);
4024 unsigned int mask
, i
;
4027 while ((q
= strtok (p
, ",")) != NULL
)
4038 if (!strcmp (q
, "default"))
4039 mask
= RECIP_MASK_ALL
;
4042 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4043 if (!strcmp (q
, recip_options
[i
].string
))
4045 mask
= recip_options
[i
].mask
;
4049 if (i
== ARRAY_SIZE (recip_options
))
4051 error ("unknown option for -mrecip=%s", q
);
4053 mask
= RECIP_MASK_NONE
;
4057 recip_mask_explicit
|= mask
;
4059 recip_mask
&= ~mask
;
4066 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4067 else if (target_flags_explicit
& MASK_RECIP
)
4068 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4070 /* Default long double to 64-bit for Bionic. */
4071 if (TARGET_HAS_BIONIC
4072 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
4073 target_flags
|= MASK_LONG_DOUBLE_64
;
4075 /* Save the initial options in case the user does function specific
4078 target_option_default_node
= target_option_current_node
4079 = build_target_option_node ();
4081 /* Handle stack protector */
4082 if (!global_options_set
.x_ix86_stack_protector_guard
)
4083 ix86_stack_protector_guard
= TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4086 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4089 ix86_option_override (void)
4091 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4092 static struct register_pass_info insert_vzeroupper_info
4093 = { pass_insert_vzeroupper
, "reload",
4094 1, PASS_POS_INSERT_AFTER
4097 ix86_option_override_internal (true);
4100 /* This needs to be done at start up. It's convenient to do it here. */
4101 register_pass (&insert_vzeroupper_info
);
4104 /* Update register usage after having seen the compiler flags. */
4107 ix86_conditional_register_usage (void)
4112 /* The PIC register, if it exists, is fixed. */
4113 j
= PIC_OFFSET_TABLE_REGNUM
;
4114 if (j
!= INVALID_REGNUM
)
4115 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4117 /* For 32-bit targets, squash the REX registers. */
4120 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4121 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4122 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4123 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4126 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4127 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4128 : TARGET_64BIT
? (1 << 2)
4131 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4133 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4135 /* Set/reset conditionally defined registers from
4136 CALL_USED_REGISTERS initializer. */
4137 if (call_used_regs
[i
] > 1)
4138 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4140 /* Calculate registers of CLOBBERED_REGS register set
4141 as call used registers from GENERAL_REGS register set. */
4142 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4143 && call_used_regs
[i
])
4144 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4147 /* If MMX is disabled, squash the registers. */
4149 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4150 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4151 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4153 /* If SSE is disabled, squash the registers. */
4155 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4156 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4157 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4159 /* If the FPU is disabled, squash the registers. */
4160 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4161 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4162 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4163 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4167 /* Save the current options */
4170 ix86_function_specific_save (struct cl_target_option
*ptr
)
4172 ptr
->arch
= ix86_arch
;
4173 ptr
->schedule
= ix86_schedule
;
4174 ptr
->tune
= ix86_tune
;
4175 ptr
->branch_cost
= ix86_branch_cost
;
4176 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4177 ptr
->arch_specified
= ix86_arch_specified
;
4178 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4179 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4180 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4182 /* The fields are char but the variables are not; make sure the
4183 values fit in the fields. */
4184 gcc_assert (ptr
->arch
== ix86_arch
);
4185 gcc_assert (ptr
->schedule
== ix86_schedule
);
4186 gcc_assert (ptr
->tune
== ix86_tune
);
4187 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4190 /* Restore the current options */
4193 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4195 enum processor_type old_tune
= ix86_tune
;
4196 enum processor_type old_arch
= ix86_arch
;
4197 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4200 ix86_arch
= (enum processor_type
) ptr
->arch
;
4201 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4202 ix86_tune
= (enum processor_type
) ptr
->tune
;
4203 ix86_branch_cost
= ptr
->branch_cost
;
4204 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4205 ix86_arch_specified
= ptr
->arch_specified
;
4206 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4207 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4208 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4210 /* Recreate the arch feature tests if the arch changed */
4211 if (old_arch
!= ix86_arch
)
4213 ix86_arch_mask
= 1u << ix86_arch
;
4214 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4215 ix86_arch_features
[i
]
4216 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4219 /* Recreate the tune optimization tests */
4220 if (old_tune
!= ix86_tune
)
4222 ix86_tune_mask
= 1u << ix86_tune
;
4223 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4224 ix86_tune_features
[i
]
4225 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4229 /* Print the current options */
4232 ix86_function_specific_print (FILE *file
, int indent
,
4233 struct cl_target_option
*ptr
)
4236 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4237 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4239 fprintf (file
, "%*sarch = %d (%s)\n",
4242 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4243 ? cpu_names
[ptr
->arch
]
4246 fprintf (file
, "%*stune = %d (%s)\n",
4249 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4250 ? cpu_names
[ptr
->tune
]
4253 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4257 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4258 free (target_string
);
4263 /* Inner function to process the attribute((target(...))), take an argument and
4264 set the current options from the argument. If we have a list, recursively go
4268 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4269 struct gcc_options
*enum_opts_set
)
4274 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4275 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4276 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4277 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4278 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4294 enum ix86_opt_type type
;
4299 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4300 IX86_ATTR_ISA ("abm", OPT_mabm
),
4301 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4302 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4303 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4304 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4305 IX86_ATTR_ISA ("aes", OPT_maes
),
4306 IX86_ATTR_ISA ("avx", OPT_mavx
),
4307 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4308 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4309 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4310 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4311 IX86_ATTR_ISA ("sse", OPT_msse
),
4312 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4313 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4314 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4315 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4316 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4317 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4318 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4319 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4320 IX86_ATTR_ISA ("fma", OPT_mfma
),
4321 IX86_ATTR_ISA ("xop", OPT_mxop
),
4322 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4323 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4324 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4325 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4326 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4327 IX86_ATTR_ISA ("hle", OPT_mhle
),
4328 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4329 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4330 IX86_ATTR_ISA ("adx", OPT_madx
),
4331 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4332 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4333 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4336 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4338 /* string options */
4339 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4340 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4343 IX86_ATTR_YES ("cld",
4347 IX86_ATTR_NO ("fancy-math-387",
4348 OPT_mfancy_math_387
,
4349 MASK_NO_FANCY_MATH_387
),
4351 IX86_ATTR_YES ("ieee-fp",
4355 IX86_ATTR_YES ("inline-all-stringops",
4356 OPT_minline_all_stringops
,
4357 MASK_INLINE_ALL_STRINGOPS
),
4359 IX86_ATTR_YES ("inline-stringops-dynamically",
4360 OPT_minline_stringops_dynamically
,
4361 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4363 IX86_ATTR_NO ("align-stringops",
4364 OPT_mno_align_stringops
,
4365 MASK_NO_ALIGN_STRINGOPS
),
4367 IX86_ATTR_YES ("recip",
4373 /* If this is a list, recurse to get the options. */
4374 if (TREE_CODE (args
) == TREE_LIST
)
4378 for (; args
; args
= TREE_CHAIN (args
))
4379 if (TREE_VALUE (args
)
4380 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4381 p_strings
, enum_opts_set
))
4387 else if (TREE_CODE (args
) != STRING_CST
)
4389 error ("attribute %<target%> argument not a string");
4393 /* Handle multiple arguments separated by commas. */
4394 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4396 while (next_optstr
&& *next_optstr
!= '\0')
4398 char *p
= next_optstr
;
4400 char *comma
= strchr (next_optstr
, ',');
4401 const char *opt_string
;
4402 size_t len
, opt_len
;
4407 enum ix86_opt_type type
= ix86_opt_unknown
;
4413 len
= comma
- next_optstr
;
4414 next_optstr
= comma
+ 1;
4422 /* Recognize no-xxx. */
4423 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4432 /* Find the option. */
4435 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4437 type
= attrs
[i
].type
;
4438 opt_len
= attrs
[i
].len
;
4439 if (ch
== attrs
[i
].string
[0]
4440 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4443 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4446 mask
= attrs
[i
].mask
;
4447 opt_string
= attrs
[i
].string
;
4452 /* Process the option. */
4455 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4459 else if (type
== ix86_opt_isa
)
4461 struct cl_decoded_option decoded
;
4463 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4464 ix86_handle_option (&global_options
, &global_options_set
,
4465 &decoded
, input_location
);
4468 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4470 if (type
== ix86_opt_no
)
4471 opt_set_p
= !opt_set_p
;
4474 target_flags
|= mask
;
4476 target_flags
&= ~mask
;
4479 else if (type
== ix86_opt_str
)
4483 error ("option(\"%s\") was already specified", opt_string
);
4487 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4490 else if (type
== ix86_opt_enum
)
4495 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4497 set_option (&global_options
, enum_opts_set
, opt
, value
,
4498 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4502 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4514 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4517 ix86_valid_target_attribute_tree (tree args
)
4519 const char *orig_arch_string
= ix86_arch_string
;
4520 const char *orig_tune_string
= ix86_tune_string
;
4521 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4522 int orig_tune_defaulted
= ix86_tune_defaulted
;
4523 int orig_arch_specified
= ix86_arch_specified
;
4524 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4527 struct cl_target_option
*def
4528 = TREE_TARGET_OPTION (target_option_default_node
);
4529 struct gcc_options enum_opts_set
;
4531 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4533 /* Process each of the options on the chain. */
4534 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4536 return error_mark_node
;
4538 /* If the changed options are different from the default, rerun
4539 ix86_option_override_internal, and then save the options away.
4540 The string options are are attribute options, and will be undone
4541 when we copy the save structure. */
4542 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4543 || target_flags
!= def
->x_target_flags
4544 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4545 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4546 || enum_opts_set
.x_ix86_fpmath
)
4548 /* If we are using the default tune= or arch=, undo the string assigned,
4549 and use the default. */
4550 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4551 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4552 else if (!orig_arch_specified
)
4553 ix86_arch_string
= NULL
;
4555 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4556 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4557 else if (orig_tune_defaulted
)
4558 ix86_tune_string
= NULL
;
4560 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4561 if (enum_opts_set
.x_ix86_fpmath
)
4562 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4563 else if (!TARGET_64BIT
&& TARGET_SSE
)
4565 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4566 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4569 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4570 ix86_option_override_internal (false);
4572 /* Add any builtin functions with the new isa if any. */
4573 ix86_add_new_builtins (ix86_isa_flags
);
4575 /* Save the current options unless we are validating options for
4577 t
= build_target_option_node ();
4579 ix86_arch_string
= orig_arch_string
;
4580 ix86_tune_string
= orig_tune_string
;
4581 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4583 /* Free up memory allocated to hold the strings */
4584 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4585 free (option_strings
[i
]);
4591 /* Hook to validate attribute((target("string"))). */
4594 ix86_valid_target_attribute_p (tree fndecl
,
4595 tree
ARG_UNUSED (name
),
4597 int ARG_UNUSED (flags
))
4599 struct cl_target_option cur_target
;
4602 /* attribute((target("default"))) does nothing, beyond
4603 affecting multi-versioning. */
4604 if (TREE_VALUE (args
)
4605 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4606 && TREE_CHAIN (args
) == NULL_TREE
4607 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4610 tree old_optimize
= build_optimization_node ();
4611 tree new_target
, new_optimize
;
4612 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4614 /* If the function changed the optimization levels as well as setting target
4615 options, start with the optimizations specified. */
4616 if (func_optimize
&& func_optimize
!= old_optimize
)
4617 cl_optimization_restore (&global_options
,
4618 TREE_OPTIMIZATION (func_optimize
));
4620 /* The target attributes may also change some optimization flags, so update
4621 the optimization options if necessary. */
4622 cl_target_option_save (&cur_target
, &global_options
);
4623 new_target
= ix86_valid_target_attribute_tree (args
);
4624 new_optimize
= build_optimization_node ();
4626 if (new_target
== error_mark_node
)
4629 else if (fndecl
&& new_target
)
4631 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4633 if (old_optimize
!= new_optimize
)
4634 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4637 cl_target_option_restore (&global_options
, &cur_target
);
4639 if (old_optimize
!= new_optimize
)
4640 cl_optimization_restore (&global_options
,
4641 TREE_OPTIMIZATION (old_optimize
));
4647 /* Hook to determine if one function can safely inline another. */
4650 ix86_can_inline_p (tree caller
, tree callee
)
4653 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4654 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4656 /* If callee has no option attributes, then it is ok to inline. */
4660 /* If caller has no option attributes, but callee does then it is not ok to
4662 else if (!caller_tree
)
4667 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4668 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4670 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4671 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4673 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4674 != callee_opts
->x_ix86_isa_flags
)
4677 /* See if we have the same non-isa options. */
4678 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4681 /* See if arch, tune, etc. are the same. */
4682 else if (caller_opts
->arch
!= callee_opts
->arch
)
4685 else if (caller_opts
->tune
!= callee_opts
->tune
)
4688 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4691 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4702 /* Remember the last target of ix86_set_current_function. */
4703 static GTY(()) tree ix86_previous_fndecl
;
4705 /* Invalidate ix86_previous_fndecl cache. */
4707 ix86_reset_previous_fndecl (void)
4709 ix86_previous_fndecl
= NULL_TREE
;
4712 /* Establish appropriate back-end context for processing the function
4713 FNDECL. The argument might be NULL to indicate processing at top
4714 level, outside of any function scope. */
4716 ix86_set_current_function (tree fndecl
)
4718 /* Only change the context if the function changes. This hook is called
4719 several times in the course of compiling a function, and we don't want to
4720 slow things down too much or call target_reinit when it isn't safe. */
4721 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4723 tree old_tree
= (ix86_previous_fndecl
4724 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4727 tree new_tree
= (fndecl
4728 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4731 ix86_previous_fndecl
= fndecl
;
4732 if (old_tree
== new_tree
)
4737 cl_target_option_restore (&global_options
,
4738 TREE_TARGET_OPTION (new_tree
));
4744 struct cl_target_option
*def
4745 = TREE_TARGET_OPTION (target_option_current_node
);
4747 cl_target_option_restore (&global_options
, def
);
4754 /* Return true if this goes in large data/bss. */
4757 ix86_in_large_data_p (tree exp
)
4759 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4762 /* Functions are never large data. */
4763 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4766 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4768 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4769 if (strcmp (section
, ".ldata") == 0
4770 || strcmp (section
, ".lbss") == 0)
4776 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4778 /* If this is an incomplete type with size 0, then we can't put it
4779 in data because it might be too big when completed. */
4780 if (!size
|| size
> ix86_section_threshold
)
4787 /* Switch to the appropriate section for output of DECL.
4788 DECL is either a `VAR_DECL' node or a constant of some sort.
4789 RELOC indicates whether forming the initial value of DECL requires
4790 link-time relocations. */
4792 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4796 x86_64_elf_select_section (tree decl
, int reloc
,
4797 unsigned HOST_WIDE_INT align
)
4799 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4800 && ix86_in_large_data_p (decl
))
4802 const char *sname
= NULL
;
4803 unsigned int flags
= SECTION_WRITE
;
4804 switch (categorize_decl_for_section (decl
, reloc
))
4809 case SECCAT_DATA_REL
:
4810 sname
= ".ldata.rel";
4812 case SECCAT_DATA_REL_LOCAL
:
4813 sname
= ".ldata.rel.local";
4815 case SECCAT_DATA_REL_RO
:
4816 sname
= ".ldata.rel.ro";
4818 case SECCAT_DATA_REL_RO_LOCAL
:
4819 sname
= ".ldata.rel.ro.local";
4823 flags
|= SECTION_BSS
;
4826 case SECCAT_RODATA_MERGE_STR
:
4827 case SECCAT_RODATA_MERGE_STR_INIT
:
4828 case SECCAT_RODATA_MERGE_CONST
:
4832 case SECCAT_SRODATA
:
4839 /* We don't split these for medium model. Place them into
4840 default sections and hope for best. */
4845 /* We might get called with string constants, but get_named_section
4846 doesn't like them as they are not DECLs. Also, we need to set
4847 flags in that case. */
4849 return get_section (sname
, flags
, NULL
);
4850 return get_named_section (decl
, sname
, reloc
);
4853 return default_elf_select_section (decl
, reloc
, align
);
4856 /* Build up a unique section name, expressed as a
4857 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4858 RELOC indicates whether the initial value of EXP requires
4859 link-time relocations. */
4861 static void ATTRIBUTE_UNUSED
4862 x86_64_elf_unique_section (tree decl
, int reloc
)
4864 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4865 && ix86_in_large_data_p (decl
))
4867 const char *prefix
= NULL
;
4868 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4869 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4871 switch (categorize_decl_for_section (decl
, reloc
))
4874 case SECCAT_DATA_REL
:
4875 case SECCAT_DATA_REL_LOCAL
:
4876 case SECCAT_DATA_REL_RO
:
4877 case SECCAT_DATA_REL_RO_LOCAL
:
4878 prefix
= one_only
? ".ld" : ".ldata";
4881 prefix
= one_only
? ".lb" : ".lbss";
4884 case SECCAT_RODATA_MERGE_STR
:
4885 case SECCAT_RODATA_MERGE_STR_INIT
:
4886 case SECCAT_RODATA_MERGE_CONST
:
4887 prefix
= one_only
? ".lr" : ".lrodata";
4889 case SECCAT_SRODATA
:
4896 /* We don't split these for medium model. Place them into
4897 default sections and hope for best. */
4902 const char *name
, *linkonce
;
4905 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4906 name
= targetm
.strip_name_encoding (name
);
4908 /* If we're using one_only, then there needs to be a .gnu.linkonce
4909 prefix to the section name. */
4910 linkonce
= one_only
? ".gnu.linkonce" : "";
4912 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4914 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4918 default_unique_section (decl
, reloc
);
4921 #ifdef COMMON_ASM_OP
4922 /* This says how to output assembler code to declare an
4923 uninitialized external linkage data object.
4925 For medium model x86-64 we need to use .largecomm opcode for
4928 x86_elf_aligned_common (FILE *file
,
4929 const char *name
, unsigned HOST_WIDE_INT size
,
4932 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4933 && size
> (unsigned int)ix86_section_threshold
)
4934 fputs (".largecomm\t", file
);
4936 fputs (COMMON_ASM_OP
, file
);
4937 assemble_name (file
, name
);
4938 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4939 size
, align
/ BITS_PER_UNIT
);
4943 /* Utility function for targets to use in implementing
4944 ASM_OUTPUT_ALIGNED_BSS. */
4947 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4948 const char *name
, unsigned HOST_WIDE_INT size
,
4951 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4952 && size
> (unsigned int)ix86_section_threshold
)
4953 switch_to_section (get_named_section (decl
, ".lbss", 0));
4955 switch_to_section (bss_section
);
4956 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4957 #ifdef ASM_DECLARE_OBJECT_NAME
4958 last_assemble_variable_decl
= decl
;
4959 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4961 /* Standard thing is just output label for the object. */
4962 ASM_OUTPUT_LABEL (file
, name
);
4963 #endif /* ASM_DECLARE_OBJECT_NAME */
4964 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4967 /* Decide whether we must probe the stack before any space allocation
4968 on this target. It's essentially TARGET_STACK_PROBE except when
4969 -fstack-check causes the stack to be already probed differently. */
4972 ix86_target_stack_probe (void)
4974 /* Do not probe the stack twice if static stack checking is enabled. */
4975 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4978 return TARGET_STACK_PROBE
;
4981 /* Decide whether we can make a sibling call to a function. DECL is the
4982 declaration of the function being targeted by the call and EXP is the
4983 CALL_EXPR representing the call. */
4986 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4988 tree type
, decl_or_type
;
4991 /* If we are generating position-independent code, we cannot sibcall
4992 optimize any indirect call, or a direct call to a global function,
4993 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4997 && (!decl
|| !targetm
.binds_local_p (decl
)))
5000 /* If we need to align the outgoing stack, then sibcalling would
5001 unalign the stack, which may break the called function. */
5002 if (ix86_minimum_incoming_stack_boundary (true)
5003 < PREFERRED_STACK_BOUNDARY
)
5008 decl_or_type
= decl
;
5009 type
= TREE_TYPE (decl
);
5013 /* We're looking at the CALL_EXPR, we need the type of the function. */
5014 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5015 type
= TREE_TYPE (type
); /* pointer type */
5016 type
= TREE_TYPE (type
); /* function type */
5017 decl_or_type
= type
;
5020 /* Check that the return value locations are the same. Like
5021 if we are returning floats on the 80387 register stack, we cannot
5022 make a sibcall from a function that doesn't return a float to a
5023 function that does or, conversely, from a function that does return
5024 a float to a function that doesn't; the necessary stack adjustment
5025 would not be executed. This is also the place we notice
5026 differences in the return value ABI. Note that it is ok for one
5027 of the functions to have void return type as long as the return
5028 value of the other is passed in a register. */
5029 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5030 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5032 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5034 if (!rtx_equal_p (a
, b
))
5037 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5039 else if (!rtx_equal_p (a
, b
))
5044 /* The SYSV ABI has more call-clobbered registers;
5045 disallow sibcalls from MS to SYSV. */
5046 if (cfun
->machine
->call_abi
== MS_ABI
5047 && ix86_function_type_abi (type
) == SYSV_ABI
)
5052 /* If this call is indirect, we'll need to be able to use a
5053 call-clobbered register for the address of the target function.
5054 Make sure that all such registers are not used for passing
5055 parameters. Note that DLLIMPORT functions are indirect. */
5057 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5059 if (ix86_function_regparm (type
, NULL
) >= 3)
5061 /* ??? Need to count the actual number of registers to be used,
5062 not the possible number of registers. Fix later. */
5068 /* Otherwise okay. That also includes certain types of indirect calls. */
5072 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5073 and "sseregparm" calling convention attributes;
5074 arguments as in struct attribute_spec.handler. */
5077 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5079 int flags ATTRIBUTE_UNUSED
,
5082 if (TREE_CODE (*node
) != FUNCTION_TYPE
5083 && TREE_CODE (*node
) != METHOD_TYPE
5084 && TREE_CODE (*node
) != FIELD_DECL
5085 && TREE_CODE (*node
) != TYPE_DECL
)
5087 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5089 *no_add_attrs
= true;
5093 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5094 if (is_attribute_p ("regparm", name
))
5098 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5100 error ("fastcall and regparm attributes are not compatible");
5103 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5105 error ("regparam and thiscall attributes are not compatible");
5108 cst
= TREE_VALUE (args
);
5109 if (TREE_CODE (cst
) != INTEGER_CST
)
5111 warning (OPT_Wattributes
,
5112 "%qE attribute requires an integer constant argument",
5114 *no_add_attrs
= true;
5116 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5118 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5120 *no_add_attrs
= true;
5128 /* Do not warn when emulating the MS ABI. */
5129 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5130 && TREE_CODE (*node
) != METHOD_TYPE
)
5131 || ix86_function_type_abi (*node
) != MS_ABI
)
5132 warning (OPT_Wattributes
, "%qE attribute ignored",
5134 *no_add_attrs
= true;
5138 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5139 if (is_attribute_p ("fastcall", name
))
5141 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5143 error ("fastcall and cdecl attributes are not compatible");
5145 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5147 error ("fastcall and stdcall attributes are not compatible");
5149 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5151 error ("fastcall and regparm attributes are not compatible");
5153 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5155 error ("fastcall and thiscall attributes are not compatible");
5159 /* Can combine stdcall with fastcall (redundant), regparm and
5161 else if (is_attribute_p ("stdcall", name
))
5163 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5165 error ("stdcall and cdecl attributes are not compatible");
5167 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5169 error ("stdcall and fastcall attributes are not compatible");
5171 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5173 error ("stdcall and thiscall attributes are not compatible");
5177 /* Can combine cdecl with regparm and sseregparm. */
5178 else if (is_attribute_p ("cdecl", name
))
5180 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5182 error ("stdcall and cdecl attributes are not compatible");
5184 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5186 error ("fastcall and cdecl attributes are not compatible");
5188 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5190 error ("cdecl and thiscall attributes are not compatible");
5193 else if (is_attribute_p ("thiscall", name
))
5195 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5196 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5198 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5200 error ("stdcall and thiscall attributes are not compatible");
5202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5204 error ("fastcall and thiscall attributes are not compatible");
5206 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5208 error ("cdecl and thiscall attributes are not compatible");
5212 /* Can combine sseregparm with all attributes. */
5217 /* The transactional memory builtins are implicitly regparm or fastcall
5218 depending on the ABI. Override the generic do-nothing attribute that
5219 these builtins were declared with, and replace it with one of the two
5220 attributes that we expect elsewhere. */
5223 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5224 tree args ATTRIBUTE_UNUSED
,
5225 int flags ATTRIBUTE_UNUSED
,
5230 /* In no case do we want to add the placeholder attribute. */
5231 *no_add_attrs
= true;
5233 /* The 64-bit ABI is unchanged for transactional memory. */
5237 /* ??? Is there a better way to validate 32-bit windows? We have
5238 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5239 if (CHECK_STACK_LIMIT
> 0)
5240 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5243 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5244 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5246 decl_attributes (node
, alt
, flags
);
5251 /* This function determines from TYPE the calling-convention. */
5254 ix86_get_callcvt (const_tree type
)
5256 unsigned int ret
= 0;
5261 return IX86_CALLCVT_CDECL
;
5263 attrs
= TYPE_ATTRIBUTES (type
);
5264 if (attrs
!= NULL_TREE
)
5266 if (lookup_attribute ("cdecl", attrs
))
5267 ret
|= IX86_CALLCVT_CDECL
;
5268 else if (lookup_attribute ("stdcall", attrs
))
5269 ret
|= IX86_CALLCVT_STDCALL
;
5270 else if (lookup_attribute ("fastcall", attrs
))
5271 ret
|= IX86_CALLCVT_FASTCALL
;
5272 else if (lookup_attribute ("thiscall", attrs
))
5273 ret
|= IX86_CALLCVT_THISCALL
;
5275 /* Regparam isn't allowed for thiscall and fastcall. */
5276 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5278 if (lookup_attribute ("regparm", attrs
))
5279 ret
|= IX86_CALLCVT_REGPARM
;
5280 if (lookup_attribute ("sseregparm", attrs
))
5281 ret
|= IX86_CALLCVT_SSEREGPARM
;
5284 if (IX86_BASE_CALLCVT(ret
) != 0)
5288 is_stdarg
= stdarg_p (type
);
5289 if (TARGET_RTD
&& !is_stdarg
)
5290 return IX86_CALLCVT_STDCALL
| ret
;
5294 || TREE_CODE (type
) != METHOD_TYPE
5295 || ix86_function_type_abi (type
) != MS_ABI
)
5296 return IX86_CALLCVT_CDECL
| ret
;
5298 return IX86_CALLCVT_THISCALL
;
5301 /* Return 0 if the attributes for two types are incompatible, 1 if they
5302 are compatible, and 2 if they are nearly compatible (which causes a
5303 warning to be generated). */
5306 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5308 unsigned int ccvt1
, ccvt2
;
5310 if (TREE_CODE (type1
) != FUNCTION_TYPE
5311 && TREE_CODE (type1
) != METHOD_TYPE
)
5314 ccvt1
= ix86_get_callcvt (type1
);
5315 ccvt2
= ix86_get_callcvt (type2
);
5318 if (ix86_function_regparm (type1
, NULL
)
5319 != ix86_function_regparm (type2
, NULL
))
5325 /* Return the regparm value for a function with the indicated TYPE and DECL.
5326 DECL may be NULL when calling function indirectly
5327 or considering a libcall. */
5330 ix86_function_regparm (const_tree type
, const_tree decl
)
5337 return (ix86_function_type_abi (type
) == SYSV_ABI
5338 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5339 ccvt
= ix86_get_callcvt (type
);
5340 regparm
= ix86_regparm
;
5342 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5344 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5347 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5351 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5353 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5356 /* Use register calling convention for local functions when possible. */
5358 && TREE_CODE (decl
) == FUNCTION_DECL
5360 && !(profile_flag
&& !flag_fentry
))
5362 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5363 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5364 if (i
&& i
->local
&& i
->can_change_signature
)
5366 int local_regparm
, globals
= 0, regno
;
5368 /* Make sure no regparm register is taken by a
5369 fixed register variable. */
5370 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5371 if (fixed_regs
[local_regparm
])
5374 /* We don't want to use regparm(3) for nested functions as
5375 these use a static chain pointer in the third argument. */
5376 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5379 /* In 32-bit mode save a register for the split stack. */
5380 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5383 /* Each fixed register usage increases register pressure,
5384 so less registers should be used for argument passing.
5385 This functionality can be overriden by an explicit
5387 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5388 if (fixed_regs
[regno
])
5392 = globals
< local_regparm
? local_regparm
- globals
: 0;
5394 if (local_regparm
> regparm
)
5395 regparm
= local_regparm
;
5402 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5403 DFmode (2) arguments in SSE registers for a function with the
5404 indicated TYPE and DECL. DECL may be NULL when calling function
5405 indirectly or considering a libcall. Otherwise return 0. */
5408 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5410 gcc_assert (!TARGET_64BIT
);
5412 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5413 by the sseregparm attribute. */
5414 if (TARGET_SSEREGPARM
5415 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5422 error ("calling %qD with attribute sseregparm without "
5423 "SSE/SSE2 enabled", decl
);
5425 error ("calling %qT with attribute sseregparm without "
5426 "SSE/SSE2 enabled", type
);
5434 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5435 (and DFmode for SSE2) arguments in SSE registers. */
5436 if (decl
&& TARGET_SSE_MATH
&& optimize
5437 && !(profile_flag
&& !flag_fentry
))
5439 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5440 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5441 if (i
&& i
->local
&& i
->can_change_signature
)
5442 return TARGET_SSE2
? 2 : 1;
5448 /* Return true if EAX is live at the start of the function. Used by
5449 ix86_expand_prologue to determine if we need special help before
5450 calling allocate_stack_worker. */
5453 ix86_eax_live_at_start_p (void)
5455 /* Cheat. Don't bother working forward from ix86_function_regparm
5456 to the function type to whether an actual argument is located in
5457 eax. Instead just look at cfg info, which is still close enough
5458 to correct at this point. This gives false positives for broken
5459 functions that might use uninitialized data that happens to be
5460 allocated in eax, but who cares? */
5461 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5465 ix86_keep_aggregate_return_pointer (tree fntype
)
5471 attr
= lookup_attribute ("callee_pop_aggregate_return",
5472 TYPE_ATTRIBUTES (fntype
));
5474 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5476 /* For 32-bit MS-ABI the default is to keep aggregate
5478 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5481 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5484 /* Value is the number of bytes of arguments automatically
5485 popped when returning from a subroutine call.
5486 FUNDECL is the declaration node of the function (as a tree),
5487 FUNTYPE is the data type of the function (as a tree),
5488 or for a library call it is an identifier node for the subroutine name.
5489 SIZE is the number of bytes of arguments passed on the stack.
5491 On the 80386, the RTD insn may be used to pop them if the number
5492 of args is fixed, but if the number is variable then the caller
5493 must pop them all. RTD can't be used for library calls now
5494 because the library is compiled with the Unix compiler.
5495 Use of RTD is a selectable option, since it is incompatible with
5496 standard Unix calling sequences. If the option is not selected,
5497 the caller must always pop the args.
5499 The attribute stdcall is equivalent to RTD on a per module basis. */
5502 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5506 /* None of the 64-bit ABIs pop arguments. */
5510 ccvt
= ix86_get_callcvt (funtype
);
5512 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5513 | IX86_CALLCVT_THISCALL
)) != 0
5514 && ! stdarg_p (funtype
))
5517 /* Lose any fake structure return argument if it is passed on the stack. */
5518 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5519 && !ix86_keep_aggregate_return_pointer (funtype
))
5521 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5523 return GET_MODE_SIZE (Pmode
);
5529 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5532 ix86_legitimate_combined_insn (rtx insn
)
5534 /* Check operand constraints in case hard registers were propagated
5535 into insn pattern. This check prevents combine pass from
5536 generating insn patterns with invalid hard register operands.
5537 These invalid insns can eventually confuse reload to error out
5538 with a spill failure. See also PRs 46829 and 46843. */
5539 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5543 extract_insn (insn
);
5544 preprocess_constraints ();
5546 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5548 rtx op
= recog_data
.operand
[i
];
5549 enum machine_mode mode
= GET_MODE (op
);
5550 struct operand_alternative
*op_alt
;
5555 /* A unary operator may be accepted by the predicate, but it
5556 is irrelevant for matching constraints. */
5560 if (GET_CODE (op
) == SUBREG
)
5562 if (REG_P (SUBREG_REG (op
))
5563 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5564 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5565 GET_MODE (SUBREG_REG (op
)),
5568 op
= SUBREG_REG (op
);
5571 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5574 op_alt
= recog_op_alt
[i
];
5576 /* Operand has no constraints, anything is OK. */
5577 win
= !recog_data
.n_alternatives
;
5579 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5581 if (op_alt
[j
].anything_ok
5582 || (op_alt
[j
].matches
!= -1
5584 (recog_data
.operand
[i
],
5585 recog_data
.operand
[op_alt
[j
].matches
]))
5586 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5601 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5603 static unsigned HOST_WIDE_INT
5604 ix86_asan_shadow_offset (void)
5606 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5607 : HOST_WIDE_INT_C (0x7fff8000))
5608 : (HOST_WIDE_INT_1
<< 29);
5611 /* Argument support functions. */
5613 /* Return true when register may be used to pass function parameters. */
5615 ix86_function_arg_regno_p (int regno
)
5618 const int *parm_regs
;
5623 return (regno
< REGPARM_MAX
5624 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5626 return (regno
< REGPARM_MAX
5627 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5628 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5629 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5630 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5635 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5640 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5641 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5645 /* TODO: The function should depend on current function ABI but
5646 builtins.c would need updating then. Therefore we use the
5649 /* RAX is used as hidden argument to va_arg functions. */
5650 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5653 if (ix86_abi
== MS_ABI
)
5654 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5656 parm_regs
= x86_64_int_parameter_registers
;
5657 for (i
= 0; i
< (ix86_abi
== MS_ABI
5658 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5659 if (regno
== parm_regs
[i
])
5664 /* Return if we do not know how to pass TYPE solely in registers. */
5667 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5669 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5672 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5673 The layout_type routine is crafty and tries to trick us into passing
5674 currently unsupported vector types on the stack by using TImode. */
5675 return (!TARGET_64BIT
&& mode
== TImode
5676 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5679 /* It returns the size, in bytes, of the area reserved for arguments passed
5680 in registers for the function represented by fndecl dependent to the used
5683 ix86_reg_parm_stack_space (const_tree fndecl
)
5685 enum calling_abi call_abi
= SYSV_ABI
;
5686 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5687 call_abi
= ix86_function_abi (fndecl
);
5689 call_abi
= ix86_function_type_abi (fndecl
);
5690 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5695 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5698 ix86_function_type_abi (const_tree fntype
)
5700 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5702 enum calling_abi abi
= ix86_abi
;
5703 if (abi
== SYSV_ABI
)
5705 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5708 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5716 ix86_function_ms_hook_prologue (const_tree fn
)
5718 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5720 if (decl_function_context (fn
) != NULL_TREE
)
5721 error_at (DECL_SOURCE_LOCATION (fn
),
5722 "ms_hook_prologue is not compatible with nested function");
5729 static enum calling_abi
5730 ix86_function_abi (const_tree fndecl
)
5734 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5737 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5740 ix86_cfun_abi (void)
5744 return cfun
->machine
->call_abi
;
5747 /* Write the extra assembler code needed to declare a function properly. */
5750 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5753 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5757 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5758 unsigned int filler_cc
= 0xcccccccc;
5760 for (i
= 0; i
< filler_count
; i
+= 4)
5761 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5764 #ifdef SUBTARGET_ASM_UNWIND_INIT
5765 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5768 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5770 /* Output magic byte marker, if hot-patch attribute is set. */
5775 /* leaq [%rsp + 0], %rsp */
5776 asm_fprintf (asm_out_file
, ASM_BYTE
5777 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5781 /* movl.s %edi, %edi
5783 movl.s %esp, %ebp */
5784 asm_fprintf (asm_out_file
, ASM_BYTE
5785 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5791 extern void init_regs (void);
5793 /* Implementation of call abi switching target hook. Specific to FNDECL
5794 the specific call register sets are set. See also
5795 ix86_conditional_register_usage for more details. */
5797 ix86_call_abi_override (const_tree fndecl
)
5799 if (fndecl
== NULL_TREE
)
5800 cfun
->machine
->call_abi
= ix86_abi
;
5802 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5805 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5806 expensive re-initialization of init_regs each time we switch function context
5807 since this is needed only during RTL expansion. */
5809 ix86_maybe_switch_abi (void)
5812 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5816 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5817 for a call to a function whose data type is FNTYPE.
5818 For a library call, FNTYPE is 0. */
5821 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5822 tree fntype
, /* tree ptr for function decl */
5823 rtx libname
, /* SYMBOL_REF of library name or 0 */
5827 struct cgraph_local_info
*i
;
5829 memset (cum
, 0, sizeof (*cum
));
5833 i
= cgraph_local_info (fndecl
);
5834 cum
->call_abi
= ix86_function_abi (fndecl
);
5839 cum
->call_abi
= ix86_function_type_abi (fntype
);
5842 cum
->caller
= caller
;
5844 /* Set up the number of registers to use for passing arguments. */
5846 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5847 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5848 "or subtarget optimization implying it");
5849 cum
->nregs
= ix86_regparm
;
5852 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5853 ? X86_64_REGPARM_MAX
5854 : X86_64_MS_REGPARM_MAX
);
5858 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5861 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5862 ? X86_64_SSE_REGPARM_MAX
5863 : X86_64_MS_SSE_REGPARM_MAX
);
5867 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5868 cum
->warn_avx
= true;
5869 cum
->warn_sse
= true;
5870 cum
->warn_mmx
= true;
5872 /* Because type might mismatch in between caller and callee, we need to
5873 use actual type of function for local calls.
5874 FIXME: cgraph_analyze can be told to actually record if function uses
5875 va_start so for local functions maybe_vaarg can be made aggressive
5877 FIXME: once typesytem is fixed, we won't need this code anymore. */
5878 if (i
&& i
->local
&& i
->can_change_signature
)
5879 fntype
= TREE_TYPE (fndecl
);
5880 cum
->maybe_vaarg
= (fntype
5881 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5886 /* If there are variable arguments, then we won't pass anything
5887 in registers in 32-bit mode. */
5888 if (stdarg_p (fntype
))
5899 /* Use ecx and edx registers if function has fastcall attribute,
5900 else look for regparm information. */
5903 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5904 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5907 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5909 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5915 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5918 /* Set up the number of SSE registers used for passing SFmode
5919 and DFmode arguments. Warn for mismatching ABI. */
5920 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5924 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5925 But in the case of vector types, it is some vector mode.
5927 When we have only some of our vector isa extensions enabled, then there
5928 are some modes for which vector_mode_supported_p is false. For these
5929 modes, the generic vector support in gcc will choose some non-vector mode
5930 in order to implement the type. By computing the natural mode, we'll
5931 select the proper ABI location for the operand and not depend on whatever
5932 the middle-end decides to do with these vector types.
5934 The midde-end can't deal with the vector types > 16 bytes. In this
5935 case, we return the original mode and warn ABI change if CUM isn't
5938 static enum machine_mode
5939 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5941 enum machine_mode mode
= TYPE_MODE (type
);
5943 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5945 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5946 if ((size
== 8 || size
== 16 || size
== 32)
5947 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5948 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5950 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5952 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5953 mode
= MIN_MODE_VECTOR_FLOAT
;
5955 mode
= MIN_MODE_VECTOR_INT
;
5957 /* Get the mode which has this inner mode and number of units. */
5958 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5959 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5960 && GET_MODE_INNER (mode
) == innermode
)
5962 if (size
== 32 && !TARGET_AVX
)
5964 static bool warnedavx
;
5971 warning (0, "AVX vector argument without AVX "
5972 "enabled changes the ABI");
5974 return TYPE_MODE (type
);
5976 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5978 static bool warnedsse
;
5985 warning (0, "SSE vector argument without SSE "
5986 "enabled changes the ABI");
6001 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6002 this may not agree with the mode that the type system has chosen for the
6003 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6004 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6007 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6012 if (orig_mode
!= BLKmode
)
6013 tmp
= gen_rtx_REG (orig_mode
, regno
);
6016 tmp
= gen_rtx_REG (mode
, regno
);
6017 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6018 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6024 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6025 of this code is to classify each 8bytes of incoming argument by the register
6026 class and assign registers accordingly. */
6028 /* Return the union class of CLASS1 and CLASS2.
6029 See the x86-64 PS ABI for details. */
6031 static enum x86_64_reg_class
6032 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6034 /* Rule #1: If both classes are equal, this is the resulting class. */
6035 if (class1
== class2
)
6038 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6040 if (class1
== X86_64_NO_CLASS
)
6042 if (class2
== X86_64_NO_CLASS
)
6045 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6046 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6047 return X86_64_MEMORY_CLASS
;
6049 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6050 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6051 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6052 return X86_64_INTEGERSI_CLASS
;
6053 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6054 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6055 return X86_64_INTEGER_CLASS
;
6057 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6059 if (class1
== X86_64_X87_CLASS
6060 || class1
== X86_64_X87UP_CLASS
6061 || class1
== X86_64_COMPLEX_X87_CLASS
6062 || class2
== X86_64_X87_CLASS
6063 || class2
== X86_64_X87UP_CLASS
6064 || class2
== X86_64_COMPLEX_X87_CLASS
)
6065 return X86_64_MEMORY_CLASS
;
6067 /* Rule #6: Otherwise class SSE is used. */
6068 return X86_64_SSE_CLASS
;
6071 /* Classify the argument of type TYPE and mode MODE.
6072 CLASSES will be filled by the register class used to pass each word
6073 of the operand. The number of words is returned. In case the parameter
6074 should be passed in memory, 0 is returned. As a special case for zero
6075 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6077 BIT_OFFSET is used internally for handling records and specifies offset
6078 of the offset in bits modulo 256 to avoid overflow cases.
6080 See the x86-64 PS ABI for details.
6084 classify_argument (enum machine_mode mode
, const_tree type
,
6085 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6087 HOST_WIDE_INT bytes
=
6088 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6090 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6092 /* Variable sized entities are always passed/returned in memory. */
6096 if (mode
!= VOIDmode
6097 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6100 if (type
&& AGGREGATE_TYPE_P (type
))
6104 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6106 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6110 for (i
= 0; i
< words
; i
++)
6111 classes
[i
] = X86_64_NO_CLASS
;
6113 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6114 signalize memory class, so handle it as special case. */
6117 classes
[0] = X86_64_NO_CLASS
;
6121 /* Classify each field of record and merge classes. */
6122 switch (TREE_CODE (type
))
6125 /* And now merge the fields of structure. */
6126 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6128 if (TREE_CODE (field
) == FIELD_DECL
)
6132 if (TREE_TYPE (field
) == error_mark_node
)
6135 /* Bitfields are always classified as integer. Handle them
6136 early, since later code would consider them to be
6137 misaligned integers. */
6138 if (DECL_BIT_FIELD (field
))
6140 for (i
= (int_bit_position (field
)
6141 + (bit_offset
% 64)) / 8 / 8;
6142 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6143 + tree_low_cst (DECL_SIZE (field
), 0)
6146 merge_classes (X86_64_INTEGER_CLASS
,
6153 type
= TREE_TYPE (field
);
6155 /* Flexible array member is ignored. */
6156 if (TYPE_MODE (type
) == BLKmode
6157 && TREE_CODE (type
) == ARRAY_TYPE
6158 && TYPE_SIZE (type
) == NULL_TREE
6159 && TYPE_DOMAIN (type
) != NULL_TREE
6160 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6165 if (!warned
&& warn_psabi
)
6168 inform (input_location
,
6169 "the ABI of passing struct with"
6170 " a flexible array member has"
6171 " changed in GCC 4.4");
6175 num
= classify_argument (TYPE_MODE (type
), type
,
6177 (int_bit_position (field
)
6178 + bit_offset
) % 256);
6181 pos
= (int_bit_position (field
)
6182 + (bit_offset
% 64)) / 8 / 8;
6183 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6185 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6192 /* Arrays are handled as small records. */
6195 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6196 TREE_TYPE (type
), subclasses
, bit_offset
);
6200 /* The partial classes are now full classes. */
6201 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6202 subclasses
[0] = X86_64_SSE_CLASS
;
6203 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6204 && !((bit_offset
% 64) == 0 && bytes
== 4))
6205 subclasses
[0] = X86_64_INTEGER_CLASS
;
6207 for (i
= 0; i
< words
; i
++)
6208 classes
[i
] = subclasses
[i
% num
];
6213 case QUAL_UNION_TYPE
:
6214 /* Unions are similar to RECORD_TYPE but offset is always 0.
6216 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6218 if (TREE_CODE (field
) == FIELD_DECL
)
6222 if (TREE_TYPE (field
) == error_mark_node
)
6225 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6226 TREE_TYPE (field
), subclasses
,
6230 for (i
= 0; i
< num
; i
++)
6231 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6242 /* When size > 16 bytes, if the first one isn't
6243 X86_64_SSE_CLASS or any other ones aren't
6244 X86_64_SSEUP_CLASS, everything should be passed in
6246 if (classes
[0] != X86_64_SSE_CLASS
)
6249 for (i
= 1; i
< words
; i
++)
6250 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6254 /* Final merger cleanup. */
6255 for (i
= 0; i
< words
; i
++)
6257 /* If one class is MEMORY, everything should be passed in
6259 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6262 /* The X86_64_SSEUP_CLASS should be always preceded by
6263 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6264 if (classes
[i
] == X86_64_SSEUP_CLASS
6265 && classes
[i
- 1] != X86_64_SSE_CLASS
6266 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6268 /* The first one should never be X86_64_SSEUP_CLASS. */
6269 gcc_assert (i
!= 0);
6270 classes
[i
] = X86_64_SSE_CLASS
;
6273 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6274 everything should be passed in memory. */
6275 if (classes
[i
] == X86_64_X87UP_CLASS
6276 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6280 /* The first one should never be X86_64_X87UP_CLASS. */
6281 gcc_assert (i
!= 0);
6282 if (!warned
&& warn_psabi
)
6285 inform (input_location
,
6286 "the ABI of passing union with long double"
6287 " has changed in GCC 4.4");
6295 /* Compute alignment needed. We align all types to natural boundaries with
6296 exception of XFmode that is aligned to 64bits. */
6297 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6299 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6302 mode_alignment
= 128;
6303 else if (mode
== XCmode
)
6304 mode_alignment
= 256;
6305 if (COMPLEX_MODE_P (mode
))
6306 mode_alignment
/= 2;
6307 /* Misaligned fields are always returned in memory. */
6308 if (bit_offset
% mode_alignment
)
6312 /* for V1xx modes, just use the base mode */
6313 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6314 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6315 mode
= GET_MODE_INNER (mode
);
6317 /* Classification of atomic types. */
6322 classes
[0] = X86_64_SSE_CLASS
;
6325 classes
[0] = X86_64_SSE_CLASS
;
6326 classes
[1] = X86_64_SSEUP_CLASS
;
6336 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6340 classes
[0] = X86_64_INTEGERSI_CLASS
;
6343 else if (size
<= 64)
6345 classes
[0] = X86_64_INTEGER_CLASS
;
6348 else if (size
<= 64+32)
6350 classes
[0] = X86_64_INTEGER_CLASS
;
6351 classes
[1] = X86_64_INTEGERSI_CLASS
;
6354 else if (size
<= 64+64)
6356 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6364 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6368 /* OImode shouldn't be used directly. */
6373 if (!(bit_offset
% 64))
6374 classes
[0] = X86_64_SSESF_CLASS
;
6376 classes
[0] = X86_64_SSE_CLASS
;
6379 classes
[0] = X86_64_SSEDF_CLASS
;
6382 classes
[0] = X86_64_X87_CLASS
;
6383 classes
[1] = X86_64_X87UP_CLASS
;
6386 classes
[0] = X86_64_SSE_CLASS
;
6387 classes
[1] = X86_64_SSEUP_CLASS
;
6390 classes
[0] = X86_64_SSE_CLASS
;
6391 if (!(bit_offset
% 64))
6397 if (!warned
&& warn_psabi
)
6400 inform (input_location
,
6401 "the ABI of passing structure with complex float"
6402 " member has changed in GCC 4.4");
6404 classes
[1] = X86_64_SSESF_CLASS
;
6408 classes
[0] = X86_64_SSEDF_CLASS
;
6409 classes
[1] = X86_64_SSEDF_CLASS
;
6412 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6415 /* This modes is larger than 16 bytes. */
6423 classes
[0] = X86_64_SSE_CLASS
;
6424 classes
[1] = X86_64_SSEUP_CLASS
;
6425 classes
[2] = X86_64_SSEUP_CLASS
;
6426 classes
[3] = X86_64_SSEUP_CLASS
;
6434 classes
[0] = X86_64_SSE_CLASS
;
6435 classes
[1] = X86_64_SSEUP_CLASS
;
6443 classes
[0] = X86_64_SSE_CLASS
;
6449 gcc_assert (VECTOR_MODE_P (mode
));
6454 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6456 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6457 classes
[0] = X86_64_INTEGERSI_CLASS
;
6459 classes
[0] = X86_64_INTEGER_CLASS
;
6460 classes
[1] = X86_64_INTEGER_CLASS
;
6461 return 1 + (bytes
> 8);
6465 /* Examine the argument and return set number of register required in each
6466 class. Return 0 iff parameter should be passed in memory. */
6468 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6469 int *int_nregs
, int *sse_nregs
)
6471 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6472 int n
= classify_argument (mode
, type
, regclass
, 0);
6478 for (n
--; n
>= 0; n
--)
6479 switch (regclass
[n
])
6481 case X86_64_INTEGER_CLASS
:
6482 case X86_64_INTEGERSI_CLASS
:
6485 case X86_64_SSE_CLASS
:
6486 case X86_64_SSESF_CLASS
:
6487 case X86_64_SSEDF_CLASS
:
6490 case X86_64_NO_CLASS
:
6491 case X86_64_SSEUP_CLASS
:
6493 case X86_64_X87_CLASS
:
6494 case X86_64_X87UP_CLASS
:
6498 case X86_64_COMPLEX_X87_CLASS
:
6499 return in_return
? 2 : 0;
6500 case X86_64_MEMORY_CLASS
:
6506 /* Construct container for the argument used by GCC interface. See
6507 FUNCTION_ARG for the detailed description. */
6510 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6511 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6512 const int *intreg
, int sse_regno
)
6514 /* The following variables hold the static issued_error state. */
6515 static bool issued_sse_arg_error
;
6516 static bool issued_sse_ret_error
;
6517 static bool issued_x87_ret_error
;
6519 enum machine_mode tmpmode
;
6521 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6522 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6526 int needed_sseregs
, needed_intregs
;
6527 rtx exp
[MAX_CLASSES
];
6530 n
= classify_argument (mode
, type
, regclass
, 0);
6533 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6536 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6539 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6540 some less clueful developer tries to use floating-point anyway. */
6541 if (needed_sseregs
&& !TARGET_SSE
)
6545 if (!issued_sse_ret_error
)
6547 error ("SSE register return with SSE disabled");
6548 issued_sse_ret_error
= true;
6551 else if (!issued_sse_arg_error
)
6553 error ("SSE register argument with SSE disabled");
6554 issued_sse_arg_error
= true;
6559 /* Likewise, error if the ABI requires us to return values in the
6560 x87 registers and the user specified -mno-80387. */
6561 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6562 for (i
= 0; i
< n
; i
++)
6563 if (regclass
[i
] == X86_64_X87_CLASS
6564 || regclass
[i
] == X86_64_X87UP_CLASS
6565 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6567 if (!issued_x87_ret_error
)
6569 error ("x87 register return with x87 disabled");
6570 issued_x87_ret_error
= true;
6575 /* First construct simple cases. Avoid SCmode, since we want to use
6576 single register to pass this type. */
6577 if (n
== 1 && mode
!= SCmode
)
6578 switch (regclass
[0])
6580 case X86_64_INTEGER_CLASS
:
6581 case X86_64_INTEGERSI_CLASS
:
6582 return gen_rtx_REG (mode
, intreg
[0]);
6583 case X86_64_SSE_CLASS
:
6584 case X86_64_SSESF_CLASS
:
6585 case X86_64_SSEDF_CLASS
:
6586 if (mode
!= BLKmode
)
6587 return gen_reg_or_parallel (mode
, orig_mode
,
6588 SSE_REGNO (sse_regno
));
6590 case X86_64_X87_CLASS
:
6591 case X86_64_COMPLEX_X87_CLASS
:
6592 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6593 case X86_64_NO_CLASS
:
6594 /* Zero sized array, struct or class. */
6600 && regclass
[0] == X86_64_SSE_CLASS
6601 && regclass
[1] == X86_64_SSEUP_CLASS
6603 return gen_reg_or_parallel (mode
, orig_mode
,
6604 SSE_REGNO (sse_regno
));
6606 && regclass
[0] == X86_64_SSE_CLASS
6607 && regclass
[1] == X86_64_SSEUP_CLASS
6608 && regclass
[2] == X86_64_SSEUP_CLASS
6609 && regclass
[3] == X86_64_SSEUP_CLASS
6611 return gen_reg_or_parallel (mode
, orig_mode
,
6612 SSE_REGNO (sse_regno
));
6614 && regclass
[0] == X86_64_X87_CLASS
6615 && regclass
[1] == X86_64_X87UP_CLASS
)
6616 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6619 && regclass
[0] == X86_64_INTEGER_CLASS
6620 && regclass
[1] == X86_64_INTEGER_CLASS
6621 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6622 && intreg
[0] + 1 == intreg
[1])
6623 return gen_rtx_REG (mode
, intreg
[0]);
6625 /* Otherwise figure out the entries of the PARALLEL. */
6626 for (i
= 0; i
< n
; i
++)
6630 switch (regclass
[i
])
6632 case X86_64_NO_CLASS
:
6634 case X86_64_INTEGER_CLASS
:
6635 case X86_64_INTEGERSI_CLASS
:
6636 /* Merge TImodes on aligned occasions here too. */
6637 if (i
* 8 + 8 > bytes
)
6639 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6640 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6644 /* We've requested 24 bytes we
6645 don't have mode for. Use DImode. */
6646 if (tmpmode
== BLKmode
)
6649 = gen_rtx_EXPR_LIST (VOIDmode
,
6650 gen_rtx_REG (tmpmode
, *intreg
),
6654 case X86_64_SSESF_CLASS
:
6656 = gen_rtx_EXPR_LIST (VOIDmode
,
6657 gen_rtx_REG (SFmode
,
6658 SSE_REGNO (sse_regno
)),
6662 case X86_64_SSEDF_CLASS
:
6664 = gen_rtx_EXPR_LIST (VOIDmode
,
6665 gen_rtx_REG (DFmode
,
6666 SSE_REGNO (sse_regno
)),
6670 case X86_64_SSE_CLASS
:
6678 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6688 && regclass
[1] == X86_64_SSEUP_CLASS
6689 && regclass
[2] == X86_64_SSEUP_CLASS
6690 && regclass
[3] == X86_64_SSEUP_CLASS
);
6698 = gen_rtx_EXPR_LIST (VOIDmode
,
6699 gen_rtx_REG (tmpmode
,
6700 SSE_REGNO (sse_regno
)),
6709 /* Empty aligned struct, union or class. */
6713 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6714 for (i
= 0; i
< nexps
; i
++)
6715 XVECEXP (ret
, 0, i
) = exp
[i
];
6719 /* Update the data in CUM to advance over an argument of mode MODE
6720 and data type TYPE. (TYPE is null for libcalls where that information
6721 may not be available.) */
6724 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6725 const_tree type
, HOST_WIDE_INT bytes
,
6726 HOST_WIDE_INT words
)
6742 cum
->words
+= words
;
6743 cum
->nregs
-= words
;
6744 cum
->regno
+= words
;
6746 if (cum
->nregs
<= 0)
6754 /* OImode shouldn't be used directly. */
6758 if (cum
->float_in_sse
< 2)
6761 if (cum
->float_in_sse
< 1)
6778 if (!type
|| !AGGREGATE_TYPE_P (type
))
6780 cum
->sse_words
+= words
;
6781 cum
->sse_nregs
-= 1;
6782 cum
->sse_regno
+= 1;
6783 if (cum
->sse_nregs
<= 0)
6797 if (!type
|| !AGGREGATE_TYPE_P (type
))
6799 cum
->mmx_words
+= words
;
6800 cum
->mmx_nregs
-= 1;
6801 cum
->mmx_regno
+= 1;
6802 if (cum
->mmx_nregs
<= 0)
6813 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6814 const_tree type
, HOST_WIDE_INT words
, bool named
)
6816 int int_nregs
, sse_nregs
;
6818 /* Unnamed 256bit vector mode parameters are passed on stack. */
6819 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6822 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6823 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6825 cum
->nregs
-= int_nregs
;
6826 cum
->sse_nregs
-= sse_nregs
;
6827 cum
->regno
+= int_nregs
;
6828 cum
->sse_regno
+= sse_nregs
;
6832 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6833 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6834 cum
->words
+= words
;
6839 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6840 HOST_WIDE_INT words
)
6842 /* Otherwise, this should be passed indirect. */
6843 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6845 cum
->words
+= words
;
6853 /* Update the data in CUM to advance over an argument of mode MODE and
6854 data type TYPE. (TYPE is null for libcalls where that information
6855 may not be available.) */
6858 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6859 const_tree type
, bool named
)
6861 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6862 HOST_WIDE_INT bytes
, words
;
6864 if (mode
== BLKmode
)
6865 bytes
= int_size_in_bytes (type
);
6867 bytes
= GET_MODE_SIZE (mode
);
6868 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6871 mode
= type_natural_mode (type
, NULL
);
6873 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6874 function_arg_advance_ms_64 (cum
, bytes
, words
);
6875 else if (TARGET_64BIT
)
6876 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6878 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6881 /* Define where to put the arguments to a function.
6882 Value is zero to push the argument on the stack,
6883 or a hard register in which to store the argument.
6885 MODE is the argument's machine mode.
6886 TYPE is the data type of the argument (as a tree).
6887 This is null for libcalls where that information may
6889 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6890 the preceding args and about the function being called.
6891 NAMED is nonzero if this argument is a named parameter
6892 (otherwise it is an extra parameter matching an ellipsis). */
6895 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6896 enum machine_mode orig_mode
, const_tree type
,
6897 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6899 static bool warnedsse
, warnedmmx
;
6901 /* Avoid the AL settings for the Unix64 ABI. */
6902 if (mode
== VOIDmode
)
6918 if (words
<= cum
->nregs
)
6920 int regno
= cum
->regno
;
6922 /* Fastcall allocates the first two DWORD (SImode) or
6923 smaller arguments to ECX and EDX if it isn't an
6929 || (type
&& AGGREGATE_TYPE_P (type
)))
6932 /* ECX not EAX is the first allocated register. */
6933 if (regno
== AX_REG
)
6936 return gen_rtx_REG (mode
, regno
);
6941 if (cum
->float_in_sse
< 2)
6944 if (cum
->float_in_sse
< 1)
6948 /* In 32bit, we pass TImode in xmm registers. */
6955 if (!type
|| !AGGREGATE_TYPE_P (type
))
6957 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6960 warning (0, "SSE vector argument without SSE enabled "
6964 return gen_reg_or_parallel (mode
, orig_mode
,
6965 cum
->sse_regno
+ FIRST_SSE_REG
);
6970 /* OImode shouldn't be used directly. */
6979 if (!type
|| !AGGREGATE_TYPE_P (type
))
6982 return gen_reg_or_parallel (mode
, orig_mode
,
6983 cum
->sse_regno
+ FIRST_SSE_REG
);
6993 if (!type
|| !AGGREGATE_TYPE_P (type
))
6995 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6998 warning (0, "MMX vector argument without MMX enabled "
7002 return gen_reg_or_parallel (mode
, orig_mode
,
7003 cum
->mmx_regno
+ FIRST_MMX_REG
);
7012 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7013 enum machine_mode orig_mode
, const_tree type
, bool named
)
7015 /* Handle a hidden AL argument containing number of registers
7016 for varargs x86-64 functions. */
7017 if (mode
== VOIDmode
)
7018 return GEN_INT (cum
->maybe_vaarg
7019 ? (cum
->sse_nregs
< 0
7020 ? X86_64_SSE_REGPARM_MAX
7035 /* Unnamed 256bit vector mode parameters are passed on stack. */
7041 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7043 &x86_64_int_parameter_registers
[cum
->regno
],
7048 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7049 enum machine_mode orig_mode
, bool named
,
7050 HOST_WIDE_INT bytes
)
7054 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7055 We use value of -2 to specify that current function call is MSABI. */
7056 if (mode
== VOIDmode
)
7057 return GEN_INT (-2);
7059 /* If we've run out of registers, it goes on the stack. */
7060 if (cum
->nregs
== 0)
7063 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7065 /* Only floating point modes are passed in anything but integer regs. */
7066 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7069 regno
= cum
->regno
+ FIRST_SSE_REG
;
7074 /* Unnamed floating parameters are passed in both the
7075 SSE and integer registers. */
7076 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7077 t2
= gen_rtx_REG (mode
, regno
);
7078 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7079 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7080 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7083 /* Handle aggregated types passed in register. */
7084 if (orig_mode
== BLKmode
)
7086 if (bytes
> 0 && bytes
<= 8)
7087 mode
= (bytes
> 4 ? DImode
: SImode
);
7088 if (mode
== BLKmode
)
7092 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7095 /* Return where to put the arguments to a function.
7096 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7098 MODE is the argument's machine mode. TYPE is the data type of the
7099 argument. It is null for libcalls where that information may not be
7100 available. CUM gives information about the preceding args and about
7101 the function being called. NAMED is nonzero if this argument is a
7102 named parameter (otherwise it is an extra parameter matching an
7106 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7107 const_tree type
, bool named
)
7109 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7110 enum machine_mode mode
= omode
;
7111 HOST_WIDE_INT bytes
, words
;
7114 if (mode
== BLKmode
)
7115 bytes
= int_size_in_bytes (type
);
7117 bytes
= GET_MODE_SIZE (mode
);
7118 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7120 /* To simplify the code below, represent vector types with a vector mode
7121 even if MMX/SSE are not active. */
7122 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7123 mode
= type_natural_mode (type
, cum
);
7125 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7126 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7127 else if (TARGET_64BIT
)
7128 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7130 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7135 /* A C expression that indicates when an argument must be passed by
7136 reference. If nonzero for an argument, a copy of that argument is
7137 made in memory and a pointer to the argument is passed instead of
7138 the argument itself. The pointer is passed in whatever way is
7139 appropriate for passing a pointer to that type. */
7142 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7143 enum machine_mode mode ATTRIBUTE_UNUSED
,
7144 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7146 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7148 /* See Windows x64 Software Convention. */
7149 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7151 int msize
= (int) GET_MODE_SIZE (mode
);
7154 /* Arrays are passed by reference. */
7155 if (TREE_CODE (type
) == ARRAY_TYPE
)
7158 if (AGGREGATE_TYPE_P (type
))
7160 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7161 are passed by reference. */
7162 msize
= int_size_in_bytes (type
);
7166 /* __m128 is passed by reference. */
7168 case 1: case 2: case 4: case 8:
7174 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7180 /* Return true when TYPE should be 128bit aligned for 32bit argument
7181 passing ABI. XXX: This function is obsolete and is only used for
7182 checking psABI compatibility with previous versions of GCC. */
7185 ix86_compat_aligned_value_p (const_tree type
)
7187 enum machine_mode mode
= TYPE_MODE (type
);
7188 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7192 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7194 if (TYPE_ALIGN (type
) < 128)
7197 if (AGGREGATE_TYPE_P (type
))
7199 /* Walk the aggregates recursively. */
7200 switch (TREE_CODE (type
))
7204 case QUAL_UNION_TYPE
:
7208 /* Walk all the structure fields. */
7209 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7211 if (TREE_CODE (field
) == FIELD_DECL
7212 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7219 /* Just for use if some languages passes arrays by value. */
7220 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7231 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7232 XXX: This function is obsolete and is only used for checking psABI
7233 compatibility with previous versions of GCC. */
7236 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7237 const_tree type
, unsigned int align
)
7239 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7240 natural boundaries. */
7241 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7243 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7244 make an exception for SSE modes since these require 128bit
7247 The handling here differs from field_alignment. ICC aligns MMX
7248 arguments to 4 byte boundaries, while structure fields are aligned
7249 to 8 byte boundaries. */
7252 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7253 align
= PARM_BOUNDARY
;
7257 if (!ix86_compat_aligned_value_p (type
))
7258 align
= PARM_BOUNDARY
;
7261 if (align
> BIGGEST_ALIGNMENT
)
7262 align
= BIGGEST_ALIGNMENT
;
7266 /* Return true when TYPE should be 128bit aligned for 32bit argument
7270 ix86_contains_aligned_value_p (const_tree type
)
7272 enum machine_mode mode
= TYPE_MODE (type
);
7274 if (mode
== XFmode
|| mode
== XCmode
)
7277 if (TYPE_ALIGN (type
) < 128)
7280 if (AGGREGATE_TYPE_P (type
))
7282 /* Walk the aggregates recursively. */
7283 switch (TREE_CODE (type
))
7287 case QUAL_UNION_TYPE
:
7291 /* Walk all the structure fields. */
7292 for (field
= TYPE_FIELDS (type
);
7294 field
= DECL_CHAIN (field
))
7296 if (TREE_CODE (field
) == FIELD_DECL
7297 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7304 /* Just for use if some languages passes arrays by value. */
7305 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7314 return TYPE_ALIGN (type
) >= 128;
7319 /* Gives the alignment boundary, in bits, of an argument with the
7320 specified mode and type. */
7323 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7328 /* Since the main variant type is used for call, we convert it to
7329 the main variant type. */
7330 type
= TYPE_MAIN_VARIANT (type
);
7331 align
= TYPE_ALIGN (type
);
7334 align
= GET_MODE_ALIGNMENT (mode
);
7335 if (align
< PARM_BOUNDARY
)
7336 align
= PARM_BOUNDARY
;
7340 unsigned int saved_align
= align
;
7344 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7347 if (mode
== XFmode
|| mode
== XCmode
)
7348 align
= PARM_BOUNDARY
;
7350 else if (!ix86_contains_aligned_value_p (type
))
7351 align
= PARM_BOUNDARY
;
7354 align
= PARM_BOUNDARY
;
7359 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7363 inform (input_location
,
7364 "The ABI for passing parameters with %d-byte"
7365 " alignment has changed in GCC 4.6",
7366 align
/ BITS_PER_UNIT
);
7373 /* Return true if N is a possible register number of function value. */
7376 ix86_function_value_regno_p (const unsigned int regno
)
7383 case FIRST_FLOAT_REG
:
7384 /* TODO: The function should depend on current function ABI but
7385 builtins.c would need updating then. Therefore we use the
7387 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7389 return TARGET_FLOAT_RETURNS_IN_80387
;
7395 if (TARGET_MACHO
|| TARGET_64BIT
)
7403 /* Define how to find the value returned by a function.
7404 VALTYPE is the data type of the value (as a tree).
7405 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7406 otherwise, FUNC is 0. */
7409 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7410 const_tree fntype
, const_tree fn
)
7414 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7415 we normally prevent this case when mmx is not available. However
7416 some ABIs may require the result to be returned like DImode. */
7417 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7418 regno
= FIRST_MMX_REG
;
7420 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7421 we prevent this case when sse is not available. However some ABIs
7422 may require the result to be returned like integer TImode. */
7423 else if (mode
== TImode
7424 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7425 regno
= FIRST_SSE_REG
;
7427 /* 32-byte vector modes in %ymm0. */
7428 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7429 regno
= FIRST_SSE_REG
;
7431 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7432 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7433 regno
= FIRST_FLOAT_REG
;
7435 /* Most things go in %eax. */
7438 /* Override FP return register with %xmm0 for local functions when
7439 SSE math is enabled or for functions with sseregparm attribute. */
7440 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7442 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7443 if ((sse_level
>= 1 && mode
== SFmode
)
7444 || (sse_level
== 2 && mode
== DFmode
))
7445 regno
= FIRST_SSE_REG
;
7448 /* OImode shouldn't be used directly. */
7449 gcc_assert (mode
!= OImode
);
7451 return gen_rtx_REG (orig_mode
, regno
);
7455 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7460 /* Handle libcalls, which don't provide a type node. */
7461 if (valtype
== NULL
)
7475 regno
= FIRST_SSE_REG
;
7479 regno
= FIRST_FLOAT_REG
;
7487 return gen_rtx_REG (mode
, regno
);
7489 else if (POINTER_TYPE_P (valtype
))
7491 /* Pointers are always returned in word_mode. */
7495 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7496 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7497 x86_64_int_return_registers
, 0);
7499 /* For zero sized structures, construct_container returns NULL, but we
7500 need to keep rest of compiler happy by returning meaningful value. */
7502 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7508 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7511 unsigned int regno
= AX_REG
;
7515 switch (GET_MODE_SIZE (mode
))
7518 if (valtype
!= NULL_TREE
7519 && !VECTOR_INTEGER_TYPE_P (valtype
)
7520 && !VECTOR_INTEGER_TYPE_P (valtype
)
7521 && !INTEGRAL_TYPE_P (valtype
)
7522 && !VECTOR_FLOAT_TYPE_P (valtype
))
7524 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7525 && !COMPLEX_MODE_P (mode
))
7526 regno
= FIRST_SSE_REG
;
7530 if (mode
== SFmode
|| mode
== DFmode
)
7531 regno
= FIRST_SSE_REG
;
7537 return gen_rtx_REG (orig_mode
, regno
);
7541 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7542 enum machine_mode orig_mode
, enum machine_mode mode
)
7544 const_tree fn
, fntype
;
7547 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7548 fn
= fntype_or_decl
;
7549 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7551 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7552 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7553 else if (TARGET_64BIT
)
7554 return function_value_64 (orig_mode
, mode
, valtype
);
7556 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7560 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7561 bool outgoing ATTRIBUTE_UNUSED
)
7563 enum machine_mode mode
, orig_mode
;
7565 orig_mode
= TYPE_MODE (valtype
);
7566 mode
= type_natural_mode (valtype
, NULL
);
7567 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7570 /* Pointer function arguments and return values are promoted to
7573 static enum machine_mode
7574 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7575 int *punsignedp
, const_tree fntype
,
7578 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7580 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7583 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7587 /* Return true if a structure, union or array with MODE containing FIELD
7588 should be accessed using BLKmode. */
7591 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7593 /* Union with XFmode must be in BLKmode. */
7594 return (mode
== XFmode
7595 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7596 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7600 ix86_libcall_value (enum machine_mode mode
)
7602 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7605 /* Return true iff type is returned in memory. */
7607 static bool ATTRIBUTE_UNUSED
7608 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7612 if (mode
== BLKmode
)
7615 size
= int_size_in_bytes (type
);
7617 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7620 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7622 /* User-created vectors small enough to fit in EAX. */
7626 /* MMX/3dNow values are returned in MM0,
7627 except when it doesn't exits or the ABI prescribes otherwise. */
7629 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7631 /* SSE values are returned in XMM0, except when it doesn't exist. */
7635 /* AVX values are returned in YMM0, except when it doesn't exist. */
7646 /* OImode shouldn't be used directly. */
7647 gcc_assert (mode
!= OImode
);
7652 static bool ATTRIBUTE_UNUSED
7653 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7655 int needed_intregs
, needed_sseregs
;
7656 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7659 static bool ATTRIBUTE_UNUSED
7660 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7662 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7664 /* __m128 is returned in xmm0. */
7665 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7666 || VECTOR_FLOAT_TYPE_P (type
))
7667 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7668 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7671 /* Otherwise, the size must be exactly in [1248]. */
7672 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7676 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7678 #ifdef SUBTARGET_RETURN_IN_MEMORY
7679 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7681 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7685 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7686 return return_in_memory_ms_64 (type
, mode
);
7688 return return_in_memory_64 (type
, mode
);
7691 return return_in_memory_32 (type
, mode
);
7695 /* When returning SSE vector types, we have a choice of either
7696 (1) being abi incompatible with a -march switch, or
7697 (2) generating an error.
7698 Given no good solution, I think the safest thing is one warning.
7699 The user won't be able to use -Werror, but....
7701 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7702 called in response to actually generating a caller or callee that
7703 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7704 via aggregate_value_p for general type probing from tree-ssa. */
7707 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7709 static bool warnedsse
, warnedmmx
;
7711 if (!TARGET_64BIT
&& type
)
7713 /* Look at the return type of the function, not the function type. */
7714 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7716 if (!TARGET_SSE
&& !warnedsse
)
7719 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7722 warning (0, "SSE vector return without SSE enabled "
7727 if (!TARGET_MMX
&& !warnedmmx
)
7729 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7732 warning (0, "MMX vector return without MMX enabled "
7742 /* Create the va_list data type. */
7744 /* Returns the calling convention specific va_list date type.
7745 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7748 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7750 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7752 /* For i386 we use plain pointer to argument area. */
7753 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7754 return build_pointer_type (char_type_node
);
7756 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7757 type_decl
= build_decl (BUILTINS_LOCATION
,
7758 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7760 f_gpr
= build_decl (BUILTINS_LOCATION
,
7761 FIELD_DECL
, get_identifier ("gp_offset"),
7762 unsigned_type_node
);
7763 f_fpr
= build_decl (BUILTINS_LOCATION
,
7764 FIELD_DECL
, get_identifier ("fp_offset"),
7765 unsigned_type_node
);
7766 f_ovf
= build_decl (BUILTINS_LOCATION
,
7767 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7769 f_sav
= build_decl (BUILTINS_LOCATION
,
7770 FIELD_DECL
, get_identifier ("reg_save_area"),
7773 va_list_gpr_counter_field
= f_gpr
;
7774 va_list_fpr_counter_field
= f_fpr
;
7776 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7777 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7778 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7779 DECL_FIELD_CONTEXT (f_sav
) = record
;
7781 TYPE_STUB_DECL (record
) = type_decl
;
7782 TYPE_NAME (record
) = type_decl
;
7783 TYPE_FIELDS (record
) = f_gpr
;
7784 DECL_CHAIN (f_gpr
) = f_fpr
;
7785 DECL_CHAIN (f_fpr
) = f_ovf
;
7786 DECL_CHAIN (f_ovf
) = f_sav
;
7788 layout_type (record
);
7790 /* The correct type is an array type of one element. */
7791 return build_array_type (record
, build_index_type (size_zero_node
));
7794 /* Setup the builtin va_list data type and for 64-bit the additional
7795 calling convention specific va_list data types. */
7798 ix86_build_builtin_va_list (void)
7800 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7802 /* Initialize abi specific va_list builtin types. */
7806 if (ix86_abi
== MS_ABI
)
7808 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7809 if (TREE_CODE (t
) != RECORD_TYPE
)
7810 t
= build_variant_type_copy (t
);
7811 sysv_va_list_type_node
= t
;
7816 if (TREE_CODE (t
) != RECORD_TYPE
)
7817 t
= build_variant_type_copy (t
);
7818 sysv_va_list_type_node
= t
;
7820 if (ix86_abi
!= MS_ABI
)
7822 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7823 if (TREE_CODE (t
) != RECORD_TYPE
)
7824 t
= build_variant_type_copy (t
);
7825 ms_va_list_type_node
= t
;
7830 if (TREE_CODE (t
) != RECORD_TYPE
)
7831 t
= build_variant_type_copy (t
);
7832 ms_va_list_type_node
= t
;
7839 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7842 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7848 /* GPR size of varargs save area. */
7849 if (cfun
->va_list_gpr_size
)
7850 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7852 ix86_varargs_gpr_size
= 0;
7854 /* FPR size of varargs save area. We don't need it if we don't pass
7855 anything in SSE registers. */
7856 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7857 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7859 ix86_varargs_fpr_size
= 0;
7861 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7864 save_area
= frame_pointer_rtx
;
7865 set
= get_varargs_alias_set ();
7867 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7868 if (max
> X86_64_REGPARM_MAX
)
7869 max
= X86_64_REGPARM_MAX
;
7871 for (i
= cum
->regno
; i
< max
; i
++)
7873 mem
= gen_rtx_MEM (word_mode
,
7874 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7875 MEM_NOTRAP_P (mem
) = 1;
7876 set_mem_alias_set (mem
, set
);
7877 emit_move_insn (mem
,
7878 gen_rtx_REG (word_mode
,
7879 x86_64_int_parameter_registers
[i
]));
7882 if (ix86_varargs_fpr_size
)
7884 enum machine_mode smode
;
7887 /* Now emit code to save SSE registers. The AX parameter contains number
7888 of SSE parameter registers used to call this function, though all we
7889 actually check here is the zero/non-zero status. */
7891 label
= gen_label_rtx ();
7892 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7893 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7896 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7897 we used movdqa (i.e. TImode) instead? Perhaps even better would
7898 be if we could determine the real mode of the data, via a hook
7899 into pass_stdarg. Ignore all that for now. */
7901 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7902 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7904 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7905 if (max
> X86_64_SSE_REGPARM_MAX
)
7906 max
= X86_64_SSE_REGPARM_MAX
;
7908 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7910 mem
= plus_constant (Pmode
, save_area
,
7911 i
* 16 + ix86_varargs_gpr_size
);
7912 mem
= gen_rtx_MEM (smode
, mem
);
7913 MEM_NOTRAP_P (mem
) = 1;
7914 set_mem_alias_set (mem
, set
);
7915 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7917 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7925 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7927 alias_set_type set
= get_varargs_alias_set ();
7930 /* Reset to zero, as there might be a sysv vaarg used
7932 ix86_varargs_gpr_size
= 0;
7933 ix86_varargs_fpr_size
= 0;
7935 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7939 mem
= gen_rtx_MEM (Pmode
,
7940 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7941 i
* UNITS_PER_WORD
));
7942 MEM_NOTRAP_P (mem
) = 1;
7943 set_mem_alias_set (mem
, set
);
7945 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7946 emit_move_insn (mem
, reg
);
7951 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7952 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7955 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7956 CUMULATIVE_ARGS next_cum
;
7959 /* This argument doesn't appear to be used anymore. Which is good,
7960 because the old code here didn't suppress rtl generation. */
7961 gcc_assert (!no_rtl
);
7966 fntype
= TREE_TYPE (current_function_decl
);
7968 /* For varargs, we do not want to skip the dummy va_dcl argument.
7969 For stdargs, we do want to skip the last named argument. */
7971 if (stdarg_p (fntype
))
7972 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7975 if (cum
->call_abi
== MS_ABI
)
7976 setup_incoming_varargs_ms_64 (&next_cum
);
7978 setup_incoming_varargs_64 (&next_cum
);
7981 /* Checks if TYPE is of kind va_list char *. */
7984 is_va_list_char_pointer (tree type
)
7988 /* For 32-bit it is always true. */
7991 canonic
= ix86_canonical_va_list_type (type
);
7992 return (canonic
== ms_va_list_type_node
7993 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7996 /* Implement va_start. */
7999 ix86_va_start (tree valist
, rtx nextarg
)
8001 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8002 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8003 tree gpr
, fpr
, ovf
, sav
, t
;
8007 if (flag_split_stack
8008 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8010 unsigned int scratch_regno
;
8012 /* When we are splitting the stack, we can't refer to the stack
8013 arguments using internal_arg_pointer, because they may be on
8014 the old stack. The split stack prologue will arrange to
8015 leave a pointer to the old stack arguments in a scratch
8016 register, which we here copy to a pseudo-register. The split
8017 stack prologue can't set the pseudo-register directly because
8018 it (the prologue) runs before any registers have been saved. */
8020 scratch_regno
= split_stack_prologue_scratch_regno ();
8021 if (scratch_regno
!= INVALID_REGNUM
)
8025 reg
= gen_reg_rtx (Pmode
);
8026 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8029 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8033 push_topmost_sequence ();
8034 emit_insn_after (seq
, entry_of_function ());
8035 pop_topmost_sequence ();
8039 /* Only 64bit target needs something special. */
8040 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8042 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8043 std_expand_builtin_va_start (valist
, nextarg
);
8048 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8049 next
= expand_binop (ptr_mode
, add_optab
,
8050 cfun
->machine
->split_stack_varargs_pointer
,
8051 crtl
->args
.arg_offset_rtx
,
8052 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8053 convert_move (va_r
, next
, 0);
8058 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8059 f_fpr
= DECL_CHAIN (f_gpr
);
8060 f_ovf
= DECL_CHAIN (f_fpr
);
8061 f_sav
= DECL_CHAIN (f_ovf
);
8063 valist
= build_simple_mem_ref (valist
);
8064 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8065 /* The following should be folded into the MEM_REF offset. */
8066 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8068 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8070 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8072 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8075 /* Count number of gp and fp argument registers used. */
8076 words
= crtl
->args
.info
.words
;
8077 n_gpr
= crtl
->args
.info
.regno
;
8078 n_fpr
= crtl
->args
.info
.sse_regno
;
8080 if (cfun
->va_list_gpr_size
)
8082 type
= TREE_TYPE (gpr
);
8083 t
= build2 (MODIFY_EXPR
, type
,
8084 gpr
, build_int_cst (type
, n_gpr
* 8));
8085 TREE_SIDE_EFFECTS (t
) = 1;
8086 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8089 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8091 type
= TREE_TYPE (fpr
);
8092 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8093 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8094 TREE_SIDE_EFFECTS (t
) = 1;
8095 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8098 /* Find the overflow area. */
8099 type
= TREE_TYPE (ovf
);
8100 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8101 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8103 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8104 t
= make_tree (type
, ovf_rtx
);
8106 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8107 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8108 TREE_SIDE_EFFECTS (t
) = 1;
8109 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8111 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8113 /* Find the register save area.
8114 Prologue of the function save it right above stack frame. */
8115 type
= TREE_TYPE (sav
);
8116 t
= make_tree (type
, frame_pointer_rtx
);
8117 if (!ix86_varargs_gpr_size
)
8118 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8119 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8120 TREE_SIDE_EFFECTS (t
) = 1;
8121 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8125 /* Implement va_arg. */
8128 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8131 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8132 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8133 tree gpr
, fpr
, ovf
, sav
, t
;
8135 tree lab_false
, lab_over
= NULL_TREE
;
8140 enum machine_mode nat_mode
;
8141 unsigned int arg_boundary
;
8143 /* Only 64bit target needs something special. */
8144 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8145 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8147 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8148 f_fpr
= DECL_CHAIN (f_gpr
);
8149 f_ovf
= DECL_CHAIN (f_fpr
);
8150 f_sav
= DECL_CHAIN (f_ovf
);
8152 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8153 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8154 valist
= build_va_arg_indirect_ref (valist
);
8155 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8156 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8157 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8159 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8161 type
= build_pointer_type (type
);
8162 size
= int_size_in_bytes (type
);
8163 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8165 nat_mode
= type_natural_mode (type
, NULL
);
8174 /* Unnamed 256bit vector mode parameters are passed on stack. */
8175 if (!TARGET_64BIT_MS_ABI
)
8182 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8183 type
, 0, X86_64_REGPARM_MAX
,
8184 X86_64_SSE_REGPARM_MAX
, intreg
,
8189 /* Pull the value out of the saved registers. */
8191 addr
= create_tmp_var (ptr_type_node
, "addr");
8195 int needed_intregs
, needed_sseregs
;
8197 tree int_addr
, sse_addr
;
8199 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8200 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8202 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8204 need_temp
= (!REG_P (container
)
8205 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8206 || TYPE_ALIGN (type
) > 128));
8208 /* In case we are passing structure, verify that it is consecutive block
8209 on the register save area. If not we need to do moves. */
8210 if (!need_temp
&& !REG_P (container
))
8212 /* Verify that all registers are strictly consecutive */
8213 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8217 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8219 rtx slot
= XVECEXP (container
, 0, i
);
8220 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8221 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8229 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8231 rtx slot
= XVECEXP (container
, 0, i
);
8232 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8233 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8245 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8246 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8249 /* First ensure that we fit completely in registers. */
8252 t
= build_int_cst (TREE_TYPE (gpr
),
8253 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8254 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8255 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8256 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8257 gimplify_and_add (t
, pre_p
);
8261 t
= build_int_cst (TREE_TYPE (fpr
),
8262 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8263 + X86_64_REGPARM_MAX
* 8);
8264 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8265 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8266 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8267 gimplify_and_add (t
, pre_p
);
8270 /* Compute index to start of area used for integer regs. */
8273 /* int_addr = gpr + sav; */
8274 t
= fold_build_pointer_plus (sav
, gpr
);
8275 gimplify_assign (int_addr
, t
, pre_p
);
8279 /* sse_addr = fpr + sav; */
8280 t
= fold_build_pointer_plus (sav
, fpr
);
8281 gimplify_assign (sse_addr
, t
, pre_p
);
8285 int i
, prev_size
= 0;
8286 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8289 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8290 gimplify_assign (addr
, t
, pre_p
);
8292 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8294 rtx slot
= XVECEXP (container
, 0, i
);
8295 rtx reg
= XEXP (slot
, 0);
8296 enum machine_mode mode
= GET_MODE (reg
);
8302 tree dest_addr
, dest
;
8303 int cur_size
= GET_MODE_SIZE (mode
);
8305 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8306 prev_size
= INTVAL (XEXP (slot
, 1));
8307 if (prev_size
+ cur_size
> size
)
8309 cur_size
= size
- prev_size
;
8310 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8311 if (mode
== BLKmode
)
8314 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8315 if (mode
== GET_MODE (reg
))
8316 addr_type
= build_pointer_type (piece_type
);
8318 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8320 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8323 if (SSE_REGNO_P (REGNO (reg
)))
8325 src_addr
= sse_addr
;
8326 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8330 src_addr
= int_addr
;
8331 src_offset
= REGNO (reg
) * 8;
8333 src_addr
= fold_convert (addr_type
, src_addr
);
8334 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8336 dest_addr
= fold_convert (daddr_type
, addr
);
8337 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8338 if (cur_size
== GET_MODE_SIZE (mode
))
8340 src
= build_va_arg_indirect_ref (src_addr
);
8341 dest
= build_va_arg_indirect_ref (dest_addr
);
8343 gimplify_assign (dest
, src
, pre_p
);
8348 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8349 3, dest_addr
, src_addr
,
8350 size_int (cur_size
));
8351 gimplify_and_add (copy
, pre_p
);
8353 prev_size
+= cur_size
;
8359 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8360 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8361 gimplify_assign (gpr
, t
, pre_p
);
8366 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8367 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8368 gimplify_assign (fpr
, t
, pre_p
);
8371 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8373 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8376 /* ... otherwise out of the overflow area. */
8378 /* When we align parameter on stack for caller, if the parameter
8379 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8380 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8381 here with caller. */
8382 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8383 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8384 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8386 /* Care for on-stack alignment if needed. */
8387 if (arg_boundary
<= 64 || size
== 0)
8391 HOST_WIDE_INT align
= arg_boundary
/ 8;
8392 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8393 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8394 build_int_cst (TREE_TYPE (t
), -align
));
8397 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8398 gimplify_assign (addr
, t
, pre_p
);
8400 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8401 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8404 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8406 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8407 addr
= fold_convert (ptrtype
, addr
);
8410 addr
= build_va_arg_indirect_ref (addr
);
8411 return build_va_arg_indirect_ref (addr
);
8414 /* Return true if OPNUM's MEM should be matched
8415 in movabs* patterns. */
8418 ix86_check_movabs (rtx insn
, int opnum
)
8422 set
= PATTERN (insn
);
8423 if (GET_CODE (set
) == PARALLEL
)
8424 set
= XVECEXP (set
, 0, 0);
8425 gcc_assert (GET_CODE (set
) == SET
);
8426 mem
= XEXP (set
, opnum
);
8427 while (GET_CODE (mem
) == SUBREG
)
8428 mem
= SUBREG_REG (mem
);
8429 gcc_assert (MEM_P (mem
));
8430 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8433 /* Initialize the table of extra 80387 mathematical constants. */
8436 init_ext_80387_constants (void)
8438 static const char * cst
[5] =
8440 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8441 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8442 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8443 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8444 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8448 for (i
= 0; i
< 5; i
++)
8450 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8451 /* Ensure each constant is rounded to XFmode precision. */
8452 real_convert (&ext_80387_constants_table
[i
],
8453 XFmode
, &ext_80387_constants_table
[i
]);
8456 ext_80387_constants_init
= 1;
8459 /* Return non-zero if the constant is something that
8460 can be loaded with a special instruction. */
8463 standard_80387_constant_p (rtx x
)
8465 enum machine_mode mode
= GET_MODE (x
);
8469 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8472 if (x
== CONST0_RTX (mode
))
8474 if (x
== CONST1_RTX (mode
))
8477 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8479 /* For XFmode constants, try to find a special 80387 instruction when
8480 optimizing for size or on those CPUs that benefit from them. */
8482 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8486 if (! ext_80387_constants_init
)
8487 init_ext_80387_constants ();
8489 for (i
= 0; i
< 5; i
++)
8490 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8494 /* Load of the constant -0.0 or -1.0 will be split as
8495 fldz;fchs or fld1;fchs sequence. */
8496 if (real_isnegzero (&r
))
8498 if (real_identical (&r
, &dconstm1
))
8504 /* Return the opcode of the special instruction to be used to load
8508 standard_80387_constant_opcode (rtx x
)
8510 switch (standard_80387_constant_p (x
))
8534 /* Return the CONST_DOUBLE representing the 80387 constant that is
8535 loaded by the specified special instruction. The argument IDX
8536 matches the return value from standard_80387_constant_p. */
8539 standard_80387_constant_rtx (int idx
)
8543 if (! ext_80387_constants_init
)
8544 init_ext_80387_constants ();
8560 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8564 /* Return 1 if X is all 0s and 2 if x is all 1s
8565 in supported SSE/AVX vector mode. */
8568 standard_sse_constant_p (rtx x
)
8570 enum machine_mode mode
= GET_MODE (x
);
8572 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8574 if (vector_all_ones_operand (x
, mode
))
8596 /* Return the opcode of the special instruction to be used to load
8600 standard_sse_constant_opcode (rtx insn
, rtx x
)
8602 switch (standard_sse_constant_p (x
))
8605 switch (get_attr_mode (insn
))
8608 return "%vpxor\t%0, %d0";
8610 return "%vxorpd\t%0, %d0";
8612 return "%vxorps\t%0, %d0";
8615 return "vpxor\t%x0, %x0, %x0";
8617 return "vxorpd\t%x0, %x0, %x0";
8619 return "vxorps\t%x0, %x0, %x0";
8627 return "vpcmpeqd\t%0, %0, %0";
8629 return "pcmpeqd\t%0, %0";
8637 /* Returns true if OP contains a symbol reference */
8640 symbolic_reference_mentioned_p (rtx op
)
8645 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8648 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8649 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8655 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8656 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8660 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8667 /* Return true if it is appropriate to emit `ret' instructions in the
8668 body of a function. Do this only if the epilogue is simple, needing a
8669 couple of insns. Prior to reloading, we can't tell how many registers
8670 must be saved, so return false then. Return false if there is no frame
8671 marker to de-allocate. */
8674 ix86_can_use_return_insn_p (void)
8676 struct ix86_frame frame
;
8678 if (! reload_completed
|| frame_pointer_needed
)
8681 /* Don't allow more than 32k pop, since that's all we can do
8682 with one instruction. */
8683 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8686 ix86_compute_frame_layout (&frame
);
8687 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8688 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8691 /* Value should be nonzero if functions must have frame pointers.
8692 Zero means the frame pointer need not be set up (and parms may
8693 be accessed via the stack pointer) in functions that seem suitable. */
8696 ix86_frame_pointer_required (void)
8698 /* If we accessed previous frames, then the generated code expects
8699 to be able to access the saved ebp value in our frame. */
8700 if (cfun
->machine
->accesses_prev_frame
)
8703 /* Several x86 os'es need a frame pointer for other reasons,
8704 usually pertaining to setjmp. */
8705 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8708 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8709 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8712 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8713 allocation is 4GB. */
8714 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8717 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8718 turns off the frame pointer by default. Turn it back on now if
8719 we've not got a leaf function. */
8720 if (TARGET_OMIT_LEAF_FRAME_POINTER
8722 || ix86_current_function_calls_tls_descriptor
))
8725 if (crtl
->profile
&& !flag_fentry
)
8731 /* Record that the current function accesses previous call frames. */
8734 ix86_setup_frame_addresses (void)
8736 cfun
->machine
->accesses_prev_frame
= 1;
8739 #ifndef USE_HIDDEN_LINKONCE
8740 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8741 # define USE_HIDDEN_LINKONCE 1
8743 # define USE_HIDDEN_LINKONCE 0
8747 static int pic_labels_used
;
8749 /* Fills in the label name that should be used for a pc thunk for
8750 the given register. */
8753 get_pc_thunk_name (char name
[32], unsigned int regno
)
8755 gcc_assert (!TARGET_64BIT
);
8757 if (USE_HIDDEN_LINKONCE
)
8758 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8760 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8764 /* This function generates code for -fpic that loads %ebx with
8765 the return address of the caller and then returns. */
8768 ix86_code_end (void)
8773 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8778 if (!(pic_labels_used
& (1 << regno
)))
8781 get_pc_thunk_name (name
, regno
);
8783 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8784 get_identifier (name
),
8785 build_function_type_list (void_type_node
, NULL_TREE
));
8786 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8787 NULL_TREE
, void_type_node
);
8788 TREE_PUBLIC (decl
) = 1;
8789 TREE_STATIC (decl
) = 1;
8790 DECL_IGNORED_P (decl
) = 1;
8795 switch_to_section (darwin_sections
[text_coal_section
]);
8796 fputs ("\t.weak_definition\t", asm_out_file
);
8797 assemble_name (asm_out_file
, name
);
8798 fputs ("\n\t.private_extern\t", asm_out_file
);
8799 assemble_name (asm_out_file
, name
);
8800 putc ('\n', asm_out_file
);
8801 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8802 DECL_WEAK (decl
) = 1;
8806 if (USE_HIDDEN_LINKONCE
)
8808 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8810 targetm
.asm_out
.unique_section (decl
, 0);
8811 switch_to_section (get_named_section (decl
, NULL
, 0));
8813 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8814 fputs ("\t.hidden\t", asm_out_file
);
8815 assemble_name (asm_out_file
, name
);
8816 putc ('\n', asm_out_file
);
8817 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8821 switch_to_section (text_section
);
8822 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8825 DECL_INITIAL (decl
) = make_node (BLOCK
);
8826 current_function_decl
= decl
;
8827 init_function_start (decl
);
8828 first_function_block_is_cold
= false;
8829 /* Make sure unwind info is emitted for the thunk if needed. */
8830 final_start_function (emit_barrier (), asm_out_file
, 1);
8832 /* Pad stack IP move with 4 instructions (two NOPs count
8833 as one instruction). */
8834 if (TARGET_PAD_SHORT_FUNCTION
)
8839 fputs ("\tnop\n", asm_out_file
);
8842 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8843 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8844 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8845 fputs ("\tret\n", asm_out_file
);
8846 final_end_function ();
8847 init_insn_lengths ();
8848 free_after_compilation (cfun
);
8850 current_function_decl
= NULL
;
8853 if (flag_split_stack
)
8854 file_end_indicate_split_stack ();
8857 /* Emit code for the SET_GOT patterns. */
8860 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8866 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8868 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8869 xops
[2] = gen_rtx_MEM (Pmode
,
8870 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8871 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8873 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8874 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8875 an unadorned address. */
8876 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8877 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8878 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8882 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8887 /* We don't need a pic base, we're not producing pic. */
8890 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8891 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8892 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8893 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8898 get_pc_thunk_name (name
, REGNO (dest
));
8899 pic_labels_used
|= 1 << REGNO (dest
);
8901 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8902 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8903 output_asm_insn ("call\t%X2", xops
);
8906 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
8907 This is what will be referenced by the Mach-O PIC subsystem. */
8908 if (machopic_should_output_picbase_label () || !label
)
8909 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8911 /* When we are restoring the pic base at the site of a nonlocal label,
8912 and we decided to emit the pic base above, we will still output a
8913 local label used for calculating the correction offset (even though
8914 the offset will be 0 in that case). */
8916 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8917 CODE_LABEL_NUMBER (label
));
8922 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8927 /* Generate an "push" pattern for input ARG. */
8932 struct machine_function
*m
= cfun
->machine
;
8934 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8935 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8936 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8938 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8939 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8941 return gen_rtx_SET (VOIDmode
,
8942 gen_rtx_MEM (word_mode
,
8943 gen_rtx_PRE_DEC (Pmode
,
8944 stack_pointer_rtx
)),
8948 /* Generate an "pop" pattern for input ARG. */
8953 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8954 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8956 return gen_rtx_SET (VOIDmode
,
8958 gen_rtx_MEM (word_mode
,
8959 gen_rtx_POST_INC (Pmode
,
8960 stack_pointer_rtx
)));
8963 /* Return >= 0 if there is an unused call-clobbered register available
8964 for the entire function. */
8967 ix86_select_alt_pic_regnum (void)
8971 && !ix86_current_function_calls_tls_descriptor
)
8974 /* Can't use the same register for both PIC and DRAP. */
8976 drap
= REGNO (crtl
->drap_reg
);
8979 for (i
= 2; i
>= 0; --i
)
8980 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8984 return INVALID_REGNUM
;
8987 /* Return TRUE if we need to save REGNO. */
8990 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8992 if (pic_offset_table_rtx
8993 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8994 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8996 || crtl
->calls_eh_return
8997 || crtl
->uses_const_pool
8998 || cfun
->has_nonlocal_label
))
8999 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9001 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9006 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9007 if (test
== INVALID_REGNUM
)
9014 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9017 return (df_regs_ever_live_p (regno
)
9018 && !call_used_regs
[regno
]
9019 && !fixed_regs
[regno
]
9020 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9023 /* Return number of saved general prupose registers. */
9026 ix86_nsaved_regs (void)
9031 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9032 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9037 /* Return number of saved SSE registrers. */
9040 ix86_nsaved_sseregs (void)
9045 if (!TARGET_64BIT_MS_ABI
)
9047 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9048 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9053 /* Given FROM and TO register numbers, say whether this elimination is
9054 allowed. If stack alignment is needed, we can only replace argument
9055 pointer with hard frame pointer, or replace frame pointer with stack
9056 pointer. Otherwise, frame pointer elimination is automatically
9057 handled and all other eliminations are valid. */
9060 ix86_can_eliminate (const int from
, const int to
)
9062 if (stack_realign_fp
)
9063 return ((from
== ARG_POINTER_REGNUM
9064 && to
== HARD_FRAME_POINTER_REGNUM
)
9065 || (from
== FRAME_POINTER_REGNUM
9066 && to
== STACK_POINTER_REGNUM
));
9068 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9071 /* Return the offset between two registers, one to be eliminated, and the other
9072 its replacement, at the start of a routine. */
9075 ix86_initial_elimination_offset (int from
, int to
)
9077 struct ix86_frame frame
;
9078 ix86_compute_frame_layout (&frame
);
9080 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9081 return frame
.hard_frame_pointer_offset
;
9082 else if (from
== FRAME_POINTER_REGNUM
9083 && to
== HARD_FRAME_POINTER_REGNUM
)
9084 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9087 gcc_assert (to
== STACK_POINTER_REGNUM
);
9089 if (from
== ARG_POINTER_REGNUM
)
9090 return frame
.stack_pointer_offset
;
9092 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9093 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9097 /* In a dynamically-aligned function, we can't know the offset from
9098 stack pointer to frame pointer, so we must ensure that setjmp
9099 eliminates fp against the hard fp (%ebp) rather than trying to
9100 index from %esp up to the top of the frame across a gap that is
9101 of unknown (at compile-time) size. */
9103 ix86_builtin_setjmp_frame_value (void)
9105 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9108 /* When using -fsplit-stack, the allocation routines set a field in
9109 the TCB to the bottom of the stack plus this much space, measured
9112 #define SPLIT_STACK_AVAILABLE 256
9114 /* Fill structure ix86_frame about frame of currently computed function. */
9117 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9119 unsigned HOST_WIDE_INT stack_alignment_needed
;
9120 HOST_WIDE_INT offset
;
9121 unsigned HOST_WIDE_INT preferred_alignment
;
9122 HOST_WIDE_INT size
= get_frame_size ();
9123 HOST_WIDE_INT to_allocate
;
9125 frame
->nregs
= ix86_nsaved_regs ();
9126 frame
->nsseregs
= ix86_nsaved_sseregs ();
9128 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9129 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9131 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9132 function prologues and leaf. */
9133 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9134 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9135 || ix86_current_function_calls_tls_descriptor
))
9137 preferred_alignment
= 16;
9138 stack_alignment_needed
= 16;
9139 crtl
->preferred_stack_boundary
= 128;
9140 crtl
->stack_alignment_needed
= 128;
9143 gcc_assert (!size
|| stack_alignment_needed
);
9144 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9145 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9147 /* For SEH we have to limit the amount of code movement into the prologue.
9148 At present we do this via a BLOCKAGE, at which point there's very little
9149 scheduling that can be done, which means that there's very little point
9150 in doing anything except PUSHs. */
9152 cfun
->machine
->use_fast_prologue_epilogue
= false;
9154 /* During reload iteration the amount of registers saved can change.
9155 Recompute the value as needed. Do not recompute when amount of registers
9156 didn't change as reload does multiple calls to the function and does not
9157 expect the decision to change within single iteration. */
9158 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9159 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9161 int count
= frame
->nregs
;
9162 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9164 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9166 /* The fast prologue uses move instead of push to save registers. This
9167 is significantly longer, but also executes faster as modern hardware
9168 can execute the moves in parallel, but can't do that for push/pop.
9170 Be careful about choosing what prologue to emit: When function takes
9171 many instructions to execute we may use slow version as well as in
9172 case function is known to be outside hot spot (this is known with
9173 feedback only). Weight the size of function by number of registers
9174 to save as it is cheap to use one or two push instructions but very
9175 slow to use many of them. */
9177 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9178 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9179 || (flag_branch_probabilities
9180 && node
->frequency
< NODE_FREQUENCY_HOT
))
9181 cfun
->machine
->use_fast_prologue_epilogue
= false;
9183 cfun
->machine
->use_fast_prologue_epilogue
9184 = !expensive_function_p (count
);
9187 frame
->save_regs_using_mov
9188 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9189 /* If static stack checking is enabled and done with probes,
9190 the registers need to be saved before allocating the frame. */
9191 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9193 /* Skip return address. */
9194 offset
= UNITS_PER_WORD
;
9196 /* Skip pushed static chain. */
9197 if (ix86_static_chain_on_stack
)
9198 offset
+= UNITS_PER_WORD
;
9200 /* Skip saved base pointer. */
9201 if (frame_pointer_needed
)
9202 offset
+= UNITS_PER_WORD
;
9203 frame
->hfp_save_offset
= offset
;
9205 /* The traditional frame pointer location is at the top of the frame. */
9206 frame
->hard_frame_pointer_offset
= offset
;
9208 /* Register save area */
9209 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9210 frame
->reg_save_offset
= offset
;
9212 /* On SEH target, registers are pushed just before the frame pointer
9215 frame
->hard_frame_pointer_offset
= offset
;
9217 /* Align and set SSE register save area. */
9218 if (frame
->nsseregs
)
9220 /* The only ABI that has saved SSE registers (Win64) also has a
9221 16-byte aligned default stack, and thus we don't need to be
9222 within the re-aligned local stack frame to save them. */
9223 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9224 offset
= (offset
+ 16 - 1) & -16;
9225 offset
+= frame
->nsseregs
* 16;
9227 frame
->sse_reg_save_offset
= offset
;
9229 /* The re-aligned stack starts here. Values before this point are not
9230 directly comparable with values below this point. In order to make
9231 sure that no value happens to be the same before and after, force
9232 the alignment computation below to add a non-zero value. */
9233 if (stack_realign_fp
)
9234 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9237 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9238 offset
+= frame
->va_arg_size
;
9240 /* Align start of frame for local function. */
9241 if (stack_realign_fp
9242 || offset
!= frame
->sse_reg_save_offset
9245 || cfun
->calls_alloca
9246 || ix86_current_function_calls_tls_descriptor
)
9247 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9249 /* Frame pointer points here. */
9250 frame
->frame_pointer_offset
= offset
;
9254 /* Add outgoing arguments area. Can be skipped if we eliminated
9255 all the function calls as dead code.
9256 Skipping is however impossible when function calls alloca. Alloca
9257 expander assumes that last crtl->outgoing_args_size
9258 of stack frame are unused. */
9259 if (ACCUMULATE_OUTGOING_ARGS
9260 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9261 || ix86_current_function_calls_tls_descriptor
))
9263 offset
+= crtl
->outgoing_args_size
;
9264 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9267 frame
->outgoing_arguments_size
= 0;
9269 /* Align stack boundary. Only needed if we're calling another function
9271 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9272 || ix86_current_function_calls_tls_descriptor
)
9273 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9275 /* We've reached end of stack frame. */
9276 frame
->stack_pointer_offset
= offset
;
9278 /* Size prologue needs to allocate. */
9279 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9281 if ((!to_allocate
&& frame
->nregs
<= 1)
9282 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9283 frame
->save_regs_using_mov
= false;
9285 if (ix86_using_red_zone ()
9286 && crtl
->sp_is_unchanging
9288 && !ix86_current_function_calls_tls_descriptor
)
9290 frame
->red_zone_size
= to_allocate
;
9291 if (frame
->save_regs_using_mov
)
9292 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9293 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9294 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9297 frame
->red_zone_size
= 0;
9298 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9300 /* The SEH frame pointer location is near the bottom of the frame.
9301 This is enforced by the fact that the difference between the
9302 stack pointer and the frame pointer is limited to 240 bytes in
9303 the unwind data structure. */
9308 /* If we can leave the frame pointer where it is, do so. Also, returns
9309 the establisher frame for __builtin_frame_address (0). */
9310 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9311 if (diff
<= SEH_MAX_FRAME_SIZE
9312 && (diff
> 240 || (diff
& 15) != 0)
9313 && !crtl
->accesses_prior_frames
)
9315 /* Ideally we'd determine what portion of the local stack frame
9316 (within the constraint of the lowest 240) is most heavily used.
9317 But without that complication, simply bias the frame pointer
9318 by 128 bytes so as to maximize the amount of the local stack
9319 frame that is addressable with 8-bit offsets. */
9320 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9325 /* This is semi-inlined memory_address_length, but simplified
9326 since we know that we're always dealing with reg+offset, and
9327 to avoid having to create and discard all that rtl. */
9330 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9336 /* EBP and R13 cannot be encoded without an offset. */
9337 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9339 else if (IN_RANGE (offset
, -128, 127))
9342 /* ESP and R12 must be encoded with a SIB byte. */
9343 if (regno
== SP_REG
|| regno
== R12_REG
)
9349 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9350 The valid base registers are taken from CFUN->MACHINE->FS. */
9353 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9355 const struct machine_function
*m
= cfun
->machine
;
9356 rtx base_reg
= NULL
;
9357 HOST_WIDE_INT base_offset
= 0;
9359 if (m
->use_fast_prologue_epilogue
)
9361 /* Choose the base register most likely to allow the most scheduling
9362 opportunities. Generally FP is valid throughout the function,
9363 while DRAP must be reloaded within the epilogue. But choose either
9364 over the SP due to increased encoding size. */
9368 base_reg
= hard_frame_pointer_rtx
;
9369 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9371 else if (m
->fs
.drap_valid
)
9373 base_reg
= crtl
->drap_reg
;
9374 base_offset
= 0 - cfa_offset
;
9376 else if (m
->fs
.sp_valid
)
9378 base_reg
= stack_pointer_rtx
;
9379 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9384 HOST_WIDE_INT toffset
;
9387 /* Choose the base register with the smallest address encoding.
9388 With a tie, choose FP > DRAP > SP. */
9391 base_reg
= stack_pointer_rtx
;
9392 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9393 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9395 if (m
->fs
.drap_valid
)
9397 toffset
= 0 - cfa_offset
;
9398 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9401 base_reg
= crtl
->drap_reg
;
9402 base_offset
= toffset
;
9408 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9409 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9412 base_reg
= hard_frame_pointer_rtx
;
9413 base_offset
= toffset
;
9418 gcc_assert (base_reg
!= NULL
);
9420 return plus_constant (Pmode
, base_reg
, base_offset
);
9423 /* Emit code to save registers in the prologue. */
9426 ix86_emit_save_regs (void)
9431 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9432 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9434 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9435 RTX_FRAME_RELATED_P (insn
) = 1;
9439 /* Emit a single register save at CFA - CFA_OFFSET. */
9442 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9443 HOST_WIDE_INT cfa_offset
)
9445 struct machine_function
*m
= cfun
->machine
;
9446 rtx reg
= gen_rtx_REG (mode
, regno
);
9447 rtx mem
, addr
, base
, insn
;
9449 addr
= choose_baseaddr (cfa_offset
);
9450 mem
= gen_frame_mem (mode
, addr
);
9452 /* For SSE saves, we need to indicate the 128-bit alignment. */
9453 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9455 insn
= emit_move_insn (mem
, reg
);
9456 RTX_FRAME_RELATED_P (insn
) = 1;
9459 if (GET_CODE (base
) == PLUS
)
9460 base
= XEXP (base
, 0);
9461 gcc_checking_assert (REG_P (base
));
9463 /* When saving registers into a re-aligned local stack frame, avoid
9464 any tricky guessing by dwarf2out. */
9465 if (m
->fs
.realigned
)
9467 gcc_checking_assert (stack_realign_drap
);
9469 if (regno
== REGNO (crtl
->drap_reg
))
9471 /* A bit of a hack. We force the DRAP register to be saved in
9472 the re-aligned stack frame, which provides us with a copy
9473 of the CFA that will last past the prologue. Install it. */
9474 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9475 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9476 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9477 mem
= gen_rtx_MEM (mode
, addr
);
9478 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9482 /* The frame pointer is a stable reference within the
9483 aligned frame. Use it. */
9484 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9485 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9486 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9487 mem
= gen_rtx_MEM (mode
, addr
);
9488 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9489 gen_rtx_SET (VOIDmode
, mem
, reg
));
9493 /* The memory may not be relative to the current CFA register,
9494 which means that we may need to generate a new pattern for
9495 use by the unwind info. */
9496 else if (base
!= m
->fs
.cfa_reg
)
9498 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9499 m
->fs
.cfa_offset
- cfa_offset
);
9500 mem
= gen_rtx_MEM (mode
, addr
);
9501 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9505 /* Emit code to save registers using MOV insns.
9506 First register is stored at CFA - CFA_OFFSET. */
9508 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9512 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9513 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9515 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9516 cfa_offset
-= UNITS_PER_WORD
;
9520 /* Emit code to save SSE registers using MOV insns.
9521 First register is stored at CFA - CFA_OFFSET. */
9523 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9527 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9528 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9530 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9535 static GTY(()) rtx queued_cfa_restores
;
9537 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9538 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9539 Don't add the note if the previously saved value will be left untouched
9540 within stack red-zone till return, as unwinders can find the same value
9541 in the register and on the stack. */
9544 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9546 if (!crtl
->shrink_wrapped
9547 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9552 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9553 RTX_FRAME_RELATED_P (insn
) = 1;
9557 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9560 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9563 ix86_add_queued_cfa_restore_notes (rtx insn
)
9566 if (!queued_cfa_restores
)
9568 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9570 XEXP (last
, 1) = REG_NOTES (insn
);
9571 REG_NOTES (insn
) = queued_cfa_restores
;
9572 queued_cfa_restores
= NULL_RTX
;
9573 RTX_FRAME_RELATED_P (insn
) = 1;
9576 /* Expand prologue or epilogue stack adjustment.
9577 The pattern exist to put a dependency on all ebp-based memory accesses.
9578 STYLE should be negative if instructions should be marked as frame related,
9579 zero if %r11 register is live and cannot be freely used and positive
9583 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9584 int style
, bool set_cfa
)
9586 struct machine_function
*m
= cfun
->machine
;
9588 bool add_frame_related_expr
= false;
9590 if (Pmode
== SImode
)
9591 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9592 else if (x86_64_immediate_operand (offset
, DImode
))
9593 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9597 /* r11 is used by indirect sibcall return as well, set before the
9598 epilogue and used after the epilogue. */
9600 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9603 gcc_assert (src
!= hard_frame_pointer_rtx
9604 && dest
!= hard_frame_pointer_rtx
);
9605 tmp
= hard_frame_pointer_rtx
;
9607 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9609 add_frame_related_expr
= true;
9611 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9614 insn
= emit_insn (insn
);
9616 ix86_add_queued_cfa_restore_notes (insn
);
9622 gcc_assert (m
->fs
.cfa_reg
== src
);
9623 m
->fs
.cfa_offset
+= INTVAL (offset
);
9624 m
->fs
.cfa_reg
= dest
;
9626 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9627 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9628 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9629 RTX_FRAME_RELATED_P (insn
) = 1;
9633 RTX_FRAME_RELATED_P (insn
) = 1;
9634 if (add_frame_related_expr
)
9636 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9637 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9638 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9642 if (dest
== stack_pointer_rtx
)
9644 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9645 bool valid
= m
->fs
.sp_valid
;
9647 if (src
== hard_frame_pointer_rtx
)
9649 valid
= m
->fs
.fp_valid
;
9650 ooffset
= m
->fs
.fp_offset
;
9652 else if (src
== crtl
->drap_reg
)
9654 valid
= m
->fs
.drap_valid
;
9659 /* Else there are two possibilities: SP itself, which we set
9660 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9661 taken care of this by hand along the eh_return path. */
9662 gcc_checking_assert (src
== stack_pointer_rtx
9663 || offset
== const0_rtx
);
9666 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9667 m
->fs
.sp_valid
= valid
;
9671 /* Find an available register to be used as dynamic realign argument
9672 pointer regsiter. Such a register will be written in prologue and
9673 used in begin of body, so it must not be
9674 1. parameter passing register.
9676 We reuse static-chain register if it is available. Otherwise, we
9677 use DI for i386 and R13 for x86-64. We chose R13 since it has
9680 Return: the regno of chosen register. */
9683 find_drap_reg (void)
9685 tree decl
= cfun
->decl
;
9689 /* Use R13 for nested function or function need static chain.
9690 Since function with tail call may use any caller-saved
9691 registers in epilogue, DRAP must not use caller-saved
9692 register in such case. */
9693 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9700 /* Use DI for nested function or function need static chain.
9701 Since function with tail call may use any caller-saved
9702 registers in epilogue, DRAP must not use caller-saved
9703 register in such case. */
9704 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9707 /* Reuse static chain register if it isn't used for parameter
9709 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9711 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9712 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9719 /* Return minimum incoming stack alignment. */
9722 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9724 unsigned int incoming_stack_boundary
;
9726 /* Prefer the one specified at command line. */
9727 if (ix86_user_incoming_stack_boundary
)
9728 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9729 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9730 if -mstackrealign is used, it isn't used for sibcall check and
9731 estimated stack alignment is 128bit. */
9734 && ix86_force_align_arg_pointer
9735 && crtl
->stack_alignment_estimated
== 128)
9736 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9738 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9740 /* Incoming stack alignment can be changed on individual functions
9741 via force_align_arg_pointer attribute. We use the smallest
9742 incoming stack boundary. */
9743 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9744 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9745 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9746 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9748 /* The incoming stack frame has to be aligned at least at
9749 parm_stack_boundary. */
9750 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9751 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9753 /* Stack at entrance of main is aligned by runtime. We use the
9754 smallest incoming stack boundary. */
9755 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9756 && DECL_NAME (current_function_decl
)
9757 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9758 && DECL_FILE_SCOPE_P (current_function_decl
))
9759 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9761 return incoming_stack_boundary
;
9764 /* Update incoming stack boundary and estimated stack alignment. */
9767 ix86_update_stack_boundary (void)
9769 ix86_incoming_stack_boundary
9770 = ix86_minimum_incoming_stack_boundary (false);
9772 /* x86_64 vararg needs 16byte stack alignment for register save
9776 && crtl
->stack_alignment_estimated
< 128)
9777 crtl
->stack_alignment_estimated
= 128;
9780 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9781 needed or an rtx for DRAP otherwise. */
9784 ix86_get_drap_rtx (void)
9786 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9787 crtl
->need_drap
= true;
9789 if (stack_realign_drap
)
9791 /* Assign DRAP to vDRAP and returns vDRAP */
9792 unsigned int regno
= find_drap_reg ();
9797 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9798 crtl
->drap_reg
= arg_ptr
;
9801 drap_vreg
= copy_to_reg (arg_ptr
);
9805 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9808 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9809 RTX_FRAME_RELATED_P (insn
) = 1;
9817 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9820 ix86_internal_arg_pointer (void)
9822 return virtual_incoming_args_rtx
;
9825 struct scratch_reg
{
9830 /* Return a short-lived scratch register for use on function entry.
9831 In 32-bit mode, it is valid only after the registers are saved
9832 in the prologue. This register must be released by means of
9833 release_scratch_register_on_entry once it is dead. */
9836 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9844 /* We always use R11 in 64-bit mode. */
9849 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9851 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9853 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9854 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9855 int regparm
= ix86_function_regparm (fntype
, decl
);
9857 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9859 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9860 for the static chain register. */
9861 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9862 && drap_regno
!= AX_REG
)
9864 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9865 for the static chain register. */
9866 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9868 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9870 /* ecx is the static chain register. */
9871 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9873 && drap_regno
!= CX_REG
)
9875 else if (ix86_save_reg (BX_REG
, true))
9877 /* esi is the static chain register. */
9878 else if (!(regparm
== 3 && static_chain_p
)
9879 && ix86_save_reg (SI_REG
, true))
9881 else if (ix86_save_reg (DI_REG
, true))
9885 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9890 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9893 rtx insn
= emit_insn (gen_push (sr
->reg
));
9894 RTX_FRAME_RELATED_P (insn
) = 1;
9898 /* Release a scratch register obtained from the preceding function. */
9901 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9905 struct machine_function
*m
= cfun
->machine
;
9906 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9908 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9909 RTX_FRAME_RELATED_P (insn
) = 1;
9910 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9911 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9912 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9913 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9917 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9919 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9922 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9924 /* We skip the probe for the first interval + a small dope of 4 words and
9925 probe that many bytes past the specified size to maintain a protection
9926 area at the botton of the stack. */
9927 const int dope
= 4 * UNITS_PER_WORD
;
9928 rtx size_rtx
= GEN_INT (size
), last
;
9930 /* See if we have a constant small number of probes to generate. If so,
9931 that's the easy case. The run-time loop is made up of 11 insns in the
9932 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9933 for n # of intervals. */
9934 if (size
<= 5 * PROBE_INTERVAL
)
9936 HOST_WIDE_INT i
, adjust
;
9937 bool first_probe
= true;
9939 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9940 values of N from 1 until it exceeds SIZE. If only one probe is
9941 needed, this will not generate any code. Then adjust and probe
9942 to PROBE_INTERVAL + SIZE. */
9943 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9947 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9948 first_probe
= false;
9951 adjust
= PROBE_INTERVAL
;
9953 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9954 plus_constant (Pmode
, stack_pointer_rtx
,
9956 emit_stack_probe (stack_pointer_rtx
);
9960 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9962 adjust
= size
+ PROBE_INTERVAL
- i
;
9964 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9965 plus_constant (Pmode
, stack_pointer_rtx
,
9967 emit_stack_probe (stack_pointer_rtx
);
9969 /* Adjust back to account for the additional first interval. */
9970 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9971 plus_constant (Pmode
, stack_pointer_rtx
,
9972 PROBE_INTERVAL
+ dope
)));
9975 /* Otherwise, do the same as above, but in a loop. Note that we must be
9976 extra careful with variables wrapping around because we might be at
9977 the very top (or the very bottom) of the address space and we have
9978 to be able to handle this case properly; in particular, we use an
9979 equality test for the loop condition. */
9982 HOST_WIDE_INT rounded_size
;
9983 struct scratch_reg sr
;
9985 get_scratch_register_on_entry (&sr
);
9988 /* Step 1: round SIZE to the previous multiple of the interval. */
9990 rounded_size
= size
& -PROBE_INTERVAL
;
9993 /* Step 2: compute initial and final value of the loop counter. */
9995 /* SP = SP_0 + PROBE_INTERVAL. */
9996 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9997 plus_constant (Pmode
, stack_pointer_rtx
,
9998 - (PROBE_INTERVAL
+ dope
))));
10000 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10001 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10002 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10003 gen_rtx_PLUS (Pmode
, sr
.reg
,
10004 stack_pointer_rtx
)));
10007 /* Step 3: the loop
10009 while (SP != LAST_ADDR)
10011 SP = SP + PROBE_INTERVAL
10015 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10016 values of N from 1 until it is equal to ROUNDED_SIZE. */
10018 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10021 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10022 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10024 if (size
!= rounded_size
)
10026 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10027 plus_constant (Pmode
, stack_pointer_rtx
,
10028 rounded_size
- size
)));
10029 emit_stack_probe (stack_pointer_rtx
);
10032 /* Adjust back to account for the additional first interval. */
10033 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10034 plus_constant (Pmode
, stack_pointer_rtx
,
10035 PROBE_INTERVAL
+ dope
)));
10037 release_scratch_register_on_entry (&sr
);
10040 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10042 /* Even if the stack pointer isn't the CFA register, we need to correctly
10043 describe the adjustments made to it, in particular differentiate the
10044 frame-related ones from the frame-unrelated ones. */
10047 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10048 XVECEXP (expr
, 0, 0)
10049 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10050 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10051 XVECEXP (expr
, 0, 1)
10052 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10053 plus_constant (Pmode
, stack_pointer_rtx
,
10054 PROBE_INTERVAL
+ dope
+ size
));
10055 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10056 RTX_FRAME_RELATED_P (last
) = 1;
10058 cfun
->machine
->fs
.sp_offset
+= size
;
10061 /* Make sure nothing is scheduled before we are done. */
10062 emit_insn (gen_blockage ());
10065 /* Adjust the stack pointer up to REG while probing it. */
10068 output_adjust_stack_and_probe (rtx reg
)
10070 static int labelno
= 0;
10071 char loop_lab
[32], end_lab
[32];
10074 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10075 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10077 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10079 /* Jump to END_LAB if SP == LAST_ADDR. */
10080 xops
[0] = stack_pointer_rtx
;
10082 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10083 fputs ("\tje\t", asm_out_file
);
10084 assemble_name_raw (asm_out_file
, end_lab
);
10085 fputc ('\n', asm_out_file
);
10087 /* SP = SP + PROBE_INTERVAL. */
10088 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10089 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10092 xops
[1] = const0_rtx
;
10093 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10095 fprintf (asm_out_file
, "\tjmp\t");
10096 assemble_name_raw (asm_out_file
, loop_lab
);
10097 fputc ('\n', asm_out_file
);
10099 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10104 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10105 inclusive. These are offsets from the current stack pointer. */
10108 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10110 /* See if we have a constant small number of probes to generate. If so,
10111 that's the easy case. The run-time loop is made up of 7 insns in the
10112 generic case while the compile-time loop is made up of n insns for n #
10114 if (size
<= 7 * PROBE_INTERVAL
)
10118 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10119 it exceeds SIZE. If only one probe is needed, this will not
10120 generate any code. Then probe at FIRST + SIZE. */
10121 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10122 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10125 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10129 /* Otherwise, do the same as above, but in a loop. Note that we must be
10130 extra careful with variables wrapping around because we might be at
10131 the very top (or the very bottom) of the address space and we have
10132 to be able to handle this case properly; in particular, we use an
10133 equality test for the loop condition. */
10136 HOST_WIDE_INT rounded_size
, last
;
10137 struct scratch_reg sr
;
10139 get_scratch_register_on_entry (&sr
);
10142 /* Step 1: round SIZE to the previous multiple of the interval. */
10144 rounded_size
= size
& -PROBE_INTERVAL
;
10147 /* Step 2: compute initial and final value of the loop counter. */
10149 /* TEST_OFFSET = FIRST. */
10150 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10152 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10153 last
= first
+ rounded_size
;
10156 /* Step 3: the loop
10158 while (TEST_ADDR != LAST_ADDR)
10160 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10164 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10165 until it is equal to ROUNDED_SIZE. */
10167 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10170 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10171 that SIZE is equal to ROUNDED_SIZE. */
10173 if (size
!= rounded_size
)
10174 emit_stack_probe (plus_constant (Pmode
,
10175 gen_rtx_PLUS (Pmode
,
10178 rounded_size
- size
));
10180 release_scratch_register_on_entry (&sr
);
10183 /* Make sure nothing is scheduled before we are done. */
10184 emit_insn (gen_blockage ());
10187 /* Probe a range of stack addresses from REG to END, inclusive. These are
10188 offsets from the current stack pointer. */
10191 output_probe_stack_range (rtx reg
, rtx end
)
10193 static int labelno
= 0;
10194 char loop_lab
[32], end_lab
[32];
10197 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10198 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10200 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10202 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10205 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10206 fputs ("\tje\t", asm_out_file
);
10207 assemble_name_raw (asm_out_file
, end_lab
);
10208 fputc ('\n', asm_out_file
);
10210 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10211 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10212 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10214 /* Probe at TEST_ADDR. */
10215 xops
[0] = stack_pointer_rtx
;
10217 xops
[2] = const0_rtx
;
10218 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10220 fprintf (asm_out_file
, "\tjmp\t");
10221 assemble_name_raw (asm_out_file
, loop_lab
);
10222 fputc ('\n', asm_out_file
);
10224 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10229 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10230 to be generated in correct form. */
10232 ix86_finalize_stack_realign_flags (void)
10234 /* Check if stack realign is really needed after reload, and
10235 stores result in cfun */
10236 unsigned int incoming_stack_boundary
10237 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10238 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10239 unsigned int stack_realign
= (incoming_stack_boundary
10241 ? crtl
->max_used_stack_slot_alignment
10242 : crtl
->stack_alignment_needed
));
10244 if (crtl
->stack_realign_finalized
)
10246 /* After stack_realign_needed is finalized, we can't no longer
10248 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10252 /* If the only reason for frame_pointer_needed is that we conservatively
10253 assumed stack realignment might be needed, but in the end nothing that
10254 needed the stack alignment had been spilled, clear frame_pointer_needed
10255 and say we don't need stack realignment. */
10257 && !crtl
->need_drap
10258 && frame_pointer_needed
10260 && flag_omit_frame_pointer
10261 && crtl
->sp_is_unchanging
10262 && !ix86_current_function_calls_tls_descriptor
10263 && !crtl
->accesses_prior_frames
10264 && !cfun
->calls_alloca
10265 && !crtl
->calls_eh_return
10266 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10267 && !ix86_frame_pointer_required ()
10268 && get_frame_size () == 0
10269 && ix86_nsaved_sseregs () == 0
10270 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10272 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10275 CLEAR_HARD_REG_SET (prologue_used
);
10276 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10277 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10278 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10279 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10280 HARD_FRAME_POINTER_REGNUM
);
10284 FOR_BB_INSNS (bb
, insn
)
10285 if (NONDEBUG_INSN_P (insn
)
10286 && requires_stack_frame_p (insn
, prologue_used
,
10287 set_up_by_prologue
))
10289 crtl
->stack_realign_needed
= stack_realign
;
10290 crtl
->stack_realign_finalized
= true;
10295 frame_pointer_needed
= false;
10296 stack_realign
= false;
10297 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10298 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10299 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10300 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10301 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10302 df_finish_pass (true);
10303 df_scan_alloc (NULL
);
10305 df_compute_regs_ever_live (true);
10309 crtl
->stack_realign_needed
= stack_realign
;
10310 crtl
->stack_realign_finalized
= true;
10313 /* Expand the prologue into a bunch of separate insns. */
10316 ix86_expand_prologue (void)
10318 struct machine_function
*m
= cfun
->machine
;
10321 struct ix86_frame frame
;
10322 HOST_WIDE_INT allocate
;
10323 bool int_registers_saved
;
10324 bool sse_registers_saved
;
10326 ix86_finalize_stack_realign_flags ();
10328 /* DRAP should not coexist with stack_realign_fp */
10329 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10331 memset (&m
->fs
, 0, sizeof (m
->fs
));
10333 /* Initialize CFA state for before the prologue. */
10334 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10335 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10337 /* Track SP offset to the CFA. We continue tracking this after we've
10338 swapped the CFA register away from SP. In the case of re-alignment
10339 this is fudged; we're interested to offsets within the local frame. */
10340 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10341 m
->fs
.sp_valid
= true;
10343 ix86_compute_frame_layout (&frame
);
10345 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10347 /* We should have already generated an error for any use of
10348 ms_hook on a nested function. */
10349 gcc_checking_assert (!ix86_static_chain_on_stack
);
10351 /* Check if profiling is active and we shall use profiling before
10352 prologue variant. If so sorry. */
10353 if (crtl
->profile
&& flag_fentry
!= 0)
10354 sorry ("ms_hook_prologue attribute isn%'t compatible "
10355 "with -mfentry for 32-bit");
10357 /* In ix86_asm_output_function_label we emitted:
10358 8b ff movl.s %edi,%edi
10360 8b ec movl.s %esp,%ebp
10362 This matches the hookable function prologue in Win32 API
10363 functions in Microsoft Windows XP Service Pack 2 and newer.
10364 Wine uses this to enable Windows apps to hook the Win32 API
10365 functions provided by Wine.
10367 What that means is that we've already set up the frame pointer. */
10369 if (frame_pointer_needed
10370 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10374 /* We've decided to use the frame pointer already set up.
10375 Describe this to the unwinder by pretending that both
10376 push and mov insns happen right here.
10378 Putting the unwind info here at the end of the ms_hook
10379 is done so that we can make absolutely certain we get
10380 the required byte sequence at the start of the function,
10381 rather than relying on an assembler that can produce
10382 the exact encoding required.
10384 However it does mean (in the unpatched case) that we have
10385 a 1 insn window where the asynchronous unwind info is
10386 incorrect. However, if we placed the unwind info at
10387 its correct location we would have incorrect unwind info
10388 in the patched case. Which is probably all moot since
10389 I don't expect Wine generates dwarf2 unwind info for the
10390 system libraries that use this feature. */
10392 insn
= emit_insn (gen_blockage ());
10394 push
= gen_push (hard_frame_pointer_rtx
);
10395 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10396 stack_pointer_rtx
);
10397 RTX_FRAME_RELATED_P (push
) = 1;
10398 RTX_FRAME_RELATED_P (mov
) = 1;
10400 RTX_FRAME_RELATED_P (insn
) = 1;
10401 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10402 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10404 /* Note that gen_push incremented m->fs.cfa_offset, even
10405 though we didn't emit the push insn here. */
10406 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10407 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10408 m
->fs
.fp_valid
= true;
10412 /* The frame pointer is not needed so pop %ebp again.
10413 This leaves us with a pristine state. */
10414 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10418 /* The first insn of a function that accepts its static chain on the
10419 stack is to push the register that would be filled in by a direct
10420 call. This insn will be skipped by the trampoline. */
10421 else if (ix86_static_chain_on_stack
)
10423 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10424 emit_insn (gen_blockage ());
10426 /* We don't want to interpret this push insn as a register save,
10427 only as a stack adjustment. The real copy of the register as
10428 a save will be done later, if needed. */
10429 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10430 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10431 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10432 RTX_FRAME_RELATED_P (insn
) = 1;
10435 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10436 of DRAP is needed and stack realignment is really needed after reload */
10437 if (stack_realign_drap
)
10439 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10441 /* Only need to push parameter pointer reg if it is caller saved. */
10442 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10444 /* Push arg pointer reg */
10445 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10446 RTX_FRAME_RELATED_P (insn
) = 1;
10449 /* Grab the argument pointer. */
10450 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10451 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10452 RTX_FRAME_RELATED_P (insn
) = 1;
10453 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10454 m
->fs
.cfa_offset
= 0;
10456 /* Align the stack. */
10457 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10459 GEN_INT (-align_bytes
)));
10460 RTX_FRAME_RELATED_P (insn
) = 1;
10462 /* Replicate the return address on the stack so that return
10463 address can be reached via (argp - 1) slot. This is needed
10464 to implement macro RETURN_ADDR_RTX and intrinsic function
10465 expand_builtin_return_addr etc. */
10466 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10467 t
= gen_frame_mem (word_mode
, t
);
10468 insn
= emit_insn (gen_push (t
));
10469 RTX_FRAME_RELATED_P (insn
) = 1;
10471 /* For the purposes of frame and register save area addressing,
10472 we've started over with a new frame. */
10473 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10474 m
->fs
.realigned
= true;
10477 int_registers_saved
= (frame
.nregs
== 0);
10478 sse_registers_saved
= (frame
.nsseregs
== 0);
10480 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10482 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10483 slower on all targets. Also sdb doesn't like it. */
10484 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10485 RTX_FRAME_RELATED_P (insn
) = 1;
10487 /* Push registers now, before setting the frame pointer
10489 if (!int_registers_saved
10491 && !frame
.save_regs_using_mov
)
10493 ix86_emit_save_regs ();
10494 int_registers_saved
= true;
10495 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10498 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10500 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10501 RTX_FRAME_RELATED_P (insn
) = 1;
10503 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10504 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10505 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10506 m
->fs
.fp_valid
= true;
10510 if (!int_registers_saved
)
10512 /* If saving registers via PUSH, do so now. */
10513 if (!frame
.save_regs_using_mov
)
10515 ix86_emit_save_regs ();
10516 int_registers_saved
= true;
10517 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10520 /* When using red zone we may start register saving before allocating
10521 the stack frame saving one cycle of the prologue. However, avoid
10522 doing this if we have to probe the stack; at least on x86_64 the
10523 stack probe can turn into a call that clobbers a red zone location. */
10524 else if (ix86_using_red_zone ()
10525 && (! TARGET_STACK_PROBE
10526 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10528 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10529 int_registers_saved
= true;
10533 if (stack_realign_fp
)
10535 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10536 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10538 /* The computation of the size of the re-aligned stack frame means
10539 that we must allocate the size of the register save area before
10540 performing the actual alignment. Otherwise we cannot guarantee
10541 that there's enough storage above the realignment point. */
10542 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10543 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10544 GEN_INT (m
->fs
.sp_offset
10545 - frame
.sse_reg_save_offset
),
10548 /* Align the stack. */
10549 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10551 GEN_INT (-align_bytes
)));
10553 /* For the purposes of register save area addressing, the stack
10554 pointer is no longer valid. As for the value of sp_offset,
10555 see ix86_compute_frame_layout, which we need to match in order
10556 to pass verification of stack_pointer_offset at the end. */
10557 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10558 m
->fs
.sp_valid
= false;
10561 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10563 if (flag_stack_usage_info
)
10565 /* We start to count from ARG_POINTER. */
10566 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10568 /* If it was realigned, take into account the fake frame. */
10569 if (stack_realign_drap
)
10571 if (ix86_static_chain_on_stack
)
10572 stack_size
+= UNITS_PER_WORD
;
10574 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10575 stack_size
+= UNITS_PER_WORD
;
10577 /* This over-estimates by 1 minimal-stack-alignment-unit but
10578 mitigates that by counting in the new return address slot. */
10579 current_function_dynamic_stack_size
10580 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10583 current_function_static_stack_size
= stack_size
;
10586 /* On SEH target with very large frame size, allocate an area to save
10587 SSE registers (as the very large allocation won't be described). */
10589 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10590 && !sse_registers_saved
)
10592 HOST_WIDE_INT sse_size
=
10593 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10595 gcc_assert (int_registers_saved
);
10597 /* No need to do stack checking as the area will be immediately
10599 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10600 GEN_INT (-sse_size
), -1,
10601 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10602 allocate
-= sse_size
;
10603 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10604 sse_registers_saved
= true;
10607 /* The stack has already been decremented by the instruction calling us
10608 so probe if the size is non-negative to preserve the protection area. */
10609 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10611 /* We expect the registers to be saved when probes are used. */
10612 gcc_assert (int_registers_saved
);
10614 if (STACK_CHECK_MOVING_SP
)
10616 ix86_adjust_stack_and_probe (allocate
);
10621 HOST_WIDE_INT size
= allocate
;
10623 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10624 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10626 if (TARGET_STACK_PROBE
)
10627 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10629 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10635 else if (!ix86_target_stack_probe ()
10636 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10638 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10639 GEN_INT (-allocate
), -1,
10640 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10644 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10646 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10647 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10648 bool eax_live
= false;
10649 bool r10_live
= false;
10652 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10653 if (!TARGET_64BIT_MS_ABI
)
10654 eax_live
= ix86_eax_live_at_start_p ();
10656 /* Note that SEH directives need to continue tracking the stack
10657 pointer even after the frame pointer has been set up. */
10660 insn
= emit_insn (gen_push (eax
));
10661 allocate
-= UNITS_PER_WORD
;
10662 if (sp_is_cfa_reg
|| TARGET_SEH
)
10665 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10666 RTX_FRAME_RELATED_P (insn
) = 1;
10672 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10673 insn
= emit_insn (gen_push (r10
));
10674 allocate
-= UNITS_PER_WORD
;
10675 if (sp_is_cfa_reg
|| TARGET_SEH
)
10678 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10679 RTX_FRAME_RELATED_P (insn
) = 1;
10683 emit_move_insn (eax
, GEN_INT (allocate
));
10684 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10686 /* Use the fact that AX still contains ALLOCATE. */
10687 adjust_stack_insn
= (Pmode
== DImode
10688 ? gen_pro_epilogue_adjust_stack_di_sub
10689 : gen_pro_epilogue_adjust_stack_si_sub
);
10691 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10692 stack_pointer_rtx
, eax
));
10694 if (sp_is_cfa_reg
|| TARGET_SEH
)
10697 m
->fs
.cfa_offset
+= allocate
;
10698 RTX_FRAME_RELATED_P (insn
) = 1;
10699 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10700 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10701 plus_constant (Pmode
, stack_pointer_rtx
,
10704 m
->fs
.sp_offset
+= allocate
;
10706 if (r10_live
&& eax_live
)
10708 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10709 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10710 gen_frame_mem (word_mode
, t
));
10711 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10712 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10713 gen_frame_mem (word_mode
, t
));
10715 else if (eax_live
|| r10_live
)
10717 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10718 emit_move_insn (gen_rtx_REG (word_mode
,
10719 (eax_live
? AX_REG
: R10_REG
)),
10720 gen_frame_mem (word_mode
, t
));
10723 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10725 /* If we havn't already set up the frame pointer, do so now. */
10726 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10728 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10729 GEN_INT (frame
.stack_pointer_offset
10730 - frame
.hard_frame_pointer_offset
));
10731 insn
= emit_insn (insn
);
10732 RTX_FRAME_RELATED_P (insn
) = 1;
10733 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10735 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10736 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10737 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10738 m
->fs
.fp_valid
= true;
10741 if (!int_registers_saved
)
10742 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10743 if (!sse_registers_saved
)
10744 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10746 pic_reg_used
= false;
10747 /* We don't use pic-register for pe-coff target. */
10748 if (pic_offset_table_rtx
10750 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10753 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10755 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10756 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10758 pic_reg_used
= true;
10765 if (ix86_cmodel
== CM_LARGE_PIC
)
10767 rtx label
, tmp_reg
;
10769 gcc_assert (Pmode
== DImode
);
10770 label
= gen_label_rtx ();
10771 emit_label (label
);
10772 LABEL_PRESERVE_P (label
) = 1;
10773 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10774 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10775 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10777 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10778 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10779 pic_offset_table_rtx
, tmp_reg
));
10782 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10786 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10787 RTX_FRAME_RELATED_P (insn
) = 1;
10788 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10792 /* In the pic_reg_used case, make sure that the got load isn't deleted
10793 when mcount needs it. Blockage to avoid call movement across mcount
10794 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10796 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10797 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10799 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10801 /* vDRAP is setup but after reload it turns out stack realign
10802 isn't necessary, here we will emit prologue to setup DRAP
10803 without stack realign adjustment */
10804 t
= choose_baseaddr (0);
10805 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10808 /* Prevent instructions from being scheduled into register save push
10809 sequence when access to the redzone area is done through frame pointer.
10810 The offset between the frame pointer and the stack pointer is calculated
10811 relative to the value of the stack pointer at the end of the function
10812 prologue, and moving instructions that access redzone area via frame
10813 pointer inside push sequence violates this assumption. */
10814 if (frame_pointer_needed
&& frame
.red_zone_size
)
10815 emit_insn (gen_memory_blockage ());
10817 /* Emit cld instruction if stringops are used in the function. */
10818 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10819 emit_insn (gen_cld ());
10821 /* SEH requires that the prologue end within 256 bytes of the start of
10822 the function. Prevent instruction schedules that would extend that.
10823 Further, prevent alloca modifications to the stack pointer from being
10824 combined with prologue modifications. */
10826 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10829 /* Emit code to restore REG using a POP insn. */
10832 ix86_emit_restore_reg_using_pop (rtx reg
)
10834 struct machine_function
*m
= cfun
->machine
;
10835 rtx insn
= emit_insn (gen_pop (reg
));
10837 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10838 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10840 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10841 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10843 /* Previously we'd represented the CFA as an expression
10844 like *(%ebp - 8). We've just popped that value from
10845 the stack, which means we need to reset the CFA to
10846 the drap register. This will remain until we restore
10847 the stack pointer. */
10848 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10849 RTX_FRAME_RELATED_P (insn
) = 1;
10851 /* This means that the DRAP register is valid for addressing too. */
10852 m
->fs
.drap_valid
= true;
10856 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10858 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10859 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10860 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10861 RTX_FRAME_RELATED_P (insn
) = 1;
10863 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10866 /* When the frame pointer is the CFA, and we pop it, we are
10867 swapping back to the stack pointer as the CFA. This happens
10868 for stack frames that don't allocate other data, so we assume
10869 the stack pointer is now pointing at the return address, i.e.
10870 the function entry state, which makes the offset be 1 word. */
10871 if (reg
== hard_frame_pointer_rtx
)
10873 m
->fs
.fp_valid
= false;
10874 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10876 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10877 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10879 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10880 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10881 GEN_INT (m
->fs
.cfa_offset
)));
10882 RTX_FRAME_RELATED_P (insn
) = 1;
10887 /* Emit code to restore saved registers using POP insns. */
10890 ix86_emit_restore_regs_using_pop (void)
10892 unsigned int regno
;
10894 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10895 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10896 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10899 /* Emit code and notes for the LEAVE instruction. */
10902 ix86_emit_leave (void)
10904 struct machine_function
*m
= cfun
->machine
;
10905 rtx insn
= emit_insn (ix86_gen_leave ());
10907 ix86_add_queued_cfa_restore_notes (insn
);
10909 gcc_assert (m
->fs
.fp_valid
);
10910 m
->fs
.sp_valid
= true;
10911 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10912 m
->fs
.fp_valid
= false;
10914 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10916 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10917 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10919 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10920 plus_constant (Pmode
, stack_pointer_rtx
,
10922 RTX_FRAME_RELATED_P (insn
) = 1;
10924 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10928 /* Emit code to restore saved registers using MOV insns.
10929 First register is restored from CFA - CFA_OFFSET. */
10931 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10932 bool maybe_eh_return
)
10934 struct machine_function
*m
= cfun
->machine
;
10935 unsigned int regno
;
10937 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10938 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10940 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10943 mem
= choose_baseaddr (cfa_offset
);
10944 mem
= gen_frame_mem (word_mode
, mem
);
10945 insn
= emit_move_insn (reg
, mem
);
10947 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10949 /* Previously we'd represented the CFA as an expression
10950 like *(%ebp - 8). We've just popped that value from
10951 the stack, which means we need to reset the CFA to
10952 the drap register. This will remain until we restore
10953 the stack pointer. */
10954 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10955 RTX_FRAME_RELATED_P (insn
) = 1;
10957 /* This means that the DRAP register is valid for addressing. */
10958 m
->fs
.drap_valid
= true;
10961 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10963 cfa_offset
-= UNITS_PER_WORD
;
10967 /* Emit code to restore saved registers using MOV insns.
10968 First register is restored from CFA - CFA_OFFSET. */
10970 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10971 bool maybe_eh_return
)
10973 unsigned int regno
;
10975 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10976 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10978 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10981 mem
= choose_baseaddr (cfa_offset
);
10982 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10983 set_mem_align (mem
, 128);
10984 emit_move_insn (reg
, mem
);
10986 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10992 /* Restore function stack, frame, and registers. */
10995 ix86_expand_epilogue (int style
)
10997 struct machine_function
*m
= cfun
->machine
;
10998 struct machine_frame_state frame_state_save
= m
->fs
;
10999 struct ix86_frame frame
;
11000 bool restore_regs_via_mov
;
11003 ix86_finalize_stack_realign_flags ();
11004 ix86_compute_frame_layout (&frame
);
11006 m
->fs
.sp_valid
= (!frame_pointer_needed
11007 || (crtl
->sp_is_unchanging
11008 && !stack_realign_fp
));
11009 gcc_assert (!m
->fs
.sp_valid
11010 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11012 /* The FP must be valid if the frame pointer is present. */
11013 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11014 gcc_assert (!m
->fs
.fp_valid
11015 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11017 /* We must have *some* valid pointer to the stack frame. */
11018 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11020 /* The DRAP is never valid at this point. */
11021 gcc_assert (!m
->fs
.drap_valid
);
11023 /* See the comment about red zone and frame
11024 pointer usage in ix86_expand_prologue. */
11025 if (frame_pointer_needed
&& frame
.red_zone_size
)
11026 emit_insn (gen_memory_blockage ());
11028 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11029 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11031 /* Determine the CFA offset of the end of the red-zone. */
11032 m
->fs
.red_zone_offset
= 0;
11033 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11035 /* The red-zone begins below the return address. */
11036 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11038 /* When the register save area is in the aligned portion of
11039 the stack, determine the maximum runtime displacement that
11040 matches up with the aligned frame. */
11041 if (stack_realign_drap
)
11042 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11046 /* Special care must be taken for the normal return case of a function
11047 using eh_return: the eax and edx registers are marked as saved, but
11048 not restored along this path. Adjust the save location to match. */
11049 if (crtl
->calls_eh_return
&& style
!= 2)
11050 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11052 /* EH_RETURN requires the use of moves to function properly. */
11053 if (crtl
->calls_eh_return
)
11054 restore_regs_via_mov
= true;
11055 /* SEH requires the use of pops to identify the epilogue. */
11056 else if (TARGET_SEH
)
11057 restore_regs_via_mov
= false;
11058 /* If we're only restoring one register and sp is not valid then
11059 using a move instruction to restore the register since it's
11060 less work than reloading sp and popping the register. */
11061 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11062 restore_regs_via_mov
= true;
11063 else if (TARGET_EPILOGUE_USING_MOVE
11064 && cfun
->machine
->use_fast_prologue_epilogue
11065 && (frame
.nregs
> 1
11066 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11067 restore_regs_via_mov
= true;
11068 else if (frame_pointer_needed
11070 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11071 restore_regs_via_mov
= true;
11072 else if (frame_pointer_needed
11073 && TARGET_USE_LEAVE
11074 && cfun
->machine
->use_fast_prologue_epilogue
11075 && frame
.nregs
== 1)
11076 restore_regs_via_mov
= true;
11078 restore_regs_via_mov
= false;
11080 if (restore_regs_via_mov
|| frame
.nsseregs
)
11082 /* Ensure that the entire register save area is addressable via
11083 the stack pointer, if we will restore via sp. */
11085 && m
->fs
.sp_offset
> 0x7fffffff
11086 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11087 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11089 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11090 GEN_INT (m
->fs
.sp_offset
11091 - frame
.sse_reg_save_offset
),
11093 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11097 /* If there are any SSE registers to restore, then we have to do it
11098 via moves, since there's obviously no pop for SSE regs. */
11099 if (frame
.nsseregs
)
11100 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11103 if (restore_regs_via_mov
)
11108 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11110 /* eh_return epilogues need %ecx added to the stack pointer. */
11113 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11115 /* Stack align doesn't work with eh_return. */
11116 gcc_assert (!stack_realign_drap
);
11117 /* Neither does regparm nested functions. */
11118 gcc_assert (!ix86_static_chain_on_stack
);
11120 if (frame_pointer_needed
)
11122 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11123 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11124 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11126 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11127 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11129 /* Note that we use SA as a temporary CFA, as the return
11130 address is at the proper place relative to it. We
11131 pretend this happens at the FP restore insn because
11132 prior to this insn the FP would be stored at the wrong
11133 offset relative to SA, and after this insn we have no
11134 other reasonable register to use for the CFA. We don't
11135 bother resetting the CFA to the SP for the duration of
11136 the return insn. */
11137 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11138 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11139 ix86_add_queued_cfa_restore_notes (insn
);
11140 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11141 RTX_FRAME_RELATED_P (insn
) = 1;
11143 m
->fs
.cfa_reg
= sa
;
11144 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11145 m
->fs
.fp_valid
= false;
11147 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11148 const0_rtx
, style
, false);
11152 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11153 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11154 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11155 ix86_add_queued_cfa_restore_notes (insn
);
11157 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11158 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11160 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11161 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11162 plus_constant (Pmode
, stack_pointer_rtx
,
11164 RTX_FRAME_RELATED_P (insn
) = 1;
11167 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11168 m
->fs
.sp_valid
= true;
11173 /* SEH requires that the function end with (1) a stack adjustment
11174 if necessary, (2) a sequence of pops, and (3) a return or
11175 jump instruction. Prevent insns from the function body from
11176 being scheduled into this sequence. */
11179 /* Prevent a catch region from being adjacent to the standard
11180 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11181 several other flags that would be interesting to test are
11183 if (flag_non_call_exceptions
)
11184 emit_insn (gen_nops (const1_rtx
));
11186 emit_insn (gen_blockage ());
11189 /* First step is to deallocate the stack frame so that we can
11190 pop the registers. Also do it on SEH target for very large
11191 frame as the emitted instructions aren't allowed by the ABI in
11193 if (!m
->fs
.sp_valid
11195 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11196 >= SEH_MAX_FRAME_SIZE
)))
11198 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11199 GEN_INT (m
->fs
.fp_offset
11200 - frame
.reg_save_offset
),
11203 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11205 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11206 GEN_INT (m
->fs
.sp_offset
11207 - frame
.reg_save_offset
),
11209 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11212 ix86_emit_restore_regs_using_pop ();
11215 /* If we used a stack pointer and haven't already got rid of it,
11217 if (m
->fs
.fp_valid
)
11219 /* If the stack pointer is valid and pointing at the frame
11220 pointer store address, then we only need a pop. */
11221 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11222 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11223 /* Leave results in shorter dependency chains on CPUs that are
11224 able to grok it fast. */
11225 else if (TARGET_USE_LEAVE
11226 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11227 || !cfun
->machine
->use_fast_prologue_epilogue
)
11228 ix86_emit_leave ();
11231 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11232 hard_frame_pointer_rtx
,
11233 const0_rtx
, style
, !using_drap
);
11234 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11240 int param_ptr_offset
= UNITS_PER_WORD
;
11243 gcc_assert (stack_realign_drap
);
11245 if (ix86_static_chain_on_stack
)
11246 param_ptr_offset
+= UNITS_PER_WORD
;
11247 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11248 param_ptr_offset
+= UNITS_PER_WORD
;
11250 insn
= emit_insn (gen_rtx_SET
11251 (VOIDmode
, stack_pointer_rtx
,
11252 gen_rtx_PLUS (Pmode
,
11254 GEN_INT (-param_ptr_offset
))));
11255 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11256 m
->fs
.cfa_offset
= param_ptr_offset
;
11257 m
->fs
.sp_offset
= param_ptr_offset
;
11258 m
->fs
.realigned
= false;
11260 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11261 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11262 GEN_INT (param_ptr_offset
)));
11263 RTX_FRAME_RELATED_P (insn
) = 1;
11265 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11266 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11269 /* At this point the stack pointer must be valid, and we must have
11270 restored all of the registers. We may not have deallocated the
11271 entire stack frame. We've delayed this until now because it may
11272 be possible to merge the local stack deallocation with the
11273 deallocation forced by ix86_static_chain_on_stack. */
11274 gcc_assert (m
->fs
.sp_valid
);
11275 gcc_assert (!m
->fs
.fp_valid
);
11276 gcc_assert (!m
->fs
.realigned
);
11277 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11279 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11280 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11284 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11286 /* Sibcall epilogues don't want a return instruction. */
11289 m
->fs
= frame_state_save
;
11293 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11295 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11297 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11298 address, do explicit add, and jump indirectly to the caller. */
11300 if (crtl
->args
.pops_args
>= 65536)
11302 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11305 /* There is no "pascal" calling convention in any 64bit ABI. */
11306 gcc_assert (!TARGET_64BIT
);
11308 insn
= emit_insn (gen_pop (ecx
));
11309 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11310 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11312 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11313 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11314 add_reg_note (insn
, REG_CFA_REGISTER
,
11315 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11316 RTX_FRAME_RELATED_P (insn
) = 1;
11318 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11320 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11323 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11326 emit_jump_insn (gen_simple_return_internal ());
11328 /* Restore the state back to the state from the prologue,
11329 so that it's correct for the next epilogue. */
11330 m
->fs
= frame_state_save
;
11333 /* Reset from the function's potential modifications. */
11336 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11337 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11339 if (pic_offset_table_rtx
)
11340 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11342 /* Mach-O doesn't support labels at the end of objects, so if
11343 it looks like we might want one, insert a NOP. */
11345 rtx insn
= get_last_insn ();
11346 rtx deleted_debug_label
= NULL_RTX
;
11349 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11351 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11352 notes only, instead set their CODE_LABEL_NUMBER to -1,
11353 otherwise there would be code generation differences
11354 in between -g and -g0. */
11355 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11356 deleted_debug_label
= insn
;
11357 insn
= PREV_INSN (insn
);
11362 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11363 fputs ("\tnop\n", file
);
11364 else if (deleted_debug_label
)
11365 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11366 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11367 CODE_LABEL_NUMBER (insn
) = -1;
11373 /* Return a scratch register to use in the split stack prologue. The
11374 split stack prologue is used for -fsplit-stack. It is the first
11375 instructions in the function, even before the regular prologue.
11376 The scratch register can be any caller-saved register which is not
11377 used for parameters or for the static chain. */
11379 static unsigned int
11380 split_stack_prologue_scratch_regno (void)
11386 bool is_fastcall
, is_thiscall
;
11389 is_fastcall
= (lookup_attribute ("fastcall",
11390 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11392 is_thiscall
= (lookup_attribute ("thiscall",
11393 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11395 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11399 if (DECL_STATIC_CHAIN (cfun
->decl
))
11401 sorry ("-fsplit-stack does not support fastcall with "
11402 "nested function");
11403 return INVALID_REGNUM
;
11407 else if (is_thiscall
)
11409 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11413 else if (regparm
< 3)
11415 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11421 sorry ("-fsplit-stack does not support 2 register "
11422 " parameters for a nested function");
11423 return INVALID_REGNUM
;
11430 /* FIXME: We could make this work by pushing a register
11431 around the addition and comparison. */
11432 sorry ("-fsplit-stack does not support 3 register parameters");
11433 return INVALID_REGNUM
;
11438 /* A SYMBOL_REF for the function which allocates new stackspace for
11441 static GTY(()) rtx split_stack_fn
;
11443 /* A SYMBOL_REF for the more stack function when using the large
11446 static GTY(()) rtx split_stack_fn_large
;
11448 /* Handle -fsplit-stack. These are the first instructions in the
11449 function, even before the regular prologue. */
11452 ix86_expand_split_stack_prologue (void)
11454 struct ix86_frame frame
;
11455 HOST_WIDE_INT allocate
;
11456 unsigned HOST_WIDE_INT args_size
;
11457 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11458 rtx scratch_reg
= NULL_RTX
;
11459 rtx varargs_label
= NULL_RTX
;
11462 gcc_assert (flag_split_stack
&& reload_completed
);
11464 ix86_finalize_stack_realign_flags ();
11465 ix86_compute_frame_layout (&frame
);
11466 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11468 /* This is the label we will branch to if we have enough stack
11469 space. We expect the basic block reordering pass to reverse this
11470 branch if optimizing, so that we branch in the unlikely case. */
11471 label
= gen_label_rtx ();
11473 /* We need to compare the stack pointer minus the frame size with
11474 the stack boundary in the TCB. The stack boundary always gives
11475 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11476 can compare directly. Otherwise we need to do an addition. */
11478 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11479 UNSPEC_STACK_CHECK
);
11480 limit
= gen_rtx_CONST (Pmode
, limit
);
11481 limit
= gen_rtx_MEM (Pmode
, limit
);
11482 if (allocate
< SPLIT_STACK_AVAILABLE
)
11483 current
= stack_pointer_rtx
;
11486 unsigned int scratch_regno
;
11489 /* We need a scratch register to hold the stack pointer minus
11490 the required frame size. Since this is the very start of the
11491 function, the scratch register can be any caller-saved
11492 register which is not used for parameters. */
11493 offset
= GEN_INT (- allocate
);
11494 scratch_regno
= split_stack_prologue_scratch_regno ();
11495 if (scratch_regno
== INVALID_REGNUM
)
11497 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11498 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11500 /* We don't use ix86_gen_add3 in this case because it will
11501 want to split to lea, but when not optimizing the insn
11502 will not be split after this point. */
11503 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11504 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11509 emit_move_insn (scratch_reg
, offset
);
11510 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11511 stack_pointer_rtx
));
11513 current
= scratch_reg
;
11516 ix86_expand_branch (GEU
, current
, limit
, label
);
11517 jump_insn
= get_last_insn ();
11518 JUMP_LABEL (jump_insn
) = label
;
11520 /* Mark the jump as very likely to be taken. */
11521 add_reg_note (jump_insn
, REG_BR_PROB
,
11522 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11524 if (split_stack_fn
== NULL_RTX
)
11525 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11526 fn
= split_stack_fn
;
11528 /* Get more stack space. We pass in the desired stack space and the
11529 size of the arguments to copy to the new stack. In 32-bit mode
11530 we push the parameters; __morestack will return on a new stack
11531 anyhow. In 64-bit mode we pass the parameters in r10 and
11533 allocate_rtx
= GEN_INT (allocate
);
11534 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11535 call_fusage
= NULL_RTX
;
11540 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11541 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11543 /* If this function uses a static chain, it will be in %r10.
11544 Preserve it across the call to __morestack. */
11545 if (DECL_STATIC_CHAIN (cfun
->decl
))
11549 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11550 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11551 use_reg (&call_fusage
, rax
);
11554 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11557 HOST_WIDE_INT argval
;
11559 gcc_assert (Pmode
== DImode
);
11560 /* When using the large model we need to load the address
11561 into a register, and we've run out of registers. So we
11562 switch to a different calling convention, and we call a
11563 different function: __morestack_large. We pass the
11564 argument size in the upper 32 bits of r10 and pass the
11565 frame size in the lower 32 bits. */
11566 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11567 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11569 if (split_stack_fn_large
== NULL_RTX
)
11570 split_stack_fn_large
=
11571 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11573 if (ix86_cmodel
== CM_LARGE_PIC
)
11577 label
= gen_label_rtx ();
11578 emit_label (label
);
11579 LABEL_PRESERVE_P (label
) = 1;
11580 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11581 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11582 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11583 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11585 x
= gen_rtx_CONST (Pmode
, x
);
11586 emit_move_insn (reg11
, x
);
11587 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11588 x
= gen_const_mem (Pmode
, x
);
11589 emit_move_insn (reg11
, x
);
11592 emit_move_insn (reg11
, split_stack_fn_large
);
11596 argval
= ((args_size
<< 16) << 16) + allocate
;
11597 emit_move_insn (reg10
, GEN_INT (argval
));
11601 emit_move_insn (reg10
, allocate_rtx
);
11602 emit_move_insn (reg11
, GEN_INT (args_size
));
11603 use_reg (&call_fusage
, reg11
);
11606 use_reg (&call_fusage
, reg10
);
11610 emit_insn (gen_push (GEN_INT (args_size
)));
11611 emit_insn (gen_push (allocate_rtx
));
11613 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11614 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11616 add_function_usage_to (call_insn
, call_fusage
);
11618 /* In order to make call/return prediction work right, we now need
11619 to execute a return instruction. See
11620 libgcc/config/i386/morestack.S for the details on how this works.
11622 For flow purposes gcc must not see this as a return
11623 instruction--we need control flow to continue at the subsequent
11624 label. Therefore, we use an unspec. */
11625 gcc_assert (crtl
->args
.pops_args
< 65536);
11626 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11628 /* If we are in 64-bit mode and this function uses a static chain,
11629 we saved %r10 in %rax before calling _morestack. */
11630 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11631 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11632 gen_rtx_REG (word_mode
, AX_REG
));
11634 /* If this function calls va_start, we need to store a pointer to
11635 the arguments on the old stack, because they may not have been
11636 all copied to the new stack. At this point the old stack can be
11637 found at the frame pointer value used by __morestack, because
11638 __morestack has set that up before calling back to us. Here we
11639 store that pointer in a scratch register, and in
11640 ix86_expand_prologue we store the scratch register in a stack
11642 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11644 unsigned int scratch_regno
;
11648 scratch_regno
= split_stack_prologue_scratch_regno ();
11649 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11650 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11654 return address within this function
11655 return address of caller of this function
11657 So we add three words to get to the stack arguments.
11661 return address within this function
11662 first argument to __morestack
11663 second argument to __morestack
11664 return address of caller of this function
11666 So we add five words to get to the stack arguments.
11668 words
= TARGET_64BIT
? 3 : 5;
11669 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11670 gen_rtx_PLUS (Pmode
, frame_reg
,
11671 GEN_INT (words
* UNITS_PER_WORD
))));
11673 varargs_label
= gen_label_rtx ();
11674 emit_jump_insn (gen_jump (varargs_label
));
11675 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11680 emit_label (label
);
11681 LABEL_NUSES (label
) = 1;
11683 /* If this function calls va_start, we now have to set the scratch
11684 register for the case where we do not call __morestack. In this
11685 case we need to set it based on the stack pointer. */
11686 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11688 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11689 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11690 GEN_INT (UNITS_PER_WORD
))));
11692 emit_label (varargs_label
);
11693 LABEL_NUSES (varargs_label
) = 1;
11697 /* We may have to tell the dataflow pass that the split stack prologue
11698 is initializing a scratch register. */
11701 ix86_live_on_entry (bitmap regs
)
11703 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11705 gcc_assert (flag_split_stack
);
11706 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11710 /* Determine if op is suitable SUBREG RTX for address. */
11713 ix86_address_subreg_operand (rtx op
)
11715 enum machine_mode mode
;
11720 mode
= GET_MODE (op
);
11722 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11725 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11726 failures when the register is one word out of a two word structure. */
11727 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11730 /* Allow only SUBREGs of non-eliminable hard registers. */
11731 return register_no_elim_operand (op
, mode
);
11734 /* Extract the parts of an RTL expression that is a valid memory address
11735 for an instruction. Return 0 if the structure of the address is
11736 grossly off. Return -1 if the address contains ASHIFT, so it is not
11737 strictly valid, but still used for computing length of lea instruction. */
11740 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11742 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11743 rtx base_reg
, index_reg
;
11744 HOST_WIDE_INT scale
= 1;
11745 rtx scale_rtx
= NULL_RTX
;
11748 enum ix86_address_seg seg
= SEG_DEFAULT
;
11750 /* Allow zero-extended SImode addresses,
11751 they will be emitted with addr32 prefix. */
11752 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11754 if (GET_CODE (addr
) == ZERO_EXTEND
11755 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11757 addr
= XEXP (addr
, 0);
11758 if (CONST_INT_P (addr
))
11761 else if (GET_CODE (addr
) == AND
11762 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11764 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11765 if (addr
== NULL_RTX
)
11768 if (CONST_INT_P (addr
))
11773 /* Allow SImode subregs of DImode addresses,
11774 they will be emitted with addr32 prefix. */
11775 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11777 if (GET_CODE (addr
) == SUBREG
11778 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11780 addr
= SUBREG_REG (addr
);
11781 if (CONST_INT_P (addr
))
11788 else if (GET_CODE (addr
) == SUBREG
)
11790 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11795 else if (GET_CODE (addr
) == PLUS
)
11797 rtx addends
[4], op
;
11805 addends
[n
++] = XEXP (op
, 1);
11808 while (GET_CODE (op
) == PLUS
);
11813 for (i
= n
; i
>= 0; --i
)
11816 switch (GET_CODE (op
))
11821 index
= XEXP (op
, 0);
11822 scale_rtx
= XEXP (op
, 1);
11828 index
= XEXP (op
, 0);
11829 tmp
= XEXP (op
, 1);
11830 if (!CONST_INT_P (tmp
))
11832 scale
= INTVAL (tmp
);
11833 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11835 scale
= 1 << scale
;
11840 if (GET_CODE (op
) != UNSPEC
)
11845 if (XINT (op
, 1) == UNSPEC_TP
11846 && TARGET_TLS_DIRECT_SEG_REFS
11847 && seg
== SEG_DEFAULT
)
11848 seg
= DEFAULT_TLS_SEG_REG
;
11854 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11881 else if (GET_CODE (addr
) == MULT
)
11883 index
= XEXP (addr
, 0); /* index*scale */
11884 scale_rtx
= XEXP (addr
, 1);
11886 else if (GET_CODE (addr
) == ASHIFT
)
11888 /* We're called for lea too, which implements ashift on occasion. */
11889 index
= XEXP (addr
, 0);
11890 tmp
= XEXP (addr
, 1);
11891 if (!CONST_INT_P (tmp
))
11893 scale
= INTVAL (tmp
);
11894 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11896 scale
= 1 << scale
;
11899 else if (CONST_INT_P (addr
))
11901 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11904 /* Constant addresses are sign extended to 64bit, we have to
11905 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11907 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11913 disp
= addr
; /* displacement */
11919 else if (GET_CODE (index
) == SUBREG
11920 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11926 /* Address override works only on the (%reg) part of %fs:(%reg). */
11927 if (seg
!= SEG_DEFAULT
11928 && ((base
&& GET_MODE (base
) != word_mode
)
11929 || (index
&& GET_MODE (index
) != word_mode
)))
11932 /* Extract the integral value of scale. */
11935 if (!CONST_INT_P (scale_rtx
))
11937 scale
= INTVAL (scale_rtx
);
11940 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11941 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11943 /* Avoid useless 0 displacement. */
11944 if (disp
== const0_rtx
&& (base
|| index
))
11947 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11948 if (base_reg
&& index_reg
&& scale
== 1
11949 && (index_reg
== arg_pointer_rtx
11950 || index_reg
== frame_pointer_rtx
11951 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11954 tmp
= base
, base
= index
, index
= tmp
;
11955 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11958 /* Special case: %ebp cannot be encoded as a base without a displacement.
11962 && (base_reg
== hard_frame_pointer_rtx
11963 || base_reg
== frame_pointer_rtx
11964 || base_reg
== arg_pointer_rtx
11965 || (REG_P (base_reg
)
11966 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11967 || REGNO (base_reg
) == R13_REG
))))
11970 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11971 Avoid this by transforming to [%esi+0].
11972 Reload calls address legitimization without cfun defined, so we need
11973 to test cfun for being non-NULL. */
11974 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11975 && base_reg
&& !index_reg
&& !disp
11976 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11979 /* Special case: encode reg+reg instead of reg*2. */
11980 if (!base
&& index
&& scale
== 2)
11981 base
= index
, base_reg
= index_reg
, scale
= 1;
11983 /* Special case: scaling cannot be encoded without base or displacement. */
11984 if (!base
&& !disp
&& index
&& scale
!= 1)
11988 out
->index
= index
;
11990 out
->scale
= scale
;
11996 /* Return cost of the memory address x.
11997 For i386, it is better to use a complex address than let gcc copy
11998 the address into a reg and make a new pseudo. But not if the address
11999 requires to two regs - that would mean more pseudos with longer
12002 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12003 addr_space_t as ATTRIBUTE_UNUSED
,
12004 bool speed ATTRIBUTE_UNUSED
)
12006 struct ix86_address parts
;
12008 int ok
= ix86_decompose_address (x
, &parts
);
12012 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12013 parts
.base
= SUBREG_REG (parts
.base
);
12014 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12015 parts
.index
= SUBREG_REG (parts
.index
);
12017 /* Attempt to minimize number of registers in the address. */
12019 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12021 && (!REG_P (parts
.index
)
12022 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12026 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12028 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12029 && parts
.base
!= parts
.index
)
12032 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12033 since it's predecode logic can't detect the length of instructions
12034 and it degenerates to vector decoded. Increase cost of such
12035 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12036 to split such addresses or even refuse such addresses at all.
12038 Following addressing modes are affected:
12043 The first and last case may be avoidable by explicitly coding the zero in
12044 memory address, but I don't have AMD-K6 machine handy to check this
12048 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12049 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12050 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12056 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12057 this is used for to form addresses to local data when -fPIC is in
12061 darwin_local_data_pic (rtx disp
)
12063 return (GET_CODE (disp
) == UNSPEC
12064 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12067 /* Determine if a given RTX is a valid constant. We already know this
12068 satisfies CONSTANT_P. */
12071 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12073 switch (GET_CODE (x
))
12078 if (GET_CODE (x
) == PLUS
)
12080 if (!CONST_INT_P (XEXP (x
, 1)))
12085 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12088 /* Only some unspecs are valid as "constants". */
12089 if (GET_CODE (x
) == UNSPEC
)
12090 switch (XINT (x
, 1))
12093 case UNSPEC_GOTOFF
:
12094 case UNSPEC_PLTOFF
:
12095 return TARGET_64BIT
;
12097 case UNSPEC_NTPOFF
:
12098 x
= XVECEXP (x
, 0, 0);
12099 return (GET_CODE (x
) == SYMBOL_REF
12100 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12101 case UNSPEC_DTPOFF
:
12102 x
= XVECEXP (x
, 0, 0);
12103 return (GET_CODE (x
) == SYMBOL_REF
12104 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12109 /* We must have drilled down to a symbol. */
12110 if (GET_CODE (x
) == LABEL_REF
)
12112 if (GET_CODE (x
) != SYMBOL_REF
)
12117 /* TLS symbols are never valid. */
12118 if (SYMBOL_REF_TLS_MODEL (x
))
12121 /* DLLIMPORT symbols are never valid. */
12122 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12123 && SYMBOL_REF_DLLIMPORT_P (x
))
12127 /* mdynamic-no-pic */
12128 if (MACHO_DYNAMIC_NO_PIC_P
)
12129 return machopic_symbol_defined_p (x
);
12134 if (GET_MODE (x
) == TImode
12135 && x
!= CONST0_RTX (TImode
)
12141 if (!standard_sse_constant_p (x
))
12148 /* Otherwise we handle everything else in the move patterns. */
12152 /* Determine if it's legal to put X into the constant pool. This
12153 is not possible for the address of thread-local symbols, which
12154 is checked above. */
12157 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12159 /* We can always put integral constants and vectors in memory. */
12160 switch (GET_CODE (x
))
12170 return !ix86_legitimate_constant_p (mode
, x
);
12173 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12177 is_imported_p (rtx x
)
12179 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12180 || GET_CODE (x
) != SYMBOL_REF
)
12183 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12187 /* Nonzero if the constant value X is a legitimate general operand
12188 when generating PIC code. It is given that flag_pic is on and
12189 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12192 legitimate_pic_operand_p (rtx x
)
12196 switch (GET_CODE (x
))
12199 inner
= XEXP (x
, 0);
12200 if (GET_CODE (inner
) == PLUS
12201 && CONST_INT_P (XEXP (inner
, 1)))
12202 inner
= XEXP (inner
, 0);
12204 /* Only some unspecs are valid as "constants". */
12205 if (GET_CODE (inner
) == UNSPEC
)
12206 switch (XINT (inner
, 1))
12209 case UNSPEC_GOTOFF
:
12210 case UNSPEC_PLTOFF
:
12211 return TARGET_64BIT
;
12213 x
= XVECEXP (inner
, 0, 0);
12214 return (GET_CODE (x
) == SYMBOL_REF
12215 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12216 case UNSPEC_MACHOPIC_OFFSET
:
12217 return legitimate_pic_address_disp_p (x
);
12225 return legitimate_pic_address_disp_p (x
);
12232 /* Determine if a given CONST RTX is a valid memory displacement
12236 legitimate_pic_address_disp_p (rtx disp
)
12240 /* In 64bit mode we can allow direct addresses of symbols and labels
12241 when they are not dynamic symbols. */
12244 rtx op0
= disp
, op1
;
12246 switch (GET_CODE (disp
))
12252 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12254 op0
= XEXP (XEXP (disp
, 0), 0);
12255 op1
= XEXP (XEXP (disp
, 0), 1);
12256 if (!CONST_INT_P (op1
)
12257 || INTVAL (op1
) >= 16*1024*1024
12258 || INTVAL (op1
) < -16*1024*1024)
12260 if (GET_CODE (op0
) == LABEL_REF
)
12262 if (GET_CODE (op0
) == CONST
12263 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12264 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12266 if (GET_CODE (op0
) == UNSPEC
12267 && XINT (op0
, 1) == UNSPEC_PCREL
)
12269 if (GET_CODE (op0
) != SYMBOL_REF
)
12274 /* TLS references should always be enclosed in UNSPEC.
12275 The dllimported symbol needs always to be resolved. */
12276 if (SYMBOL_REF_TLS_MODEL (op0
)
12277 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12282 if (is_imported_p (op0
))
12285 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12286 || !SYMBOL_REF_LOCAL_P (op0
))
12289 /* Function-symbols need to be resolved only for
12291 For the small-model we don't need to resolve anything
12293 if ((ix86_cmodel
!= CM_LARGE_PIC
12294 && SYMBOL_REF_FUNCTION_P (op0
))
12295 || ix86_cmodel
== CM_SMALL_PIC
)
12297 /* Non-external symbols don't need to be resolved for
12298 large, and medium-model. */
12299 if ((ix86_cmodel
== CM_LARGE_PIC
12300 || ix86_cmodel
== CM_MEDIUM_PIC
)
12301 && !SYMBOL_REF_EXTERNAL_P (op0
))
12304 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12305 && SYMBOL_REF_LOCAL_P (op0
)
12306 && ix86_cmodel
!= CM_LARGE_PIC
)
12314 if (GET_CODE (disp
) != CONST
)
12316 disp
= XEXP (disp
, 0);
12320 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12321 of GOT tables. We should not need these anyway. */
12322 if (GET_CODE (disp
) != UNSPEC
12323 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12324 && XINT (disp
, 1) != UNSPEC_GOTOFF
12325 && XINT (disp
, 1) != UNSPEC_PCREL
12326 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12329 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12330 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12336 if (GET_CODE (disp
) == PLUS
)
12338 if (!CONST_INT_P (XEXP (disp
, 1)))
12340 disp
= XEXP (disp
, 0);
12344 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12347 if (GET_CODE (disp
) != UNSPEC
)
12350 switch (XINT (disp
, 1))
12355 /* We need to check for both symbols and labels because VxWorks loads
12356 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12358 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12359 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12360 case UNSPEC_GOTOFF
:
12361 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12362 While ABI specify also 32bit relocation but we don't produce it in
12363 small PIC model at all. */
12364 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12365 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12367 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12369 case UNSPEC_GOTTPOFF
:
12370 case UNSPEC_GOTNTPOFF
:
12371 case UNSPEC_INDNTPOFF
:
12374 disp
= XVECEXP (disp
, 0, 0);
12375 return (GET_CODE (disp
) == SYMBOL_REF
12376 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12377 case UNSPEC_NTPOFF
:
12378 disp
= XVECEXP (disp
, 0, 0);
12379 return (GET_CODE (disp
) == SYMBOL_REF
12380 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12381 case UNSPEC_DTPOFF
:
12382 disp
= XVECEXP (disp
, 0, 0);
12383 return (GET_CODE (disp
) == SYMBOL_REF
12384 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12390 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12391 replace the input X, or the original X if no replacement is called for.
12392 The output parameter *WIN is 1 if the calling macro should goto WIN,
12393 0 if it should not. */
12396 ix86_legitimize_reload_address (rtx x
,
12397 enum machine_mode mode ATTRIBUTE_UNUSED
,
12398 int opnum
, int type
,
12399 int ind_levels ATTRIBUTE_UNUSED
)
12401 /* Reload can generate:
12403 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12407 This RTX is rejected from ix86_legitimate_address_p due to
12408 non-strictness of base register 97. Following this rejection,
12409 reload pushes all three components into separate registers,
12410 creating invalid memory address RTX.
12412 Following code reloads only the invalid part of the
12413 memory address RTX. */
12415 if (GET_CODE (x
) == PLUS
12416 && REG_P (XEXP (x
, 1))
12417 && GET_CODE (XEXP (x
, 0)) == PLUS
12418 && REG_P (XEXP (XEXP (x
, 0), 1)))
12421 bool something_reloaded
= false;
12423 base
= XEXP (XEXP (x
, 0), 1);
12424 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12426 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12427 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12428 opnum
, (enum reload_type
) type
);
12429 something_reloaded
= true;
12432 index
= XEXP (x
, 1);
12433 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12435 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12436 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12437 opnum
, (enum reload_type
) type
);
12438 something_reloaded
= true;
12441 gcc_assert (something_reloaded
);
12448 /* Recognizes RTL expressions that are valid memory addresses for an
12449 instruction. The MODE argument is the machine mode for the MEM
12450 expression that wants to use this address.
12452 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12453 convert common non-canonical forms to canonical form so that they will
12457 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12458 rtx addr
, bool strict
)
12460 struct ix86_address parts
;
12461 rtx base
, index
, disp
;
12462 HOST_WIDE_INT scale
;
12464 if (ix86_decompose_address (addr
, &parts
) <= 0)
12465 /* Decomposition failed. */
12469 index
= parts
.index
;
12471 scale
= parts
.scale
;
12473 /* Validate base register. */
12480 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12481 reg
= SUBREG_REG (base
);
12483 /* Base is not a register. */
12486 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12489 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12490 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12491 /* Base is not valid. */
12495 /* Validate index register. */
12502 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12503 reg
= SUBREG_REG (index
);
12505 /* Index is not a register. */
12508 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12511 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12512 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12513 /* Index is not valid. */
12517 /* Index and base should have the same mode. */
12519 && GET_MODE (base
) != GET_MODE (index
))
12522 /* Validate scale factor. */
12526 /* Scale without index. */
12529 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12530 /* Scale is not a valid multiplier. */
12534 /* Validate displacement. */
12537 if (GET_CODE (disp
) == CONST
12538 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12539 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12540 switch (XINT (XEXP (disp
, 0), 1))
12542 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12543 used. While ABI specify also 32bit relocations, we don't produce
12544 them at all and use IP relative instead. */
12546 case UNSPEC_GOTOFF
:
12547 gcc_assert (flag_pic
);
12549 goto is_legitimate_pic
;
12551 /* 64bit address unspec. */
12554 case UNSPEC_GOTPCREL
:
12556 gcc_assert (flag_pic
);
12557 goto is_legitimate_pic
;
12559 case UNSPEC_GOTTPOFF
:
12560 case UNSPEC_GOTNTPOFF
:
12561 case UNSPEC_INDNTPOFF
:
12562 case UNSPEC_NTPOFF
:
12563 case UNSPEC_DTPOFF
:
12566 case UNSPEC_STACK_CHECK
:
12567 gcc_assert (flag_split_stack
);
12571 /* Invalid address unspec. */
12575 else if (SYMBOLIC_CONST (disp
)
12579 && MACHOPIC_INDIRECT
12580 && !machopic_operand_p (disp
)
12586 if (TARGET_64BIT
&& (index
|| base
))
12588 /* foo@dtpoff(%rX) is ok. */
12589 if (GET_CODE (disp
) != CONST
12590 || GET_CODE (XEXP (disp
, 0)) != PLUS
12591 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12592 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12593 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12594 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12595 /* Non-constant pic memory reference. */
12598 else if ((!TARGET_MACHO
|| flag_pic
)
12599 && ! legitimate_pic_address_disp_p (disp
))
12600 /* Displacement is an invalid pic construct. */
12603 else if (MACHO_DYNAMIC_NO_PIC_P
12604 && !ix86_legitimate_constant_p (Pmode
, disp
))
12605 /* displacment must be referenced via non_lazy_pointer */
12609 /* This code used to verify that a symbolic pic displacement
12610 includes the pic_offset_table_rtx register.
12612 While this is good idea, unfortunately these constructs may
12613 be created by "adds using lea" optimization for incorrect
12622 This code is nonsensical, but results in addressing
12623 GOT table with pic_offset_table_rtx base. We can't
12624 just refuse it easily, since it gets matched by
12625 "addsi3" pattern, that later gets split to lea in the
12626 case output register differs from input. While this
12627 can be handled by separate addsi pattern for this case
12628 that never results in lea, this seems to be easier and
12629 correct fix for crash to disable this test. */
12631 else if (GET_CODE (disp
) != LABEL_REF
12632 && !CONST_INT_P (disp
)
12633 && (GET_CODE (disp
) != CONST
12634 || !ix86_legitimate_constant_p (Pmode
, disp
))
12635 && (GET_CODE (disp
) != SYMBOL_REF
12636 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12637 /* Displacement is not constant. */
12639 else if (TARGET_64BIT
12640 && !x86_64_immediate_operand (disp
, VOIDmode
))
12641 /* Displacement is out of range. */
12645 /* Everything looks valid. */
12649 /* Determine if a given RTX is a valid constant address. */
12652 constant_address_p (rtx x
)
12654 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12657 /* Return a unique alias set for the GOT. */
12659 static alias_set_type
12660 ix86_GOT_alias_set (void)
12662 static alias_set_type set
= -1;
12664 set
= new_alias_set ();
12668 /* Return a legitimate reference for ORIG (an address) using the
12669 register REG. If REG is 0, a new pseudo is generated.
12671 There are two types of references that must be handled:
12673 1. Global data references must load the address from the GOT, via
12674 the PIC reg. An insn is emitted to do this load, and the reg is
12677 2. Static data references, constant pool addresses, and code labels
12678 compute the address as an offset from the GOT, whose base is in
12679 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12680 differentiate them from global data objects. The returned
12681 address is the PIC reg + an unspec constant.
12683 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12684 reg also appears in the address. */
12687 legitimize_pic_address (rtx orig
, rtx reg
)
12690 rtx new_rtx
= orig
;
12693 if (TARGET_MACHO
&& !TARGET_64BIT
)
12696 reg
= gen_reg_rtx (Pmode
);
12697 /* Use the generic Mach-O PIC machinery. */
12698 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12702 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12704 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12709 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12711 else if (TARGET_64BIT
&& !TARGET_PECOFF
12712 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12715 /* This symbol may be referenced via a displacement from the PIC
12716 base address (@GOTOFF). */
12718 if (reload_in_progress
)
12719 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12720 if (GET_CODE (addr
) == CONST
)
12721 addr
= XEXP (addr
, 0);
12722 if (GET_CODE (addr
) == PLUS
)
12724 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12726 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12729 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12730 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12732 tmpreg
= gen_reg_rtx (Pmode
);
12735 emit_move_insn (tmpreg
, new_rtx
);
12739 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12740 tmpreg
, 1, OPTAB_DIRECT
);
12744 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12746 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12748 /* This symbol may be referenced via a displacement from the PIC
12749 base address (@GOTOFF). */
12751 if (reload_in_progress
)
12752 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12753 if (GET_CODE (addr
) == CONST
)
12754 addr
= XEXP (addr
, 0);
12755 if (GET_CODE (addr
) == PLUS
)
12757 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12759 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12762 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12763 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12764 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12768 emit_move_insn (reg
, new_rtx
);
12772 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12773 /* We can't use @GOTOFF for text labels on VxWorks;
12774 see gotoff_operand. */
12775 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12777 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12781 /* For x64 PE-COFF there is no GOT table. So we use address
12783 if (TARGET_64BIT
&& TARGET_PECOFF
)
12785 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12786 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12789 reg
= gen_reg_rtx (Pmode
);
12790 emit_move_insn (reg
, new_rtx
);
12793 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12795 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12796 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12797 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12798 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12801 reg
= gen_reg_rtx (Pmode
);
12802 /* Use directly gen_movsi, otherwise the address is loaded
12803 into register for CSE. We don't want to CSE this addresses,
12804 instead we CSE addresses from the GOT table, so skip this. */
12805 emit_insn (gen_movsi (reg
, new_rtx
));
12810 /* This symbol must be referenced via a load from the
12811 Global Offset Table (@GOT). */
12813 if (reload_in_progress
)
12814 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12815 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12816 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12818 new_rtx
= force_reg (Pmode
, new_rtx
);
12819 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12820 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12821 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12824 reg
= gen_reg_rtx (Pmode
);
12825 emit_move_insn (reg
, new_rtx
);
12831 if (CONST_INT_P (addr
)
12832 && !x86_64_immediate_operand (addr
, VOIDmode
))
12836 emit_move_insn (reg
, addr
);
12840 new_rtx
= force_reg (Pmode
, addr
);
12842 else if (GET_CODE (addr
) == CONST
)
12844 addr
= XEXP (addr
, 0);
12846 /* We must match stuff we generate before. Assume the only
12847 unspecs that can get here are ours. Not that we could do
12848 anything with them anyway.... */
12849 if (GET_CODE (addr
) == UNSPEC
12850 || (GET_CODE (addr
) == PLUS
12851 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12853 gcc_assert (GET_CODE (addr
) == PLUS
);
12855 if (GET_CODE (addr
) == PLUS
)
12857 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12859 /* Check first to see if this is a constant offset from a @GOTOFF
12860 symbol reference. */
12861 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
12862 && CONST_INT_P (op1
))
12866 if (reload_in_progress
)
12867 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12868 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12870 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12871 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12872 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12876 emit_move_insn (reg
, new_rtx
);
12882 if (INTVAL (op1
) < -16*1024*1024
12883 || INTVAL (op1
) >= 16*1024*1024)
12885 if (!x86_64_immediate_operand (op1
, Pmode
))
12886 op1
= force_reg (Pmode
, op1
);
12887 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12893 rtx base
= legitimize_pic_address (op0
, reg
);
12894 enum machine_mode mode
= GET_MODE (base
);
12896 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12898 if (CONST_INT_P (new_rtx
))
12900 if (INTVAL (new_rtx
) < -16*1024*1024
12901 || INTVAL (new_rtx
) >= 16*1024*1024)
12903 if (!x86_64_immediate_operand (new_rtx
, mode
))
12904 new_rtx
= force_reg (mode
, new_rtx
);
12906 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12909 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12913 if (GET_CODE (new_rtx
) == PLUS
12914 && CONSTANT_P (XEXP (new_rtx
, 1)))
12916 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12917 new_rtx
= XEXP (new_rtx
, 1);
12919 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12927 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12930 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12932 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12934 if (GET_MODE (tp
) != tp_mode
)
12936 gcc_assert (GET_MODE (tp
) == SImode
);
12937 gcc_assert (tp_mode
== DImode
);
12939 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12943 tp
= copy_to_mode_reg (tp_mode
, tp
);
12948 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12950 static GTY(()) rtx ix86_tls_symbol
;
12953 ix86_tls_get_addr (void)
12955 if (!ix86_tls_symbol
)
12958 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12959 ? "___tls_get_addr" : "__tls_get_addr");
12961 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12964 return ix86_tls_symbol
;
12967 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12969 static GTY(()) rtx ix86_tls_module_base_symbol
;
12972 ix86_tls_module_base (void)
12974 if (!ix86_tls_module_base_symbol
)
12976 ix86_tls_module_base_symbol
12977 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12979 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12980 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12983 return ix86_tls_module_base_symbol
;
12986 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12987 false if we expect this to be used for a memory address and true if
12988 we expect to load the address into a register. */
12991 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12993 rtx dest
, base
, off
;
12994 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12995 enum machine_mode tp_mode
= Pmode
;
13000 case TLS_MODEL_GLOBAL_DYNAMIC
:
13001 dest
= gen_reg_rtx (Pmode
);
13005 if (flag_pic
&& !TARGET_PECOFF
)
13006 pic
= pic_offset_table_rtx
;
13009 pic
= gen_reg_rtx (Pmode
);
13010 emit_insn (gen_set_got (pic
));
13014 if (TARGET_GNU2_TLS
)
13017 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13019 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13021 tp
= get_thread_pointer (Pmode
, true);
13022 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13024 if (GET_MODE (x
) != Pmode
)
13025 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13027 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13031 rtx caddr
= ix86_tls_get_addr ();
13035 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13040 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13041 insns
= get_insns ();
13044 if (GET_MODE (x
) != Pmode
)
13045 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13047 RTL_CONST_CALL_P (insns
) = 1;
13048 emit_libcall_block (insns
, dest
, rax
, x
);
13051 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13055 case TLS_MODEL_LOCAL_DYNAMIC
:
13056 base
= gen_reg_rtx (Pmode
);
13061 pic
= pic_offset_table_rtx
;
13064 pic
= gen_reg_rtx (Pmode
);
13065 emit_insn (gen_set_got (pic
));
13069 if (TARGET_GNU2_TLS
)
13071 rtx tmp
= ix86_tls_module_base ();
13074 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13076 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13078 tp
= get_thread_pointer (Pmode
, true);
13079 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13080 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13084 rtx caddr
= ix86_tls_get_addr ();
13088 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13093 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13094 insns
= get_insns ();
13097 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13098 share the LD_BASE result with other LD model accesses. */
13099 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13100 UNSPEC_TLS_LD_BASE
);
13102 RTL_CONST_CALL_P (insns
) = 1;
13103 emit_libcall_block (insns
, base
, rax
, eqv
);
13106 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13109 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13110 off
= gen_rtx_CONST (Pmode
, off
);
13112 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13114 if (TARGET_GNU2_TLS
)
13116 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13118 if (GET_MODE (x
) != Pmode
)
13119 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13121 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13125 case TLS_MODEL_INITIAL_EXEC
:
13128 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13130 /* The Sun linker took the AMD64 TLS spec literally
13131 and can only handle %rax as destination of the
13132 initial executable code sequence. */
13134 dest
= gen_reg_rtx (DImode
);
13135 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13139 /* Generate DImode references to avoid %fs:(%reg32)
13140 problems and linker IE->LE relaxation bug. */
13143 type
= UNSPEC_GOTNTPOFF
;
13147 if (reload_in_progress
)
13148 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13149 pic
= pic_offset_table_rtx
;
13150 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13152 else if (!TARGET_ANY_GNU_TLS
)
13154 pic
= gen_reg_rtx (Pmode
);
13155 emit_insn (gen_set_got (pic
));
13156 type
= UNSPEC_GOTTPOFF
;
13161 type
= UNSPEC_INDNTPOFF
;
13164 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13165 off
= gen_rtx_CONST (tp_mode
, off
);
13167 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13168 off
= gen_const_mem (tp_mode
, off
);
13169 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13171 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13173 base
= get_thread_pointer (tp_mode
,
13174 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13175 off
= force_reg (tp_mode
, off
);
13176 return gen_rtx_PLUS (tp_mode
, base
, off
);
13180 base
= get_thread_pointer (Pmode
, true);
13181 dest
= gen_reg_rtx (Pmode
);
13182 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13186 case TLS_MODEL_LOCAL_EXEC
:
13187 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13188 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13189 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13190 off
= gen_rtx_CONST (Pmode
, off
);
13192 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13194 base
= get_thread_pointer (Pmode
,
13195 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13196 return gen_rtx_PLUS (Pmode
, base
, off
);
13200 base
= get_thread_pointer (Pmode
, true);
13201 dest
= gen_reg_rtx (Pmode
);
13202 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13207 gcc_unreachable ();
13213 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13214 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13215 unique refptr-DECL symbol corresponding to symbol DECL. */
13217 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13218 htab_t dllimport_map
;
13221 get_dllimport_decl (tree decl
, bool beimport
)
13223 struct tree_map
*h
, in
;
13226 const char *prefix
;
13227 size_t namelen
, prefixlen
;
13232 if (!dllimport_map
)
13233 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13235 in
.hash
= htab_hash_pointer (decl
);
13236 in
.base
.from
= decl
;
13237 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13238 h
= (struct tree_map
*) *loc
;
13242 *loc
= h
= ggc_alloc_tree_map ();
13244 h
->base
.from
= decl
;
13245 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13246 VAR_DECL
, NULL
, ptr_type_node
);
13247 DECL_ARTIFICIAL (to
) = 1;
13248 DECL_IGNORED_P (to
) = 1;
13249 DECL_EXTERNAL (to
) = 1;
13250 TREE_READONLY (to
) = 1;
13252 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13253 name
= targetm
.strip_name_encoding (name
);
13255 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13256 ? "*__imp_" : "*__imp__";
13258 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13259 namelen
= strlen (name
);
13260 prefixlen
= strlen (prefix
);
13261 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13262 memcpy (imp_name
, prefix
, prefixlen
);
13263 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13265 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13266 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13267 SET_SYMBOL_REF_DECL (rtl
, to
);
13268 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13271 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13272 #ifdef SUB_TARGET_RECORD_STUB
13273 SUB_TARGET_RECORD_STUB (name
);
13277 rtl
= gen_const_mem (Pmode
, rtl
);
13278 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13280 SET_DECL_RTL (to
, rtl
);
13281 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13286 /* Expand SYMBOL into its corresponding far-addresse symbol.
13287 WANT_REG is true if we require the result be a register. */
13290 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13295 gcc_assert (SYMBOL_REF_DECL (symbol
));
13296 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13298 x
= DECL_RTL (imp_decl
);
13300 x
= force_reg (Pmode
, x
);
13304 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13305 true if we require the result be a register. */
13308 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13313 gcc_assert (SYMBOL_REF_DECL (symbol
));
13314 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13316 x
= DECL_RTL (imp_decl
);
13318 x
= force_reg (Pmode
, x
);
13322 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13323 is true if we require the result be a register. */
13326 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13328 if (!TARGET_PECOFF
)
13331 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13333 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13334 return legitimize_dllimport_symbol (addr
, inreg
);
13335 if (GET_CODE (addr
) == CONST
13336 && GET_CODE (XEXP (addr
, 0)) == PLUS
13337 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13338 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13340 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13341 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13345 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13347 if (GET_CODE (addr
) == SYMBOL_REF
13348 && !is_imported_p (addr
)
13349 && SYMBOL_REF_EXTERNAL_P (addr
)
13350 && SYMBOL_REF_DECL (addr
))
13351 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13353 if (GET_CODE (addr
) == CONST
13354 && GET_CODE (XEXP (addr
, 0)) == PLUS
13355 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13356 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13357 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13358 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13360 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13361 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13366 /* Try machine-dependent ways of modifying an illegitimate address
13367 to be legitimate. If we find one, return the new, valid address.
13368 This macro is used in only one place: `memory_address' in explow.c.
13370 OLDX is the address as it was before break_out_memory_refs was called.
13371 In some cases it is useful to look at this to decide what needs to be done.
13373 It is always safe for this macro to do nothing. It exists to recognize
13374 opportunities to optimize the output.
13376 For the 80386, we handle X+REG by loading X into a register R and
13377 using R+REG. R will go in a general reg and indexing will be used.
13378 However, if REG is a broken-out memory address or multiplication,
13379 nothing needs to be done because REG can certainly go in a general reg.
13381 When -fpic is used, special handling is needed for symbolic references.
13382 See comments by legitimize_pic_address in i386.c for details. */
13385 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13386 enum machine_mode mode
)
13391 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13393 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13394 if (GET_CODE (x
) == CONST
13395 && GET_CODE (XEXP (x
, 0)) == PLUS
13396 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13397 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13399 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13400 (enum tls_model
) log
, false);
13401 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13404 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13406 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13411 if (flag_pic
&& SYMBOLIC_CONST (x
))
13412 return legitimize_pic_address (x
, 0);
13415 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13416 return machopic_indirect_data_reference (x
, 0);
13419 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13420 if (GET_CODE (x
) == ASHIFT
13421 && CONST_INT_P (XEXP (x
, 1))
13422 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13425 log
= INTVAL (XEXP (x
, 1));
13426 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13427 GEN_INT (1 << log
));
13430 if (GET_CODE (x
) == PLUS
)
13432 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13434 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13435 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13436 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13439 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13440 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13441 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13442 GEN_INT (1 << log
));
13445 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13446 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13447 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13450 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13451 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13452 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13453 GEN_INT (1 << log
));
13456 /* Put multiply first if it isn't already. */
13457 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13459 rtx tmp
= XEXP (x
, 0);
13460 XEXP (x
, 0) = XEXP (x
, 1);
13465 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13466 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13467 created by virtual register instantiation, register elimination, and
13468 similar optimizations. */
13469 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13472 x
= gen_rtx_PLUS (Pmode
,
13473 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13474 XEXP (XEXP (x
, 1), 0)),
13475 XEXP (XEXP (x
, 1), 1));
13479 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13480 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13481 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13482 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13483 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13484 && CONSTANT_P (XEXP (x
, 1)))
13487 rtx other
= NULL_RTX
;
13489 if (CONST_INT_P (XEXP (x
, 1)))
13491 constant
= XEXP (x
, 1);
13492 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13494 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13496 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13497 other
= XEXP (x
, 1);
13505 x
= gen_rtx_PLUS (Pmode
,
13506 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13507 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13508 plus_constant (Pmode
, other
,
13509 INTVAL (constant
)));
13513 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13516 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13519 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13522 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13525 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13529 && REG_P (XEXP (x
, 1))
13530 && REG_P (XEXP (x
, 0)))
13533 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13536 x
= legitimize_pic_address (x
, 0);
13539 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13542 if (REG_P (XEXP (x
, 0)))
13544 rtx temp
= gen_reg_rtx (Pmode
);
13545 rtx val
= force_operand (XEXP (x
, 1), temp
);
13548 val
= convert_to_mode (Pmode
, val
, 1);
13549 emit_move_insn (temp
, val
);
13552 XEXP (x
, 1) = temp
;
13556 else if (REG_P (XEXP (x
, 1)))
13558 rtx temp
= gen_reg_rtx (Pmode
);
13559 rtx val
= force_operand (XEXP (x
, 0), temp
);
13562 val
= convert_to_mode (Pmode
, val
, 1);
13563 emit_move_insn (temp
, val
);
13566 XEXP (x
, 0) = temp
;
13574 /* Print an integer constant expression in assembler syntax. Addition
13575 and subtraction are the only arithmetic that may appear in these
13576 expressions. FILE is the stdio stream to write to, X is the rtx, and
13577 CODE is the operand print code from the output string. */
13580 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13584 switch (GET_CODE (x
))
13587 gcc_assert (flag_pic
);
13592 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13593 output_addr_const (file
, x
);
13596 const char *name
= XSTR (x
, 0);
13598 /* Mark the decl as referenced so that cgraph will
13599 output the function. */
13600 if (SYMBOL_REF_DECL (x
))
13601 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13604 if (MACHOPIC_INDIRECT
13605 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13606 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13608 assemble_name (file
, name
);
13610 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13611 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13612 fputs ("@PLT", file
);
13619 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13620 assemble_name (asm_out_file
, buf
);
13624 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13628 /* This used to output parentheses around the expression,
13629 but that does not work on the 386 (either ATT or BSD assembler). */
13630 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13634 if (GET_MODE (x
) == VOIDmode
)
13636 /* We can use %d if the number is <32 bits and positive. */
13637 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13638 fprintf (file
, "0x%lx%08lx",
13639 (unsigned long) CONST_DOUBLE_HIGH (x
),
13640 (unsigned long) CONST_DOUBLE_LOW (x
));
13642 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13645 /* We can't handle floating point constants;
13646 TARGET_PRINT_OPERAND must handle them. */
13647 output_operand_lossage ("floating constant misused");
13651 /* Some assemblers need integer constants to appear first. */
13652 if (CONST_INT_P (XEXP (x
, 0)))
13654 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13656 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13660 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13661 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13663 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13669 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13670 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13672 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13674 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13678 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13680 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13685 gcc_assert (XVECLEN (x
, 0) == 1);
13686 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13687 switch (XINT (x
, 1))
13690 fputs ("@GOT", file
);
13692 case UNSPEC_GOTOFF
:
13693 fputs ("@GOTOFF", file
);
13695 case UNSPEC_PLTOFF
:
13696 fputs ("@PLTOFF", file
);
13699 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13700 "(%rip)" : "[rip]", file
);
13702 case UNSPEC_GOTPCREL
:
13703 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13704 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13706 case UNSPEC_GOTTPOFF
:
13707 /* FIXME: This might be @TPOFF in Sun ld too. */
13708 fputs ("@gottpoff", file
);
13711 fputs ("@tpoff", file
);
13713 case UNSPEC_NTPOFF
:
13715 fputs ("@tpoff", file
);
13717 fputs ("@ntpoff", file
);
13719 case UNSPEC_DTPOFF
:
13720 fputs ("@dtpoff", file
);
13722 case UNSPEC_GOTNTPOFF
:
13724 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13725 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13727 fputs ("@gotntpoff", file
);
13729 case UNSPEC_INDNTPOFF
:
13730 fputs ("@indntpoff", file
);
13733 case UNSPEC_MACHOPIC_OFFSET
:
13735 machopic_output_function_base_name (file
);
13739 output_operand_lossage ("invalid UNSPEC as operand");
13745 output_operand_lossage ("invalid expression as operand");
13749 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13750 We need to emit DTP-relative relocations. */
13752 static void ATTRIBUTE_UNUSED
13753 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13755 fputs (ASM_LONG
, file
);
13756 output_addr_const (file
, x
);
13757 fputs ("@dtpoff", file
);
13763 fputs (", 0", file
);
13766 gcc_unreachable ();
13770 /* Return true if X is a representation of the PIC register. This copes
13771 with calls from ix86_find_base_term, where the register might have
13772 been replaced by a cselib value. */
13775 ix86_pic_register_p (rtx x
)
13777 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13778 return (pic_offset_table_rtx
13779 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13781 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13784 /* Helper function for ix86_delegitimize_address.
13785 Attempt to delegitimize TLS local-exec accesses. */
13788 ix86_delegitimize_tls_address (rtx orig_x
)
13790 rtx x
= orig_x
, unspec
;
13791 struct ix86_address addr
;
13793 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13797 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13799 if (ix86_decompose_address (x
, &addr
) == 0
13800 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13801 || addr
.disp
== NULL_RTX
13802 || GET_CODE (addr
.disp
) != CONST
)
13804 unspec
= XEXP (addr
.disp
, 0);
13805 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13806 unspec
= XEXP (unspec
, 0);
13807 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13809 x
= XVECEXP (unspec
, 0, 0);
13810 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13811 if (unspec
!= XEXP (addr
.disp
, 0))
13812 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13815 rtx idx
= addr
.index
;
13816 if (addr
.scale
!= 1)
13817 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13818 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13821 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13822 if (MEM_P (orig_x
))
13823 x
= replace_equiv_address_nv (orig_x
, x
);
13827 /* In the name of slightly smaller debug output, and to cater to
13828 general assembler lossage, recognize PIC+GOTOFF and turn it back
13829 into a direct symbol reference.
13831 On Darwin, this is necessary to avoid a crash, because Darwin
13832 has a different PIC label for each routine but the DWARF debugging
13833 information is not associated with any particular routine, so it's
13834 necessary to remove references to the PIC label from RTL stored by
13835 the DWARF output code. */
13838 ix86_delegitimize_address (rtx x
)
13840 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13841 /* addend is NULL or some rtx if x is something+GOTOFF where
13842 something doesn't include the PIC register. */
13843 rtx addend
= NULL_RTX
;
13844 /* reg_addend is NULL or a multiple of some register. */
13845 rtx reg_addend
= NULL_RTX
;
13846 /* const_addend is NULL or a const_int. */
13847 rtx const_addend
= NULL_RTX
;
13848 /* This is the result, or NULL. */
13849 rtx result
= NULL_RTX
;
13858 if (GET_CODE (x
) == CONST
13859 && GET_CODE (XEXP (x
, 0)) == PLUS
13860 && GET_MODE (XEXP (x
, 0)) == Pmode
13861 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13862 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13863 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13865 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13866 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13867 if (MEM_P (orig_x
))
13868 x
= replace_equiv_address_nv (orig_x
, x
);
13871 if (GET_CODE (x
) != CONST
13872 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13873 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13874 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13875 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13876 return ix86_delegitimize_tls_address (orig_x
);
13877 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13878 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13880 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13888 if (GET_CODE (x
) != PLUS
13889 || GET_CODE (XEXP (x
, 1)) != CONST
)
13890 return ix86_delegitimize_tls_address (orig_x
);
13892 if (ix86_pic_register_p (XEXP (x
, 0)))
13893 /* %ebx + GOT/GOTOFF */
13895 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13897 /* %ebx + %reg * scale + GOT/GOTOFF */
13898 reg_addend
= XEXP (x
, 0);
13899 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13900 reg_addend
= XEXP (reg_addend
, 1);
13901 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13902 reg_addend
= XEXP (reg_addend
, 0);
13905 reg_addend
= NULL_RTX
;
13906 addend
= XEXP (x
, 0);
13910 addend
= XEXP (x
, 0);
13912 x
= XEXP (XEXP (x
, 1), 0);
13913 if (GET_CODE (x
) == PLUS
13914 && CONST_INT_P (XEXP (x
, 1)))
13916 const_addend
= XEXP (x
, 1);
13920 if (GET_CODE (x
) == UNSPEC
13921 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13922 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13923 result
= XVECEXP (x
, 0, 0);
13925 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13926 && !MEM_P (orig_x
))
13927 result
= XVECEXP (x
, 0, 0);
13930 return ix86_delegitimize_tls_address (orig_x
);
13933 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13935 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13938 /* If the rest of original X doesn't involve the PIC register, add
13939 addend and subtract pic_offset_table_rtx. This can happen e.g.
13941 leal (%ebx, %ecx, 4), %ecx
13943 movl foo@GOTOFF(%ecx), %edx
13944 in which case we return (%ecx - %ebx) + foo. */
13945 if (pic_offset_table_rtx
)
13946 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13947 pic_offset_table_rtx
),
13952 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13954 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13955 if (result
== NULL_RTX
)
13961 /* If X is a machine specific address (i.e. a symbol or label being
13962 referenced as a displacement from the GOT implemented using an
13963 UNSPEC), then return the base term. Otherwise return X. */
13966 ix86_find_base_term (rtx x
)
13972 if (GET_CODE (x
) != CONST
)
13974 term
= XEXP (x
, 0);
13975 if (GET_CODE (term
) == PLUS
13976 && (CONST_INT_P (XEXP (term
, 1))
13977 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13978 term
= XEXP (term
, 0);
13979 if (GET_CODE (term
) != UNSPEC
13980 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13981 && XINT (term
, 1) != UNSPEC_PCREL
))
13984 return XVECEXP (term
, 0, 0);
13987 return ix86_delegitimize_address (x
);
13991 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13992 bool fp
, FILE *file
)
13994 const char *suffix
;
13996 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13998 code
= ix86_fp_compare_code_to_integer (code
);
14002 code
= reverse_condition (code
);
14053 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14057 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14058 Those same assemblers have the same but opposite lossage on cmov. */
14059 if (mode
== CCmode
)
14060 suffix
= fp
? "nbe" : "a";
14061 else if (mode
== CCCmode
)
14064 gcc_unreachable ();
14080 gcc_unreachable ();
14084 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14101 gcc_unreachable ();
14105 /* ??? As above. */
14106 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14107 suffix
= fp
? "nb" : "ae";
14110 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14114 /* ??? As above. */
14115 if (mode
== CCmode
)
14117 else if (mode
== CCCmode
)
14118 suffix
= fp
? "nb" : "ae";
14120 gcc_unreachable ();
14123 suffix
= fp
? "u" : "p";
14126 suffix
= fp
? "nu" : "np";
14129 gcc_unreachable ();
14131 fputs (suffix
, file
);
14134 /* Print the name of register X to FILE based on its machine mode and number.
14135 If CODE is 'w', pretend the mode is HImode.
14136 If CODE is 'b', pretend the mode is QImode.
14137 If CODE is 'k', pretend the mode is SImode.
14138 If CODE is 'q', pretend the mode is DImode.
14139 If CODE is 'x', pretend the mode is V4SFmode.
14140 If CODE is 't', pretend the mode is V8SFmode.
14141 If CODE is 'h', pretend the reg is the 'high' byte register.
14142 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14143 If CODE is 'd', duplicate the operand for AVX instruction.
14147 print_reg (rtx x
, int code
, FILE *file
)
14150 unsigned int regno
;
14151 bool duplicated
= code
== 'd' && TARGET_AVX
;
14153 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14158 gcc_assert (TARGET_64BIT
);
14159 fputs ("rip", file
);
14163 regno
= true_regnum (x
);
14164 gcc_assert (regno
!= ARG_POINTER_REGNUM
14165 && regno
!= FRAME_POINTER_REGNUM
14166 && regno
!= FLAGS_REG
14167 && regno
!= FPSR_REG
14168 && regno
!= FPCR_REG
);
14170 if (code
== 'w' || MMX_REG_P (x
))
14172 else if (code
== 'b')
14174 else if (code
== 'k')
14176 else if (code
== 'q')
14178 else if (code
== 'y')
14180 else if (code
== 'h')
14182 else if (code
== 'x')
14184 else if (code
== 't')
14187 code
= GET_MODE_SIZE (GET_MODE (x
));
14189 /* Irritatingly, AMD extended registers use different naming convention
14190 from the normal registers: "r%d[bwd]" */
14191 if (REX_INT_REGNO_P (regno
))
14193 gcc_assert (TARGET_64BIT
);
14195 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14199 error ("extended registers have no high halves");
14214 error ("unsupported operand size for extended register");
14224 if (STACK_TOP_P (x
))
14233 if (! ANY_FP_REG_P (x
))
14234 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14239 reg
= hi_reg_name
[regno
];
14242 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14244 reg
= qi_reg_name
[regno
];
14247 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14249 reg
= qi_high_reg_name
[regno
];
14254 gcc_assert (!duplicated
);
14256 fputs (hi_reg_name
[regno
] + 1, file
);
14261 gcc_unreachable ();
14267 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14268 fprintf (file
, ", %%%s", reg
);
14270 fprintf (file
, ", %s", reg
);
14274 /* Locate some local-dynamic symbol still in use by this function
14275 so that we can print its name in some tls_local_dynamic_base
14279 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14283 if (GET_CODE (x
) == SYMBOL_REF
14284 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14286 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14293 static const char *
14294 get_some_local_dynamic_name (void)
14298 if (cfun
->machine
->some_ld_name
)
14299 return cfun
->machine
->some_ld_name
;
14301 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14302 if (NONDEBUG_INSN_P (insn
)
14303 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14304 return cfun
->machine
->some_ld_name
;
14309 /* Meaning of CODE:
14310 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14311 C -- print opcode suffix for set/cmov insn.
14312 c -- like C, but print reversed condition
14313 F,f -- likewise, but for floating-point.
14314 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14316 R -- print the prefix for register names.
14317 z -- print the opcode suffix for the size of the current operand.
14318 Z -- likewise, with special suffixes for x87 instructions.
14319 * -- print a star (in certain assembler syntax)
14320 A -- print an absolute memory reference.
14321 E -- print address with DImode register names if TARGET_64BIT.
14322 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14323 s -- print a shift double count, followed by the assemblers argument
14325 b -- print the QImode name of the register for the indicated operand.
14326 %b0 would print %al if operands[0] is reg 0.
14327 w -- likewise, print the HImode name of the register.
14328 k -- likewise, print the SImode name of the register.
14329 q -- likewise, print the DImode name of the register.
14330 x -- likewise, print the V4SFmode name of the register.
14331 t -- likewise, print the V8SFmode name of the register.
14332 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14333 y -- print "st(0)" instead of "st" as a register.
14334 d -- print duplicated register operand for AVX instruction.
14335 D -- print condition for SSE cmp instruction.
14336 P -- if PIC, print an @PLT suffix.
14337 p -- print raw symbol name.
14338 X -- don't print any sort of PIC '@' suffix for a symbol.
14339 & -- print some in-use local-dynamic symbol name.
14340 H -- print a memory address offset by 8; used for sse high-parts
14341 Y -- print condition for XOP pcom* instruction.
14342 + -- print a branch hint as 'cs' or 'ds' prefix
14343 ; -- print a semicolon (after prefixes due to bug in older gas).
14344 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14345 @ -- print a segment register of thread base pointer load
14346 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14350 ix86_print_operand (FILE *file
, rtx x
, int code
)
14357 switch (ASSEMBLER_DIALECT
)
14364 /* Intel syntax. For absolute addresses, registers should not
14365 be surrounded by braces. */
14369 ix86_print_operand (file
, x
, 0);
14376 gcc_unreachable ();
14379 ix86_print_operand (file
, x
, 0);
14383 /* Wrap address in an UNSPEC to declare special handling. */
14385 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14387 output_address (x
);
14391 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14396 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14401 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14406 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14411 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14416 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14421 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14422 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14425 switch (GET_MODE_SIZE (GET_MODE (x
)))
14440 output_operand_lossage
14441 ("invalid operand size for operand code 'O'");
14450 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14452 /* Opcodes don't get size suffixes if using Intel opcodes. */
14453 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14456 switch (GET_MODE_SIZE (GET_MODE (x
)))
14475 output_operand_lossage
14476 ("invalid operand size for operand code 'z'");
14481 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14483 (0, "non-integer operand used with operand code 'z'");
14487 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14488 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14491 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14493 switch (GET_MODE_SIZE (GET_MODE (x
)))
14496 #ifdef HAVE_AS_IX86_FILDS
14506 #ifdef HAVE_AS_IX86_FILDQ
14509 fputs ("ll", file
);
14517 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14519 /* 387 opcodes don't get size suffixes
14520 if the operands are registers. */
14521 if (STACK_REG_P (x
))
14524 switch (GET_MODE_SIZE (GET_MODE (x
)))
14545 output_operand_lossage
14546 ("invalid operand type used with operand code 'Z'");
14550 output_operand_lossage
14551 ("invalid operand size for operand code 'Z'");
14569 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14571 ix86_print_operand (file
, x
, 0);
14572 fputs (", ", file
);
14577 switch (GET_CODE (x
))
14580 fputs ("neq", file
);
14583 fputs ("eq", file
);
14587 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14591 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14595 fputs ("le", file
);
14599 fputs ("lt", file
);
14602 fputs ("unord", file
);
14605 fputs ("ord", file
);
14608 fputs ("ueq", file
);
14611 fputs ("nlt", file
);
14614 fputs ("nle", file
);
14617 fputs ("ule", file
);
14620 fputs ("ult", file
);
14623 fputs ("une", file
);
14626 output_operand_lossage ("operand is not a condition code, "
14627 "invalid operand code 'Y'");
14633 /* Little bit of braindamage here. The SSE compare instructions
14634 does use completely different names for the comparisons that the
14635 fp conditional moves. */
14636 switch (GET_CODE (x
))
14641 fputs ("eq_us", file
);
14645 fputs ("eq", file
);
14650 fputs ("nge", file
);
14654 fputs ("lt", file
);
14659 fputs ("ngt", file
);
14663 fputs ("le", file
);
14666 fputs ("unord", file
);
14671 fputs ("neq_oq", file
);
14675 fputs ("neq", file
);
14680 fputs ("ge", file
);
14684 fputs ("nlt", file
);
14689 fputs ("gt", file
);
14693 fputs ("nle", file
);
14696 fputs ("ord", file
);
14699 output_operand_lossage ("operand is not a condition code, "
14700 "invalid operand code 'D'");
14707 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14708 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14714 if (!COMPARISON_P (x
))
14716 output_operand_lossage ("operand is not a condition code, "
14717 "invalid operand code '%c'", code
);
14720 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14721 code
== 'c' || code
== 'f',
14722 code
== 'F' || code
== 'f',
14727 if (!offsettable_memref_p (x
))
14729 output_operand_lossage ("operand is not an offsettable memory "
14730 "reference, invalid operand code 'H'");
14733 /* It doesn't actually matter what mode we use here, as we're
14734 only going to use this for printing. */
14735 x
= adjust_address_nv (x
, DImode
, 8);
14736 /* Output 'qword ptr' for intel assembler dialect. */
14737 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14742 gcc_assert (CONST_INT_P (x
));
14744 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14745 #ifdef HAVE_AS_IX86_HLE
14746 fputs ("xacquire ", file
);
14748 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14750 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14751 #ifdef HAVE_AS_IX86_HLE
14752 fputs ("xrelease ", file
);
14754 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14756 /* We do not want to print value of the operand. */
14760 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14766 const char *name
= get_some_local_dynamic_name ();
14768 output_operand_lossage ("'%%&' used without any "
14769 "local dynamic TLS references");
14771 assemble_name (file
, name
);
14780 || optimize_function_for_size_p (cfun
)
14781 || !TARGET_BRANCH_PREDICTION_HINTS
)
14784 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14787 int pred_val
= INTVAL (XEXP (x
, 0));
14789 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14790 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14792 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14794 = final_forward_branch_p (current_output_insn
) == 0;
14796 /* Emit hints only in the case default branch prediction
14797 heuristics would fail. */
14798 if (taken
!= cputaken
)
14800 /* We use 3e (DS) prefix for taken branches and
14801 2e (CS) prefix for not taken branches. */
14803 fputs ("ds ; ", file
);
14805 fputs ("cs ; ", file
);
14813 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14819 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14822 /* The kernel uses a different segment register for performance
14823 reasons; a system call would not have to trash the userspace
14824 segment register, which would be expensive. */
14825 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14826 fputs ("fs", file
);
14828 fputs ("gs", file
);
14832 putc (TARGET_AVX2
? 'i' : 'f', file
);
14836 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14837 fputs ("addr32 ", file
);
14841 output_operand_lossage ("invalid operand code '%c'", code
);
14846 print_reg (x
, code
, file
);
14848 else if (MEM_P (x
))
14850 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14851 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14852 && GET_MODE (x
) != BLKmode
)
14855 switch (GET_MODE_SIZE (GET_MODE (x
)))
14857 case 1: size
= "BYTE"; break;
14858 case 2: size
= "WORD"; break;
14859 case 4: size
= "DWORD"; break;
14860 case 8: size
= "QWORD"; break;
14861 case 12: size
= "TBYTE"; break;
14863 if (GET_MODE (x
) == XFmode
)
14868 case 32: size
= "YMMWORD"; break;
14870 gcc_unreachable ();
14873 /* Check for explicit size override (codes 'b', 'w', 'k',
14877 else if (code
== 'w')
14879 else if (code
== 'k')
14881 else if (code
== 'q')
14883 else if (code
== 'x')
14886 fputs (size
, file
);
14887 fputs (" PTR ", file
);
14891 /* Avoid (%rip) for call operands. */
14892 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14893 && !CONST_INT_P (x
))
14894 output_addr_const (file
, x
);
14895 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14896 output_operand_lossage ("invalid constraints for operand");
14898 output_address (x
);
14901 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14906 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14907 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14909 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14911 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14913 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14914 (unsigned long long) (int) l
);
14916 fprintf (file
, "0x%08x", (unsigned int) l
);
14919 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14924 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14925 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14927 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14929 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14932 /* These float cases don't actually occur as immediate operands. */
14933 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14937 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14938 fputs (dstr
, file
);
14943 /* We have patterns that allow zero sets of memory, for instance.
14944 In 64-bit mode, we should probably support all 8-byte vectors,
14945 since we can in fact encode that into an immediate. */
14946 if (GET_CODE (x
) == CONST_VECTOR
)
14948 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14952 if (code
!= 'P' && code
!= 'p')
14954 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14956 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14959 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14960 || GET_CODE (x
) == LABEL_REF
)
14962 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14965 fputs ("OFFSET FLAT:", file
);
14968 if (CONST_INT_P (x
))
14969 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14970 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14971 output_pic_addr_const (file
, x
, code
);
14973 output_addr_const (file
, x
);
14978 ix86_print_operand_punct_valid_p (unsigned char code
)
14980 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14981 || code
== ';' || code
== '~' || code
== '^');
14984 /* Print a memory operand whose address is ADDR. */
14987 ix86_print_operand_address (FILE *file
, rtx addr
)
14989 struct ix86_address parts
;
14990 rtx base
, index
, disp
;
14996 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14998 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14999 gcc_assert (parts
.index
== NULL_RTX
);
15000 parts
.index
= XVECEXP (addr
, 0, 1);
15001 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15002 addr
= XVECEXP (addr
, 0, 0);
15005 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15007 gcc_assert (TARGET_64BIT
);
15008 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15012 ok
= ix86_decompose_address (addr
, &parts
);
15017 index
= parts
.index
;
15019 scale
= parts
.scale
;
15027 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15029 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15032 gcc_unreachable ();
15035 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15036 if (TARGET_64BIT
&& !base
&& !index
)
15040 if (GET_CODE (disp
) == CONST
15041 && GET_CODE (XEXP (disp
, 0)) == PLUS
15042 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15043 symbol
= XEXP (XEXP (disp
, 0), 0);
15045 if (GET_CODE (symbol
) == LABEL_REF
15046 || (GET_CODE (symbol
) == SYMBOL_REF
15047 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15050 if (!base
&& !index
)
15052 /* Displacement only requires special attention. */
15054 if (CONST_INT_P (disp
))
15056 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15057 fputs ("ds:", file
);
15058 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15061 output_pic_addr_const (file
, disp
, 0);
15063 output_addr_const (file
, disp
);
15067 /* Print SImode register names to force addr32 prefix. */
15068 if (SImode_address_operand (addr
, VOIDmode
))
15070 #ifdef ENABLE_CHECKING
15071 gcc_assert (TARGET_64BIT
);
15072 switch (GET_CODE (addr
))
15075 gcc_assert (GET_MODE (addr
) == SImode
);
15076 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15080 gcc_assert (GET_MODE (addr
) == DImode
);
15083 gcc_unreachable ();
15086 gcc_assert (!code
);
15092 && CONST_INT_P (disp
)
15093 && INTVAL (disp
) < -16*1024*1024)
15095 /* X32 runs in 64-bit mode, where displacement, DISP, in
15096 address DISP(%r64), is encoded as 32-bit immediate sign-
15097 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15098 address is %r64 + 0xffffffffbffffd00. When %r64 <
15099 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15100 which is invalid for x32. The correct address is %r64
15101 - 0x40000300 == 0xf7ffdd64. To properly encode
15102 -0x40000300(%r64) for x32, we zero-extend negative
15103 displacement by forcing addr32 prefix which truncates
15104 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15105 zero-extend all negative displacements, including -1(%rsp).
15106 However, for small negative displacements, sign-extension
15107 won't cause overflow. We only zero-extend negative
15108 displacements if they < -16*1024*1024, which is also used
15109 to check legitimate address displacements for PIC. */
15113 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15118 output_pic_addr_const (file
, disp
, 0);
15119 else if (GET_CODE (disp
) == LABEL_REF
)
15120 output_asm_label (disp
);
15122 output_addr_const (file
, disp
);
15127 print_reg (base
, code
, file
);
15131 print_reg (index
, vsib
? 0 : code
, file
);
15132 if (scale
!= 1 || vsib
)
15133 fprintf (file
, ",%d", scale
);
15139 rtx offset
= NULL_RTX
;
15143 /* Pull out the offset of a symbol; print any symbol itself. */
15144 if (GET_CODE (disp
) == CONST
15145 && GET_CODE (XEXP (disp
, 0)) == PLUS
15146 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15148 offset
= XEXP (XEXP (disp
, 0), 1);
15149 disp
= gen_rtx_CONST (VOIDmode
,
15150 XEXP (XEXP (disp
, 0), 0));
15154 output_pic_addr_const (file
, disp
, 0);
15155 else if (GET_CODE (disp
) == LABEL_REF
)
15156 output_asm_label (disp
);
15157 else if (CONST_INT_P (disp
))
15160 output_addr_const (file
, disp
);
15166 print_reg (base
, code
, file
);
15169 if (INTVAL (offset
) >= 0)
15171 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15175 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15182 print_reg (index
, vsib
? 0 : code
, file
);
15183 if (scale
!= 1 || vsib
)
15184 fprintf (file
, "*%d", scale
);
15191 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15194 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15198 if (GET_CODE (x
) != UNSPEC
)
15201 op
= XVECEXP (x
, 0, 0);
15202 switch (XINT (x
, 1))
15204 case UNSPEC_GOTTPOFF
:
15205 output_addr_const (file
, op
);
15206 /* FIXME: This might be @TPOFF in Sun ld. */
15207 fputs ("@gottpoff", file
);
15210 output_addr_const (file
, op
);
15211 fputs ("@tpoff", file
);
15213 case UNSPEC_NTPOFF
:
15214 output_addr_const (file
, op
);
15216 fputs ("@tpoff", file
);
15218 fputs ("@ntpoff", file
);
15220 case UNSPEC_DTPOFF
:
15221 output_addr_const (file
, op
);
15222 fputs ("@dtpoff", file
);
15224 case UNSPEC_GOTNTPOFF
:
15225 output_addr_const (file
, op
);
15227 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15228 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15230 fputs ("@gotntpoff", file
);
15232 case UNSPEC_INDNTPOFF
:
15233 output_addr_const (file
, op
);
15234 fputs ("@indntpoff", file
);
15237 case UNSPEC_MACHOPIC_OFFSET
:
15238 output_addr_const (file
, op
);
15240 machopic_output_function_base_name (file
);
15244 case UNSPEC_STACK_CHECK
:
15248 gcc_assert (flag_split_stack
);
15250 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15251 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15253 gcc_unreachable ();
15256 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15267 /* Split one or more double-mode RTL references into pairs of half-mode
15268 references. The RTL can be REG, offsettable MEM, integer constant, or
15269 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15270 split and "num" is its length. lo_half and hi_half are output arrays
15271 that parallel "operands". */
15274 split_double_mode (enum machine_mode mode
, rtx operands
[],
15275 int num
, rtx lo_half
[], rtx hi_half
[])
15277 enum machine_mode half_mode
;
15283 half_mode
= DImode
;
15286 half_mode
= SImode
;
15289 gcc_unreachable ();
15292 byte
= GET_MODE_SIZE (half_mode
);
15296 rtx op
= operands
[num
];
15298 /* simplify_subreg refuse to split volatile memory addresses,
15299 but we still have to handle it. */
15302 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15303 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15307 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15308 GET_MODE (op
) == VOIDmode
15309 ? mode
: GET_MODE (op
), 0);
15310 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15311 GET_MODE (op
) == VOIDmode
15312 ? mode
: GET_MODE (op
), byte
);
15317 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15318 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15319 is the expression of the binary operation. The output may either be
15320 emitted here, or returned to the caller, like all output_* functions.
15322 There is no guarantee that the operands are the same mode, as they
15323 might be within FLOAT or FLOAT_EXTEND expressions. */
15325 #ifndef SYSV386_COMPAT
15326 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15327 wants to fix the assemblers because that causes incompatibility
15328 with gcc. No-one wants to fix gcc because that causes
15329 incompatibility with assemblers... You can use the option of
15330 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15331 #define SYSV386_COMPAT 1
15335 output_387_binary_op (rtx insn
, rtx
*operands
)
15337 static char buf
[40];
15340 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15342 #ifdef ENABLE_CHECKING
15343 /* Even if we do not want to check the inputs, this documents input
15344 constraints. Which helps in understanding the following code. */
15345 if (STACK_REG_P (operands
[0])
15346 && ((REG_P (operands
[1])
15347 && REGNO (operands
[0]) == REGNO (operands
[1])
15348 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15349 || (REG_P (operands
[2])
15350 && REGNO (operands
[0]) == REGNO (operands
[2])
15351 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15352 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15355 gcc_assert (is_sse
);
15358 switch (GET_CODE (operands
[3]))
15361 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15362 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15370 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15371 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15379 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15380 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15388 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15389 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15397 gcc_unreachable ();
15404 strcpy (buf
, ssep
);
15405 if (GET_MODE (operands
[0]) == SFmode
)
15406 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15408 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15412 strcpy (buf
, ssep
+ 1);
15413 if (GET_MODE (operands
[0]) == SFmode
)
15414 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15416 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15422 switch (GET_CODE (operands
[3]))
15426 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15428 rtx temp
= operands
[2];
15429 operands
[2] = operands
[1];
15430 operands
[1] = temp
;
15433 /* know operands[0] == operands[1]. */
15435 if (MEM_P (operands
[2]))
15441 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15443 if (STACK_TOP_P (operands
[0]))
15444 /* How is it that we are storing to a dead operand[2]?
15445 Well, presumably operands[1] is dead too. We can't
15446 store the result to st(0) as st(0) gets popped on this
15447 instruction. Instead store to operands[2] (which I
15448 think has to be st(1)). st(1) will be popped later.
15449 gcc <= 2.8.1 didn't have this check and generated
15450 assembly code that the Unixware assembler rejected. */
15451 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15453 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15457 if (STACK_TOP_P (operands
[0]))
15458 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15460 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15465 if (MEM_P (operands
[1]))
15471 if (MEM_P (operands
[2]))
15477 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15480 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15481 derived assemblers, confusingly reverse the direction of
15482 the operation for fsub{r} and fdiv{r} when the
15483 destination register is not st(0). The Intel assembler
15484 doesn't have this brain damage. Read !SYSV386_COMPAT to
15485 figure out what the hardware really does. */
15486 if (STACK_TOP_P (operands
[0]))
15487 p
= "{p\t%0, %2|rp\t%2, %0}";
15489 p
= "{rp\t%2, %0|p\t%0, %2}";
15491 if (STACK_TOP_P (operands
[0]))
15492 /* As above for fmul/fadd, we can't store to st(0). */
15493 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15495 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15500 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15503 if (STACK_TOP_P (operands
[0]))
15504 p
= "{rp\t%0, %1|p\t%1, %0}";
15506 p
= "{p\t%1, %0|rp\t%0, %1}";
15508 if (STACK_TOP_P (operands
[0]))
15509 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15511 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15516 if (STACK_TOP_P (operands
[0]))
15518 if (STACK_TOP_P (operands
[1]))
15519 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15521 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15524 else if (STACK_TOP_P (operands
[1]))
15527 p
= "{\t%1, %0|r\t%0, %1}";
15529 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15535 p
= "{r\t%2, %0|\t%0, %2}";
15537 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15543 gcc_unreachable ();
15550 /* Check if a 256bit AVX register is referenced inside of EXP. */
15553 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15557 if (GET_CODE (exp
) == SUBREG
)
15558 exp
= SUBREG_REG (exp
);
15561 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15567 /* Return needed mode for entity in optimize_mode_switching pass. */
15570 ix86_avx_u128_mode_needed (rtx insn
)
15576 /* Needed mode is set to AVX_U128_CLEAN if there are
15577 no 256bit modes used in function arguments. */
15578 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15580 link
= XEXP (link
, 1))
15582 if (GET_CODE (XEXP (link
, 0)) == USE
)
15584 rtx arg
= XEXP (XEXP (link
, 0), 0);
15586 if (ix86_check_avx256_register (&arg
, NULL
))
15587 return AVX_U128_ANY
;
15591 return AVX_U128_CLEAN
;
15594 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15595 changes state only when a 256bit register is written to, but we need
15596 to prevent the compiler from moving optimal insertion point above
15597 eventual read from 256bit register. */
15598 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15599 return AVX_U128_DIRTY
;
15601 return AVX_U128_ANY
;
15604 /* Return mode that i387 must be switched into
15605 prior to the execution of insn. */
15608 ix86_i387_mode_needed (int entity
, rtx insn
)
15610 enum attr_i387_cw mode
;
15612 /* The mode UNINITIALIZED is used to store control word after a
15613 function call or ASM pattern. The mode ANY specify that function
15614 has no requirements on the control word and make no changes in the
15615 bits we are interested in. */
15618 || (NONJUMP_INSN_P (insn
)
15619 && (asm_noperands (PATTERN (insn
)) >= 0
15620 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15621 return I387_CW_UNINITIALIZED
;
15623 if (recog_memoized (insn
) < 0)
15624 return I387_CW_ANY
;
15626 mode
= get_attr_i387_cw (insn
);
15631 if (mode
== I387_CW_TRUNC
)
15636 if (mode
== I387_CW_FLOOR
)
15641 if (mode
== I387_CW_CEIL
)
15646 if (mode
== I387_CW_MASK_PM
)
15651 gcc_unreachable ();
15654 return I387_CW_ANY
;
15657 /* Return mode that entity must be switched into
15658 prior to the execution of insn. */
15661 ix86_mode_needed (int entity
, rtx insn
)
15666 return ix86_avx_u128_mode_needed (insn
);
15671 return ix86_i387_mode_needed (entity
, insn
);
15673 gcc_unreachable ();
15678 /* Check if a 256bit AVX register is referenced in stores. */
15681 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15683 if (ix86_check_avx256_register (&dest
, NULL
))
15685 bool *used
= (bool *) data
;
15690 /* Calculate mode of upper 128bit AVX registers after the insn. */
15693 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15695 rtx pat
= PATTERN (insn
);
15697 if (vzeroupper_operation (pat
, VOIDmode
)
15698 || vzeroall_operation (pat
, VOIDmode
))
15699 return AVX_U128_CLEAN
;
15701 /* We know that state is clean after CALL insn if there are no
15702 256bit registers used in the function return register. */
15705 bool avx_reg256_found
= false;
15706 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15707 if (!avx_reg256_found
)
15708 return AVX_U128_CLEAN
;
15711 /* Otherwise, return current mode. Remember that if insn
15712 references AVX 256bit registers, the mode was already changed
15713 to DIRTY from MODE_NEEDED. */
15717 /* Return the mode that an insn results in. */
15720 ix86_mode_after (int entity
, int mode
, rtx insn
)
15725 return ix86_avx_u128_mode_after (mode
, insn
);
15732 gcc_unreachable ();
15737 ix86_avx_u128_mode_entry (void)
15741 /* Entry mode is set to AVX_U128_DIRTY if there are
15742 256bit modes used in function arguments. */
15743 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15744 arg
= TREE_CHAIN (arg
))
15746 rtx incoming
= DECL_INCOMING_RTL (arg
);
15748 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15749 return AVX_U128_DIRTY
;
15752 return AVX_U128_CLEAN
;
15755 /* Return a mode that ENTITY is assumed to be
15756 switched to at function entry. */
15759 ix86_mode_entry (int entity
)
15764 return ix86_avx_u128_mode_entry ();
15769 return I387_CW_ANY
;
15771 gcc_unreachable ();
15776 ix86_avx_u128_mode_exit (void)
15778 rtx reg
= crtl
->return_rtx
;
15780 /* Exit mode is set to AVX_U128_DIRTY if there are
15781 256bit modes used in the function return register. */
15782 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15783 return AVX_U128_DIRTY
;
15785 return AVX_U128_CLEAN
;
15788 /* Return a mode that ENTITY is assumed to be
15789 switched to at function exit. */
15792 ix86_mode_exit (int entity
)
15797 return ix86_avx_u128_mode_exit ();
15802 return I387_CW_ANY
;
15804 gcc_unreachable ();
15808 /* Output code to initialize control word copies used by trunc?f?i and
15809 rounding patterns. CURRENT_MODE is set to current control word,
15810 while NEW_MODE is set to new control word. */
15813 emit_i387_cw_initialization (int mode
)
15815 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15818 enum ix86_stack_slot slot
;
15820 rtx reg
= gen_reg_rtx (HImode
);
15822 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15823 emit_move_insn (reg
, copy_rtx (stored_mode
));
15825 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15826 || optimize_insn_for_size_p ())
15830 case I387_CW_TRUNC
:
15831 /* round toward zero (truncate) */
15832 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15833 slot
= SLOT_CW_TRUNC
;
15836 case I387_CW_FLOOR
:
15837 /* round down toward -oo */
15838 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15839 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15840 slot
= SLOT_CW_FLOOR
;
15844 /* round up toward +oo */
15845 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15846 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15847 slot
= SLOT_CW_CEIL
;
15850 case I387_CW_MASK_PM
:
15851 /* mask precision exception for nearbyint() */
15852 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15853 slot
= SLOT_CW_MASK_PM
;
15857 gcc_unreachable ();
15864 case I387_CW_TRUNC
:
15865 /* round toward zero (truncate) */
15866 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15867 slot
= SLOT_CW_TRUNC
;
15870 case I387_CW_FLOOR
:
15871 /* round down toward -oo */
15872 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15873 slot
= SLOT_CW_FLOOR
;
15877 /* round up toward +oo */
15878 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15879 slot
= SLOT_CW_CEIL
;
15882 case I387_CW_MASK_PM
:
15883 /* mask precision exception for nearbyint() */
15884 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15885 slot
= SLOT_CW_MASK_PM
;
15889 gcc_unreachable ();
15893 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15895 new_mode
= assign_386_stack_local (HImode
, slot
);
15896 emit_move_insn (new_mode
, reg
);
15899 /* Emit vzeroupper. */
15902 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15906 /* Cancel automatic vzeroupper insertion if there are
15907 live call-saved SSE registers at the insertion point. */
15909 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15910 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15914 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15915 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15918 emit_insn (gen_avx_vzeroupper ());
15921 /* Generate one or more insns to set ENTITY to MODE. */
15924 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15929 if (mode
== AVX_U128_CLEAN
)
15930 ix86_avx_emit_vzeroupper (regs_live
);
15936 if (mode
!= I387_CW_ANY
15937 && mode
!= I387_CW_UNINITIALIZED
)
15938 emit_i387_cw_initialization (mode
);
15941 gcc_unreachable ();
15945 /* Output code for INSN to convert a float to a signed int. OPERANDS
15946 are the insn operands. The output may be [HSD]Imode and the input
15947 operand may be [SDX]Fmode. */
15950 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15952 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15953 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15954 int round_mode
= get_attr_i387_cw (insn
);
15956 /* Jump through a hoop or two for DImode, since the hardware has no
15957 non-popping instruction. We used to do this a different way, but
15958 that was somewhat fragile and broke with post-reload splitters. */
15959 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15960 output_asm_insn ("fld\t%y1", operands
);
15962 gcc_assert (STACK_TOP_P (operands
[1]));
15963 gcc_assert (MEM_P (operands
[0]));
15964 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15967 output_asm_insn ("fisttp%Z0\t%0", operands
);
15970 if (round_mode
!= I387_CW_ANY
)
15971 output_asm_insn ("fldcw\t%3", operands
);
15972 if (stack_top_dies
|| dimode_p
)
15973 output_asm_insn ("fistp%Z0\t%0", operands
);
15975 output_asm_insn ("fist%Z0\t%0", operands
);
15976 if (round_mode
!= I387_CW_ANY
)
15977 output_asm_insn ("fldcw\t%2", operands
);
15983 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15984 have the values zero or one, indicates the ffreep insn's operand
15985 from the OPERANDS array. */
15987 static const char *
15988 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15990 if (TARGET_USE_FFREEP
)
15991 #ifdef HAVE_AS_IX86_FFREEP
15992 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15995 static char retval
[32];
15996 int regno
= REGNO (operands
[opno
]);
15998 gcc_assert (STACK_REGNO_P (regno
));
16000 regno
-= FIRST_STACK_REG
;
16002 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16007 return opno
? "fstp\t%y1" : "fstp\t%y0";
16011 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16012 should be used. UNORDERED_P is true when fucom should be used. */
16015 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16017 int stack_top_dies
;
16018 rtx cmp_op0
, cmp_op1
;
16019 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16023 cmp_op0
= operands
[0];
16024 cmp_op1
= operands
[1];
16028 cmp_op0
= operands
[1];
16029 cmp_op1
= operands
[2];
16034 if (GET_MODE (operands
[0]) == SFmode
)
16036 return "%vucomiss\t{%1, %0|%0, %1}";
16038 return "%vcomiss\t{%1, %0|%0, %1}";
16041 return "%vucomisd\t{%1, %0|%0, %1}";
16043 return "%vcomisd\t{%1, %0|%0, %1}";
16046 gcc_assert (STACK_TOP_P (cmp_op0
));
16048 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16050 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16052 if (stack_top_dies
)
16054 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16055 return output_387_ffreep (operands
, 1);
16058 return "ftst\n\tfnstsw\t%0";
16061 if (STACK_REG_P (cmp_op1
)
16063 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16064 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16066 /* If both the top of the 387 stack dies, and the other operand
16067 is also a stack register that dies, then this must be a
16068 `fcompp' float compare */
16072 /* There is no double popping fcomi variant. Fortunately,
16073 eflags is immune from the fstp's cc clobbering. */
16075 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16077 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16078 return output_387_ffreep (operands
, 0);
16083 return "fucompp\n\tfnstsw\t%0";
16085 return "fcompp\n\tfnstsw\t%0";
16090 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16092 static const char * const alt
[16] =
16094 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16095 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16096 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16097 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16099 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16100 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16104 "fcomi\t{%y1, %0|%0, %y1}",
16105 "fcomip\t{%y1, %0|%0, %y1}",
16106 "fucomi\t{%y1, %0|%0, %y1}",
16107 "fucomip\t{%y1, %0|%0, %y1}",
16118 mask
= eflags_p
<< 3;
16119 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16120 mask
|= unordered_p
<< 1;
16121 mask
|= stack_top_dies
;
16123 gcc_assert (mask
< 16);
16132 ix86_output_addr_vec_elt (FILE *file
, int value
)
16134 const char *directive
= ASM_LONG
;
16138 directive
= ASM_QUAD
;
16140 gcc_assert (!TARGET_64BIT
);
16143 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16147 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16149 const char *directive
= ASM_LONG
;
16152 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16153 directive
= ASM_QUAD
;
16155 gcc_assert (!TARGET_64BIT
);
16157 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16158 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16159 fprintf (file
, "%s%s%d-%s%d\n",
16160 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16161 else if (HAVE_AS_GOTOFF_IN_DATA
)
16162 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16164 else if (TARGET_MACHO
)
16166 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16167 machopic_output_function_base_name (file
);
16172 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16173 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16176 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16180 ix86_expand_clear (rtx dest
)
16184 /* We play register width games, which are only valid after reload. */
16185 gcc_assert (reload_completed
);
16187 /* Avoid HImode and its attendant prefix byte. */
16188 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16189 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16190 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16192 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16193 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16195 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16196 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16202 /* X is an unchanging MEM. If it is a constant pool reference, return
16203 the constant pool rtx, else NULL. */
16206 maybe_get_pool_constant (rtx x
)
16208 x
= ix86_delegitimize_address (XEXP (x
, 0));
16210 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16211 return get_pool_constant (x
);
16217 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16220 enum tls_model model
;
16225 if (GET_CODE (op1
) == SYMBOL_REF
)
16229 model
= SYMBOL_REF_TLS_MODEL (op1
);
16232 op1
= legitimize_tls_address (op1
, model
, true);
16233 op1
= force_operand (op1
, op0
);
16236 op1
= convert_to_mode (mode
, op1
, 1);
16238 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16241 else if (GET_CODE (op1
) == CONST
16242 && GET_CODE (XEXP (op1
, 0)) == PLUS
16243 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16245 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16246 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16249 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16251 tmp
= legitimize_tls_address (symbol
, model
, true);
16253 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16257 tmp
= force_operand (tmp
, NULL
);
16258 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16259 op0
, 1, OPTAB_DIRECT
);
16262 op1
= convert_to_mode (mode
, tmp
, 1);
16266 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16267 && symbolic_operand (op1
, mode
))
16269 if (TARGET_MACHO
&& !TARGET_64BIT
)
16272 /* dynamic-no-pic */
16273 if (MACHOPIC_INDIRECT
)
16275 rtx temp
= ((reload_in_progress
16276 || ((op0
&& REG_P (op0
))
16278 ? op0
: gen_reg_rtx (Pmode
));
16279 op1
= machopic_indirect_data_reference (op1
, temp
);
16281 op1
= machopic_legitimize_pic_address (op1
, mode
,
16282 temp
== op1
? 0 : temp
);
16284 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16286 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16290 if (GET_CODE (op0
) == MEM
)
16291 op1
= force_reg (Pmode
, op1
);
16295 if (GET_CODE (temp
) != REG
)
16296 temp
= gen_reg_rtx (Pmode
);
16297 temp
= legitimize_pic_address (op1
, temp
);
16302 /* dynamic-no-pic */
16308 op1
= force_reg (mode
, op1
);
16309 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16311 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16312 op1
= legitimize_pic_address (op1
, reg
);
16315 op1
= convert_to_mode (mode
, op1
, 1);
16322 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16323 || !push_operand (op0
, mode
))
16325 op1
= force_reg (mode
, op1
);
16327 if (push_operand (op0
, mode
)
16328 && ! general_no_elim_operand (op1
, mode
))
16329 op1
= copy_to_mode_reg (mode
, op1
);
16331 /* Force large constants in 64bit compilation into register
16332 to get them CSEed. */
16333 if (can_create_pseudo_p ()
16334 && (mode
== DImode
) && TARGET_64BIT
16335 && immediate_operand (op1
, mode
)
16336 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16337 && !register_operand (op0
, mode
)
16339 op1
= copy_to_mode_reg (mode
, op1
);
16341 if (can_create_pseudo_p ()
16342 && FLOAT_MODE_P (mode
)
16343 && GET_CODE (op1
) == CONST_DOUBLE
)
16345 /* If we are loading a floating point constant to a register,
16346 force the value to memory now, since we'll get better code
16347 out the back end. */
16349 op1
= validize_mem (force_const_mem (mode
, op1
));
16350 if (!register_operand (op0
, mode
))
16352 rtx temp
= gen_reg_rtx (mode
);
16353 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16354 emit_move_insn (op0
, temp
);
16360 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16364 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16366 rtx op0
= operands
[0], op1
= operands
[1];
16367 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16369 /* Force constants other than zero into memory. We do not know how
16370 the instructions used to build constants modify the upper 64 bits
16371 of the register, once we have that information we may be able
16372 to handle some of them more efficiently. */
16373 if (can_create_pseudo_p ()
16374 && register_operand (op0
, mode
)
16375 && (CONSTANT_P (op1
)
16376 || (GET_CODE (op1
) == SUBREG
16377 && CONSTANT_P (SUBREG_REG (op1
))))
16378 && !standard_sse_constant_p (op1
))
16379 op1
= validize_mem (force_const_mem (mode
, op1
));
16381 /* We need to check memory alignment for SSE mode since attribute
16382 can make operands unaligned. */
16383 if (can_create_pseudo_p ()
16384 && SSE_REG_MODE_P (mode
)
16385 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16386 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16390 /* ix86_expand_vector_move_misalign() does not like constants ... */
16391 if (CONSTANT_P (op1
)
16392 || (GET_CODE (op1
) == SUBREG
16393 && CONSTANT_P (SUBREG_REG (op1
))))
16394 op1
= validize_mem (force_const_mem (mode
, op1
));
16396 /* ... nor both arguments in memory. */
16397 if (!register_operand (op0
, mode
)
16398 && !register_operand (op1
, mode
))
16399 op1
= force_reg (mode
, op1
);
16401 tmp
[0] = op0
; tmp
[1] = op1
;
16402 ix86_expand_vector_move_misalign (mode
, tmp
);
16406 /* Make operand1 a register if it isn't already. */
16407 if (can_create_pseudo_p ()
16408 && !register_operand (op0
, mode
)
16409 && !register_operand (op1
, mode
))
16411 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16415 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16418 /* Split 32-byte AVX unaligned load and store if needed. */
16421 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16424 rtx (*extract
) (rtx
, rtx
, rtx
);
16425 rtx (*load_unaligned
) (rtx
, rtx
);
16426 rtx (*store_unaligned
) (rtx
, rtx
);
16427 enum machine_mode mode
;
16429 switch (GET_MODE (op0
))
16432 gcc_unreachable ();
16434 extract
= gen_avx_vextractf128v32qi
;
16435 load_unaligned
= gen_avx_loaddqu256
;
16436 store_unaligned
= gen_avx_storedqu256
;
16440 extract
= gen_avx_vextractf128v8sf
;
16441 load_unaligned
= gen_avx_loadups256
;
16442 store_unaligned
= gen_avx_storeups256
;
16446 extract
= gen_avx_vextractf128v4df
;
16447 load_unaligned
= gen_avx_loadupd256
;
16448 store_unaligned
= gen_avx_storeupd256
;
16455 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16457 rtx r
= gen_reg_rtx (mode
);
16458 m
= adjust_address (op1
, mode
, 0);
16459 emit_move_insn (r
, m
);
16460 m
= adjust_address (op1
, mode
, 16);
16461 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16462 emit_move_insn (op0
, r
);
16465 emit_insn (load_unaligned (op0
, op1
));
16467 else if (MEM_P (op0
))
16469 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16471 m
= adjust_address (op0
, mode
, 0);
16472 emit_insn (extract (m
, op1
, const0_rtx
));
16473 m
= adjust_address (op0
, mode
, 16);
16474 emit_insn (extract (m
, op1
, const1_rtx
));
16477 emit_insn (store_unaligned (op0
, op1
));
16480 gcc_unreachable ();
16483 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16484 straight to ix86_expand_vector_move. */
16485 /* Code generation for scalar reg-reg moves of single and double precision data:
16486 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16490 if (x86_sse_partial_reg_dependency == true)
16495 Code generation for scalar loads of double precision data:
16496 if (x86_sse_split_regs == true)
16497 movlpd mem, reg (gas syntax)
16501 Code generation for unaligned packed loads of single precision data
16502 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16503 if (x86_sse_unaligned_move_optimal)
16506 if (x86_sse_partial_reg_dependency == true)
16518 Code generation for unaligned packed loads of double precision data
16519 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16520 if (x86_sse_unaligned_move_optimal)
16523 if (x86_sse_split_regs == true)
16536 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16544 && GET_MODE_SIZE (mode
) == 32)
16546 switch (GET_MODE_CLASS (mode
))
16548 case MODE_VECTOR_INT
:
16550 op0
= gen_lowpart (V32QImode
, op0
);
16551 op1
= gen_lowpart (V32QImode
, op1
);
16554 case MODE_VECTOR_FLOAT
:
16555 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16559 gcc_unreachable ();
16567 /* ??? If we have typed data, then it would appear that using
16568 movdqu is the only way to get unaligned data loaded with
16570 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16572 op0
= gen_lowpart (V16QImode
, op0
);
16573 op1
= gen_lowpart (V16QImode
, op1
);
16574 /* We will eventually emit movups based on insn attributes. */
16575 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16577 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16582 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16583 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16584 || optimize_insn_for_size_p ())
16586 /* We will eventually emit movups based on insn attributes. */
16587 emit_insn (gen_sse2_loadupd (op0
, op1
));
16591 /* When SSE registers are split into halves, we can avoid
16592 writing to the top half twice. */
16593 if (TARGET_SSE_SPLIT_REGS
)
16595 emit_clobber (op0
);
16600 /* ??? Not sure about the best option for the Intel chips.
16601 The following would seem to satisfy; the register is
16602 entirely cleared, breaking the dependency chain. We
16603 then store to the upper half, with a dependency depth
16604 of one. A rumor has it that Intel recommends two movsd
16605 followed by an unpacklpd, but this is unconfirmed. And
16606 given that the dependency depth of the unpacklpd would
16607 still be one, I'm not sure why this would be better. */
16608 zero
= CONST0_RTX (V2DFmode
);
16611 m
= adjust_address (op1
, DFmode
, 0);
16612 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16613 m
= adjust_address (op1
, DFmode
, 8);
16614 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16619 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16620 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16621 || optimize_insn_for_size_p ())
16623 op0
= gen_lowpart (V4SFmode
, op0
);
16624 op1
= gen_lowpart (V4SFmode
, op1
);
16625 emit_insn (gen_sse_loadups (op0
, op1
));
16629 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16630 emit_move_insn (op0
, CONST0_RTX (mode
));
16632 emit_clobber (op0
);
16634 if (mode
!= V4SFmode
)
16635 op0
= gen_lowpart (V4SFmode
, op0
);
16637 m
= adjust_address (op1
, V2SFmode
, 0);
16638 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16639 m
= adjust_address (op1
, V2SFmode
, 8);
16640 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16643 else if (MEM_P (op0
))
16645 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16647 op0
= gen_lowpart (V16QImode
, op0
);
16648 op1
= gen_lowpart (V16QImode
, op1
);
16649 /* We will eventually emit movups based on insn attributes. */
16650 emit_insn (gen_sse2_storedqu (op0
, op1
));
16652 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16655 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16656 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16657 || optimize_insn_for_size_p ())
16658 /* We will eventually emit movups based on insn attributes. */
16659 emit_insn (gen_sse2_storeupd (op0
, op1
));
16662 m
= adjust_address (op0
, DFmode
, 0);
16663 emit_insn (gen_sse2_storelpd (m
, op1
));
16664 m
= adjust_address (op0
, DFmode
, 8);
16665 emit_insn (gen_sse2_storehpd (m
, op1
));
16670 if (mode
!= V4SFmode
)
16671 op1
= gen_lowpart (V4SFmode
, op1
);
16674 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16675 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16676 || optimize_insn_for_size_p ())
16678 op0
= gen_lowpart (V4SFmode
, op0
);
16679 emit_insn (gen_sse_storeups (op0
, op1
));
16683 m
= adjust_address (op0
, V2SFmode
, 0);
16684 emit_insn (gen_sse_storelps (m
, op1
));
16685 m
= adjust_address (op0
, V2SFmode
, 8);
16686 emit_insn (gen_sse_storehps (m
, op1
));
16691 gcc_unreachable ();
16694 /* Expand a push in MODE. This is some mode for which we do not support
16695 proper push instructions, at least from the registers that we expect
16696 the value to live in. */
16699 ix86_expand_push (enum machine_mode mode
, rtx x
)
16703 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16704 GEN_INT (-GET_MODE_SIZE (mode
)),
16705 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16706 if (tmp
!= stack_pointer_rtx
)
16707 emit_move_insn (stack_pointer_rtx
, tmp
);
16709 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16711 /* When we push an operand onto stack, it has to be aligned at least
16712 at the function argument boundary. However since we don't have
16713 the argument type, we can't determine the actual argument
16715 emit_move_insn (tmp
, x
);
16718 /* Helper function of ix86_fixup_binary_operands to canonicalize
16719 operand order. Returns true if the operands should be swapped. */
16722 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16725 rtx dst
= operands
[0];
16726 rtx src1
= operands
[1];
16727 rtx src2
= operands
[2];
16729 /* If the operation is not commutative, we can't do anything. */
16730 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16733 /* Highest priority is that src1 should match dst. */
16734 if (rtx_equal_p (dst
, src1
))
16736 if (rtx_equal_p (dst
, src2
))
16739 /* Next highest priority is that immediate constants come second. */
16740 if (immediate_operand (src2
, mode
))
16742 if (immediate_operand (src1
, mode
))
16745 /* Lowest priority is that memory references should come second. */
16755 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16756 destination to use for the operation. If different from the true
16757 destination in operands[0], a copy operation will be required. */
16760 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16763 rtx dst
= operands
[0];
16764 rtx src1
= operands
[1];
16765 rtx src2
= operands
[2];
16767 /* Canonicalize operand order. */
16768 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16772 /* It is invalid to swap operands of different modes. */
16773 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16780 /* Both source operands cannot be in memory. */
16781 if (MEM_P (src1
) && MEM_P (src2
))
16783 /* Optimization: Only read from memory once. */
16784 if (rtx_equal_p (src1
, src2
))
16786 src2
= force_reg (mode
, src2
);
16790 src2
= force_reg (mode
, src2
);
16793 /* If the destination is memory, and we do not have matching source
16794 operands, do things in registers. */
16795 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16796 dst
= gen_reg_rtx (mode
);
16798 /* Source 1 cannot be a constant. */
16799 if (CONSTANT_P (src1
))
16800 src1
= force_reg (mode
, src1
);
16802 /* Source 1 cannot be a non-matching memory. */
16803 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16804 src1
= force_reg (mode
, src1
);
16806 /* Improve address combine. */
16808 && GET_MODE_CLASS (mode
) == MODE_INT
16810 src2
= force_reg (mode
, src2
);
16812 operands
[1] = src1
;
16813 operands
[2] = src2
;
16817 /* Similarly, but assume that the destination has already been
16818 set up properly. */
16821 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16822 enum machine_mode mode
, rtx operands
[])
16824 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16825 gcc_assert (dst
== operands
[0]);
16828 /* Attempt to expand a binary operator. Make the expansion closer to the
16829 actual machine, then just general_operand, which will allow 3 separate
16830 memory references (one output, two input) in a single insn. */
16833 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16836 rtx src1
, src2
, dst
, op
, clob
;
16838 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16839 src1
= operands
[1];
16840 src2
= operands
[2];
16842 /* Emit the instruction. */
16844 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16845 if (reload_in_progress
)
16847 /* Reload doesn't know about the flags register, and doesn't know that
16848 it doesn't want to clobber it. We can only do this with PLUS. */
16849 gcc_assert (code
== PLUS
);
16852 else if (reload_completed
16854 && !rtx_equal_p (dst
, src1
))
16856 /* This is going to be an LEA; avoid splitting it later. */
16861 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16862 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16865 /* Fix up the destination if needed. */
16866 if (dst
!= operands
[0])
16867 emit_move_insn (operands
[0], dst
);
16870 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16871 the given OPERANDS. */
16874 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16877 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16878 if (GET_CODE (operands
[1]) == SUBREG
)
16883 else if (GET_CODE (operands
[2]) == SUBREG
)
16888 /* Optimize (__m128i) d | (__m128i) e and similar code
16889 when d and e are float vectors into float vector logical
16890 insn. In C/C++ without using intrinsics there is no other way
16891 to express vector logical operation on float vectors than
16892 to cast them temporarily to integer vectors. */
16894 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16895 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16896 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16897 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16898 && SUBREG_BYTE (op1
) == 0
16899 && (GET_CODE (op2
) == CONST_VECTOR
16900 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16901 && SUBREG_BYTE (op2
) == 0))
16902 && can_create_pseudo_p ())
16905 switch (GET_MODE (SUBREG_REG (op1
)))
16911 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16912 if (GET_CODE (op2
) == CONST_VECTOR
)
16914 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16915 op2
= force_reg (GET_MODE (dst
), op2
);
16920 op2
= SUBREG_REG (operands
[2]);
16921 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16922 op2
= force_reg (GET_MODE (dst
), op2
);
16924 op1
= SUBREG_REG (op1
);
16925 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16926 op1
= force_reg (GET_MODE (dst
), op1
);
16927 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16928 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16930 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16936 if (!nonimmediate_operand (operands
[1], mode
))
16937 operands
[1] = force_reg (mode
, operands
[1]);
16938 if (!nonimmediate_operand (operands
[2], mode
))
16939 operands
[2] = force_reg (mode
, operands
[2]);
16940 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16941 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16942 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16946 /* Return TRUE or FALSE depending on whether the binary operator meets the
16947 appropriate constraints. */
16950 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16953 rtx dst
= operands
[0];
16954 rtx src1
= operands
[1];
16955 rtx src2
= operands
[2];
16957 /* Both source operands cannot be in memory. */
16958 if (MEM_P (src1
) && MEM_P (src2
))
16961 /* Canonicalize operand order for commutative operators. */
16962 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16969 /* If the destination is memory, we must have a matching source operand. */
16970 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16973 /* Source 1 cannot be a constant. */
16974 if (CONSTANT_P (src1
))
16977 /* Source 1 cannot be a non-matching memory. */
16978 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16979 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16980 return (code
== AND
16983 || (TARGET_64BIT
&& mode
== DImode
))
16984 && satisfies_constraint_L (src2
));
16989 /* Attempt to expand a unary operator. Make the expansion closer to the
16990 actual machine, then just general_operand, which will allow 2 separate
16991 memory references (one output, one input) in a single insn. */
16994 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16997 int matching_memory
;
16998 rtx src
, dst
, op
, clob
;
17003 /* If the destination is memory, and we do not have matching source
17004 operands, do things in registers. */
17005 matching_memory
= 0;
17008 if (rtx_equal_p (dst
, src
))
17009 matching_memory
= 1;
17011 dst
= gen_reg_rtx (mode
);
17014 /* When source operand is memory, destination must match. */
17015 if (MEM_P (src
) && !matching_memory
)
17016 src
= force_reg (mode
, src
);
17018 /* Emit the instruction. */
17020 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17021 if (reload_in_progress
|| code
== NOT
)
17023 /* Reload doesn't know about the flags register, and doesn't know that
17024 it doesn't want to clobber it. */
17025 gcc_assert (code
== NOT
);
17030 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17031 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17034 /* Fix up the destination if needed. */
17035 if (dst
!= operands
[0])
17036 emit_move_insn (operands
[0], dst
);
17039 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17040 divisor are within the range [0-255]. */
17043 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17046 rtx end_label
, qimode_label
;
17047 rtx insn
, div
, mod
;
17048 rtx scratch
, tmp0
, tmp1
, tmp2
;
17049 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17050 rtx (*gen_zero_extend
) (rtx
, rtx
);
17051 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17056 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17057 gen_test_ccno_1
= gen_testsi_ccno_1
;
17058 gen_zero_extend
= gen_zero_extendqisi2
;
17061 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17062 gen_test_ccno_1
= gen_testdi_ccno_1
;
17063 gen_zero_extend
= gen_zero_extendqidi2
;
17066 gcc_unreachable ();
17069 end_label
= gen_label_rtx ();
17070 qimode_label
= gen_label_rtx ();
17072 scratch
= gen_reg_rtx (mode
);
17074 /* Use 8bit unsigned divimod if dividend and divisor are within
17075 the range [0-255]. */
17076 emit_move_insn (scratch
, operands
[2]);
17077 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17078 scratch
, 1, OPTAB_DIRECT
);
17079 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17080 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17081 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17082 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17083 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17085 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17086 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17087 JUMP_LABEL (insn
) = qimode_label
;
17089 /* Generate original signed/unsigned divimod. */
17090 div
= gen_divmod4_1 (operands
[0], operands
[1],
17091 operands
[2], operands
[3]);
17094 /* Branch to the end. */
17095 emit_jump_insn (gen_jump (end_label
));
17098 /* Generate 8bit unsigned divide. */
17099 emit_label (qimode_label
);
17100 /* Don't use operands[0] for result of 8bit divide since not all
17101 registers support QImode ZERO_EXTRACT. */
17102 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17103 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17104 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17105 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17109 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17110 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17114 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17115 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17118 /* Extract remainder from AH. */
17119 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17120 if (REG_P (operands
[1]))
17121 insn
= emit_move_insn (operands
[1], tmp1
);
17124 /* Need a new scratch register since the old one has result
17126 scratch
= gen_reg_rtx (mode
);
17127 emit_move_insn (scratch
, tmp1
);
17128 insn
= emit_move_insn (operands
[1], scratch
);
17130 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17132 /* Zero extend quotient from AL. */
17133 tmp1
= gen_lowpart (QImode
, tmp0
);
17134 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17135 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17137 emit_label (end_label
);
17140 #define LEA_MAX_STALL (3)
17141 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17143 /* Increase given DISTANCE in half-cycles according to
17144 dependencies between PREV and NEXT instructions.
17145 Add 1 half-cycle if there is no dependency and
17146 go to next cycle if there is some dependecy. */
17148 static unsigned int
17149 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17154 if (!prev
|| !next
)
17155 return distance
+ (distance
& 1) + 2;
17157 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17158 return distance
+ 1;
17160 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17161 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17162 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17163 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17164 return distance
+ (distance
& 1) + 2;
17166 return distance
+ 1;
17169 /* Function checks if instruction INSN defines register number
17170 REGNO1 or REGNO2. */
17173 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17178 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17179 if (DF_REF_REG_DEF_P (*def_rec
)
17180 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17181 && (regno1
== DF_REF_REGNO (*def_rec
)
17182 || regno2
== DF_REF_REGNO (*def_rec
)))
17190 /* Function checks if instruction INSN uses register number
17191 REGNO as a part of address expression. */
17194 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17198 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17199 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17205 /* Search backward for non-agu definition of register number REGNO1
17206 or register number REGNO2 in basic block starting from instruction
17207 START up to head of basic block or instruction INSN.
17209 Function puts true value into *FOUND var if definition was found
17210 and false otherwise.
17212 Distance in half-cycles between START and found instruction or head
17213 of BB is added to DISTANCE and returned. */
17216 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17217 rtx insn
, int distance
,
17218 rtx start
, bool *found
)
17220 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17228 && distance
< LEA_SEARCH_THRESHOLD
)
17230 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17232 distance
= increase_distance (prev
, next
, distance
);
17233 if (insn_defines_reg (regno1
, regno2
, prev
))
17235 if (recog_memoized (prev
) < 0
17236 || get_attr_type (prev
) != TYPE_LEA
)
17245 if (prev
== BB_HEAD (bb
))
17248 prev
= PREV_INSN (prev
);
17254 /* Search backward for non-agu definition of register number REGNO1
17255 or register number REGNO2 in INSN's basic block until
17256 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17257 2. Reach neighbour BBs boundary, or
17258 3. Reach agu definition.
17259 Returns the distance between the non-agu definition point and INSN.
17260 If no definition point, returns -1. */
17263 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17266 basic_block bb
= BLOCK_FOR_INSN (insn
);
17268 bool found
= false;
17270 if (insn
!= BB_HEAD (bb
))
17271 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17272 distance
, PREV_INSN (insn
),
17275 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17279 bool simple_loop
= false;
17281 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17284 simple_loop
= true;
17289 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17291 BB_END (bb
), &found
);
17294 int shortest_dist
= -1;
17295 bool found_in_bb
= false;
17297 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17300 = distance_non_agu_define_in_bb (regno1
, regno2
,
17306 if (shortest_dist
< 0)
17307 shortest_dist
= bb_dist
;
17308 else if (bb_dist
> 0)
17309 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17315 distance
= shortest_dist
;
17319 /* get_attr_type may modify recog data. We want to make sure
17320 that recog data is valid for instruction INSN, on which
17321 distance_non_agu_define is called. INSN is unchanged here. */
17322 extract_insn_cached (insn
);
17327 return distance
>> 1;
17330 /* Return the distance in half-cycles between INSN and the next
17331 insn that uses register number REGNO in memory address added
17332 to DISTANCE. Return -1 if REGNO0 is set.
17334 Put true value into *FOUND if register usage was found and
17336 Put true value into *REDEFINED if register redefinition was
17337 found and false otherwise. */
17340 distance_agu_use_in_bb (unsigned int regno
,
17341 rtx insn
, int distance
, rtx start
,
17342 bool *found
, bool *redefined
)
17344 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17349 *redefined
= false;
17353 && distance
< LEA_SEARCH_THRESHOLD
)
17355 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17357 distance
= increase_distance(prev
, next
, distance
);
17358 if (insn_uses_reg_mem (regno
, next
))
17360 /* Return DISTANCE if OP0 is used in memory
17361 address in NEXT. */
17366 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17368 /* Return -1 if OP0 is set in NEXT. */
17376 if (next
== BB_END (bb
))
17379 next
= NEXT_INSN (next
);
17385 /* Return the distance between INSN and the next insn that uses
17386 register number REGNO0 in memory address. Return -1 if no such
17387 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17390 distance_agu_use (unsigned int regno0
, rtx insn
)
17392 basic_block bb
= BLOCK_FOR_INSN (insn
);
17394 bool found
= false;
17395 bool redefined
= false;
17397 if (insn
!= BB_END (bb
))
17398 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17400 &found
, &redefined
);
17402 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17406 bool simple_loop
= false;
17408 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17411 simple_loop
= true;
17416 distance
= distance_agu_use_in_bb (regno0
, insn
,
17417 distance
, BB_HEAD (bb
),
17418 &found
, &redefined
);
17421 int shortest_dist
= -1;
17422 bool found_in_bb
= false;
17423 bool redefined_in_bb
= false;
17425 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17428 = distance_agu_use_in_bb (regno0
, insn
,
17429 distance
, BB_HEAD (e
->dest
),
17430 &found_in_bb
, &redefined_in_bb
);
17433 if (shortest_dist
< 0)
17434 shortest_dist
= bb_dist
;
17435 else if (bb_dist
> 0)
17436 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17442 distance
= shortest_dist
;
17446 if (!found
|| redefined
)
17449 return distance
>> 1;
17452 /* Define this macro to tune LEA priority vs ADD, it take effect when
17453 there is a dilemma of choicing LEA or ADD
17454 Negative value: ADD is more preferred than LEA
17456 Positive value: LEA is more preferred than ADD*/
17457 #define IX86_LEA_PRIORITY 0
17459 /* Return true if usage of lea INSN has performance advantage
17460 over a sequence of instructions. Instructions sequence has
17461 SPLIT_COST cycles higher latency than lea latency. */
17464 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17465 unsigned int regno2
, int split_cost
, bool has_scale
)
17467 int dist_define
, dist_use
;
17469 /* For Silvermont if using a 2-source or 3-source LEA for
17470 non-destructive destination purposes, or due to wanting
17471 ability to use SCALE, the use of LEA is justified. */
17472 if (ix86_tune
== PROCESSOR_SLM
)
17476 if (split_cost
< 1)
17478 if (regno0
== regno1
|| regno0
== regno2
)
17483 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17484 dist_use
= distance_agu_use (regno0
, insn
);
17486 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17488 /* If there is no non AGU operand definition, no AGU
17489 operand usage and split cost is 0 then both lea
17490 and non lea variants have same priority. Currently
17491 we prefer lea for 64 bit code and non lea on 32 bit
17493 if (dist_use
< 0 && split_cost
== 0)
17494 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17499 /* With longer definitions distance lea is more preferable.
17500 Here we change it to take into account splitting cost and
17502 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17504 /* If there is no use in memory addess then we just check
17505 that split cost exceeds AGU stall. */
17507 return dist_define
> LEA_MAX_STALL
;
17509 /* If this insn has both backward non-agu dependence and forward
17510 agu dependence, the one with short distance takes effect. */
17511 return dist_define
>= dist_use
;
17514 /* Return true if it is legal to clobber flags by INSN and
17515 false otherwise. */
17518 ix86_ok_to_clobber_flags (rtx insn
)
17520 basic_block bb
= BLOCK_FOR_INSN (insn
);
17526 if (NONDEBUG_INSN_P (insn
))
17528 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17529 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17532 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17536 if (insn
== BB_END (bb
))
17539 insn
= NEXT_INSN (insn
);
17542 live
= df_get_live_out(bb
);
17543 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17546 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17547 move and add to avoid AGU stalls. */
17550 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17552 unsigned int regno0
, regno1
, regno2
;
17554 /* Check if we need to optimize. */
17555 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17558 /* Check it is correct to split here. */
17559 if (!ix86_ok_to_clobber_flags(insn
))
17562 regno0
= true_regnum (operands
[0]);
17563 regno1
= true_regnum (operands
[1]);
17564 regno2
= true_regnum (operands
[2]);
17566 /* We need to split only adds with non destructive
17567 destination operand. */
17568 if (regno0
== regno1
|| regno0
== regno2
)
17571 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17574 /* Return true if we should emit lea instruction instead of mov
17578 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17580 unsigned int regno0
, regno1
;
17582 /* Check if we need to optimize. */
17583 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17586 /* Use lea for reg to reg moves only. */
17587 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17590 regno0
= true_regnum (operands
[0]);
17591 regno1
= true_regnum (operands
[1]);
17593 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17596 /* Return true if we need to split lea into a sequence of
17597 instructions to avoid AGU stalls. */
17600 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17602 unsigned int regno0
, regno1
, regno2
;
17604 struct ix86_address parts
;
17607 /* Check we need to optimize. */
17608 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17611 /* Check it is correct to split here. */
17612 if (!ix86_ok_to_clobber_flags(insn
))
17615 ok
= ix86_decompose_address (operands
[1], &parts
);
17618 /* There should be at least two components in the address. */
17619 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17620 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17623 /* We should not split into add if non legitimate pic
17624 operand is used as displacement. */
17625 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17628 regno0
= true_regnum (operands
[0]) ;
17629 regno1
= INVALID_REGNUM
;
17630 regno2
= INVALID_REGNUM
;
17633 regno1
= true_regnum (parts
.base
);
17635 regno2
= true_regnum (parts
.index
);
17639 /* Compute how many cycles we will add to execution time
17640 if split lea into a sequence of instructions. */
17641 if (parts
.base
|| parts
.index
)
17643 /* Have to use mov instruction if non desctructive
17644 destination form is used. */
17645 if (regno1
!= regno0
&& regno2
!= regno0
)
17648 /* Have to add index to base if both exist. */
17649 if (parts
.base
&& parts
.index
)
17652 /* Have to use shift and adds if scale is 2 or greater. */
17653 if (parts
.scale
> 1)
17655 if (regno0
!= regno1
)
17657 else if (regno2
== regno0
)
17660 split_cost
+= parts
.scale
;
17663 /* Have to use add instruction with immediate if
17664 disp is non zero. */
17665 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17668 /* Subtract the price of lea. */
17672 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
17676 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17677 matches destination. RTX includes clobber of FLAGS_REG. */
17680 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17685 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17686 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17688 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17691 /* Return true if regno1 def is nearest to the insn. */
17694 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17697 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17701 while (prev
&& prev
!= start
)
17703 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17705 prev
= PREV_INSN (prev
);
17708 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17710 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17712 prev
= PREV_INSN (prev
);
17715 /* None of the regs is defined in the bb. */
17719 /* Split lea instructions into a sequence of instructions
17720 which are executed on ALU to avoid AGU stalls.
17721 It is assumed that it is allowed to clobber flags register
17722 at lea position. */
17725 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17727 unsigned int regno0
, regno1
, regno2
;
17728 struct ix86_address parts
;
17732 ok
= ix86_decompose_address (operands
[1], &parts
);
17735 target
= gen_lowpart (mode
, operands
[0]);
17737 regno0
= true_regnum (target
);
17738 regno1
= INVALID_REGNUM
;
17739 regno2
= INVALID_REGNUM
;
17743 parts
.base
= gen_lowpart (mode
, parts
.base
);
17744 regno1
= true_regnum (parts
.base
);
17749 parts
.index
= gen_lowpart (mode
, parts
.index
);
17750 regno2
= true_regnum (parts
.index
);
17754 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17756 if (parts
.scale
> 1)
17758 /* Case r1 = r1 + ... */
17759 if (regno1
== regno0
)
17761 /* If we have a case r1 = r1 + C * r1 then we
17762 should use multiplication which is very
17763 expensive. Assume cost model is wrong if we
17764 have such case here. */
17765 gcc_assert (regno2
!= regno0
);
17767 for (adds
= parts
.scale
; adds
> 0; adds
--)
17768 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17772 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17773 if (regno0
!= regno2
)
17774 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17776 /* Use shift for scaling. */
17777 ix86_emit_binop (ASHIFT
, mode
, target
,
17778 GEN_INT (exact_log2 (parts
.scale
)));
17781 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17783 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17784 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17787 else if (!parts
.base
&& !parts
.index
)
17789 gcc_assert(parts
.disp
);
17790 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17796 if (regno0
!= regno2
)
17797 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17799 else if (!parts
.index
)
17801 if (regno0
!= regno1
)
17802 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17806 if (regno0
== regno1
)
17808 else if (regno0
== regno2
)
17814 /* Find better operand for SET instruction, depending
17815 on which definition is farther from the insn. */
17816 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17817 tmp
= parts
.index
, tmp1
= parts
.base
;
17819 tmp
= parts
.base
, tmp1
= parts
.index
;
17821 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17823 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17824 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17826 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17830 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17833 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17834 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17838 /* Return true if it is ok to optimize an ADD operation to LEA
17839 operation to avoid flag register consumation. For most processors,
17840 ADD is faster than LEA. For the processors like ATOM, if the
17841 destination register of LEA holds an actual address which will be
17842 used soon, LEA is better and otherwise ADD is better. */
17845 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17847 unsigned int regno0
= true_regnum (operands
[0]);
17848 unsigned int regno1
= true_regnum (operands
[1]);
17849 unsigned int regno2
= true_regnum (operands
[2]);
17851 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17852 if (regno0
!= regno1
&& regno0
!= regno2
)
17855 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17858 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
17861 /* Return true if destination reg of SET_BODY is shift count of
17865 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17871 /* Retrieve destination of SET_BODY. */
17872 switch (GET_CODE (set_body
))
17875 set_dest
= SET_DEST (set_body
);
17876 if (!set_dest
|| !REG_P (set_dest
))
17880 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17881 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17889 /* Retrieve shift count of USE_BODY. */
17890 switch (GET_CODE (use_body
))
17893 shift_rtx
= XEXP (use_body
, 1);
17896 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17897 if (ix86_dep_by_shift_count_body (set_body
,
17898 XVECEXP (use_body
, 0, i
)))
17906 && (GET_CODE (shift_rtx
) == ASHIFT
17907 || GET_CODE (shift_rtx
) == LSHIFTRT
17908 || GET_CODE (shift_rtx
) == ASHIFTRT
17909 || GET_CODE (shift_rtx
) == ROTATE
17910 || GET_CODE (shift_rtx
) == ROTATERT
))
17912 rtx shift_count
= XEXP (shift_rtx
, 1);
17914 /* Return true if shift count is dest of SET_BODY. */
17915 if (REG_P (shift_count
))
17917 /* Add check since it can be invoked before register
17918 allocation in pre-reload schedule. */
17919 if (reload_completed
17920 && true_regnum (set_dest
) == true_regnum (shift_count
))
17922 else if (REGNO(set_dest
) == REGNO(shift_count
))
17930 /* Return true if destination reg of SET_INSN is shift count of
17934 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17936 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17937 PATTERN (use_insn
));
17940 /* Return TRUE or FALSE depending on whether the unary operator meets the
17941 appropriate constraints. */
17944 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17945 enum machine_mode mode ATTRIBUTE_UNUSED
,
17946 rtx operands
[2] ATTRIBUTE_UNUSED
)
17948 /* If one of operands is memory, source and destination must match. */
17949 if ((MEM_P (operands
[0])
17950 || MEM_P (operands
[1]))
17951 && ! rtx_equal_p (operands
[0], operands
[1]))
17956 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17957 are ok, keeping in mind the possible movddup alternative. */
17960 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17962 if (MEM_P (operands
[0]))
17963 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17964 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17965 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17969 /* Post-reload splitter for converting an SF or DFmode value in an
17970 SSE register into an unsigned SImode. */
17973 ix86_split_convert_uns_si_sse (rtx operands
[])
17975 enum machine_mode vecmode
;
17976 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17978 large
= operands
[1];
17979 zero_or_two31
= operands
[2];
17980 input
= operands
[3];
17981 two31
= operands
[4];
17982 vecmode
= GET_MODE (large
);
17983 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17985 /* Load up the value into the low element. We must ensure that the other
17986 elements are valid floats -- zero is the easiest such value. */
17989 if (vecmode
== V4SFmode
)
17990 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17992 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17996 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17997 emit_move_insn (value
, CONST0_RTX (vecmode
));
17998 if (vecmode
== V4SFmode
)
17999 emit_insn (gen_sse_movss (value
, value
, input
));
18001 emit_insn (gen_sse2_movsd (value
, value
, input
));
18004 emit_move_insn (large
, two31
);
18005 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18007 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18008 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18010 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18011 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18013 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18014 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18016 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18017 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18019 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18020 if (vecmode
== V4SFmode
)
18021 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18023 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18026 emit_insn (gen_xorv4si3 (value
, value
, large
));
18029 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18030 Expects the 64-bit DImode to be supplied in a pair of integral
18031 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18032 -mfpmath=sse, !optimize_size only. */
18035 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18037 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18038 rtx int_xmm
, fp_xmm
;
18039 rtx biases
, exponents
;
18042 int_xmm
= gen_reg_rtx (V4SImode
);
18043 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18044 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18045 else if (TARGET_SSE_SPLIT_REGS
)
18047 emit_clobber (int_xmm
);
18048 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18052 x
= gen_reg_rtx (V2DImode
);
18053 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18054 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18057 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18058 gen_rtvec (4, GEN_INT (0x43300000UL
),
18059 GEN_INT (0x45300000UL
),
18060 const0_rtx
, const0_rtx
));
18061 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18063 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18064 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18066 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18067 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18068 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18069 (0x1.0p84 + double(fp_value_hi_xmm)).
18070 Note these exponents differ by 32. */
18072 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18074 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18075 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18076 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18077 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18078 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18079 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18080 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18081 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18082 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18084 /* Add the upper and lower DFmode values together. */
18086 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18089 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18090 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18091 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18094 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18097 /* Not used, but eases macroization of patterns. */
18099 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18100 rtx input ATTRIBUTE_UNUSED
)
18102 gcc_unreachable ();
18105 /* Convert an unsigned SImode value into a DFmode. Only currently used
18106 for SSE, but applicable anywhere. */
18109 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18111 REAL_VALUE_TYPE TWO31r
;
18114 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18115 NULL
, 1, OPTAB_DIRECT
);
18117 fp
= gen_reg_rtx (DFmode
);
18118 emit_insn (gen_floatsidf2 (fp
, x
));
18120 real_ldexp (&TWO31r
, &dconst1
, 31);
18121 x
= const_double_from_real_value (TWO31r
, DFmode
);
18123 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18125 emit_move_insn (target
, x
);
18128 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18129 32-bit mode; otherwise we have a direct convert instruction. */
18132 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18134 REAL_VALUE_TYPE TWO32r
;
18135 rtx fp_lo
, fp_hi
, x
;
18137 fp_lo
= gen_reg_rtx (DFmode
);
18138 fp_hi
= gen_reg_rtx (DFmode
);
18140 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18142 real_ldexp (&TWO32r
, &dconst1
, 32);
18143 x
= const_double_from_real_value (TWO32r
, DFmode
);
18144 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18146 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18148 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18151 emit_move_insn (target
, x
);
18154 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18155 For x86_32, -mfpmath=sse, !optimize_size only. */
18157 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18159 REAL_VALUE_TYPE ONE16r
;
18160 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18162 real_ldexp (&ONE16r
, &dconst1
, 16);
18163 x
= const_double_from_real_value (ONE16r
, SFmode
);
18164 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18165 NULL
, 0, OPTAB_DIRECT
);
18166 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18167 NULL
, 0, OPTAB_DIRECT
);
18168 fp_hi
= gen_reg_rtx (SFmode
);
18169 fp_lo
= gen_reg_rtx (SFmode
);
18170 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18171 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18172 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18174 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18176 if (!rtx_equal_p (target
, fp_hi
))
18177 emit_move_insn (target
, fp_hi
);
18180 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18181 a vector of unsigned ints VAL to vector of floats TARGET. */
18184 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18187 REAL_VALUE_TYPE TWO16r
;
18188 enum machine_mode intmode
= GET_MODE (val
);
18189 enum machine_mode fltmode
= GET_MODE (target
);
18190 rtx (*cvt
) (rtx
, rtx
);
18192 if (intmode
== V4SImode
)
18193 cvt
= gen_floatv4siv4sf2
;
18195 cvt
= gen_floatv8siv8sf2
;
18196 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18197 tmp
[0] = force_reg (intmode
, tmp
[0]);
18198 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18200 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18201 NULL_RTX
, 1, OPTAB_DIRECT
);
18202 tmp
[3] = gen_reg_rtx (fltmode
);
18203 emit_insn (cvt (tmp
[3], tmp
[1]));
18204 tmp
[4] = gen_reg_rtx (fltmode
);
18205 emit_insn (cvt (tmp
[4], tmp
[2]));
18206 real_ldexp (&TWO16r
, &dconst1
, 16);
18207 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18208 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18209 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18211 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18213 if (tmp
[7] != target
)
18214 emit_move_insn (target
, tmp
[7]);
18217 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18218 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18219 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18220 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18223 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18225 REAL_VALUE_TYPE TWO31r
;
18226 rtx two31r
, tmp
[4];
18227 enum machine_mode mode
= GET_MODE (val
);
18228 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18229 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18230 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18233 for (i
= 0; i
< 3; i
++)
18234 tmp
[i
] = gen_reg_rtx (mode
);
18235 real_ldexp (&TWO31r
, &dconst1
, 31);
18236 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18237 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18238 two31r
= force_reg (mode
, two31r
);
18241 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18242 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18243 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18244 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18245 default: gcc_unreachable ();
18247 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18248 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18249 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18251 if (intmode
== V4SImode
|| TARGET_AVX2
)
18252 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18253 gen_lowpart (intmode
, tmp
[0]),
18254 GEN_INT (31), NULL_RTX
, 0,
18258 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18259 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18260 *xorp
= expand_simple_binop (intmode
, AND
,
18261 gen_lowpart (intmode
, tmp
[0]),
18262 two31
, NULL_RTX
, 0,
18265 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18269 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18270 then replicate the value for all elements of the vector
18274 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18278 enum machine_mode scalar_mode
;
18295 n_elt
= GET_MODE_NUNITS (mode
);
18296 v
= rtvec_alloc (n_elt
);
18297 scalar_mode
= GET_MODE_INNER (mode
);
18299 RTVEC_ELT (v
, 0) = value
;
18301 for (i
= 1; i
< n_elt
; ++i
)
18302 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18304 return gen_rtx_CONST_VECTOR (mode
, v
);
18307 gcc_unreachable ();
18311 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18312 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18313 for an SSE register. If VECT is true, then replicate the mask for
18314 all elements of the vector register. If INVERT is true, then create
18315 a mask excluding the sign bit. */
18318 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18320 enum machine_mode vec_mode
, imode
;
18321 HOST_WIDE_INT hi
, lo
;
18326 /* Find the sign bit, sign extended to 2*HWI. */
18334 mode
= GET_MODE_INNER (mode
);
18336 lo
= 0x80000000, hi
= lo
< 0;
18344 mode
= GET_MODE_INNER (mode
);
18346 if (HOST_BITS_PER_WIDE_INT
>= 64)
18347 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18349 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18354 vec_mode
= VOIDmode
;
18355 if (HOST_BITS_PER_WIDE_INT
>= 64)
18358 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18365 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18369 lo
= ~lo
, hi
= ~hi
;
18375 mask
= immed_double_const (lo
, hi
, imode
);
18377 vec
= gen_rtvec (2, v
, mask
);
18378 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18379 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18386 gcc_unreachable ();
18390 lo
= ~lo
, hi
= ~hi
;
18392 /* Force this value into the low part of a fp vector constant. */
18393 mask
= immed_double_const (lo
, hi
, imode
);
18394 mask
= gen_lowpart (mode
, mask
);
18396 if (vec_mode
== VOIDmode
)
18397 return force_reg (mode
, mask
);
18399 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18400 return force_reg (vec_mode
, v
);
18403 /* Generate code for floating point ABS or NEG. */
18406 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18409 rtx mask
, set
, dst
, src
;
18410 bool use_sse
= false;
18411 bool vector_mode
= VECTOR_MODE_P (mode
);
18412 enum machine_mode vmode
= mode
;
18416 else if (mode
== TFmode
)
18418 else if (TARGET_SSE_MATH
)
18420 use_sse
= SSE_FLOAT_MODE_P (mode
);
18421 if (mode
== SFmode
)
18423 else if (mode
== DFmode
)
18427 /* NEG and ABS performed with SSE use bitwise mask operations.
18428 Create the appropriate mask now. */
18430 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18437 set
= gen_rtx_fmt_e (code
, mode
, src
);
18438 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18445 use
= gen_rtx_USE (VOIDmode
, mask
);
18447 par
= gen_rtvec (2, set
, use
);
18450 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18451 par
= gen_rtvec (3, set
, use
, clob
);
18453 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18459 /* Expand a copysign operation. Special case operand 0 being a constant. */
18462 ix86_expand_copysign (rtx operands
[])
18464 enum machine_mode mode
, vmode
;
18465 rtx dest
, op0
, op1
, mask
, nmask
;
18467 dest
= operands
[0];
18471 mode
= GET_MODE (dest
);
18473 if (mode
== SFmode
)
18475 else if (mode
== DFmode
)
18480 if (GET_CODE (op0
) == CONST_DOUBLE
)
18482 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18484 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18485 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18487 if (mode
== SFmode
|| mode
== DFmode
)
18489 if (op0
== CONST0_RTX (mode
))
18490 op0
= CONST0_RTX (vmode
);
18493 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18495 op0
= force_reg (vmode
, v
);
18498 else if (op0
!= CONST0_RTX (mode
))
18499 op0
= force_reg (mode
, op0
);
18501 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18503 if (mode
== SFmode
)
18504 copysign_insn
= gen_copysignsf3_const
;
18505 else if (mode
== DFmode
)
18506 copysign_insn
= gen_copysigndf3_const
;
18508 copysign_insn
= gen_copysigntf3_const
;
18510 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18514 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18516 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18517 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18519 if (mode
== SFmode
)
18520 copysign_insn
= gen_copysignsf3_var
;
18521 else if (mode
== DFmode
)
18522 copysign_insn
= gen_copysigndf3_var
;
18524 copysign_insn
= gen_copysigntf3_var
;
18526 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18530 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18531 be a constant, and so has already been expanded into a vector constant. */
18534 ix86_split_copysign_const (rtx operands
[])
18536 enum machine_mode mode
, vmode
;
18537 rtx dest
, op0
, mask
, x
;
18539 dest
= operands
[0];
18541 mask
= operands
[3];
18543 mode
= GET_MODE (dest
);
18544 vmode
= GET_MODE (mask
);
18546 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18547 x
= gen_rtx_AND (vmode
, dest
, mask
);
18548 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18550 if (op0
!= CONST0_RTX (vmode
))
18552 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18553 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18557 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18558 so we have to do two masks. */
18561 ix86_split_copysign_var (rtx operands
[])
18563 enum machine_mode mode
, vmode
;
18564 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18566 dest
= operands
[0];
18567 scratch
= operands
[1];
18570 nmask
= operands
[4];
18571 mask
= operands
[5];
18573 mode
= GET_MODE (dest
);
18574 vmode
= GET_MODE (mask
);
18576 if (rtx_equal_p (op0
, op1
))
18578 /* Shouldn't happen often (it's useless, obviously), but when it does
18579 we'd generate incorrect code if we continue below. */
18580 emit_move_insn (dest
, op0
);
18584 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18586 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18588 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18589 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18592 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18593 x
= gen_rtx_NOT (vmode
, dest
);
18594 x
= gen_rtx_AND (vmode
, x
, op0
);
18595 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18599 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18601 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18603 else /* alternative 2,4 */
18605 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18606 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18607 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18609 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18611 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18613 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18614 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18616 else /* alternative 3,4 */
18618 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18620 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18621 x
= gen_rtx_AND (vmode
, dest
, op0
);
18623 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18626 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18627 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18630 /* Return TRUE or FALSE depending on whether the first SET in INSN
18631 has source and destination with matching CC modes, and that the
18632 CC mode is at least as constrained as REQ_MODE. */
18635 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18638 enum machine_mode set_mode
;
18640 set
= PATTERN (insn
);
18641 if (GET_CODE (set
) == PARALLEL
)
18642 set
= XVECEXP (set
, 0, 0);
18643 gcc_assert (GET_CODE (set
) == SET
);
18644 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18646 set_mode
= GET_MODE (SET_DEST (set
));
18650 if (req_mode
!= CCNOmode
18651 && (req_mode
!= CCmode
18652 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18656 if (req_mode
== CCGCmode
)
18660 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18664 if (req_mode
== CCZmode
)
18674 if (set_mode
!= req_mode
)
18679 gcc_unreachable ();
18682 return GET_MODE (SET_SRC (set
)) == set_mode
;
18685 /* Generate insn patterns to do an integer compare of OPERANDS. */
18688 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18690 enum machine_mode cmpmode
;
18693 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18694 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18696 /* This is very simple, but making the interface the same as in the
18697 FP case makes the rest of the code easier. */
18698 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18699 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18701 /* Return the test that should be put into the flags user, i.e.
18702 the bcc, scc, or cmov instruction. */
18703 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18706 /* Figure out whether to use ordered or unordered fp comparisons.
18707 Return the appropriate mode to use. */
18710 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18712 /* ??? In order to make all comparisons reversible, we do all comparisons
18713 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18714 all forms trapping and nontrapping comparisons, we can make inequality
18715 comparisons trapping again, since it results in better code when using
18716 FCOM based compares. */
18717 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18721 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18723 enum machine_mode mode
= GET_MODE (op0
);
18725 if (SCALAR_FLOAT_MODE_P (mode
))
18727 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18728 return ix86_fp_compare_mode (code
);
18733 /* Only zero flag is needed. */
18734 case EQ
: /* ZF=0 */
18735 case NE
: /* ZF!=0 */
18737 /* Codes needing carry flag. */
18738 case GEU
: /* CF=0 */
18739 case LTU
: /* CF=1 */
18740 /* Detect overflow checks. They need just the carry flag. */
18741 if (GET_CODE (op0
) == PLUS
18742 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18746 case GTU
: /* CF=0 & ZF=0 */
18747 case LEU
: /* CF=1 | ZF=1 */
18748 /* Detect overflow checks. They need just the carry flag. */
18749 if (GET_CODE (op0
) == MINUS
18750 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18754 /* Codes possibly doable only with sign flag when
18755 comparing against zero. */
18756 case GE
: /* SF=OF or SF=0 */
18757 case LT
: /* SF<>OF or SF=1 */
18758 if (op1
== const0_rtx
)
18761 /* For other cases Carry flag is not required. */
18763 /* Codes doable only with sign flag when comparing
18764 against zero, but we miss jump instruction for it
18765 so we need to use relational tests against overflow
18766 that thus needs to be zero. */
18767 case GT
: /* ZF=0 & SF=OF */
18768 case LE
: /* ZF=1 | SF<>OF */
18769 if (op1
== const0_rtx
)
18773 /* strcmp pattern do (use flags) and combine may ask us for proper
18778 gcc_unreachable ();
18782 /* Return the fixed registers used for condition codes. */
18785 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18792 /* If two condition code modes are compatible, return a condition code
18793 mode which is compatible with both. Otherwise, return
18796 static enum machine_mode
18797 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18802 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18805 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18806 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18809 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18811 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18817 gcc_unreachable ();
18847 /* These are only compatible with themselves, which we already
18854 /* Return a comparison we can do and that it is equivalent to
18855 swap_condition (code) apart possibly from orderedness.
18856 But, never change orderedness if TARGET_IEEE_FP, returning
18857 UNKNOWN in that case if necessary. */
18859 static enum rtx_code
18860 ix86_fp_swap_condition (enum rtx_code code
)
18864 case GT
: /* GTU - CF=0 & ZF=0 */
18865 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18866 case GE
: /* GEU - CF=0 */
18867 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18868 case UNLT
: /* LTU - CF=1 */
18869 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18870 case UNLE
: /* LEU - CF=1 | ZF=1 */
18871 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18873 return swap_condition (code
);
18877 /* Return cost of comparison CODE using the best strategy for performance.
18878 All following functions do use number of instructions as a cost metrics.
18879 In future this should be tweaked to compute bytes for optimize_size and
18880 take into account performance of various instructions on various CPUs. */
18883 ix86_fp_comparison_cost (enum rtx_code code
)
18887 /* The cost of code using bit-twiddling on %ah. */
18904 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18908 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18911 gcc_unreachable ();
18914 switch (ix86_fp_comparison_strategy (code
))
18916 case IX86_FPCMP_COMI
:
18917 return arith_cost
> 4 ? 3 : 2;
18918 case IX86_FPCMP_SAHF
:
18919 return arith_cost
> 4 ? 4 : 3;
18925 /* Return strategy to use for floating-point. We assume that fcomi is always
18926 preferrable where available, since that is also true when looking at size
18927 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18929 enum ix86_fpcmp_strategy
18930 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18932 /* Do fcomi/sahf based test when profitable. */
18935 return IX86_FPCMP_COMI
;
18937 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
18938 return IX86_FPCMP_SAHF
;
18940 return IX86_FPCMP_ARITH
;
18943 /* Swap, force into registers, or otherwise massage the two operands
18944 to a fp comparison. The operands are updated in place; the new
18945 comparison code is returned. */
18947 static enum rtx_code
18948 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18950 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18951 rtx op0
= *pop0
, op1
= *pop1
;
18952 enum machine_mode op_mode
= GET_MODE (op0
);
18953 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18955 /* All of the unordered compare instructions only work on registers.
18956 The same is true of the fcomi compare instructions. The XFmode
18957 compare instructions require registers except when comparing
18958 against zero or when converting operand 1 from fixed point to
18962 && (fpcmp_mode
== CCFPUmode
18963 || (op_mode
== XFmode
18964 && ! (standard_80387_constant_p (op0
) == 1
18965 || standard_80387_constant_p (op1
) == 1)
18966 && GET_CODE (op1
) != FLOAT
)
18967 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18969 op0
= force_reg (op_mode
, op0
);
18970 op1
= force_reg (op_mode
, op1
);
18974 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18975 things around if they appear profitable, otherwise force op0
18976 into a register. */
18978 if (standard_80387_constant_p (op0
) == 0
18980 && ! (standard_80387_constant_p (op1
) == 0
18983 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18984 if (new_code
!= UNKNOWN
)
18987 tmp
= op0
, op0
= op1
, op1
= tmp
;
18993 op0
= force_reg (op_mode
, op0
);
18995 if (CONSTANT_P (op1
))
18997 int tmp
= standard_80387_constant_p (op1
);
18999 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19003 op1
= force_reg (op_mode
, op1
);
19006 op1
= force_reg (op_mode
, op1
);
19010 /* Try to rearrange the comparison to make it cheaper. */
19011 if (ix86_fp_comparison_cost (code
)
19012 > ix86_fp_comparison_cost (swap_condition (code
))
19013 && (REG_P (op1
) || can_create_pseudo_p ()))
19016 tmp
= op0
, op0
= op1
, op1
= tmp
;
19017 code
= swap_condition (code
);
19019 op0
= force_reg (op_mode
, op0
);
19027 /* Convert comparison codes we use to represent FP comparison to integer
19028 code that will result in proper branch. Return UNKNOWN if no such code
19032 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19061 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19064 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19066 enum machine_mode fpcmp_mode
, intcmp_mode
;
19069 fpcmp_mode
= ix86_fp_compare_mode (code
);
19070 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19072 /* Do fcomi/sahf based test when profitable. */
19073 switch (ix86_fp_comparison_strategy (code
))
19075 case IX86_FPCMP_COMI
:
19076 intcmp_mode
= fpcmp_mode
;
19077 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19078 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19083 case IX86_FPCMP_SAHF
:
19084 intcmp_mode
= fpcmp_mode
;
19085 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19086 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19090 scratch
= gen_reg_rtx (HImode
);
19091 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19092 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19095 case IX86_FPCMP_ARITH
:
19096 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19097 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19098 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19100 scratch
= gen_reg_rtx (HImode
);
19101 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19103 /* In the unordered case, we have to check C2 for NaN's, which
19104 doesn't happen to work out to anything nice combination-wise.
19105 So do some bit twiddling on the value we've got in AH to come
19106 up with an appropriate set of condition codes. */
19108 intcmp_mode
= CCNOmode
;
19113 if (code
== GT
|| !TARGET_IEEE_FP
)
19115 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19120 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19121 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19122 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19123 intcmp_mode
= CCmode
;
19129 if (code
== LT
&& TARGET_IEEE_FP
)
19131 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19132 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19133 intcmp_mode
= CCmode
;
19138 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19144 if (code
== GE
|| !TARGET_IEEE_FP
)
19146 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19151 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19152 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19158 if (code
== LE
&& TARGET_IEEE_FP
)
19160 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19161 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19162 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19163 intcmp_mode
= CCmode
;
19168 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19174 if (code
== EQ
&& TARGET_IEEE_FP
)
19176 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19177 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19178 intcmp_mode
= CCmode
;
19183 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19189 if (code
== NE
&& TARGET_IEEE_FP
)
19191 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19192 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19198 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19204 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19208 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19213 gcc_unreachable ();
19221 /* Return the test that should be put into the flags user, i.e.
19222 the bcc, scc, or cmov instruction. */
19223 return gen_rtx_fmt_ee (code
, VOIDmode
,
19224 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19229 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19233 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19234 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19236 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19238 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19239 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19242 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19248 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19250 enum machine_mode mode
= GET_MODE (op0
);
19262 tmp
= ix86_expand_compare (code
, op0
, op1
);
19263 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19264 gen_rtx_LABEL_REF (VOIDmode
, label
),
19266 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19273 /* Expand DImode branch into multiple compare+branch. */
19275 rtx lo
[2], hi
[2], label2
;
19276 enum rtx_code code1
, code2
, code3
;
19277 enum machine_mode submode
;
19279 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19281 tmp
= op0
, op0
= op1
, op1
= tmp
;
19282 code
= swap_condition (code
);
19285 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19286 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19288 submode
= mode
== DImode
? SImode
: DImode
;
19290 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19291 avoid two branches. This costs one extra insn, so disable when
19292 optimizing for size. */
19294 if ((code
== EQ
|| code
== NE
)
19295 && (!optimize_insn_for_size_p ()
19296 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19301 if (hi
[1] != const0_rtx
)
19302 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19303 NULL_RTX
, 0, OPTAB_WIDEN
);
19306 if (lo
[1] != const0_rtx
)
19307 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19308 NULL_RTX
, 0, OPTAB_WIDEN
);
19310 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19311 NULL_RTX
, 0, OPTAB_WIDEN
);
19313 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19317 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19318 op1 is a constant and the low word is zero, then we can just
19319 examine the high word. Similarly for low word -1 and
19320 less-or-equal-than or greater-than. */
19322 if (CONST_INT_P (hi
[1]))
19325 case LT
: case LTU
: case GE
: case GEU
:
19326 if (lo
[1] == const0_rtx
)
19328 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19332 case LE
: case LEU
: case GT
: case GTU
:
19333 if (lo
[1] == constm1_rtx
)
19335 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19343 /* Otherwise, we need two or three jumps. */
19345 label2
= gen_label_rtx ();
19348 code2
= swap_condition (code
);
19349 code3
= unsigned_condition (code
);
19353 case LT
: case GT
: case LTU
: case GTU
:
19356 case LE
: code1
= LT
; code2
= GT
; break;
19357 case GE
: code1
= GT
; code2
= LT
; break;
19358 case LEU
: code1
= LTU
; code2
= GTU
; break;
19359 case GEU
: code1
= GTU
; code2
= LTU
; break;
19361 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19362 case NE
: code2
= UNKNOWN
; break;
19365 gcc_unreachable ();
19370 * if (hi(a) < hi(b)) goto true;
19371 * if (hi(a) > hi(b)) goto false;
19372 * if (lo(a) < lo(b)) goto true;
19376 if (code1
!= UNKNOWN
)
19377 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19378 if (code2
!= UNKNOWN
)
19379 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19381 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19383 if (code2
!= UNKNOWN
)
19384 emit_label (label2
);
19389 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19394 /* Split branch based on floating point condition. */
19396 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19397 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19402 if (target2
!= pc_rtx
)
19405 code
= reverse_condition_maybe_unordered (code
);
19410 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19413 /* Remove pushed operand from stack. */
19415 ix86_free_from_memory (GET_MODE (pushed
));
19417 i
= emit_jump_insn (gen_rtx_SET
19419 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19420 condition
, target1
, target2
)));
19421 if (split_branch_probability
>= 0)
19422 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19426 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19430 gcc_assert (GET_MODE (dest
) == QImode
);
19432 ret
= ix86_expand_compare (code
, op0
, op1
);
19433 PUT_MODE (ret
, QImode
);
19434 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19437 /* Expand comparison setting or clearing carry flag. Return true when
19438 successful and set pop for the operation. */
19440 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19442 enum machine_mode mode
=
19443 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19445 /* Do not handle double-mode compares that go through special path. */
19446 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19449 if (SCALAR_FLOAT_MODE_P (mode
))
19451 rtx compare_op
, compare_seq
;
19453 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19455 /* Shortcut: following common codes never translate
19456 into carry flag compares. */
19457 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19458 || code
== ORDERED
|| code
== UNORDERED
)
19461 /* These comparisons require zero flag; swap operands so they won't. */
19462 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19463 && !TARGET_IEEE_FP
)
19468 code
= swap_condition (code
);
19471 /* Try to expand the comparison and verify that we end up with
19472 carry flag based comparison. This fails to be true only when
19473 we decide to expand comparison using arithmetic that is not
19474 too common scenario. */
19476 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19477 compare_seq
= get_insns ();
19480 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19481 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19482 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19484 code
= GET_CODE (compare_op
);
19486 if (code
!= LTU
&& code
!= GEU
)
19489 emit_insn (compare_seq
);
19494 if (!INTEGRAL_MODE_P (mode
))
19503 /* Convert a==0 into (unsigned)a<1. */
19506 if (op1
!= const0_rtx
)
19509 code
= (code
== EQ
? LTU
: GEU
);
19512 /* Convert a>b into b<a or a>=b-1. */
19515 if (CONST_INT_P (op1
))
19517 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19518 /* Bail out on overflow. We still can swap operands but that
19519 would force loading of the constant into register. */
19520 if (op1
== const0_rtx
19521 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19523 code
= (code
== GTU
? GEU
: LTU
);
19530 code
= (code
== GTU
? LTU
: GEU
);
19534 /* Convert a>=0 into (unsigned)a<0x80000000. */
19537 if (mode
== DImode
|| op1
!= const0_rtx
)
19539 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19540 code
= (code
== LT
? GEU
: LTU
);
19544 if (mode
== DImode
|| op1
!= constm1_rtx
)
19546 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19547 code
= (code
== LE
? GEU
: LTU
);
19553 /* Swapping operands may cause constant to appear as first operand. */
19554 if (!nonimmediate_operand (op0
, VOIDmode
))
19556 if (!can_create_pseudo_p ())
19558 op0
= force_reg (mode
, op0
);
19560 *pop
= ix86_expand_compare (code
, op0
, op1
);
19561 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19566 ix86_expand_int_movcc (rtx operands
[])
19568 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19569 rtx compare_seq
, compare_op
;
19570 enum machine_mode mode
= GET_MODE (operands
[0]);
19571 bool sign_bit_compare_p
= false;
19572 rtx op0
= XEXP (operands
[1], 0);
19573 rtx op1
= XEXP (operands
[1], 1);
19575 if (GET_MODE (op0
) == TImode
19576 || (GET_MODE (op0
) == DImode
19581 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19582 compare_seq
= get_insns ();
19585 compare_code
= GET_CODE (compare_op
);
19587 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19588 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19589 sign_bit_compare_p
= true;
19591 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19592 HImode insns, we'd be swallowed in word prefix ops. */
19594 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19595 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19596 && CONST_INT_P (operands
[2])
19597 && CONST_INT_P (operands
[3]))
19599 rtx out
= operands
[0];
19600 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19601 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19602 HOST_WIDE_INT diff
;
19605 /* Sign bit compares are better done using shifts than we do by using
19607 if (sign_bit_compare_p
19608 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19610 /* Detect overlap between destination and compare sources. */
19613 if (!sign_bit_compare_p
)
19616 bool fpcmp
= false;
19618 compare_code
= GET_CODE (compare_op
);
19620 flags
= XEXP (compare_op
, 0);
19622 if (GET_MODE (flags
) == CCFPmode
19623 || GET_MODE (flags
) == CCFPUmode
)
19627 = ix86_fp_compare_code_to_integer (compare_code
);
19630 /* To simplify rest of code, restrict to the GEU case. */
19631 if (compare_code
== LTU
)
19633 HOST_WIDE_INT tmp
= ct
;
19636 compare_code
= reverse_condition (compare_code
);
19637 code
= reverse_condition (code
);
19642 PUT_CODE (compare_op
,
19643 reverse_condition_maybe_unordered
19644 (GET_CODE (compare_op
)));
19646 PUT_CODE (compare_op
,
19647 reverse_condition (GET_CODE (compare_op
)));
19651 if (reg_overlap_mentioned_p (out
, op0
)
19652 || reg_overlap_mentioned_p (out
, op1
))
19653 tmp
= gen_reg_rtx (mode
);
19655 if (mode
== DImode
)
19656 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19658 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19659 flags
, compare_op
));
19663 if (code
== GT
|| code
== GE
)
19664 code
= reverse_condition (code
);
19667 HOST_WIDE_INT tmp
= ct
;
19672 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19685 tmp
= expand_simple_binop (mode
, PLUS
,
19687 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19698 tmp
= expand_simple_binop (mode
, IOR
,
19700 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19702 else if (diff
== -1 && ct
)
19712 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19714 tmp
= expand_simple_binop (mode
, PLUS
,
19715 copy_rtx (tmp
), GEN_INT (cf
),
19716 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19724 * andl cf - ct, dest
19734 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19737 tmp
= expand_simple_binop (mode
, AND
,
19739 gen_int_mode (cf
- ct
, mode
),
19740 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19742 tmp
= expand_simple_binop (mode
, PLUS
,
19743 copy_rtx (tmp
), GEN_INT (ct
),
19744 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19747 if (!rtx_equal_p (tmp
, out
))
19748 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19755 enum machine_mode cmp_mode
= GET_MODE (op0
);
19758 tmp
= ct
, ct
= cf
, cf
= tmp
;
19761 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19763 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19765 /* We may be reversing unordered compare to normal compare, that
19766 is not valid in general (we may convert non-trapping condition
19767 to trapping one), however on i386 we currently emit all
19768 comparisons unordered. */
19769 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19770 code
= reverse_condition_maybe_unordered (code
);
19774 compare_code
= reverse_condition (compare_code
);
19775 code
= reverse_condition (code
);
19779 compare_code
= UNKNOWN
;
19780 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19781 && CONST_INT_P (op1
))
19783 if (op1
== const0_rtx
19784 && (code
== LT
|| code
== GE
))
19785 compare_code
= code
;
19786 else if (op1
== constm1_rtx
)
19790 else if (code
== GT
)
19795 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19796 if (compare_code
!= UNKNOWN
19797 && GET_MODE (op0
) == GET_MODE (out
)
19798 && (cf
== -1 || ct
== -1))
19800 /* If lea code below could be used, only optimize
19801 if it results in a 2 insn sequence. */
19803 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19804 || diff
== 3 || diff
== 5 || diff
== 9)
19805 || (compare_code
== LT
&& ct
== -1)
19806 || (compare_code
== GE
&& cf
== -1))
19809 * notl op1 (if necessary)
19817 code
= reverse_condition (code
);
19820 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19822 out
= expand_simple_binop (mode
, IOR
,
19824 out
, 1, OPTAB_DIRECT
);
19825 if (out
!= operands
[0])
19826 emit_move_insn (operands
[0], out
);
19833 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19834 || diff
== 3 || diff
== 5 || diff
== 9)
19835 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19837 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19843 * lea cf(dest*(ct-cf)),dest
19847 * This also catches the degenerate setcc-only case.
19853 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19856 /* On x86_64 the lea instruction operates on Pmode, so we need
19857 to get arithmetics done in proper mode to match. */
19859 tmp
= copy_rtx (out
);
19863 out1
= copy_rtx (out
);
19864 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19868 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19874 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19877 if (!rtx_equal_p (tmp
, out
))
19880 out
= force_operand (tmp
, copy_rtx (out
));
19882 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19884 if (!rtx_equal_p (out
, operands
[0]))
19885 emit_move_insn (operands
[0], copy_rtx (out
));
19891 * General case: Jumpful:
19892 * xorl dest,dest cmpl op1, op2
19893 * cmpl op1, op2 movl ct, dest
19894 * setcc dest jcc 1f
19895 * decl dest movl cf, dest
19896 * andl (cf-ct),dest 1:
19899 * Size 20. Size 14.
19901 * This is reasonably steep, but branch mispredict costs are
19902 * high on modern cpus, so consider failing only if optimizing
19906 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19907 && BRANCH_COST (optimize_insn_for_speed_p (),
19912 enum machine_mode cmp_mode
= GET_MODE (op0
);
19917 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19919 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19921 /* We may be reversing unordered compare to normal compare,
19922 that is not valid in general (we may convert non-trapping
19923 condition to trapping one), however on i386 we currently
19924 emit all comparisons unordered. */
19925 code
= reverse_condition_maybe_unordered (code
);
19929 code
= reverse_condition (code
);
19930 if (compare_code
!= UNKNOWN
)
19931 compare_code
= reverse_condition (compare_code
);
19935 if (compare_code
!= UNKNOWN
)
19937 /* notl op1 (if needed)
19942 For x < 0 (resp. x <= -1) there will be no notl,
19943 so if possible swap the constants to get rid of the
19945 True/false will be -1/0 while code below (store flag
19946 followed by decrement) is 0/-1, so the constants need
19947 to be exchanged once more. */
19949 if (compare_code
== GE
|| !cf
)
19951 code
= reverse_condition (code
);
19956 HOST_WIDE_INT tmp
= cf
;
19961 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19965 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19967 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19969 copy_rtx (out
), 1, OPTAB_DIRECT
);
19972 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19973 gen_int_mode (cf
- ct
, mode
),
19974 copy_rtx (out
), 1, OPTAB_DIRECT
);
19976 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19977 copy_rtx (out
), 1, OPTAB_DIRECT
);
19978 if (!rtx_equal_p (out
, operands
[0]))
19979 emit_move_insn (operands
[0], copy_rtx (out
));
19985 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19987 /* Try a few things more with specific constants and a variable. */
19990 rtx var
, orig_out
, out
, tmp
;
19992 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19995 /* If one of the two operands is an interesting constant, load a
19996 constant with the above and mask it in with a logical operation. */
19998 if (CONST_INT_P (operands
[2]))
20001 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20002 operands
[3] = constm1_rtx
, op
= and_optab
;
20003 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20004 operands
[3] = const0_rtx
, op
= ior_optab
;
20008 else if (CONST_INT_P (operands
[3]))
20011 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20012 operands
[2] = constm1_rtx
, op
= and_optab
;
20013 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20014 operands
[2] = const0_rtx
, op
= ior_optab
;
20021 orig_out
= operands
[0];
20022 tmp
= gen_reg_rtx (mode
);
20025 /* Recurse to get the constant loaded. */
20026 if (ix86_expand_int_movcc (operands
) == 0)
20029 /* Mask in the interesting variable. */
20030 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20032 if (!rtx_equal_p (out
, orig_out
))
20033 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20039 * For comparison with above,
20049 if (! nonimmediate_operand (operands
[2], mode
))
20050 operands
[2] = force_reg (mode
, operands
[2]);
20051 if (! nonimmediate_operand (operands
[3], mode
))
20052 operands
[3] = force_reg (mode
, operands
[3]);
20054 if (! register_operand (operands
[2], VOIDmode
)
20056 || ! register_operand (operands
[3], VOIDmode
)))
20057 operands
[2] = force_reg (mode
, operands
[2]);
20060 && ! register_operand (operands
[3], VOIDmode
))
20061 operands
[3] = force_reg (mode
, operands
[3]);
20063 emit_insn (compare_seq
);
20064 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20065 gen_rtx_IF_THEN_ELSE (mode
,
20066 compare_op
, operands
[2],
20071 /* Swap, force into registers, or otherwise massage the two operands
20072 to an sse comparison with a mask result. Thus we differ a bit from
20073 ix86_prepare_fp_compare_args which expects to produce a flags result.
20075 The DEST operand exists to help determine whether to commute commutative
20076 operators. The POP0/POP1 operands are updated in place. The new
20077 comparison code is returned, or UNKNOWN if not implementable. */
20079 static enum rtx_code
20080 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20081 rtx
*pop0
, rtx
*pop1
)
20089 /* AVX supports all the needed comparisons. */
20092 /* We have no LTGT as an operator. We could implement it with
20093 NE & ORDERED, but this requires an extra temporary. It's
20094 not clear that it's worth it. */
20101 /* These are supported directly. */
20108 /* AVX has 3 operand comparisons, no need to swap anything. */
20111 /* For commutative operators, try to canonicalize the destination
20112 operand to be first in the comparison - this helps reload to
20113 avoid extra moves. */
20114 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20122 /* These are not supported directly before AVX, and furthermore
20123 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20124 comparison operands to transform into something that is
20129 code
= swap_condition (code
);
20133 gcc_unreachable ();
20139 /* Detect conditional moves that exactly match min/max operational
20140 semantics. Note that this is IEEE safe, as long as we don't
20141 interchange the operands.
20143 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20144 and TRUE if the operation is successful and instructions are emitted. */
20147 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20148 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20150 enum machine_mode mode
;
20156 else if (code
== UNGE
)
20159 if_true
= if_false
;
20165 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20167 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20172 mode
= GET_MODE (dest
);
20174 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20175 but MODE may be a vector mode and thus not appropriate. */
20176 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20178 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20181 if_true
= force_reg (mode
, if_true
);
20182 v
= gen_rtvec (2, if_true
, if_false
);
20183 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20187 code
= is_min
? SMIN
: SMAX
;
20188 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20191 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20195 /* Expand an sse vector comparison. Return the register with the result. */
20198 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20199 rtx op_true
, rtx op_false
)
20201 enum machine_mode mode
= GET_MODE (dest
);
20202 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20205 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20206 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20207 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20210 || reg_overlap_mentioned_p (dest
, op_true
)
20211 || reg_overlap_mentioned_p (dest
, op_false
))
20212 dest
= gen_reg_rtx (mode
);
20214 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20215 if (cmp_mode
!= mode
)
20217 x
= force_reg (cmp_mode
, x
);
20218 convert_move (dest
, x
, false);
20221 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20226 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20227 operations. This is used for both scalar and vector conditional moves. */
20230 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20232 enum machine_mode mode
= GET_MODE (dest
);
20235 if (vector_all_ones_operand (op_true
, mode
)
20236 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20238 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20240 else if (op_false
== CONST0_RTX (mode
))
20242 op_true
= force_reg (mode
, op_true
);
20243 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20244 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20246 else if (op_true
== CONST0_RTX (mode
))
20248 op_false
= force_reg (mode
, op_false
);
20249 x
= gen_rtx_NOT (mode
, cmp
);
20250 x
= gen_rtx_AND (mode
, x
, op_false
);
20251 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20253 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20255 op_false
= force_reg (mode
, op_false
);
20256 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20257 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20259 else if (TARGET_XOP
)
20261 op_true
= force_reg (mode
, op_true
);
20263 if (!nonimmediate_operand (op_false
, mode
))
20264 op_false
= force_reg (mode
, op_false
);
20266 emit_insn (gen_rtx_SET (mode
, dest
,
20267 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20273 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20275 if (!nonimmediate_operand (op_true
, mode
))
20276 op_true
= force_reg (mode
, op_true
);
20278 op_false
= force_reg (mode
, op_false
);
20284 gen
= gen_sse4_1_blendvps
;
20288 gen
= gen_sse4_1_blendvpd
;
20296 gen
= gen_sse4_1_pblendvb
;
20297 dest
= gen_lowpart (V16QImode
, dest
);
20298 op_false
= gen_lowpart (V16QImode
, op_false
);
20299 op_true
= gen_lowpart (V16QImode
, op_true
);
20300 cmp
= gen_lowpart (V16QImode
, cmp
);
20305 gen
= gen_avx_blendvps256
;
20309 gen
= gen_avx_blendvpd256
;
20317 gen
= gen_avx2_pblendvb
;
20318 dest
= gen_lowpart (V32QImode
, dest
);
20319 op_false
= gen_lowpart (V32QImode
, op_false
);
20320 op_true
= gen_lowpart (V32QImode
, op_true
);
20321 cmp
= gen_lowpart (V32QImode
, cmp
);
20329 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20332 op_true
= force_reg (mode
, op_true
);
20334 t2
= gen_reg_rtx (mode
);
20336 t3
= gen_reg_rtx (mode
);
20340 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20341 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20343 x
= gen_rtx_NOT (mode
, cmp
);
20344 x
= gen_rtx_AND (mode
, x
, op_false
);
20345 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20347 x
= gen_rtx_IOR (mode
, t3
, t2
);
20348 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20353 /* Expand a floating-point conditional move. Return true if successful. */
20356 ix86_expand_fp_movcc (rtx operands
[])
20358 enum machine_mode mode
= GET_MODE (operands
[0]);
20359 enum rtx_code code
= GET_CODE (operands
[1]);
20360 rtx tmp
, compare_op
;
20361 rtx op0
= XEXP (operands
[1], 0);
20362 rtx op1
= XEXP (operands
[1], 1);
20364 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20366 enum machine_mode cmode
;
20368 /* Since we've no cmove for sse registers, don't force bad register
20369 allocation just to gain access to it. Deny movcc when the
20370 comparison mode doesn't match the move mode. */
20371 cmode
= GET_MODE (op0
);
20372 if (cmode
== VOIDmode
)
20373 cmode
= GET_MODE (op1
);
20377 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20378 if (code
== UNKNOWN
)
20381 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20382 operands
[2], operands
[3]))
20385 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20386 operands
[2], operands
[3]);
20387 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20391 if (GET_MODE (op0
) == TImode
20392 || (GET_MODE (op0
) == DImode
20396 /* The floating point conditional move instructions don't directly
20397 support conditions resulting from a signed integer comparison. */
20399 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20400 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20402 tmp
= gen_reg_rtx (QImode
);
20403 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20405 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20408 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20409 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20410 operands
[2], operands
[3])));
20415 /* Expand a floating-point vector conditional move; a vcond operation
20416 rather than a movcc operation. */
20419 ix86_expand_fp_vcond (rtx operands
[])
20421 enum rtx_code code
= GET_CODE (operands
[3]);
20424 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20425 &operands
[4], &operands
[5]);
20426 if (code
== UNKNOWN
)
20429 switch (GET_CODE (operands
[3]))
20432 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20433 operands
[5], operands
[0], operands
[0]);
20434 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20435 operands
[5], operands
[1], operands
[2]);
20439 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20440 operands
[5], operands
[0], operands
[0]);
20441 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20442 operands
[5], operands
[1], operands
[2]);
20446 gcc_unreachable ();
20448 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20450 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20454 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20455 operands
[5], operands
[1], operands
[2]))
20458 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20459 operands
[1], operands
[2]);
20460 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20464 /* Expand a signed/unsigned integral vector conditional move. */
20467 ix86_expand_int_vcond (rtx operands
[])
20469 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20470 enum machine_mode mode
= GET_MODE (operands
[4]);
20471 enum rtx_code code
= GET_CODE (operands
[3]);
20472 bool negate
= false;
20475 cop0
= operands
[4];
20476 cop1
= operands
[5];
20478 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20479 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20480 if ((code
== LT
|| code
== GE
)
20481 && data_mode
== mode
20482 && cop1
== CONST0_RTX (mode
)
20483 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20484 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20485 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20486 && (GET_MODE_SIZE (data_mode
) == 16
20487 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20489 rtx negop
= operands
[2 - (code
== LT
)];
20490 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20491 if (negop
== CONST1_RTX (data_mode
))
20493 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20494 operands
[0], 1, OPTAB_DIRECT
);
20495 if (res
!= operands
[0])
20496 emit_move_insn (operands
[0], res
);
20499 else if (GET_MODE_INNER (data_mode
) != DImode
20500 && vector_all_ones_operand (negop
, data_mode
))
20502 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20503 operands
[0], 0, OPTAB_DIRECT
);
20504 if (res
!= operands
[0])
20505 emit_move_insn (operands
[0], res
);
20510 if (!nonimmediate_operand (cop1
, mode
))
20511 cop1
= force_reg (mode
, cop1
);
20512 if (!general_operand (operands
[1], data_mode
))
20513 operands
[1] = force_reg (data_mode
, operands
[1]);
20514 if (!general_operand (operands
[2], data_mode
))
20515 operands
[2] = force_reg (data_mode
, operands
[2]);
20517 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20519 && (mode
== V16QImode
|| mode
== V8HImode
20520 || mode
== V4SImode
|| mode
== V2DImode
))
20524 /* Canonicalize the comparison to EQ, GT, GTU. */
20535 code
= reverse_condition (code
);
20541 code
= reverse_condition (code
);
20547 code
= swap_condition (code
);
20548 x
= cop0
, cop0
= cop1
, cop1
= x
;
20552 gcc_unreachable ();
20555 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20556 if (mode
== V2DImode
)
20561 /* SSE4.1 supports EQ. */
20562 if (!TARGET_SSE4_1
)
20568 /* SSE4.2 supports GT/GTU. */
20569 if (!TARGET_SSE4_2
)
20574 gcc_unreachable ();
20578 /* Unsigned parallel compare is not supported by the hardware.
20579 Play some tricks to turn this into a signed comparison
20583 cop0
= force_reg (mode
, cop0
);
20593 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20597 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20598 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20599 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20600 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20602 gcc_unreachable ();
20604 /* Subtract (-(INT MAX) - 1) from both operands to make
20606 mask
= ix86_build_signbit_mask (mode
, true, false);
20607 t1
= gen_reg_rtx (mode
);
20608 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20610 t2
= gen_reg_rtx (mode
);
20611 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20623 /* Perform a parallel unsigned saturating subtraction. */
20624 x
= gen_reg_rtx (mode
);
20625 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20626 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20629 cop1
= CONST0_RTX (mode
);
20635 gcc_unreachable ();
20640 /* Allow the comparison to be done in one mode, but the movcc to
20641 happen in another mode. */
20642 if (data_mode
== mode
)
20644 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20645 operands
[1+negate
], operands
[2-negate
]);
20649 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20650 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20652 operands
[1+negate
], operands
[2-negate
]);
20653 x
= gen_lowpart (data_mode
, x
);
20656 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20657 operands
[2-negate
]);
20661 /* Expand a variable vector permutation. */
20664 ix86_expand_vec_perm (rtx operands
[])
20666 rtx target
= operands
[0];
20667 rtx op0
= operands
[1];
20668 rtx op1
= operands
[2];
20669 rtx mask
= operands
[3];
20670 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20671 enum machine_mode mode
= GET_MODE (op0
);
20672 enum machine_mode maskmode
= GET_MODE (mask
);
20674 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20676 /* Number of elements in the vector. */
20677 w
= GET_MODE_NUNITS (mode
);
20678 e
= GET_MODE_UNIT_SIZE (mode
);
20679 gcc_assert (w
<= 32);
20683 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20685 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20686 an constant shuffle operand. With a tiny bit of effort we can
20687 use VPERMD instead. A re-interpretation stall for V4DFmode is
20688 unfortunate but there's no avoiding it.
20689 Similarly for V16HImode we don't have instructions for variable
20690 shuffling, while for V32QImode we can use after preparing suitable
20691 masks vpshufb; vpshufb; vpermq; vpor. */
20693 if (mode
== V16HImode
)
20695 maskmode
= mode
= V32QImode
;
20701 maskmode
= mode
= V8SImode
;
20705 t1
= gen_reg_rtx (maskmode
);
20707 /* Replicate the low bits of the V4DImode mask into V8SImode:
20709 t1 = { A A B B C C D D }. */
20710 for (i
= 0; i
< w
/ 2; ++i
)
20711 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20712 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20713 vt
= force_reg (maskmode
, vt
);
20714 mask
= gen_lowpart (maskmode
, mask
);
20715 if (maskmode
== V8SImode
)
20716 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20718 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20720 /* Multiply the shuffle indicies by two. */
20721 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20724 /* Add one to the odd shuffle indicies:
20725 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20726 for (i
= 0; i
< w
/ 2; ++i
)
20728 vec
[i
* 2] = const0_rtx
;
20729 vec
[i
* 2 + 1] = const1_rtx
;
20731 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20732 vt
= validize_mem (force_const_mem (maskmode
, vt
));
20733 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20736 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20737 operands
[3] = mask
= t1
;
20738 target
= gen_lowpart (mode
, target
);
20739 op0
= gen_lowpart (mode
, op0
);
20740 op1
= gen_lowpart (mode
, op1
);
20746 /* The VPERMD and VPERMPS instructions already properly ignore
20747 the high bits of the shuffle elements. No need for us to
20748 perform an AND ourselves. */
20749 if (one_operand_shuffle
)
20750 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20753 t1
= gen_reg_rtx (V8SImode
);
20754 t2
= gen_reg_rtx (V8SImode
);
20755 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20756 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20762 mask
= gen_lowpart (V8SFmode
, mask
);
20763 if (one_operand_shuffle
)
20764 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20767 t1
= gen_reg_rtx (V8SFmode
);
20768 t2
= gen_reg_rtx (V8SFmode
);
20769 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20770 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20776 /* By combining the two 128-bit input vectors into one 256-bit
20777 input vector, we can use VPERMD and VPERMPS for the full
20778 two-operand shuffle. */
20779 t1
= gen_reg_rtx (V8SImode
);
20780 t2
= gen_reg_rtx (V8SImode
);
20781 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20782 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20783 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20784 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20788 t1
= gen_reg_rtx (V8SFmode
);
20789 t2
= gen_reg_rtx (V8SImode
);
20790 mask
= gen_lowpart (V4SImode
, mask
);
20791 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20792 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20793 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20794 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20798 t1
= gen_reg_rtx (V32QImode
);
20799 t2
= gen_reg_rtx (V32QImode
);
20800 t3
= gen_reg_rtx (V32QImode
);
20801 vt2
= GEN_INT (128);
20802 for (i
= 0; i
< 32; i
++)
20804 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20805 vt
= force_reg (V32QImode
, vt
);
20806 for (i
= 0; i
< 32; i
++)
20807 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20808 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20809 vt2
= force_reg (V32QImode
, vt2
);
20810 /* From mask create two adjusted masks, which contain the same
20811 bits as mask in the low 7 bits of each vector element.
20812 The first mask will have the most significant bit clear
20813 if it requests element from the same 128-bit lane
20814 and MSB set if it requests element from the other 128-bit lane.
20815 The second mask will have the opposite values of the MSB,
20816 and additionally will have its 128-bit lanes swapped.
20817 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20818 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20819 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20820 stands for other 12 bytes. */
20821 /* The bit whether element is from the same lane or the other
20822 lane is bit 4, so shift it up by 3 to the MSB position. */
20823 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20824 gen_lowpart (V4DImode
, mask
),
20826 /* Clear MSB bits from the mask just in case it had them set. */
20827 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20828 /* After this t1 will have MSB set for elements from other lane. */
20829 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20830 /* Clear bits other than MSB. */
20831 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20832 /* Or in the lower bits from mask into t3. */
20833 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20834 /* And invert MSB bits in t1, so MSB is set for elements from the same
20836 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20837 /* Swap 128-bit lanes in t3. */
20838 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20839 gen_lowpart (V4DImode
, t3
),
20840 const2_rtx
, GEN_INT (3),
20841 const0_rtx
, const1_rtx
));
20842 /* And or in the lower bits from mask into t1. */
20843 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20844 if (one_operand_shuffle
)
20846 /* Each of these shuffles will put 0s in places where
20847 element from the other 128-bit lane is needed, otherwise
20848 will shuffle in the requested value. */
20849 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20850 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20851 /* For t3 the 128-bit lanes are swapped again. */
20852 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20853 gen_lowpart (V4DImode
, t3
),
20854 const2_rtx
, GEN_INT (3),
20855 const0_rtx
, const1_rtx
));
20856 /* And oring both together leads to the result. */
20857 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20861 t4
= gen_reg_rtx (V32QImode
);
20862 /* Similarly to the above one_operand_shuffle code,
20863 just for repeated twice for each operand. merge_two:
20864 code will merge the two results together. */
20865 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20866 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20867 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20868 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20869 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20870 gen_lowpart (V4DImode
, t4
),
20871 const2_rtx
, GEN_INT (3),
20872 const0_rtx
, const1_rtx
));
20873 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20874 gen_lowpart (V4DImode
, t3
),
20875 const2_rtx
, GEN_INT (3),
20876 const0_rtx
, const1_rtx
));
20877 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20878 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20884 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20891 /* The XOP VPPERM insn supports three inputs. By ignoring the
20892 one_operand_shuffle special case, we avoid creating another
20893 set of constant vectors in memory. */
20894 one_operand_shuffle
= false;
20896 /* mask = mask & {2*w-1, ...} */
20897 vt
= GEN_INT (2*w
- 1);
20901 /* mask = mask & {w-1, ...} */
20902 vt
= GEN_INT (w
- 1);
20905 for (i
= 0; i
< w
; i
++)
20907 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20908 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20909 NULL_RTX
, 0, OPTAB_DIRECT
);
20911 /* For non-QImode operations, convert the word permutation control
20912 into a byte permutation control. */
20913 if (mode
!= V16QImode
)
20915 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20916 GEN_INT (exact_log2 (e
)),
20917 NULL_RTX
, 0, OPTAB_DIRECT
);
20919 /* Convert mask to vector of chars. */
20920 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20922 /* Replicate each of the input bytes into byte positions:
20923 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20924 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20925 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20926 for (i
= 0; i
< 16; ++i
)
20927 vec
[i
] = GEN_INT (i
/e
* e
);
20928 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20929 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
20931 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20933 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20935 /* Convert it into the byte positions by doing
20936 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20937 for (i
= 0; i
< 16; ++i
)
20938 vec
[i
] = GEN_INT (i
% e
);
20939 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20940 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
20941 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20944 /* The actual shuffle operations all operate on V16QImode. */
20945 op0
= gen_lowpart (V16QImode
, op0
);
20946 op1
= gen_lowpart (V16QImode
, op1
);
20947 target
= gen_lowpart (V16QImode
, target
);
20951 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20953 else if (one_operand_shuffle
)
20955 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20962 /* Shuffle the two input vectors independently. */
20963 t1
= gen_reg_rtx (V16QImode
);
20964 t2
= gen_reg_rtx (V16QImode
);
20965 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20966 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20969 /* Then merge them together. The key is whether any given control
20970 element contained a bit set that indicates the second word. */
20971 mask
= operands
[3];
20973 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20975 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20976 more shuffle to convert the V2DI input mask into a V4SI
20977 input mask. At which point the masking that expand_int_vcond
20978 will work as desired. */
20979 rtx t3
= gen_reg_rtx (V4SImode
);
20980 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20981 const0_rtx
, const0_rtx
,
20982 const2_rtx
, const2_rtx
));
20984 maskmode
= V4SImode
;
20988 for (i
= 0; i
< w
; i
++)
20990 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20991 vt
= force_reg (maskmode
, vt
);
20992 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20993 NULL_RTX
, 0, OPTAB_DIRECT
);
20995 xops
[0] = gen_lowpart (mode
, operands
[0]);
20996 xops
[1] = gen_lowpart (mode
, t2
);
20997 xops
[2] = gen_lowpart (mode
, t1
);
20998 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21001 ok
= ix86_expand_int_vcond (xops
);
21006 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21007 true if we should do zero extension, else sign extension. HIGH_P is
21008 true if we want the N/2 high elements, else the low elements. */
21011 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21013 enum machine_mode imode
= GET_MODE (src
);
21018 rtx (*unpack
)(rtx
, rtx
);
21019 rtx (*extract
)(rtx
, rtx
) = NULL
;
21020 enum machine_mode halfmode
= BLKmode
;
21026 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21028 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21029 halfmode
= V16QImode
;
21031 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21035 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21037 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21038 halfmode
= V8HImode
;
21040 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21044 unpack
= gen_avx2_zero_extendv4siv4di2
;
21046 unpack
= gen_avx2_sign_extendv4siv4di2
;
21047 halfmode
= V4SImode
;
21049 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21053 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21055 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21059 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21061 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21065 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21067 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21070 gcc_unreachable ();
21073 if (GET_MODE_SIZE (imode
) == 32)
21075 tmp
= gen_reg_rtx (halfmode
);
21076 emit_insn (extract (tmp
, src
));
21080 /* Shift higher 8 bytes to lower 8 bytes. */
21081 tmp
= gen_reg_rtx (imode
);
21082 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
21083 gen_lowpart (V1TImode
, src
),
21089 emit_insn (unpack (dest
, tmp
));
21093 rtx (*unpack
)(rtx
, rtx
, rtx
);
21099 unpack
= gen_vec_interleave_highv16qi
;
21101 unpack
= gen_vec_interleave_lowv16qi
;
21105 unpack
= gen_vec_interleave_highv8hi
;
21107 unpack
= gen_vec_interleave_lowv8hi
;
21111 unpack
= gen_vec_interleave_highv4si
;
21113 unpack
= gen_vec_interleave_lowv4si
;
21116 gcc_unreachable ();
21120 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21122 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21123 src
, pc_rtx
, pc_rtx
);
21125 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
21129 /* Expand conditional increment or decrement using adb/sbb instructions.
21130 The default case using setcc followed by the conditional move can be
21131 done by generic code. */
21133 ix86_expand_int_addcc (rtx operands
[])
21135 enum rtx_code code
= GET_CODE (operands
[1]);
21137 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21139 rtx val
= const0_rtx
;
21140 bool fpcmp
= false;
21141 enum machine_mode mode
;
21142 rtx op0
= XEXP (operands
[1], 0);
21143 rtx op1
= XEXP (operands
[1], 1);
21145 if (operands
[3] != const1_rtx
21146 && operands
[3] != constm1_rtx
)
21148 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21150 code
= GET_CODE (compare_op
);
21152 flags
= XEXP (compare_op
, 0);
21154 if (GET_MODE (flags
) == CCFPmode
21155 || GET_MODE (flags
) == CCFPUmode
)
21158 code
= ix86_fp_compare_code_to_integer (code
);
21165 PUT_CODE (compare_op
,
21166 reverse_condition_maybe_unordered
21167 (GET_CODE (compare_op
)));
21169 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21172 mode
= GET_MODE (operands
[0]);
21174 /* Construct either adc or sbb insn. */
21175 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21180 insn
= gen_subqi3_carry
;
21183 insn
= gen_subhi3_carry
;
21186 insn
= gen_subsi3_carry
;
21189 insn
= gen_subdi3_carry
;
21192 gcc_unreachable ();
21200 insn
= gen_addqi3_carry
;
21203 insn
= gen_addhi3_carry
;
21206 insn
= gen_addsi3_carry
;
21209 insn
= gen_adddi3_carry
;
21212 gcc_unreachable ();
21215 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21221 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21222 but works for floating pointer parameters and nonoffsetable memories.
21223 For pushes, it returns just stack offsets; the values will be saved
21224 in the right order. Maximally three parts are generated. */
21227 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21232 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21234 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21236 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21237 gcc_assert (size
>= 2 && size
<= 4);
21239 /* Optimize constant pool reference to immediates. This is used by fp
21240 moves, that force all constants to memory to allow combining. */
21241 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21243 rtx tmp
= maybe_get_pool_constant (operand
);
21248 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21250 /* The only non-offsetable memories we handle are pushes. */
21251 int ok
= push_operand (operand
, VOIDmode
);
21255 operand
= copy_rtx (operand
);
21256 PUT_MODE (operand
, word_mode
);
21257 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21261 if (GET_CODE (operand
) == CONST_VECTOR
)
21263 enum machine_mode imode
= int_mode_for_mode (mode
);
21264 /* Caution: if we looked through a constant pool memory above,
21265 the operand may actually have a different mode now. That's
21266 ok, since we want to pun this all the way back to an integer. */
21267 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21268 gcc_assert (operand
!= NULL
);
21274 if (mode
== DImode
)
21275 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21280 if (REG_P (operand
))
21282 gcc_assert (reload_completed
);
21283 for (i
= 0; i
< size
; i
++)
21284 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21286 else if (offsettable_memref_p (operand
))
21288 operand
= adjust_address (operand
, SImode
, 0);
21289 parts
[0] = operand
;
21290 for (i
= 1; i
< size
; i
++)
21291 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21293 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21298 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21302 real_to_target (l
, &r
, mode
);
21303 parts
[3] = gen_int_mode (l
[3], SImode
);
21304 parts
[2] = gen_int_mode (l
[2], SImode
);
21307 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21308 long double may not be 80-bit. */
21309 real_to_target (l
, &r
, mode
);
21310 parts
[2] = gen_int_mode (l
[2], SImode
);
21313 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21316 gcc_unreachable ();
21318 parts
[1] = gen_int_mode (l
[1], SImode
);
21319 parts
[0] = gen_int_mode (l
[0], SImode
);
21322 gcc_unreachable ();
21327 if (mode
== TImode
)
21328 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21329 if (mode
== XFmode
|| mode
== TFmode
)
21331 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21332 if (REG_P (operand
))
21334 gcc_assert (reload_completed
);
21335 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21336 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21338 else if (offsettable_memref_p (operand
))
21340 operand
= adjust_address (operand
, DImode
, 0);
21341 parts
[0] = operand
;
21342 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21344 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21349 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21350 real_to_target (l
, &r
, mode
);
21352 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21353 if (HOST_BITS_PER_WIDE_INT
>= 64)
21356 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21357 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21360 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21362 if (upper_mode
== SImode
)
21363 parts
[1] = gen_int_mode (l
[2], SImode
);
21364 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21367 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21368 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21371 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21374 gcc_unreachable ();
21381 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21382 Return false when normal moves are needed; true when all required
21383 insns have been emitted. Operands 2-4 contain the input values
21384 int the correct order; operands 5-7 contain the output values. */
21387 ix86_split_long_move (rtx operands
[])
21392 int collisions
= 0;
21393 enum machine_mode mode
= GET_MODE (operands
[0]);
21394 bool collisionparts
[4];
21396 /* The DFmode expanders may ask us to move double.
21397 For 64bit target this is single move. By hiding the fact
21398 here we simplify i386.md splitters. */
21399 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21401 /* Optimize constant pool reference to immediates. This is used by
21402 fp moves, that force all constants to memory to allow combining. */
21404 if (MEM_P (operands
[1])
21405 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21406 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21407 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21408 if (push_operand (operands
[0], VOIDmode
))
21410 operands
[0] = copy_rtx (operands
[0]);
21411 PUT_MODE (operands
[0], word_mode
);
21414 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21415 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21416 emit_move_insn (operands
[0], operands
[1]);
21420 /* The only non-offsettable memory we handle is push. */
21421 if (push_operand (operands
[0], VOIDmode
))
21424 gcc_assert (!MEM_P (operands
[0])
21425 || offsettable_memref_p (operands
[0]));
21427 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21428 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21430 /* When emitting push, take care for source operands on the stack. */
21431 if (push
&& MEM_P (operands
[1])
21432 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21434 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21436 /* Compensate for the stack decrement by 4. */
21437 if (!TARGET_64BIT
&& nparts
== 3
21438 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21439 src_base
= plus_constant (Pmode
, src_base
, 4);
21441 /* src_base refers to the stack pointer and is
21442 automatically decreased by emitted push. */
21443 for (i
= 0; i
< nparts
; i
++)
21444 part
[1][i
] = change_address (part
[1][i
],
21445 GET_MODE (part
[1][i
]), src_base
);
21448 /* We need to do copy in the right order in case an address register
21449 of the source overlaps the destination. */
21450 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21454 for (i
= 0; i
< nparts
; i
++)
21457 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21458 if (collisionparts
[i
])
21462 /* Collision in the middle part can be handled by reordering. */
21463 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21465 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21466 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21468 else if (collisions
== 1
21470 && (collisionparts
[1] || collisionparts
[2]))
21472 if (collisionparts
[1])
21474 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21475 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21479 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21480 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21484 /* If there are more collisions, we can't handle it by reordering.
21485 Do an lea to the last part and use only one colliding move. */
21486 else if (collisions
> 1)
21492 base
= part
[0][nparts
- 1];
21494 /* Handle the case when the last part isn't valid for lea.
21495 Happens in 64-bit mode storing the 12-byte XFmode. */
21496 if (GET_MODE (base
) != Pmode
)
21497 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21499 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21500 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21501 for (i
= 1; i
< nparts
; i
++)
21503 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21504 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21515 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21516 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21517 stack_pointer_rtx
, GEN_INT (-4)));
21518 emit_move_insn (part
[0][2], part
[1][2]);
21520 else if (nparts
== 4)
21522 emit_move_insn (part
[0][3], part
[1][3]);
21523 emit_move_insn (part
[0][2], part
[1][2]);
21528 /* In 64bit mode we don't have 32bit push available. In case this is
21529 register, it is OK - we will just use larger counterpart. We also
21530 retype memory - these comes from attempt to avoid REX prefix on
21531 moving of second half of TFmode value. */
21532 if (GET_MODE (part
[1][1]) == SImode
)
21534 switch (GET_CODE (part
[1][1]))
21537 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21541 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21545 gcc_unreachable ();
21548 if (GET_MODE (part
[1][0]) == SImode
)
21549 part
[1][0] = part
[1][1];
21552 emit_move_insn (part
[0][1], part
[1][1]);
21553 emit_move_insn (part
[0][0], part
[1][0]);
21557 /* Choose correct order to not overwrite the source before it is copied. */
21558 if ((REG_P (part
[0][0])
21559 && REG_P (part
[1][1])
21560 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21562 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21564 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21566 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21568 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21570 operands
[2 + i
] = part
[0][j
];
21571 operands
[6 + i
] = part
[1][j
];
21576 for (i
= 0; i
< nparts
; i
++)
21578 operands
[2 + i
] = part
[0][i
];
21579 operands
[6 + i
] = part
[1][i
];
21583 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21584 if (optimize_insn_for_size_p ())
21586 for (j
= 0; j
< nparts
- 1; j
++)
21587 if (CONST_INT_P (operands
[6 + j
])
21588 && operands
[6 + j
] != const0_rtx
21589 && REG_P (operands
[2 + j
]))
21590 for (i
= j
; i
< nparts
- 1; i
++)
21591 if (CONST_INT_P (operands
[7 + i
])
21592 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21593 operands
[7 + i
] = operands
[2 + j
];
21596 for (i
= 0; i
< nparts
; i
++)
21597 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21602 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21603 left shift by a constant, either using a single shift or
21604 a sequence of add instructions. */
21607 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21609 rtx (*insn
)(rtx
, rtx
, rtx
);
21612 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21613 && !optimize_insn_for_size_p ()))
21615 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21616 while (count
-- > 0)
21617 emit_insn (insn (operand
, operand
, operand
));
21621 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21622 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21627 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21629 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21630 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21631 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21633 rtx low
[2], high
[2];
21636 if (CONST_INT_P (operands
[2]))
21638 split_double_mode (mode
, operands
, 2, low
, high
);
21639 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21641 if (count
>= half_width
)
21643 emit_move_insn (high
[0], low
[1]);
21644 emit_move_insn (low
[0], const0_rtx
);
21646 if (count
> half_width
)
21647 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21651 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21653 if (!rtx_equal_p (operands
[0], operands
[1]))
21654 emit_move_insn (operands
[0], operands
[1]);
21656 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21657 ix86_expand_ashl_const (low
[0], count
, mode
);
21662 split_double_mode (mode
, operands
, 1, low
, high
);
21664 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21666 if (operands
[1] == const1_rtx
)
21668 /* Assuming we've chosen a QImode capable registers, then 1 << N
21669 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21670 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21672 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21674 ix86_expand_clear (low
[0]);
21675 ix86_expand_clear (high
[0]);
21676 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21678 d
= gen_lowpart (QImode
, low
[0]);
21679 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21680 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21681 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21683 d
= gen_lowpart (QImode
, high
[0]);
21684 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21685 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21686 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21689 /* Otherwise, we can get the same results by manually performing
21690 a bit extract operation on bit 5/6, and then performing the two
21691 shifts. The two methods of getting 0/1 into low/high are exactly
21692 the same size. Avoiding the shift in the bit extract case helps
21693 pentium4 a bit; no one else seems to care much either way. */
21696 enum machine_mode half_mode
;
21697 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21698 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21699 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21700 HOST_WIDE_INT bits
;
21703 if (mode
== DImode
)
21705 half_mode
= SImode
;
21706 gen_lshr3
= gen_lshrsi3
;
21707 gen_and3
= gen_andsi3
;
21708 gen_xor3
= gen_xorsi3
;
21713 half_mode
= DImode
;
21714 gen_lshr3
= gen_lshrdi3
;
21715 gen_and3
= gen_anddi3
;
21716 gen_xor3
= gen_xordi3
;
21720 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21721 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21723 x
= gen_lowpart (half_mode
, operands
[2]);
21724 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21726 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21727 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21728 emit_move_insn (low
[0], high
[0]);
21729 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21732 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21733 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21737 if (operands
[1] == constm1_rtx
)
21739 /* For -1 << N, we can avoid the shld instruction, because we
21740 know that we're shifting 0...31/63 ones into a -1. */
21741 emit_move_insn (low
[0], constm1_rtx
);
21742 if (optimize_insn_for_size_p ())
21743 emit_move_insn (high
[0], low
[0]);
21745 emit_move_insn (high
[0], constm1_rtx
);
21749 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21751 if (!rtx_equal_p (operands
[0], operands
[1]))
21752 emit_move_insn (operands
[0], operands
[1]);
21754 split_double_mode (mode
, operands
, 1, low
, high
);
21755 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21758 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21760 if (TARGET_CMOVE
&& scratch
)
21762 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21763 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21765 ix86_expand_clear (scratch
);
21766 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21770 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21771 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21773 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21778 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21780 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21781 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21782 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21783 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21785 rtx low
[2], high
[2];
21788 if (CONST_INT_P (operands
[2]))
21790 split_double_mode (mode
, operands
, 2, low
, high
);
21791 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21793 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21795 emit_move_insn (high
[0], high
[1]);
21796 emit_insn (gen_ashr3 (high
[0], high
[0],
21797 GEN_INT (half_width
- 1)));
21798 emit_move_insn (low
[0], high
[0]);
21801 else if (count
>= half_width
)
21803 emit_move_insn (low
[0], high
[1]);
21804 emit_move_insn (high
[0], low
[0]);
21805 emit_insn (gen_ashr3 (high
[0], high
[0],
21806 GEN_INT (half_width
- 1)));
21808 if (count
> half_width
)
21809 emit_insn (gen_ashr3 (low
[0], low
[0],
21810 GEN_INT (count
- half_width
)));
21814 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21816 if (!rtx_equal_p (operands
[0], operands
[1]))
21817 emit_move_insn (operands
[0], operands
[1]);
21819 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21820 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21825 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21827 if (!rtx_equal_p (operands
[0], operands
[1]))
21828 emit_move_insn (operands
[0], operands
[1]);
21830 split_double_mode (mode
, operands
, 1, low
, high
);
21832 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21833 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21835 if (TARGET_CMOVE
&& scratch
)
21837 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21838 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21840 emit_move_insn (scratch
, high
[0]);
21841 emit_insn (gen_ashr3 (scratch
, scratch
,
21842 GEN_INT (half_width
- 1)));
21843 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21848 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21849 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21851 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21857 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21859 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21860 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21861 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21862 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21864 rtx low
[2], high
[2];
21867 if (CONST_INT_P (operands
[2]))
21869 split_double_mode (mode
, operands
, 2, low
, high
);
21870 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21872 if (count
>= half_width
)
21874 emit_move_insn (low
[0], high
[1]);
21875 ix86_expand_clear (high
[0]);
21877 if (count
> half_width
)
21878 emit_insn (gen_lshr3 (low
[0], low
[0],
21879 GEN_INT (count
- half_width
)));
21883 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21885 if (!rtx_equal_p (operands
[0], operands
[1]))
21886 emit_move_insn (operands
[0], operands
[1]);
21888 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21889 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21894 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21896 if (!rtx_equal_p (operands
[0], operands
[1]))
21897 emit_move_insn (operands
[0], operands
[1]);
21899 split_double_mode (mode
, operands
, 1, low
, high
);
21901 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21902 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21904 if (TARGET_CMOVE
&& scratch
)
21906 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21907 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21909 ix86_expand_clear (scratch
);
21910 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21915 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21916 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21918 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21923 /* Predict just emitted jump instruction to be taken with probability PROB. */
21925 predict_jump (int prob
)
21927 rtx insn
= get_last_insn ();
21928 gcc_assert (JUMP_P (insn
));
21929 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21932 /* Helper function for the string operations below. Dest VARIABLE whether
21933 it is aligned to VALUE bytes. If true, jump to the label. */
21935 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21937 rtx label
= gen_label_rtx ();
21938 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21939 if (GET_MODE (variable
) == DImode
)
21940 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21942 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21943 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21946 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21948 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21952 /* Adjust COUNTER by the VALUE. */
21954 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21956 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21957 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21959 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21962 /* Zero extend possibly SImode EXP to Pmode register. */
21964 ix86_zero_extend_to_Pmode (rtx exp
)
21966 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21969 /* Divide COUNTREG by SCALE. */
21971 scale_counter (rtx countreg
, int scale
)
21977 if (CONST_INT_P (countreg
))
21978 return GEN_INT (INTVAL (countreg
) / scale
);
21979 gcc_assert (REG_P (countreg
));
21981 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21982 GEN_INT (exact_log2 (scale
)),
21983 NULL
, 1, OPTAB_DIRECT
);
21987 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21988 DImode for constant loop counts. */
21990 static enum machine_mode
21991 counter_mode (rtx count_exp
)
21993 if (GET_MODE (count_exp
) != VOIDmode
)
21994 return GET_MODE (count_exp
);
21995 if (!CONST_INT_P (count_exp
))
21997 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22002 /* When SRCPTR is non-NULL, output simple loop to move memory
22003 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
22004 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
22005 equivalent loop to set memory by VALUE (supposed to be in MODE).
22007 The size is rounded down to whole number of chunk size moved at once.
22008 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22012 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22013 rtx destptr
, rtx srcptr
, rtx value
,
22014 rtx count
, enum machine_mode mode
, int unroll
,
22017 rtx out_label
, top_label
, iter
, tmp
;
22018 enum machine_mode iter_mode
= counter_mode (count
);
22019 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22020 rtx piece_size
= GEN_INT (piece_size_n
);
22021 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22025 top_label
= gen_label_rtx ();
22026 out_label
= gen_label_rtx ();
22027 iter
= gen_reg_rtx (iter_mode
);
22029 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22030 NULL
, 1, OPTAB_DIRECT
);
22031 /* Those two should combine. */
22032 if (piece_size
== const1_rtx
)
22034 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22036 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22038 emit_move_insn (iter
, const0_rtx
);
22040 emit_label (top_label
);
22042 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22044 /* This assert could be relaxed - in this case we'll need to compute
22045 smallest power of two, containing in PIECE_SIZE_N and pass it to
22047 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22048 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22049 destmem
= adjust_address (destmem
, mode
, 0);
22053 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22054 srcmem
= adjust_address (srcmem
, mode
, 0);
22056 /* When unrolling for chips that reorder memory reads and writes,
22057 we can save registers by using single temporary.
22058 Also using 4 temporaries is overkill in 32bit mode. */
22059 if (!TARGET_64BIT
&& 0)
22061 for (i
= 0; i
< unroll
; i
++)
22066 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22068 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22070 emit_move_insn (destmem
, srcmem
);
22076 gcc_assert (unroll
<= 4);
22077 for (i
= 0; i
< unroll
; i
++)
22079 tmpreg
[i
] = gen_reg_rtx (mode
);
22083 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22085 emit_move_insn (tmpreg
[i
], srcmem
);
22087 for (i
= 0; i
< unroll
; i
++)
22092 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22094 emit_move_insn (destmem
, tmpreg
[i
]);
22099 for (i
= 0; i
< unroll
; i
++)
22103 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22104 emit_move_insn (destmem
, value
);
22107 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22108 true, OPTAB_LIB_WIDEN
);
22110 emit_move_insn (iter
, tmp
);
22112 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22114 if (expected_size
!= -1)
22116 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22117 if (expected_size
== 0)
22119 else if (expected_size
> REG_BR_PROB_BASE
)
22120 predict_jump (REG_BR_PROB_BASE
- 1);
22122 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22125 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22126 iter
= ix86_zero_extend_to_Pmode (iter
);
22127 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22128 true, OPTAB_LIB_WIDEN
);
22129 if (tmp
!= destptr
)
22130 emit_move_insn (destptr
, tmp
);
22133 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22134 true, OPTAB_LIB_WIDEN
);
22136 emit_move_insn (srcptr
, tmp
);
22138 emit_label (out_label
);
22141 /* Output "rep; mov" instruction.
22142 Arguments have same meaning as for previous function */
22144 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
22145 rtx destptr
, rtx srcptr
,
22147 enum machine_mode mode
)
22152 HOST_WIDE_INT rounded_count
;
22154 /* If the size is known, it is shorter to use rep movs. */
22155 if (mode
== QImode
&& CONST_INT_P (count
)
22156 && !(INTVAL (count
) & 3))
22159 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22160 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22161 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22162 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22163 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22164 if (mode
!= QImode
)
22166 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22167 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22168 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22169 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22170 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22171 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22175 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22176 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22178 if (CONST_INT_P (count
))
22180 rounded_count
= (INTVAL (count
)
22181 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22182 destmem
= shallow_copy_rtx (destmem
);
22183 srcmem
= shallow_copy_rtx (srcmem
);
22184 set_mem_size (destmem
, rounded_count
);
22185 set_mem_size (srcmem
, rounded_count
);
22189 if (MEM_SIZE_KNOWN_P (destmem
))
22190 clear_mem_size (destmem
);
22191 if (MEM_SIZE_KNOWN_P (srcmem
))
22192 clear_mem_size (srcmem
);
22194 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22198 /* Output "rep; stos" instruction.
22199 Arguments have same meaning as for previous function */
22201 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
22202 rtx count
, enum machine_mode mode
,
22207 HOST_WIDE_INT rounded_count
;
22209 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22210 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22211 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22212 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22213 if (mode
!= QImode
)
22215 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22216 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22217 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22220 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22221 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
22223 rounded_count
= (INTVAL (count
)
22224 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22225 destmem
= shallow_copy_rtx (destmem
);
22226 set_mem_size (destmem
, rounded_count
);
22228 else if (MEM_SIZE_KNOWN_P (destmem
))
22229 clear_mem_size (destmem
);
22230 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22233 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22235 SRC is passed by pointer to be updated on return.
22236 Return value is updated DST. */
22238 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22239 HOST_WIDE_INT size_to_move
)
22241 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22242 enum insn_code code
;
22243 enum machine_mode move_mode
;
22246 /* Find the widest mode in which we could perform moves.
22247 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22248 it until move of such size is supported. */
22249 piece_size
= 1 << floor_log2 (size_to_move
);
22250 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22251 code
= optab_handler (mov_optab
, move_mode
);
22252 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22255 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22256 code
= optab_handler (mov_optab
, move_mode
);
22259 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22260 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22261 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22263 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22264 move_mode
= mode_for_vector (word_mode
, nunits
);
22265 code
= optab_handler (mov_optab
, move_mode
);
22266 if (code
== CODE_FOR_nothing
)
22268 move_mode
= word_mode
;
22269 piece_size
= GET_MODE_SIZE (move_mode
);
22270 code
= optab_handler (mov_optab
, move_mode
);
22273 gcc_assert (code
!= CODE_FOR_nothing
);
22275 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22276 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22278 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22279 gcc_assert (size_to_move
% piece_size
== 0);
22280 adjust
= GEN_INT (piece_size
);
22281 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22283 /* We move from memory to memory, so we'll need to do it via
22284 a temporary register. */
22285 tempreg
= gen_reg_rtx (move_mode
);
22286 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22287 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22289 emit_move_insn (destptr
,
22290 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22291 emit_move_insn (srcptr
,
22292 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22294 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22296 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22300 /* Update DST and SRC rtx. */
22305 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22307 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22308 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22311 if (CONST_INT_P (count
))
22313 HOST_WIDE_INT countval
= INTVAL (count
);
22314 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22317 /* For now MAX_SIZE should be a power of 2. This assert could be
22318 relaxed, but it'll require a bit more complicated epilogue
22320 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22321 for (i
= max_size
; i
>= 1; i
>>= 1)
22323 if (epilogue_size
& i
)
22324 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22330 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22331 count
, 1, OPTAB_DIRECT
);
22332 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22333 count
, QImode
, 1, 4);
22337 /* When there are stringops, we can cheaply increase dest and src pointers.
22338 Otherwise we save code size by maintaining offset (zero is readily
22339 available from preceding rep operation) and using x86 addressing modes.
22341 if (TARGET_SINGLE_STRINGOP
)
22345 rtx label
= ix86_expand_aligntest (count
, 4, true);
22346 src
= change_address (srcmem
, SImode
, srcptr
);
22347 dest
= change_address (destmem
, SImode
, destptr
);
22348 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22349 emit_label (label
);
22350 LABEL_NUSES (label
) = 1;
22354 rtx label
= ix86_expand_aligntest (count
, 2, true);
22355 src
= change_address (srcmem
, HImode
, srcptr
);
22356 dest
= change_address (destmem
, HImode
, destptr
);
22357 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22358 emit_label (label
);
22359 LABEL_NUSES (label
) = 1;
22363 rtx label
= ix86_expand_aligntest (count
, 1, true);
22364 src
= change_address (srcmem
, QImode
, srcptr
);
22365 dest
= change_address (destmem
, QImode
, destptr
);
22366 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22367 emit_label (label
);
22368 LABEL_NUSES (label
) = 1;
22373 rtx offset
= force_reg (Pmode
, const0_rtx
);
22378 rtx label
= ix86_expand_aligntest (count
, 4, true);
22379 src
= change_address (srcmem
, SImode
, srcptr
);
22380 dest
= change_address (destmem
, SImode
, destptr
);
22381 emit_move_insn (dest
, src
);
22382 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22383 true, OPTAB_LIB_WIDEN
);
22385 emit_move_insn (offset
, tmp
);
22386 emit_label (label
);
22387 LABEL_NUSES (label
) = 1;
22391 rtx label
= ix86_expand_aligntest (count
, 2, true);
22392 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22393 src
= change_address (srcmem
, HImode
, tmp
);
22394 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22395 dest
= change_address (destmem
, HImode
, tmp
);
22396 emit_move_insn (dest
, src
);
22397 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22398 true, OPTAB_LIB_WIDEN
);
22400 emit_move_insn (offset
, tmp
);
22401 emit_label (label
);
22402 LABEL_NUSES (label
) = 1;
22406 rtx label
= ix86_expand_aligntest (count
, 1, true);
22407 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22408 src
= change_address (srcmem
, QImode
, tmp
);
22409 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22410 dest
= change_address (destmem
, QImode
, tmp
);
22411 emit_move_insn (dest
, src
);
22412 emit_label (label
);
22413 LABEL_NUSES (label
) = 1;
22418 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22420 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22421 rtx count
, int max_size
)
22424 expand_simple_binop (counter_mode (count
), AND
, count
,
22425 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22426 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22427 gen_lowpart (QImode
, value
), count
, QImode
,
22431 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22433 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22437 if (CONST_INT_P (count
))
22439 HOST_WIDE_INT countval
= INTVAL (count
);
22442 if ((countval
& 0x10) && max_size
> 16)
22446 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22447 emit_insn (gen_strset (destptr
, dest
, value
));
22448 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22449 emit_insn (gen_strset (destptr
, dest
, value
));
22452 gcc_unreachable ();
22455 if ((countval
& 0x08) && max_size
> 8)
22459 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22460 emit_insn (gen_strset (destptr
, dest
, value
));
22464 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22465 emit_insn (gen_strset (destptr
, dest
, value
));
22466 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22467 emit_insn (gen_strset (destptr
, dest
, value
));
22471 if ((countval
& 0x04) && max_size
> 4)
22473 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22474 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22477 if ((countval
& 0x02) && max_size
> 2)
22479 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22480 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22483 if ((countval
& 0x01) && max_size
> 1)
22485 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22486 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22493 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22498 rtx label
= ix86_expand_aligntest (count
, 16, true);
22501 dest
= change_address (destmem
, DImode
, destptr
);
22502 emit_insn (gen_strset (destptr
, dest
, value
));
22503 emit_insn (gen_strset (destptr
, dest
, value
));
22507 dest
= change_address (destmem
, SImode
, destptr
);
22508 emit_insn (gen_strset (destptr
, dest
, value
));
22509 emit_insn (gen_strset (destptr
, dest
, value
));
22510 emit_insn (gen_strset (destptr
, dest
, value
));
22511 emit_insn (gen_strset (destptr
, dest
, value
));
22513 emit_label (label
);
22514 LABEL_NUSES (label
) = 1;
22518 rtx label
= ix86_expand_aligntest (count
, 8, true);
22521 dest
= change_address (destmem
, DImode
, destptr
);
22522 emit_insn (gen_strset (destptr
, dest
, value
));
22526 dest
= change_address (destmem
, SImode
, destptr
);
22527 emit_insn (gen_strset (destptr
, dest
, value
));
22528 emit_insn (gen_strset (destptr
, dest
, value
));
22530 emit_label (label
);
22531 LABEL_NUSES (label
) = 1;
22535 rtx label
= ix86_expand_aligntest (count
, 4, true);
22536 dest
= change_address (destmem
, SImode
, destptr
);
22537 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22538 emit_label (label
);
22539 LABEL_NUSES (label
) = 1;
22543 rtx label
= ix86_expand_aligntest (count
, 2, true);
22544 dest
= change_address (destmem
, HImode
, destptr
);
22545 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22546 emit_label (label
);
22547 LABEL_NUSES (label
) = 1;
22551 rtx label
= ix86_expand_aligntest (count
, 1, true);
22552 dest
= change_address (destmem
, QImode
, destptr
);
22553 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22554 emit_label (label
);
22555 LABEL_NUSES (label
) = 1;
22559 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22561 Return value is updated DESTMEM. */
22563 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22564 rtx destptr
, rtx srcptr
, rtx count
,
22565 int align
, int desired_alignment
)
22568 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22572 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22573 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22574 ix86_adjust_counter (count
, i
);
22575 emit_label (label
);
22576 LABEL_NUSES (label
) = 1;
22577 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22583 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22584 ALIGN_BYTES is how many bytes need to be copied.
22585 The function updates DST and SRC, namely, it sets proper alignment.
22586 DST is returned via return value, SRC is updated via pointer SRCP. */
22588 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22589 int desired_align
, int align_bytes
)
22592 rtx orig_dst
= dst
;
22593 rtx orig_src
= src
;
22594 int piece_size
= 1;
22595 int copied_bytes
= 0;
22596 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22597 if (src_align_bytes
>= 0)
22598 src_align_bytes
= desired_align
- src_align_bytes
;
22600 for (piece_size
= 1;
22601 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
22604 if (align_bytes
& piece_size
)
22606 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
22607 copied_bytes
+= piece_size
;
22611 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22612 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22613 if (src_align_bytes
>= 0)
22615 unsigned int src_align
;
22616 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
22618 if ((src_align_bytes
& (src_align
- 1))
22619 == (align_bytes
& (src_align
- 1)))
22622 if (src_align
> (unsigned int) desired_align
)
22623 src_align
= desired_align
;
22624 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22625 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22627 if (MEM_SIZE_KNOWN_P (orig_dst
))
22628 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22629 if (MEM_SIZE_KNOWN_P (orig_src
))
22630 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22635 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22636 DESIRED_ALIGNMENT. */
22638 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22639 int align
, int desired_alignment
)
22641 if (align
<= 1 && desired_alignment
> 1)
22643 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22644 destmem
= change_address (destmem
, QImode
, destptr
);
22645 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22646 ix86_adjust_counter (count
, 1);
22647 emit_label (label
);
22648 LABEL_NUSES (label
) = 1;
22650 if (align
<= 2 && desired_alignment
> 2)
22652 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22653 destmem
= change_address (destmem
, HImode
, destptr
);
22654 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22655 ix86_adjust_counter (count
, 2);
22656 emit_label (label
);
22657 LABEL_NUSES (label
) = 1;
22659 if (align
<= 4 && desired_alignment
> 4)
22661 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22662 destmem
= change_address (destmem
, SImode
, destptr
);
22663 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22664 ix86_adjust_counter (count
, 4);
22665 emit_label (label
);
22666 LABEL_NUSES (label
) = 1;
22668 gcc_assert (desired_alignment
<= 8);
22671 /* Set enough from DST to align DST known to by aligned by ALIGN to
22672 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22674 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22675 int desired_align
, int align_bytes
)
22678 rtx orig_dst
= dst
;
22679 if (align_bytes
& 1)
22681 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22683 emit_insn (gen_strset (destreg
, dst
,
22684 gen_lowpart (QImode
, value
)));
22686 if (align_bytes
& 2)
22688 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22689 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22690 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22692 emit_insn (gen_strset (destreg
, dst
,
22693 gen_lowpart (HImode
, value
)));
22695 if (align_bytes
& 4)
22697 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22698 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22699 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22701 emit_insn (gen_strset (destreg
, dst
,
22702 gen_lowpart (SImode
, value
)));
22704 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22705 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22706 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22707 if (MEM_SIZE_KNOWN_P (orig_dst
))
22708 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22712 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22713 static enum stringop_alg
22714 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22715 int *dynamic_check
, bool *noalign
)
22717 const struct stringop_algs
* algs
;
22718 bool optimize_for_speed
;
22719 /* Algorithms using the rep prefix want at least edi and ecx;
22720 additionally, memset wants eax and memcpy wants esi. Don't
22721 consider such algorithms if the user has appropriated those
22722 registers for their own purposes. */
22723 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22725 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22728 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22729 || (alg != rep_prefix_1_byte \
22730 && alg != rep_prefix_4_byte \
22731 && alg != rep_prefix_8_byte))
22732 const struct processor_costs
*cost
;
22734 /* Even if the string operation call is cold, we still might spend a lot
22735 of time processing large blocks. */
22736 if (optimize_function_for_size_p (cfun
)
22737 || (optimize_insn_for_size_p ()
22738 && expected_size
!= -1 && expected_size
< 256))
22739 optimize_for_speed
= false;
22741 optimize_for_speed
= true;
22743 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22745 *dynamic_check
= -1;
22747 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22749 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22750 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22751 return ix86_stringop_alg
;
22752 /* rep; movq or rep; movl is the smallest variant. */
22753 else if (!optimize_for_speed
)
22755 if (!count
|| (count
& 3))
22756 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22758 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22760 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22762 else if (expected_size
!= -1 && expected_size
< 4)
22763 return loop_1_byte
;
22764 else if (expected_size
!= -1)
22767 enum stringop_alg alg
= libcall
;
22768 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22770 /* We get here if the algorithms that were not libcall-based
22771 were rep-prefix based and we are unable to use rep prefixes
22772 based on global register usage. Break out of the loop and
22773 use the heuristic below. */
22774 if (algs
->size
[i
].max
== 0)
22776 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22778 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22780 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22782 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22783 last non-libcall inline algorithm. */
22784 if (TARGET_INLINE_ALL_STRINGOPS
)
22786 /* When the current size is best to be copied by a libcall,
22787 but we are still forced to inline, run the heuristic below
22788 that will pick code for medium sized blocks. */
22789 if (alg
!= libcall
)
22793 else if (ALG_USABLE_P (candidate
))
22795 *noalign
= algs
->size
[i
].noalign
;
22800 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22802 /* When asked to inline the call anyway, try to pick meaningful choice.
22803 We look for maximal size of block that is faster to copy by hand and
22804 take blocks of at most of that size guessing that average size will
22805 be roughly half of the block.
22807 If this turns out to be bad, we might simply specify the preferred
22808 choice in ix86_costs. */
22809 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22810 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22813 enum stringop_alg alg
;
22815 bool any_alg_usable_p
= true;
22817 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22819 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22820 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22822 if (candidate
!= libcall
&& candidate
22823 && ALG_USABLE_P (candidate
))
22824 max
= algs
->size
[i
].max
;
22826 /* If there aren't any usable algorithms, then recursing on
22827 smaller sizes isn't going to find anything. Just return the
22828 simple byte-at-a-time copy loop. */
22829 if (!any_alg_usable_p
)
22831 /* Pick something reasonable. */
22832 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22833 *dynamic_check
= 128;
22834 return loop_1_byte
;
22838 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22839 gcc_assert (*dynamic_check
== -1);
22840 gcc_assert (alg
!= libcall
);
22841 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22842 *dynamic_check
= max
;
22845 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22846 #undef ALG_USABLE_P
22849 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22850 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22852 decide_alignment (int align
,
22853 enum stringop_alg alg
,
22855 enum machine_mode move_mode
)
22857 int desired_align
= 0;
22859 gcc_assert (alg
!= no_stringop
);
22861 if (alg
== libcall
)
22863 if (move_mode
== VOIDmode
)
22866 desired_align
= GET_MODE_SIZE (move_mode
);
22867 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22868 copying whole cacheline at once. */
22869 if (TARGET_PENTIUMPRO
22870 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
22875 if (desired_align
< align
)
22876 desired_align
= align
;
22877 if (expected_size
!= -1 && expected_size
< 4)
22878 desired_align
= align
;
22880 return desired_align
;
22883 /* Expand string move (memcpy) operation. Use i386 string operations
22884 when profitable. expand_setmem contains similar code. The code
22885 depends upon architecture, block size and alignment, but always has
22886 the same overall structure:
22888 1) Prologue guard: Conditional that jumps up to epilogues for small
22889 blocks that can be handled by epilogue alone. This is faster
22890 but also needed for correctness, since prologue assume the block
22891 is larger than the desired alignment.
22893 Optional dynamic check for size and libcall for large
22894 blocks is emitted here too, with -minline-stringops-dynamically.
22896 2) Prologue: copy first few bytes in order to get destination
22897 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22898 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22899 copied. We emit either a jump tree on power of two sized
22900 blocks, or a byte loop.
22902 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22903 with specified algorithm.
22905 4) Epilogue: code copying tail of the block that is too small to be
22906 handled by main body (or up to size guarded by prologue guard). */
22909 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22910 rtx expected_align_exp
, rtx expected_size_exp
)
22916 rtx jump_around_label
= NULL
;
22917 HOST_WIDE_INT align
= 1;
22918 unsigned HOST_WIDE_INT count
= 0;
22919 HOST_WIDE_INT expected_size
= -1;
22920 int size_needed
= 0, epilogue_size_needed
;
22921 int desired_align
= 0, align_bytes
= 0;
22922 enum stringop_alg alg
;
22924 bool need_zero_guard
= false;
22926 enum machine_mode move_mode
= VOIDmode
;
22927 int unroll_factor
= 1;
22929 if (CONST_INT_P (align_exp
))
22930 align
= INTVAL (align_exp
);
22931 /* i386 can do misaligned access on reasonably increased cost. */
22932 if (CONST_INT_P (expected_align_exp
)
22933 && INTVAL (expected_align_exp
) > align
)
22934 align
= INTVAL (expected_align_exp
);
22935 /* ALIGN is the minimum of destination and source alignment, but we care here
22936 just about destination alignment. */
22937 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22938 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22940 if (CONST_INT_P (count_exp
))
22941 count
= expected_size
= INTVAL (count_exp
);
22942 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22943 expected_size
= INTVAL (expected_size_exp
);
22945 /* Make sure we don't need to care about overflow later on. */
22946 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22949 /* Step 0: Decide on preferred algorithm, desired alignment and
22950 size of chunks to be copied by main loop. */
22951 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22952 if (alg
== libcall
)
22954 gcc_assert (alg
!= no_stringop
);
22957 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22958 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22959 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22962 move_mode
= word_mode
;
22967 gcc_unreachable ();
22969 need_zero_guard
= true;
22970 move_mode
= QImode
;
22973 need_zero_guard
= true;
22975 case unrolled_loop
:
22976 need_zero_guard
= true;
22977 unroll_factor
= (TARGET_64BIT
? 4 : 2);
22980 need_zero_guard
= true;
22982 /* Find the widest supported mode. */
22983 move_mode
= word_mode
;
22984 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
22985 != CODE_FOR_nothing
)
22986 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
22988 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22989 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22990 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22992 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22993 move_mode
= mode_for_vector (word_mode
, nunits
);
22994 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
22995 move_mode
= word_mode
;
22997 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
22999 case rep_prefix_8_byte
:
23000 move_mode
= DImode
;
23002 case rep_prefix_4_byte
:
23003 move_mode
= SImode
;
23005 case rep_prefix_1_byte
:
23006 move_mode
= QImode
;
23009 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23010 epilogue_size_needed
= size_needed
;
23012 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23013 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23014 align
= desired_align
;
23016 /* Step 1: Prologue guard. */
23018 /* Alignment code needs count to be in register. */
23019 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23021 if (INTVAL (count_exp
) > desired_align
23022 && INTVAL (count_exp
) > size_needed
)
23025 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23026 if (align_bytes
<= 0)
23029 align_bytes
= desired_align
- align_bytes
;
23031 if (align_bytes
== 0)
23032 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23034 gcc_assert (desired_align
>= 1 && align
>= 1);
23036 /* Ensure that alignment prologue won't copy past end of block. */
23037 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23039 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23040 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23041 Make sure it is power of 2. */
23042 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23046 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23048 /* If main algorithm works on QImode, no epilogue is needed.
23049 For small sizes just don't align anything. */
23050 if (size_needed
== 1)
23051 desired_align
= align
;
23058 label
= gen_label_rtx ();
23059 emit_cmp_and_jump_insns (count_exp
,
23060 GEN_INT (epilogue_size_needed
),
23061 LTU
, 0, counter_mode (count_exp
), 1, label
);
23062 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23063 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23065 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23069 /* Emit code to decide on runtime whether library call or inline should be
23071 if (dynamic_check
!= -1)
23073 if (CONST_INT_P (count_exp
))
23075 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23077 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23078 count_exp
= const0_rtx
;
23084 rtx hot_label
= gen_label_rtx ();
23085 jump_around_label
= gen_label_rtx ();
23086 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23087 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23088 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23089 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23090 emit_jump (jump_around_label
);
23091 emit_label (hot_label
);
23095 /* Step 2: Alignment prologue. */
23097 if (desired_align
> align
)
23099 if (align_bytes
== 0)
23101 /* Except for the first move in epilogue, we no longer know
23102 constant offset in aliasing info. It don't seems to worth
23103 the pain to maintain it for the first move, so throw away
23105 src
= change_address (src
, BLKmode
, srcreg
);
23106 dst
= change_address (dst
, BLKmode
, destreg
);
23107 dst
= expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
23112 /* If we know how many bytes need to be stored before dst is
23113 sufficiently aligned, maintain aliasing info accurately. */
23114 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
23115 desired_align
, align_bytes
);
23116 count_exp
= plus_constant (counter_mode (count_exp
),
23117 count_exp
, -align_bytes
);
23118 count
-= align_bytes
;
23120 if (need_zero_guard
23121 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23122 || (align_bytes
== 0
23123 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23124 + desired_align
- align
))))
23126 /* It is possible that we copied enough so the main loop will not
23128 gcc_assert (size_needed
> 1);
23129 if (label
== NULL_RTX
)
23130 label
= gen_label_rtx ();
23131 emit_cmp_and_jump_insns (count_exp
,
23132 GEN_INT (size_needed
),
23133 LTU
, 0, counter_mode (count_exp
), 1, label
);
23134 if (expected_size
== -1
23135 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23136 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23138 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23141 if (label
&& size_needed
== 1)
23143 emit_label (label
);
23144 LABEL_NUSES (label
) = 1;
23146 epilogue_size_needed
= 1;
23148 else if (label
== NULL_RTX
)
23149 epilogue_size_needed
= size_needed
;
23151 /* Step 3: Main loop. */
23157 gcc_unreachable ();
23160 case unrolled_loop
:
23162 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
23163 count_exp
, move_mode
, unroll_factor
,
23166 case rep_prefix_8_byte
:
23167 case rep_prefix_4_byte
:
23168 case rep_prefix_1_byte
:
23169 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
23173 /* Adjust properly the offset of src and dest memory for aliasing. */
23174 if (CONST_INT_P (count_exp
))
23176 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
23177 (count
/ size_needed
) * size_needed
);
23178 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23179 (count
/ size_needed
) * size_needed
);
23183 src
= change_address (src
, BLKmode
, srcreg
);
23184 dst
= change_address (dst
, BLKmode
, destreg
);
23187 /* Step 4: Epilogue to copy the remaining bytes. */
23191 /* When the main loop is done, COUNT_EXP might hold original count,
23192 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23193 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23194 bytes. Compensate if needed. */
23196 if (size_needed
< epilogue_size_needed
)
23199 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23200 GEN_INT (size_needed
- 1), count_exp
, 1,
23202 if (tmp
!= count_exp
)
23203 emit_move_insn (count_exp
, tmp
);
23205 emit_label (label
);
23206 LABEL_NUSES (label
) = 1;
23209 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23210 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
23212 if (jump_around_label
)
23213 emit_label (jump_around_label
);
23217 /* Helper function for memcpy. For QImode value 0xXY produce
23218 0xXYXYXYXY of wide specified by MODE. This is essentially
23219 a * 0x10101010, but we can do slightly better than
23220 synth_mult by unwinding the sequence by hand on CPUs with
23223 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23225 enum machine_mode valmode
= GET_MODE (val
);
23227 int nops
= mode
== DImode
? 3 : 2;
23229 gcc_assert (mode
== SImode
|| mode
== DImode
);
23230 if (val
== const0_rtx
)
23231 return copy_to_mode_reg (mode
, const0_rtx
);
23232 if (CONST_INT_P (val
))
23234 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23238 if (mode
== DImode
)
23239 v
|= (v
<< 16) << 16;
23240 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23243 if (valmode
== VOIDmode
)
23245 if (valmode
!= QImode
)
23246 val
= gen_lowpart (QImode
, val
);
23247 if (mode
== QImode
)
23249 if (!TARGET_PARTIAL_REG_STALL
)
23251 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23252 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23253 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23254 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23256 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23257 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23258 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23263 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23265 if (!TARGET_PARTIAL_REG_STALL
)
23266 if (mode
== SImode
)
23267 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23269 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23272 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23273 NULL
, 1, OPTAB_DIRECT
);
23275 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23277 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23278 NULL
, 1, OPTAB_DIRECT
);
23279 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23280 if (mode
== SImode
)
23282 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23283 NULL
, 1, OPTAB_DIRECT
);
23284 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23289 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23290 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23291 alignment from ALIGN to DESIRED_ALIGN. */
23293 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23298 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23299 promoted_val
= promote_duplicated_reg (DImode
, val
);
23300 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23301 promoted_val
= promote_duplicated_reg (SImode
, val
);
23302 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23303 promoted_val
= promote_duplicated_reg (HImode
, val
);
23305 promoted_val
= val
;
23307 return promoted_val
;
23310 /* Expand string clear operation (bzero). Use i386 string operations when
23311 profitable. See expand_movmem comment for explanation of individual
23312 steps performed. */
23314 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23315 rtx expected_align_exp
, rtx expected_size_exp
)
23320 rtx jump_around_label
= NULL
;
23321 HOST_WIDE_INT align
= 1;
23322 unsigned HOST_WIDE_INT count
= 0;
23323 HOST_WIDE_INT expected_size
= -1;
23324 int size_needed
= 0, epilogue_size_needed
;
23325 int desired_align
= 0, align_bytes
= 0;
23326 enum stringop_alg alg
;
23327 rtx promoted_val
= NULL
;
23328 bool force_loopy_epilogue
= false;
23330 bool need_zero_guard
= false;
23332 enum machine_mode move_mode
= VOIDmode
;
23335 if (CONST_INT_P (align_exp
))
23336 align
= INTVAL (align_exp
);
23337 /* i386 can do misaligned access on reasonably increased cost. */
23338 if (CONST_INT_P (expected_align_exp
)
23339 && INTVAL (expected_align_exp
) > align
)
23340 align
= INTVAL (expected_align_exp
);
23341 if (CONST_INT_P (count_exp
))
23342 count
= expected_size
= INTVAL (count_exp
);
23343 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23344 expected_size
= INTVAL (expected_size_exp
);
23346 /* Make sure we don't need to care about overflow later on. */
23347 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23350 /* Step 0: Decide on preferred algorithm, desired alignment and
23351 size of chunks to be copied by main loop. */
23353 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23354 if (alg
== libcall
)
23356 gcc_assert (alg
!= no_stringop
);
23359 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23360 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23362 move_mode
= word_mode
;
23368 gcc_unreachable ();
23370 need_zero_guard
= true;
23373 case unrolled_loop
:
23374 need_zero_guard
= true;
23377 case rep_prefix_8_byte
:
23378 move_mode
= DImode
;
23380 case rep_prefix_4_byte
:
23381 move_mode
= SImode
;
23383 case rep_prefix_1_byte
:
23384 move_mode
= QImode
;
23387 need_zero_guard
= true;
23388 move_mode
= QImode
;
23391 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23392 epilogue_size_needed
= size_needed
;
23394 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23395 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23396 align
= desired_align
;
23398 /* Step 1: Prologue guard. */
23400 /* Alignment code needs count to be in register. */
23401 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23403 if (INTVAL (count_exp
) > desired_align
23404 && INTVAL (count_exp
) > size_needed
)
23407 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23408 if (align_bytes
<= 0)
23411 align_bytes
= desired_align
- align_bytes
;
23413 if (align_bytes
== 0)
23415 enum machine_mode mode
= SImode
;
23416 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23418 count_exp
= force_reg (mode
, count_exp
);
23421 /* Do the cheap promotion to allow better CSE across the
23422 main loop and epilogue (ie one load of the big constant in the
23423 front of all code. */
23424 if (CONST_INT_P (val_exp
))
23425 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23426 desired_align
, align
);
23427 /* Ensure that alignment prologue won't copy past end of block. */
23428 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23430 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23431 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23432 Make sure it is power of 2. */
23433 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23435 /* To improve performance of small blocks, we jump around the VAL
23436 promoting mode. This mean that if the promoted VAL is not constant,
23437 we might not use it in the epilogue and have to use byte
23439 if (epilogue_size_needed
> 2 && !promoted_val
)
23440 force_loopy_epilogue
= true;
23443 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23445 /* If main algorithm works on QImode, no epilogue is needed.
23446 For small sizes just don't align anything. */
23447 if (size_needed
== 1)
23448 desired_align
= align
;
23455 label
= gen_label_rtx ();
23456 emit_cmp_and_jump_insns (count_exp
,
23457 GEN_INT (epilogue_size_needed
),
23458 LTU
, 0, counter_mode (count_exp
), 1, label
);
23459 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23460 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23462 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23465 if (dynamic_check
!= -1)
23467 rtx hot_label
= gen_label_rtx ();
23468 jump_around_label
= gen_label_rtx ();
23469 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23470 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23471 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23472 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23473 emit_jump (jump_around_label
);
23474 emit_label (hot_label
);
23477 /* Step 2: Alignment prologue. */
23479 /* Do the expensive promotion once we branched off the small blocks. */
23481 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23482 desired_align
, align
);
23483 gcc_assert (desired_align
>= 1 && align
>= 1);
23485 if (desired_align
> align
)
23487 if (align_bytes
== 0)
23489 /* Except for the first move in epilogue, we no longer know
23490 constant offset in aliasing info. It don't seems to worth
23491 the pain to maintain it for the first move, so throw away
23493 dst
= change_address (dst
, BLKmode
, destreg
);
23494 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23499 /* If we know how many bytes need to be stored before dst is
23500 sufficiently aligned, maintain aliasing info accurately. */
23501 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23502 desired_align
, align_bytes
);
23503 count_exp
= plus_constant (counter_mode (count_exp
),
23504 count_exp
, -align_bytes
);
23505 count
-= align_bytes
;
23507 if (need_zero_guard
23508 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23509 || (align_bytes
== 0
23510 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23511 + desired_align
- align
))))
23513 /* It is possible that we copied enough so the main loop will not
23515 gcc_assert (size_needed
> 1);
23516 if (label
== NULL_RTX
)
23517 label
= gen_label_rtx ();
23518 emit_cmp_and_jump_insns (count_exp
,
23519 GEN_INT (size_needed
),
23520 LTU
, 0, counter_mode (count_exp
), 1, label
);
23521 if (expected_size
== -1
23522 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23523 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23525 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23528 if (label
&& size_needed
== 1)
23530 emit_label (label
);
23531 LABEL_NUSES (label
) = 1;
23533 promoted_val
= val_exp
;
23534 epilogue_size_needed
= 1;
23536 else if (label
== NULL_RTX
)
23537 epilogue_size_needed
= size_needed
;
23539 /* Step 3: Main loop. */
23545 gcc_unreachable ();
23549 case unrolled_loop
:
23550 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23551 count_exp
, move_mode
, unroll_factor
,
23554 case rep_prefix_8_byte
:
23555 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23558 case rep_prefix_4_byte
:
23559 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23562 case rep_prefix_1_byte
:
23563 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23567 /* Adjust properly the offset of src and dest memory for aliasing. */
23568 if (CONST_INT_P (count_exp
))
23569 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23570 (count
/ size_needed
) * size_needed
);
23572 dst
= change_address (dst
, BLKmode
, destreg
);
23574 /* Step 4: Epilogue to copy the remaining bytes. */
23578 /* When the main loop is done, COUNT_EXP might hold original count,
23579 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23580 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23581 bytes. Compensate if needed. */
23583 if (size_needed
< epilogue_size_needed
)
23586 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23587 GEN_INT (size_needed
- 1), count_exp
, 1,
23589 if (tmp
!= count_exp
)
23590 emit_move_insn (count_exp
, tmp
);
23592 emit_label (label
);
23593 LABEL_NUSES (label
) = 1;
23596 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23598 if (force_loopy_epilogue
)
23599 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23600 epilogue_size_needed
);
23602 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23603 epilogue_size_needed
);
23605 if (jump_around_label
)
23606 emit_label (jump_around_label
);
23610 /* Expand the appropriate insns for doing strlen if not just doing
23613 out = result, initialized with the start address
23614 align_rtx = alignment of the address.
23615 scratch = scratch register, initialized with the startaddress when
23616 not aligned, otherwise undefined
23618 This is just the body. It needs the initializations mentioned above and
23619 some address computing at the end. These things are done in i386.md. */
23622 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23626 rtx align_2_label
= NULL_RTX
;
23627 rtx align_3_label
= NULL_RTX
;
23628 rtx align_4_label
= gen_label_rtx ();
23629 rtx end_0_label
= gen_label_rtx ();
23631 rtx tmpreg
= gen_reg_rtx (SImode
);
23632 rtx scratch
= gen_reg_rtx (SImode
);
23636 if (CONST_INT_P (align_rtx
))
23637 align
= INTVAL (align_rtx
);
23639 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23641 /* Is there a known alignment and is it less than 4? */
23644 rtx scratch1
= gen_reg_rtx (Pmode
);
23645 emit_move_insn (scratch1
, out
);
23646 /* Is there a known alignment and is it not 2? */
23649 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23650 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23652 /* Leave just the 3 lower bits. */
23653 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23654 NULL_RTX
, 0, OPTAB_WIDEN
);
23656 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23657 Pmode
, 1, align_4_label
);
23658 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23659 Pmode
, 1, align_2_label
);
23660 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23661 Pmode
, 1, align_3_label
);
23665 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23666 check if is aligned to 4 - byte. */
23668 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23669 NULL_RTX
, 0, OPTAB_WIDEN
);
23671 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23672 Pmode
, 1, align_4_label
);
23675 mem
= change_address (src
, QImode
, out
);
23677 /* Now compare the bytes. */
23679 /* Compare the first n unaligned byte on a byte per byte basis. */
23680 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23681 QImode
, 1, end_0_label
);
23683 /* Increment the address. */
23684 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23686 /* Not needed with an alignment of 2 */
23689 emit_label (align_2_label
);
23691 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23694 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23696 emit_label (align_3_label
);
23699 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23702 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23705 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23706 align this loop. It gives only huge programs, but does not help to
23708 emit_label (align_4_label
);
23710 mem
= change_address (src
, SImode
, out
);
23711 emit_move_insn (scratch
, mem
);
23712 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23714 /* This formula yields a nonzero result iff one of the bytes is zero.
23715 This saves three branches inside loop and many cycles. */
23717 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23718 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23719 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23720 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23721 gen_int_mode (0x80808080, SImode
)));
23722 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23727 rtx reg
= gen_reg_rtx (SImode
);
23728 rtx reg2
= gen_reg_rtx (Pmode
);
23729 emit_move_insn (reg
, tmpreg
);
23730 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23732 /* If zero is not in the first two bytes, move two bytes forward. */
23733 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23734 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23735 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23736 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23737 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23740 /* Emit lea manually to avoid clobbering of flags. */
23741 emit_insn (gen_rtx_SET (SImode
, reg2
,
23742 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23744 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23745 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23746 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23747 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23753 rtx end_2_label
= gen_label_rtx ();
23754 /* Is zero in the first two bytes? */
23756 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23757 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23758 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23759 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23760 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23762 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23763 JUMP_LABEL (tmp
) = end_2_label
;
23765 /* Not in the first two. Move two bytes forward. */
23766 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23767 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23769 emit_label (end_2_label
);
23773 /* Avoid branch in fixing the byte. */
23774 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23775 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23776 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23777 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23778 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23780 emit_label (end_0_label
);
23783 /* Expand strlen. */
23786 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23788 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23790 /* The generic case of strlen expander is long. Avoid it's
23791 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23793 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23794 && !TARGET_INLINE_ALL_STRINGOPS
23795 && !optimize_insn_for_size_p ()
23796 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23799 addr
= force_reg (Pmode
, XEXP (src
, 0));
23800 scratch1
= gen_reg_rtx (Pmode
);
23802 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23803 && !optimize_insn_for_size_p ())
23805 /* Well it seems that some optimizer does not combine a call like
23806 foo(strlen(bar), strlen(bar));
23807 when the move and the subtraction is done here. It does calculate
23808 the length just once when these instructions are done inside of
23809 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23810 often used and I use one fewer register for the lifetime of
23811 output_strlen_unroll() this is better. */
23813 emit_move_insn (out
, addr
);
23815 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23817 /* strlensi_unroll_1 returns the address of the zero at the end of
23818 the string, like memchr(), so compute the length by subtracting
23819 the start address. */
23820 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23826 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23827 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23830 scratch2
= gen_reg_rtx (Pmode
);
23831 scratch3
= gen_reg_rtx (Pmode
);
23832 scratch4
= force_reg (Pmode
, constm1_rtx
);
23834 emit_move_insn (scratch3
, addr
);
23835 eoschar
= force_reg (QImode
, eoschar
);
23837 src
= replace_equiv_address_nv (src
, scratch3
);
23839 /* If .md starts supporting :P, this can be done in .md. */
23840 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23841 scratch4
), UNSPEC_SCAS
);
23842 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23843 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23844 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23849 /* For given symbol (function) construct code to compute address of it's PLT
23850 entry in large x86-64 PIC model. */
23852 construct_plt_address (rtx symbol
)
23856 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23857 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
23858 gcc_assert (Pmode
== DImode
);
23860 tmp
= gen_reg_rtx (Pmode
);
23861 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23863 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23864 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23869 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23871 rtx pop
, bool sibcall
)
23873 unsigned int const cregs_size
23874 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
23875 rtx vec
[3 + cregs_size
];
23876 rtx use
= NULL
, call
;
23877 unsigned int vec_len
= 0;
23879 if (pop
== const0_rtx
)
23881 gcc_assert (!TARGET_64BIT
|| !pop
);
23883 if (TARGET_MACHO
&& !TARGET_64BIT
)
23886 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23887 fnaddr
= machopic_indirect_call_target (fnaddr
);
23892 /* Static functions and indirect calls don't need the pic register. */
23895 || (ix86_cmodel
== CM_LARGE_PIC
23896 && DEFAULT_ABI
!= MS_ABI
))
23897 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23898 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23899 use_reg (&use
, pic_offset_table_rtx
);
23902 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23904 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23905 emit_move_insn (al
, callarg2
);
23906 use_reg (&use
, al
);
23909 if (ix86_cmodel
== CM_LARGE_PIC
23912 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23913 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23914 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23916 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23917 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23919 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23920 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23923 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23925 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23926 vec
[vec_len
++] = call
;
23930 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23931 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23932 vec
[vec_len
++] = pop
;
23935 if (TARGET_64BIT_MS_ABI
23936 && (!callarg2
|| INTVAL (callarg2
) != -2))
23940 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23941 UNSPEC_MS_TO_SYSV_CALL
);
23943 for (i
= 0; i
< cregs_size
; i
++)
23945 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
23946 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
23949 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
23954 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23955 call
= emit_call_insn (call
);
23957 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23962 /* Output the assembly for a call instruction. */
23965 ix86_output_call_insn (rtx insn
, rtx call_op
)
23967 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23968 bool seh_nop_p
= false;
23971 if (SIBLING_CALL_P (insn
))
23975 /* SEH epilogue detection requires the indirect branch case
23976 to include REX.W. */
23977 else if (TARGET_SEH
)
23978 xasm
= "rex.W jmp %A0";
23982 output_asm_insn (xasm
, &call_op
);
23986 /* SEH unwinding can require an extra nop to be emitted in several
23987 circumstances. Determine if we have one of those. */
23992 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23994 /* If we get to another real insn, we don't need the nop. */
23998 /* If we get to the epilogue note, prevent a catch region from
23999 being adjacent to the standard epilogue sequence. If non-
24000 call-exceptions, we'll have done this during epilogue emission. */
24001 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24002 && !flag_non_call_exceptions
24003 && !can_throw_internal (insn
))
24010 /* If we didn't find a real insn following the call, prevent the
24011 unwinder from looking into the next function. */
24017 xasm
= "call\t%P0";
24019 xasm
= "call\t%A0";
24021 output_asm_insn (xasm
, &call_op
);
24029 /* Clear stack slot assignments remembered from previous functions.
24030 This is called from INIT_EXPANDERS once before RTL is emitted for each
24033 static struct machine_function
*
24034 ix86_init_machine_status (void)
24036 struct machine_function
*f
;
24038 f
= ggc_alloc_cleared_machine_function ();
24039 f
->use_fast_prologue_epilogue_nregs
= -1;
24040 f
->call_abi
= ix86_abi
;
24045 /* Return a MEM corresponding to a stack slot with mode MODE.
24046 Allocate a new slot if necessary.
24048 The RTL for a function can have several slots available: N is
24049 which slot to use. */
24052 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24054 struct stack_local_entry
*s
;
24056 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24058 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24059 if (s
->mode
== mode
&& s
->n
== n
)
24060 return validize_mem (copy_rtx (s
->rtl
));
24062 s
= ggc_alloc_stack_local_entry ();
24065 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24067 s
->next
= ix86_stack_locals
;
24068 ix86_stack_locals
= s
;
24069 return validize_mem (s
->rtl
);
24073 ix86_instantiate_decls (void)
24075 struct stack_local_entry
*s
;
24077 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24078 if (s
->rtl
!= NULL_RTX
)
24079 instantiate_decl_rtl (s
->rtl
);
24082 /* Calculate the length of the memory address in the instruction encoding.
24083 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24084 or other prefixes. We never generate addr32 prefix for LEA insn. */
24087 memory_address_length (rtx addr
, bool lea
)
24089 struct ix86_address parts
;
24090 rtx base
, index
, disp
;
24094 if (GET_CODE (addr
) == PRE_DEC
24095 || GET_CODE (addr
) == POST_INC
24096 || GET_CODE (addr
) == PRE_MODIFY
24097 || GET_CODE (addr
) == POST_MODIFY
)
24100 ok
= ix86_decompose_address (addr
, &parts
);
24103 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24105 /* If this is not LEA instruction, add the length of addr32 prefix. */
24106 if (TARGET_64BIT
&& !lea
24107 && (SImode_address_operand (addr
, VOIDmode
)
24108 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24109 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24113 index
= parts
.index
;
24116 if (base
&& GET_CODE (base
) == SUBREG
)
24117 base
= SUBREG_REG (base
);
24118 if (index
&& GET_CODE (index
) == SUBREG
)
24119 index
= SUBREG_REG (index
);
24121 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24122 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24125 - esp as the base always wants an index,
24126 - ebp as the base always wants a displacement,
24127 - r12 as the base always wants an index,
24128 - r13 as the base always wants a displacement. */
24130 /* Register Indirect. */
24131 if (base
&& !index
&& !disp
)
24133 /* esp (for its index) and ebp (for its displacement) need
24134 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24136 if (base
== arg_pointer_rtx
24137 || base
== frame_pointer_rtx
24138 || REGNO (base
) == SP_REG
24139 || REGNO (base
) == BP_REG
24140 || REGNO (base
) == R12_REG
24141 || REGNO (base
) == R13_REG
)
24145 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24146 is not disp32, but disp32(%rip), so for disp32
24147 SIB byte is needed, unless print_operand_address
24148 optimizes it into disp32(%rip) or (%rip) is implied
24150 else if (disp
&& !base
&& !index
)
24157 if (GET_CODE (disp
) == CONST
)
24158 symbol
= XEXP (disp
, 0);
24159 if (GET_CODE (symbol
) == PLUS
24160 && CONST_INT_P (XEXP (symbol
, 1)))
24161 symbol
= XEXP (symbol
, 0);
24163 if (GET_CODE (symbol
) != LABEL_REF
24164 && (GET_CODE (symbol
) != SYMBOL_REF
24165 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
24166 && (GET_CODE (symbol
) != UNSPEC
24167 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
24168 && XINT (symbol
, 1) != UNSPEC_PCREL
24169 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
24175 /* Find the length of the displacement constant. */
24178 if (base
&& satisfies_constraint_K (disp
))
24183 /* ebp always wants a displacement. Similarly r13. */
24184 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24187 /* An index requires the two-byte modrm form.... */
24189 /* ...like esp (or r12), which always wants an index. */
24190 || base
== arg_pointer_rtx
24191 || base
== frame_pointer_rtx
24192 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24199 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24200 is set, expect that insn have 8bit immediate alternative. */
24202 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24206 extract_insn_cached (insn
);
24207 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24208 if (CONSTANT_P (recog_data
.operand
[i
]))
24210 enum attr_mode mode
= get_attr_mode (insn
);
24213 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24215 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24222 ival
= trunc_int_for_mode (ival
, HImode
);
24225 ival
= trunc_int_for_mode (ival
, SImode
);
24230 if (IN_RANGE (ival
, -128, 127))
24247 /* Immediates for DImode instructions are encoded
24248 as 32bit sign extended values. */
24253 fatal_insn ("unknown insn mode", insn
);
24259 /* Compute default value for "length_address" attribute. */
24261 ix86_attr_length_address_default (rtx insn
)
24265 if (get_attr_type (insn
) == TYPE_LEA
)
24267 rtx set
= PATTERN (insn
), addr
;
24269 if (GET_CODE (set
) == PARALLEL
)
24270 set
= XVECEXP (set
, 0, 0);
24272 gcc_assert (GET_CODE (set
) == SET
);
24274 addr
= SET_SRC (set
);
24276 return memory_address_length (addr
, true);
24279 extract_insn_cached (insn
);
24280 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24281 if (MEM_P (recog_data
.operand
[i
]))
24283 constrain_operands_cached (reload_completed
);
24284 if (which_alternative
!= -1)
24286 const char *constraints
= recog_data
.constraints
[i
];
24287 int alt
= which_alternative
;
24289 while (*constraints
== '=' || *constraints
== '+')
24292 while (*constraints
++ != ',')
24294 /* Skip ignored operands. */
24295 if (*constraints
== 'X')
24298 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24303 /* Compute default value for "length_vex" attribute. It includes
24304 2 or 3 byte VEX prefix and 1 opcode byte. */
24307 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24311 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24312 byte VEX prefix. */
24313 if (!has_0f_opcode
|| has_vex_w
)
24316 /* We can always use 2 byte VEX prefix in 32bit. */
24320 extract_insn_cached (insn
);
24322 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24323 if (REG_P (recog_data
.operand
[i
]))
24325 /* REX.W bit uses 3 byte VEX prefix. */
24326 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24327 && GENERAL_REG_P (recog_data
.operand
[i
]))
24332 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24333 if (MEM_P (recog_data
.operand
[i
])
24334 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24341 /* Return the maximum number of instructions a cpu can issue. */
24344 ix86_issue_rate (void)
24348 case PROCESSOR_PENTIUM
:
24349 case PROCESSOR_ATOM
:
24350 case PROCESSOR_SLM
:
24352 case PROCESSOR_BTVER2
:
24355 case PROCESSOR_PENTIUMPRO
:
24356 case PROCESSOR_PENTIUM4
:
24357 case PROCESSOR_CORE2
:
24358 case PROCESSOR_COREI7
:
24359 case PROCESSOR_HASWELL
:
24360 case PROCESSOR_ATHLON
:
24362 case PROCESSOR_AMDFAM10
:
24363 case PROCESSOR_NOCONA
:
24364 case PROCESSOR_GENERIC32
:
24365 case PROCESSOR_GENERIC64
:
24366 case PROCESSOR_BDVER1
:
24367 case PROCESSOR_BDVER2
:
24368 case PROCESSOR_BDVER3
:
24369 case PROCESSOR_BTVER1
:
24377 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24378 by DEP_INSN and nothing set by DEP_INSN. */
24381 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24385 /* Simplify the test for uninteresting insns. */
24386 if (insn_type
!= TYPE_SETCC
24387 && insn_type
!= TYPE_ICMOV
24388 && insn_type
!= TYPE_FCMOV
24389 && insn_type
!= TYPE_IBR
)
24392 if ((set
= single_set (dep_insn
)) != 0)
24394 set
= SET_DEST (set
);
24397 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24398 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24399 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24400 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24402 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24403 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24408 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24411 /* This test is true if the dependent insn reads the flags but
24412 not any other potentially set register. */
24413 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24416 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24422 /* Return true iff USE_INSN has a memory address with operands set by
24426 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24429 extract_insn_cached (use_insn
);
24430 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24431 if (MEM_P (recog_data
.operand
[i
]))
24433 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24434 return modified_in_p (addr
, set_insn
) != 0;
24439 /* Helper function for exact_store_load_dependency.
24440 Return true if addr is found in insn. */
24442 exact_dependency_1 (rtx addr
, rtx insn
)
24444 enum rtx_code code
;
24445 const char *format_ptr
;
24448 code
= GET_CODE (insn
);
24452 if (rtx_equal_p (addr
, insn
))
24467 format_ptr
= GET_RTX_FORMAT (code
);
24468 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
24470 switch (*format_ptr
++)
24473 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
24477 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
24478 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
24486 /* Return true if there exists exact dependency for store & load, i.e.
24487 the same memory address is used in them. */
24489 exact_store_load_dependency (rtx store
, rtx load
)
24493 set1
= single_set (store
);
24496 if (!MEM_P (SET_DEST (set1
)))
24498 set2
= single_set (load
);
24501 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
24507 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24509 enum attr_type insn_type
, dep_insn_type
;
24510 enum attr_memory memory
;
24512 int dep_insn_code_number
;
24514 /* Anti and output dependencies have zero cost on all CPUs. */
24515 if (REG_NOTE_KIND (link
) != 0)
24518 dep_insn_code_number
= recog_memoized (dep_insn
);
24520 /* If we can't recognize the insns, we can't really do anything. */
24521 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24524 insn_type
= get_attr_type (insn
);
24525 dep_insn_type
= get_attr_type (dep_insn
);
24529 case PROCESSOR_PENTIUM
:
24530 /* Address Generation Interlock adds a cycle of latency. */
24531 if (insn_type
== TYPE_LEA
)
24533 rtx addr
= PATTERN (insn
);
24535 if (GET_CODE (addr
) == PARALLEL
)
24536 addr
= XVECEXP (addr
, 0, 0);
24538 gcc_assert (GET_CODE (addr
) == SET
);
24540 addr
= SET_SRC (addr
);
24541 if (modified_in_p (addr
, dep_insn
))
24544 else if (ix86_agi_dependent (dep_insn
, insn
))
24547 /* ??? Compares pair with jump/setcc. */
24548 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24551 /* Floating point stores require value to be ready one cycle earlier. */
24552 if (insn_type
== TYPE_FMOV
24553 && get_attr_memory (insn
) == MEMORY_STORE
24554 && !ix86_agi_dependent (dep_insn
, insn
))
24558 case PROCESSOR_PENTIUMPRO
:
24559 memory
= get_attr_memory (insn
);
24561 /* INT->FP conversion is expensive. */
24562 if (get_attr_fp_int_src (dep_insn
))
24565 /* There is one cycle extra latency between an FP op and a store. */
24566 if (insn_type
== TYPE_FMOV
24567 && (set
= single_set (dep_insn
)) != NULL_RTX
24568 && (set2
= single_set (insn
)) != NULL_RTX
24569 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24570 && MEM_P (SET_DEST (set2
)))
24573 /* Show ability of reorder buffer to hide latency of load by executing
24574 in parallel with previous instruction in case
24575 previous instruction is not needed to compute the address. */
24576 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24577 && !ix86_agi_dependent (dep_insn
, insn
))
24579 /* Claim moves to take one cycle, as core can issue one load
24580 at time and the next load can start cycle later. */
24581 if (dep_insn_type
== TYPE_IMOV
24582 || dep_insn_type
== TYPE_FMOV
)
24590 memory
= get_attr_memory (insn
);
24592 /* The esp dependency is resolved before the instruction is really
24594 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24595 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24598 /* INT->FP conversion is expensive. */
24599 if (get_attr_fp_int_src (dep_insn
))
24602 /* Show ability of reorder buffer to hide latency of load by executing
24603 in parallel with previous instruction in case
24604 previous instruction is not needed to compute the address. */
24605 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24606 && !ix86_agi_dependent (dep_insn
, insn
))
24608 /* Claim moves to take one cycle, as core can issue one load
24609 at time and the next load can start cycle later. */
24610 if (dep_insn_type
== TYPE_IMOV
24611 || dep_insn_type
== TYPE_FMOV
)
24620 case PROCESSOR_ATHLON
:
24622 case PROCESSOR_AMDFAM10
:
24623 case PROCESSOR_BDVER1
:
24624 case PROCESSOR_BDVER2
:
24625 case PROCESSOR_BDVER3
:
24626 case PROCESSOR_BTVER1
:
24627 case PROCESSOR_BTVER2
:
24628 case PROCESSOR_ATOM
:
24629 case PROCESSOR_GENERIC32
:
24630 case PROCESSOR_GENERIC64
:
24631 memory
= get_attr_memory (insn
);
24633 /* Show ability of reorder buffer to hide latency of load by executing
24634 in parallel with previous instruction in case
24635 previous instruction is not needed to compute the address. */
24636 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24637 && !ix86_agi_dependent (dep_insn
, insn
))
24639 enum attr_unit unit
= get_attr_unit (insn
);
24642 /* Because of the difference between the length of integer and
24643 floating unit pipeline preparation stages, the memory operands
24644 for floating point are cheaper.
24646 ??? For Athlon it the difference is most probably 2. */
24647 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24650 loadcost
= TARGET_ATHLON
? 2 : 0;
24652 if (cost
>= loadcost
)
24659 case PROCESSOR_SLM
:
24660 if (!reload_completed
)
24663 /* Increase cost of integer loads. */
24664 memory
= get_attr_memory (dep_insn
);
24665 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24667 enum attr_unit unit
= get_attr_unit (dep_insn
);
24668 if (unit
== UNIT_INTEGER
&& cost
== 1)
24670 if (memory
== MEMORY_LOAD
)
24674 /* Increase cost of ld/st for short int types only
24675 because of store forwarding issue. */
24676 rtx set
= single_set (dep_insn
);
24677 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
24678 || GET_MODE (SET_DEST (set
)) == HImode
))
24680 /* Increase cost of store/load insn if exact
24681 dependence exists and it is load insn. */
24682 enum attr_memory insn_memory
= get_attr_memory (insn
);
24683 if (insn_memory
== MEMORY_LOAD
24684 && exact_store_load_dependency (dep_insn
, insn
))
24698 /* How many alternative schedules to try. This should be as wide as the
24699 scheduling freedom in the DFA, but no wider. Making this value too
24700 large results extra work for the scheduler. */
24703 ia32_multipass_dfa_lookahead (void)
24707 case PROCESSOR_PENTIUM
:
24710 case PROCESSOR_PENTIUMPRO
:
24714 case PROCESSOR_CORE2
:
24715 case PROCESSOR_COREI7
:
24716 case PROCESSOR_HASWELL
:
24717 case PROCESSOR_ATOM
:
24718 case PROCESSOR_SLM
:
24719 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24720 as many instructions can be executed on a cycle, i.e.,
24721 issue_rate. I wonder why tuning for many CPUs does not do this. */
24722 if (reload_completed
)
24723 return ix86_issue_rate ();
24724 /* Don't use lookahead for pre-reload schedule to save compile time. */
24732 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24733 execution. It is applied if
24734 (1) IMUL instruction is on the top of list;
24735 (2) There exists the only producer of independent IMUL instruction in
24737 Return index of IMUL producer if it was found and -1 otherwise. */
24739 do_reorder_for_imul (rtx
*ready
, int n_ready
)
24741 rtx insn
, set
, insn1
, insn2
;
24742 sd_iterator_def sd_it
;
24747 if (ix86_tune
!= PROCESSOR_ATOM
)
24750 /* Check that IMUL instruction is on the top of ready list. */
24751 insn
= ready
[n_ready
- 1];
24752 set
= single_set (insn
);
24755 if (!(GET_CODE (SET_SRC (set
)) == MULT
24756 && GET_MODE (SET_SRC (set
)) == SImode
))
24759 /* Search for producer of independent IMUL instruction. */
24760 for (i
= n_ready
- 2; i
>= 0; i
--)
24763 if (!NONDEBUG_INSN_P (insn
))
24765 /* Skip IMUL instruction. */
24766 insn2
= PATTERN (insn
);
24767 if (GET_CODE (insn2
) == PARALLEL
)
24768 insn2
= XVECEXP (insn2
, 0, 0);
24769 if (GET_CODE (insn2
) == SET
24770 && GET_CODE (SET_SRC (insn2
)) == MULT
24771 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24774 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24777 con
= DEP_CON (dep
);
24778 if (!NONDEBUG_INSN_P (con
))
24780 insn1
= PATTERN (con
);
24781 if (GET_CODE (insn1
) == PARALLEL
)
24782 insn1
= XVECEXP (insn1
, 0, 0);
24784 if (GET_CODE (insn1
) == SET
24785 && GET_CODE (SET_SRC (insn1
)) == MULT
24786 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24788 sd_iterator_def sd_it1
;
24790 /* Check if there is no other dependee for IMUL. */
24792 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24795 pro
= DEP_PRO (dep1
);
24796 if (!NONDEBUG_INSN_P (pro
))
24811 /* Try to find the best candidate on the top of ready list if two insns
24812 have the same priority - candidate is best if its dependees were
24813 scheduled earlier. Applied for Silvermont only.
24814 Return true if top 2 insns must be interchanged. */
24816 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
24818 rtx top
= ready
[n_ready
- 1];
24819 rtx next
= ready
[n_ready
- 2];
24821 sd_iterator_def sd_it
;
24825 #define INSN_TICK(INSN) (HID (INSN)->tick)
24827 if (ix86_tune
!= PROCESSOR_SLM
)
24830 if (!NONDEBUG_INSN_P (top
))
24832 if (!NONJUMP_INSN_P (top
))
24834 if (!NONDEBUG_INSN_P (next
))
24836 if (!NONJUMP_INSN_P (next
))
24838 set
= single_set (top
);
24841 set
= single_set (next
);
24845 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
24847 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
24849 /* Determine winner more precise. */
24850 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
24853 pro
= DEP_PRO (dep
);
24854 if (!NONDEBUG_INSN_P (pro
))
24856 if (INSN_TICK (pro
) > clock1
)
24857 clock1
= INSN_TICK (pro
);
24859 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
24862 pro
= DEP_PRO (dep
);
24863 if (!NONDEBUG_INSN_P (pro
))
24865 if (INSN_TICK (pro
) > clock2
)
24866 clock2
= INSN_TICK (pro
);
24869 if (clock1
== clock2
)
24871 /* Determine winner - load must win. */
24872 enum attr_memory memory1
, memory2
;
24873 memory1
= get_attr_memory (top
);
24874 memory2
= get_attr_memory (next
);
24875 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
24878 return (bool) (clock2
< clock1
);
24884 /* Perform possible reodering of ready list for Atom/Silvermont only.
24885 Return issue rate. */
24887 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24890 int issue_rate
= -1;
24891 int n_ready
= *pn_ready
;
24896 /* Set up issue rate. */
24897 issue_rate
= ix86_issue_rate ();
24899 /* Do reodering for Atom/SLM only. */
24900 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
24903 /* Nothing to do if ready list contains only 1 instruction. */
24907 /* Do reodering for post-reload scheduler only. */
24908 if (!reload_completed
)
24911 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
24913 if (sched_verbose
> 1)
24914 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
24915 INSN_UID (ready
[index
]));
24917 /* Put IMUL producer (ready[index]) at the top of ready list. */
24918 insn
= ready
[index
];
24919 for (i
= index
; i
< n_ready
- 1; i
++)
24920 ready
[i
] = ready
[i
+ 1];
24921 ready
[n_ready
- 1] = insn
;
24924 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
24926 if (sched_verbose
> 1)
24927 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
24928 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
24929 /* Swap 2 top elements of ready list. */
24930 insn
= ready
[n_ready
- 1];
24931 ready
[n_ready
- 1] = ready
[n_ready
- 2];
24932 ready
[n_ready
- 2] = insn
;
24938 ix86_class_likely_spilled_p (reg_class_t
);
24940 /* Returns true if lhs of insn is HW function argument register and set up
24941 is_spilled to true if it is likely spilled HW register. */
24943 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24947 if (!NONDEBUG_INSN_P (insn
))
24949 /* Call instructions are not movable, ignore it. */
24952 insn
= PATTERN (insn
);
24953 if (GET_CODE (insn
) == PARALLEL
)
24954 insn
= XVECEXP (insn
, 0, 0);
24955 if (GET_CODE (insn
) != SET
)
24957 dst
= SET_DEST (insn
);
24958 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24959 && ix86_function_arg_regno_p (REGNO (dst
)))
24961 /* Is it likely spilled HW register? */
24962 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24963 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24964 *is_spilled
= true;
24970 /* Add output dependencies for chain of function adjacent arguments if only
24971 there is a move to likely spilled HW register. Return first argument
24972 if at least one dependence was added or NULL otherwise. */
24974 add_parameter_dependencies (rtx call
, rtx head
)
24978 rtx first_arg
= NULL
;
24979 bool is_spilled
= false;
24981 head
= PREV_INSN (head
);
24983 /* Find nearest to call argument passing instruction. */
24986 last
= PREV_INSN (last
);
24989 if (!NONDEBUG_INSN_P (last
))
24991 if (insn_is_function_arg (last
, &is_spilled
))
24999 insn
= PREV_INSN (last
);
25000 if (!INSN_P (insn
))
25004 if (!NONDEBUG_INSN_P (insn
))
25009 if (insn_is_function_arg (insn
, &is_spilled
))
25011 /* Add output depdendence between two function arguments if chain
25012 of output arguments contains likely spilled HW registers. */
25014 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25015 first_arg
= last
= insn
;
25025 /* Add output or anti dependency from insn to first_arg to restrict its code
25028 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25033 set
= single_set (insn
);
25036 tmp
= SET_DEST (set
);
25039 /* Add output dependency to the first function argument. */
25040 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25043 /* Add anti dependency. */
25044 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25047 /* Avoid cross block motion of function argument through adding dependency
25048 from the first non-jump instruction in bb. */
25050 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25052 rtx insn
= BB_END (bb
);
25056 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25058 rtx set
= single_set (insn
);
25061 avoid_func_arg_motion (arg
, insn
);
25065 if (insn
== BB_HEAD (bb
))
25067 insn
= PREV_INSN (insn
);
25071 /* Hook for pre-reload schedule - avoid motion of function arguments
25072 passed in likely spilled HW registers. */
25074 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25077 rtx first_arg
= NULL
;
25078 if (reload_completed
)
25080 while (head
!= tail
&& DEBUG_INSN_P (head
))
25081 head
= NEXT_INSN (head
);
25082 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25083 if (INSN_P (insn
) && CALL_P (insn
))
25085 first_arg
= add_parameter_dependencies (insn
, head
);
25088 /* Add dependee for first argument to predecessors if only
25089 region contains more than one block. */
25090 basic_block bb
= BLOCK_FOR_INSN (insn
);
25091 int rgn
= CONTAINING_RGN (bb
->index
);
25092 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25093 /* Skip trivial regions and region head blocks that can have
25094 predecessors outside of region. */
25095 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25099 /* Assume that region is SCC, i.e. all immediate predecessors
25100 of non-head block are in the same region. */
25101 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25103 /* Avoid creating of loop-carried dependencies through
25104 using topological odering in region. */
25105 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25106 add_dependee_for_func_arg (first_arg
, e
->src
);
25114 else if (first_arg
)
25115 avoid_func_arg_motion (first_arg
, insn
);
25118 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25119 HW registers to maximum, to schedule them at soon as possible. These are
25120 moves from function argument registers at the top of the function entry
25121 and moves from function return value registers after call. */
25123 ix86_adjust_priority (rtx insn
, int priority
)
25127 if (reload_completed
)
25130 if (!NONDEBUG_INSN_P (insn
))
25133 set
= single_set (insn
);
25136 rtx tmp
= SET_SRC (set
);
25138 && HARD_REGISTER_P (tmp
)
25139 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25140 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25141 return current_sched_info
->sched_max_insns_priority
;
25147 /* Model decoder of Core 2/i7.
25148 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25149 track the instruction fetch block boundaries and make sure that long
25150 (9+ bytes) instructions are assigned to D0. */
25152 /* Maximum length of an insn that can be handled by
25153 a secondary decoder unit. '8' for Core 2/i7. */
25154 static int core2i7_secondary_decoder_max_insn_size
;
25156 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25157 '16' for Core 2/i7. */
25158 static int core2i7_ifetch_block_size
;
25160 /* Maximum number of instructions decoder can handle per cycle.
25161 '6' for Core 2/i7. */
25162 static int core2i7_ifetch_block_max_insns
;
25164 typedef struct ix86_first_cycle_multipass_data_
*
25165 ix86_first_cycle_multipass_data_t
;
25166 typedef const struct ix86_first_cycle_multipass_data_
*
25167 const_ix86_first_cycle_multipass_data_t
;
25169 /* A variable to store target state across calls to max_issue within
25171 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25172 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25174 /* Initialize DATA. */
25176 core2i7_first_cycle_multipass_init (void *_data
)
25178 ix86_first_cycle_multipass_data_t data
25179 = (ix86_first_cycle_multipass_data_t
) _data
;
25181 data
->ifetch_block_len
= 0;
25182 data
->ifetch_block_n_insns
= 0;
25183 data
->ready_try_change
= NULL
;
25184 data
->ready_try_change_size
= 0;
25187 /* Advancing the cycle; reset ifetch block counts. */
25189 core2i7_dfa_post_advance_cycle (void)
25191 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25193 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25195 data
->ifetch_block_len
= 0;
25196 data
->ifetch_block_n_insns
= 0;
25199 static int min_insn_size (rtx
);
25201 /* Filter out insns from ready_try that the core will not be able to issue
25202 on current cycle due to decoder. */
25204 core2i7_first_cycle_multipass_filter_ready_try
25205 (const_ix86_first_cycle_multipass_data_t data
,
25206 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25213 if (ready_try
[n_ready
])
25216 insn
= get_ready_element (n_ready
);
25217 insn_size
= min_insn_size (insn
);
25219 if (/* If this is a too long an insn for a secondary decoder ... */
25220 (!first_cycle_insn_p
25221 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25222 /* ... or it would not fit into the ifetch block ... */
25223 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25224 /* ... or the decoder is full already ... */
25225 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25226 /* ... mask the insn out. */
25228 ready_try
[n_ready
] = 1;
25230 if (data
->ready_try_change
)
25231 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25236 /* Prepare for a new round of multipass lookahead scheduling. */
25238 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25239 bool first_cycle_insn_p
)
25241 ix86_first_cycle_multipass_data_t data
25242 = (ix86_first_cycle_multipass_data_t
) _data
;
25243 const_ix86_first_cycle_multipass_data_t prev_data
25244 = ix86_first_cycle_multipass_data
;
25246 /* Restore the state from the end of the previous round. */
25247 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25248 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25250 /* Filter instructions that cannot be issued on current cycle due to
25251 decoder restrictions. */
25252 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25253 first_cycle_insn_p
);
25256 /* INSN is being issued in current solution. Account for its impact on
25257 the decoder model. */
25259 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25260 rtx insn
, const void *_prev_data
)
25262 ix86_first_cycle_multipass_data_t data
25263 = (ix86_first_cycle_multipass_data_t
) _data
;
25264 const_ix86_first_cycle_multipass_data_t prev_data
25265 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25267 int insn_size
= min_insn_size (insn
);
25269 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25270 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25271 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25272 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25274 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25275 if (!data
->ready_try_change
)
25277 data
->ready_try_change
= sbitmap_alloc (n_ready
);
25278 data
->ready_try_change_size
= n_ready
;
25280 else if (data
->ready_try_change_size
< n_ready
)
25282 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
25284 data
->ready_try_change_size
= n_ready
;
25286 bitmap_clear (data
->ready_try_change
);
25288 /* Filter out insns from ready_try that the core will not be able to issue
25289 on current cycle due to decoder. */
25290 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25294 /* Revert the effect on ready_try. */
25296 core2i7_first_cycle_multipass_backtrack (const void *_data
,
25298 int n_ready ATTRIBUTE_UNUSED
)
25300 const_ix86_first_cycle_multipass_data_t data
25301 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25302 unsigned int i
= 0;
25303 sbitmap_iterator sbi
;
25305 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
25306 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
25312 /* Save the result of multipass lookahead scheduling for the next round. */
25314 core2i7_first_cycle_multipass_end (const void *_data
)
25316 const_ix86_first_cycle_multipass_data_t data
25317 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25318 ix86_first_cycle_multipass_data_t next_data
25319 = ix86_first_cycle_multipass_data
;
25323 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
25324 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
25328 /* Deallocate target data. */
25330 core2i7_first_cycle_multipass_fini (void *_data
)
25332 ix86_first_cycle_multipass_data_t data
25333 = (ix86_first_cycle_multipass_data_t
) _data
;
25335 if (data
->ready_try_change
)
25337 sbitmap_free (data
->ready_try_change
);
25338 data
->ready_try_change
= NULL
;
25339 data
->ready_try_change_size
= 0;
25343 /* Prepare for scheduling pass. */
25345 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
25346 int verbose ATTRIBUTE_UNUSED
,
25347 int max_uid ATTRIBUTE_UNUSED
)
25349 /* Install scheduling hooks for current CPU. Some of these hooks are used
25350 in time-critical parts of the scheduler, so we only set them up when
25351 they are actually used. */
25354 case PROCESSOR_CORE2
:
25355 case PROCESSOR_COREI7
:
25356 case PROCESSOR_HASWELL
:
25357 /* Do not perform multipass scheduling for pre-reload schedule
25358 to save compile time. */
25359 if (reload_completed
)
25361 targetm
.sched
.dfa_post_advance_cycle
25362 = core2i7_dfa_post_advance_cycle
;
25363 targetm
.sched
.first_cycle_multipass_init
25364 = core2i7_first_cycle_multipass_init
;
25365 targetm
.sched
.first_cycle_multipass_begin
25366 = core2i7_first_cycle_multipass_begin
;
25367 targetm
.sched
.first_cycle_multipass_issue
25368 = core2i7_first_cycle_multipass_issue
;
25369 targetm
.sched
.first_cycle_multipass_backtrack
25370 = core2i7_first_cycle_multipass_backtrack
;
25371 targetm
.sched
.first_cycle_multipass_end
25372 = core2i7_first_cycle_multipass_end
;
25373 targetm
.sched
.first_cycle_multipass_fini
25374 = core2i7_first_cycle_multipass_fini
;
25376 /* Set decoder parameters. */
25377 core2i7_secondary_decoder_max_insn_size
= 8;
25378 core2i7_ifetch_block_size
= 16;
25379 core2i7_ifetch_block_max_insns
= 6;
25382 /* ... Fall through ... */
25384 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
25385 targetm
.sched
.first_cycle_multipass_init
= NULL
;
25386 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
25387 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
25388 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
25389 targetm
.sched
.first_cycle_multipass_end
= NULL
;
25390 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
25396 /* Compute the alignment given to a constant that is being placed in memory.
25397 EXP is the constant and ALIGN is the alignment that the object would
25399 The value of this function is used instead of that alignment to align
25403 ix86_constant_alignment (tree exp
, int align
)
25405 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
25406 || TREE_CODE (exp
) == INTEGER_CST
)
25408 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
25410 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
25413 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
25414 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
25415 return BITS_PER_WORD
;
25420 /* Compute the alignment for a static variable.
25421 TYPE is the data type, and ALIGN is the alignment that
25422 the object would ordinarily have. The value of this function is used
25423 instead of that alignment to align the object. */
25426 ix86_data_alignment (tree type
, int align
, bool opt
)
25428 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
25431 && AGGREGATE_TYPE_P (type
)
25432 && TYPE_SIZE (type
)
25433 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25434 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
25435 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
25436 && align
< max_align
)
25439 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25440 to 16byte boundary. */
25443 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
25444 && TYPE_SIZE (type
)
25445 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25446 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
25447 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25454 if (TREE_CODE (type
) == ARRAY_TYPE
)
25456 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25458 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25461 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25464 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25466 if ((TYPE_MODE (type
) == XCmode
25467 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25470 else if ((TREE_CODE (type
) == RECORD_TYPE
25471 || TREE_CODE (type
) == UNION_TYPE
25472 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25473 && TYPE_FIELDS (type
))
25475 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25477 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25480 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25481 || TREE_CODE (type
) == INTEGER_TYPE
)
25483 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25485 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25492 /* Compute the alignment for a local variable or a stack slot. EXP is
25493 the data type or decl itself, MODE is the widest mode available and
25494 ALIGN is the alignment that the object would ordinarily have. The
25495 value of this macro is used instead of that alignment to align the
25499 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25500 unsigned int align
)
25504 if (exp
&& DECL_P (exp
))
25506 type
= TREE_TYPE (exp
);
25515 /* Don't do dynamic stack realignment for long long objects with
25516 -mpreferred-stack-boundary=2. */
25519 && ix86_preferred_stack_boundary
< 64
25520 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25521 && (!type
|| !TYPE_USER_ALIGN (type
))
25522 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25525 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25526 register in MODE. We will return the largest alignment of XF
25530 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25531 align
= GET_MODE_ALIGNMENT (DFmode
);
25535 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25536 to 16byte boundary. Exact wording is:
25538 An array uses the same alignment as its elements, except that a local or
25539 global array variable of length at least 16 bytes or
25540 a C99 variable-length array variable always has alignment of at least 16 bytes.
25542 This was added to allow use of aligned SSE instructions at arrays. This
25543 rule is meant for static storage (where compiler can not do the analysis
25544 by itself). We follow it for automatic variables only when convenient.
25545 We fully control everything in the function compiled and functions from
25546 other unit can not rely on the alignment.
25548 Exclude va_list type. It is the common case of local array where
25549 we can not benefit from the alignment.
25551 TODO: Probably one should optimize for size only when var is not escaping. */
25552 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25555 if (AGGREGATE_TYPE_P (type
)
25556 && (va_list_type_node
== NULL_TREE
25557 || (TYPE_MAIN_VARIANT (type
)
25558 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25559 && TYPE_SIZE (type
)
25560 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25561 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25562 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25565 if (TREE_CODE (type
) == ARRAY_TYPE
)
25567 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25569 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25572 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25574 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25576 if ((TYPE_MODE (type
) == XCmode
25577 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25580 else if ((TREE_CODE (type
) == RECORD_TYPE
25581 || TREE_CODE (type
) == UNION_TYPE
25582 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25583 && TYPE_FIELDS (type
))
25585 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25587 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25590 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25591 || TREE_CODE (type
) == INTEGER_TYPE
)
25594 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25596 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25602 /* Compute the minimum required alignment for dynamic stack realignment
25603 purposes for a local variable, parameter or a stack slot. EXP is
25604 the data type or decl itself, MODE is its mode and ALIGN is the
25605 alignment that the object would ordinarily have. */
25608 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25609 unsigned int align
)
25613 if (exp
&& DECL_P (exp
))
25615 type
= TREE_TYPE (exp
);
25624 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25627 /* Don't do dynamic stack realignment for long long objects with
25628 -mpreferred-stack-boundary=2. */
25629 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25630 && (!type
|| !TYPE_USER_ALIGN (type
))
25631 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25637 /* Find a location for the static chain incoming to a nested function.
25638 This is a register, unless all free registers are used by arguments. */
25641 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25645 if (!DECL_STATIC_CHAIN (fndecl
))
25650 /* We always use R10 in 64-bit mode. */
25658 /* By default in 32-bit mode we use ECX to pass the static chain. */
25661 fntype
= TREE_TYPE (fndecl
);
25662 ccvt
= ix86_get_callcvt (fntype
);
25663 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25665 /* Fastcall functions use ecx/edx for arguments, which leaves
25666 us with EAX for the static chain.
25667 Thiscall functions use ecx for arguments, which also
25668 leaves us with EAX for the static chain. */
25671 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25673 /* Thiscall functions use ecx for arguments, which leaves
25674 us with EAX and EDX for the static chain.
25675 We are using for abi-compatibility EAX. */
25678 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25680 /* For regparm 3, we have no free call-clobbered registers in
25681 which to store the static chain. In order to implement this,
25682 we have the trampoline push the static chain to the stack.
25683 However, we can't push a value below the return address when
25684 we call the nested function directly, so we have to use an
25685 alternate entry point. For this we use ESI, and have the
25686 alternate entry point push ESI, so that things appear the
25687 same once we're executing the nested function. */
25690 if (fndecl
== current_function_decl
)
25691 ix86_static_chain_on_stack
= true;
25692 return gen_frame_mem (SImode
,
25693 plus_constant (Pmode
,
25694 arg_pointer_rtx
, -8));
25700 return gen_rtx_REG (Pmode
, regno
);
25703 /* Emit RTL insns to initialize the variable parts of a trampoline.
25704 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25705 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25706 to be passed to the target function. */
25709 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25715 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25721 /* Load the function address to r11. Try to load address using
25722 the shorter movl instead of movabs. We may want to support
25723 movq for kernel mode, but kernel does not use trampolines at
25724 the moment. FNADDR is a 32bit address and may not be in
25725 DImode when ptr_mode == SImode. Always use movl in this
25727 if (ptr_mode
== SImode
25728 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25730 fnaddr
= copy_addr_to_reg (fnaddr
);
25732 mem
= adjust_address (m_tramp
, HImode
, offset
);
25733 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25735 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25736 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25741 mem
= adjust_address (m_tramp
, HImode
, offset
);
25742 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25744 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25745 emit_move_insn (mem
, fnaddr
);
25749 /* Load static chain using movabs to r10. Use the shorter movl
25750 instead of movabs when ptr_mode == SImode. */
25751 if (ptr_mode
== SImode
)
25762 mem
= adjust_address (m_tramp
, HImode
, offset
);
25763 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25765 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25766 emit_move_insn (mem
, chain_value
);
25769 /* Jump to r11; the last (unused) byte is a nop, only there to
25770 pad the write out to a single 32-bit store. */
25771 mem
= adjust_address (m_tramp
, SImode
, offset
);
25772 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25779 /* Depending on the static chain location, either load a register
25780 with a constant, or push the constant to the stack. All of the
25781 instructions are the same size. */
25782 chain
= ix86_static_chain (fndecl
, true);
25785 switch (REGNO (chain
))
25788 opcode
= 0xb8; break;
25790 opcode
= 0xb9; break;
25792 gcc_unreachable ();
25798 mem
= adjust_address (m_tramp
, QImode
, offset
);
25799 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25801 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25802 emit_move_insn (mem
, chain_value
);
25805 mem
= adjust_address (m_tramp
, QImode
, offset
);
25806 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25808 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25810 /* Compute offset from the end of the jmp to the target function.
25811 In the case in which the trampoline stores the static chain on
25812 the stack, we need to skip the first insn which pushes the
25813 (call-saved) register static chain; this push is 1 byte. */
25815 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25816 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25817 offset
- (MEM_P (chain
) ? 1 : 0)),
25818 NULL_RTX
, 1, OPTAB_DIRECT
);
25819 emit_move_insn (mem
, disp
);
25822 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25824 #ifdef HAVE_ENABLE_EXECUTE_STACK
25825 #ifdef CHECK_EXECUTE_STACK_ENABLED
25826 if (CHECK_EXECUTE_STACK_ENABLED
)
25828 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25829 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25833 /* The following file contains several enumerations and data structures
25834 built from the definitions in i386-builtin-types.def. */
25836 #include "i386-builtin-types.inc"
25838 /* Table for the ix86 builtin non-function types. */
25839 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25841 /* Retrieve an element from the above table, building some of
25842 the types lazily. */
25845 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25847 unsigned int index
;
25850 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25852 type
= ix86_builtin_type_tab
[(int) tcode
];
25856 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25857 if (tcode
<= IX86_BT_LAST_VECT
)
25859 enum machine_mode mode
;
25861 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25862 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25863 mode
= ix86_builtin_type_vect_mode
[index
];
25865 type
= build_vector_type_for_mode (itype
, mode
);
25871 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25872 if (tcode
<= IX86_BT_LAST_PTR
)
25873 quals
= TYPE_UNQUALIFIED
;
25875 quals
= TYPE_QUAL_CONST
;
25877 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25878 if (quals
!= TYPE_UNQUALIFIED
)
25879 itype
= build_qualified_type (itype
, quals
);
25881 type
= build_pointer_type (itype
);
25884 ix86_builtin_type_tab
[(int) tcode
] = type
;
25888 /* Table for the ix86 builtin function types. */
25889 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25891 /* Retrieve an element from the above table, building some of
25892 the types lazily. */
25895 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25899 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25901 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25905 if (tcode
<= IX86_BT_LAST_FUNC
)
25907 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25908 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25909 tree rtype
, atype
, args
= void_list_node
;
25912 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25913 for (i
= after
- 1; i
> start
; --i
)
25915 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25916 args
= tree_cons (NULL
, atype
, args
);
25919 type
= build_function_type (rtype
, args
);
25923 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25924 enum ix86_builtin_func_type icode
;
25926 icode
= ix86_builtin_func_alias_base
[index
];
25927 type
= ix86_get_builtin_func_type (icode
);
25930 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25935 /* Codes for all the SSE/MMX builtins. */
25938 IX86_BUILTIN_ADDPS
,
25939 IX86_BUILTIN_ADDSS
,
25940 IX86_BUILTIN_DIVPS
,
25941 IX86_BUILTIN_DIVSS
,
25942 IX86_BUILTIN_MULPS
,
25943 IX86_BUILTIN_MULSS
,
25944 IX86_BUILTIN_SUBPS
,
25945 IX86_BUILTIN_SUBSS
,
25947 IX86_BUILTIN_CMPEQPS
,
25948 IX86_BUILTIN_CMPLTPS
,
25949 IX86_BUILTIN_CMPLEPS
,
25950 IX86_BUILTIN_CMPGTPS
,
25951 IX86_BUILTIN_CMPGEPS
,
25952 IX86_BUILTIN_CMPNEQPS
,
25953 IX86_BUILTIN_CMPNLTPS
,
25954 IX86_BUILTIN_CMPNLEPS
,
25955 IX86_BUILTIN_CMPNGTPS
,
25956 IX86_BUILTIN_CMPNGEPS
,
25957 IX86_BUILTIN_CMPORDPS
,
25958 IX86_BUILTIN_CMPUNORDPS
,
25959 IX86_BUILTIN_CMPEQSS
,
25960 IX86_BUILTIN_CMPLTSS
,
25961 IX86_BUILTIN_CMPLESS
,
25962 IX86_BUILTIN_CMPNEQSS
,
25963 IX86_BUILTIN_CMPNLTSS
,
25964 IX86_BUILTIN_CMPNLESS
,
25965 IX86_BUILTIN_CMPORDSS
,
25966 IX86_BUILTIN_CMPUNORDSS
,
25968 IX86_BUILTIN_COMIEQSS
,
25969 IX86_BUILTIN_COMILTSS
,
25970 IX86_BUILTIN_COMILESS
,
25971 IX86_BUILTIN_COMIGTSS
,
25972 IX86_BUILTIN_COMIGESS
,
25973 IX86_BUILTIN_COMINEQSS
,
25974 IX86_BUILTIN_UCOMIEQSS
,
25975 IX86_BUILTIN_UCOMILTSS
,
25976 IX86_BUILTIN_UCOMILESS
,
25977 IX86_BUILTIN_UCOMIGTSS
,
25978 IX86_BUILTIN_UCOMIGESS
,
25979 IX86_BUILTIN_UCOMINEQSS
,
25981 IX86_BUILTIN_CVTPI2PS
,
25982 IX86_BUILTIN_CVTPS2PI
,
25983 IX86_BUILTIN_CVTSI2SS
,
25984 IX86_BUILTIN_CVTSI642SS
,
25985 IX86_BUILTIN_CVTSS2SI
,
25986 IX86_BUILTIN_CVTSS2SI64
,
25987 IX86_BUILTIN_CVTTPS2PI
,
25988 IX86_BUILTIN_CVTTSS2SI
,
25989 IX86_BUILTIN_CVTTSS2SI64
,
25991 IX86_BUILTIN_MAXPS
,
25992 IX86_BUILTIN_MAXSS
,
25993 IX86_BUILTIN_MINPS
,
25994 IX86_BUILTIN_MINSS
,
25996 IX86_BUILTIN_LOADUPS
,
25997 IX86_BUILTIN_STOREUPS
,
25998 IX86_BUILTIN_MOVSS
,
26000 IX86_BUILTIN_MOVHLPS
,
26001 IX86_BUILTIN_MOVLHPS
,
26002 IX86_BUILTIN_LOADHPS
,
26003 IX86_BUILTIN_LOADLPS
,
26004 IX86_BUILTIN_STOREHPS
,
26005 IX86_BUILTIN_STORELPS
,
26007 IX86_BUILTIN_MASKMOVQ
,
26008 IX86_BUILTIN_MOVMSKPS
,
26009 IX86_BUILTIN_PMOVMSKB
,
26011 IX86_BUILTIN_MOVNTPS
,
26012 IX86_BUILTIN_MOVNTQ
,
26014 IX86_BUILTIN_LOADDQU
,
26015 IX86_BUILTIN_STOREDQU
,
26017 IX86_BUILTIN_PACKSSWB
,
26018 IX86_BUILTIN_PACKSSDW
,
26019 IX86_BUILTIN_PACKUSWB
,
26021 IX86_BUILTIN_PADDB
,
26022 IX86_BUILTIN_PADDW
,
26023 IX86_BUILTIN_PADDD
,
26024 IX86_BUILTIN_PADDQ
,
26025 IX86_BUILTIN_PADDSB
,
26026 IX86_BUILTIN_PADDSW
,
26027 IX86_BUILTIN_PADDUSB
,
26028 IX86_BUILTIN_PADDUSW
,
26029 IX86_BUILTIN_PSUBB
,
26030 IX86_BUILTIN_PSUBW
,
26031 IX86_BUILTIN_PSUBD
,
26032 IX86_BUILTIN_PSUBQ
,
26033 IX86_BUILTIN_PSUBSB
,
26034 IX86_BUILTIN_PSUBSW
,
26035 IX86_BUILTIN_PSUBUSB
,
26036 IX86_BUILTIN_PSUBUSW
,
26039 IX86_BUILTIN_PANDN
,
26043 IX86_BUILTIN_PAVGB
,
26044 IX86_BUILTIN_PAVGW
,
26046 IX86_BUILTIN_PCMPEQB
,
26047 IX86_BUILTIN_PCMPEQW
,
26048 IX86_BUILTIN_PCMPEQD
,
26049 IX86_BUILTIN_PCMPGTB
,
26050 IX86_BUILTIN_PCMPGTW
,
26051 IX86_BUILTIN_PCMPGTD
,
26053 IX86_BUILTIN_PMADDWD
,
26055 IX86_BUILTIN_PMAXSW
,
26056 IX86_BUILTIN_PMAXUB
,
26057 IX86_BUILTIN_PMINSW
,
26058 IX86_BUILTIN_PMINUB
,
26060 IX86_BUILTIN_PMULHUW
,
26061 IX86_BUILTIN_PMULHW
,
26062 IX86_BUILTIN_PMULLW
,
26064 IX86_BUILTIN_PSADBW
,
26065 IX86_BUILTIN_PSHUFW
,
26067 IX86_BUILTIN_PSLLW
,
26068 IX86_BUILTIN_PSLLD
,
26069 IX86_BUILTIN_PSLLQ
,
26070 IX86_BUILTIN_PSRAW
,
26071 IX86_BUILTIN_PSRAD
,
26072 IX86_BUILTIN_PSRLW
,
26073 IX86_BUILTIN_PSRLD
,
26074 IX86_BUILTIN_PSRLQ
,
26075 IX86_BUILTIN_PSLLWI
,
26076 IX86_BUILTIN_PSLLDI
,
26077 IX86_BUILTIN_PSLLQI
,
26078 IX86_BUILTIN_PSRAWI
,
26079 IX86_BUILTIN_PSRADI
,
26080 IX86_BUILTIN_PSRLWI
,
26081 IX86_BUILTIN_PSRLDI
,
26082 IX86_BUILTIN_PSRLQI
,
26084 IX86_BUILTIN_PUNPCKHBW
,
26085 IX86_BUILTIN_PUNPCKHWD
,
26086 IX86_BUILTIN_PUNPCKHDQ
,
26087 IX86_BUILTIN_PUNPCKLBW
,
26088 IX86_BUILTIN_PUNPCKLWD
,
26089 IX86_BUILTIN_PUNPCKLDQ
,
26091 IX86_BUILTIN_SHUFPS
,
26093 IX86_BUILTIN_RCPPS
,
26094 IX86_BUILTIN_RCPSS
,
26095 IX86_BUILTIN_RSQRTPS
,
26096 IX86_BUILTIN_RSQRTPS_NR
,
26097 IX86_BUILTIN_RSQRTSS
,
26098 IX86_BUILTIN_RSQRTF
,
26099 IX86_BUILTIN_SQRTPS
,
26100 IX86_BUILTIN_SQRTPS_NR
,
26101 IX86_BUILTIN_SQRTSS
,
26103 IX86_BUILTIN_UNPCKHPS
,
26104 IX86_BUILTIN_UNPCKLPS
,
26106 IX86_BUILTIN_ANDPS
,
26107 IX86_BUILTIN_ANDNPS
,
26109 IX86_BUILTIN_XORPS
,
26112 IX86_BUILTIN_LDMXCSR
,
26113 IX86_BUILTIN_STMXCSR
,
26114 IX86_BUILTIN_SFENCE
,
26116 IX86_BUILTIN_FXSAVE
,
26117 IX86_BUILTIN_FXRSTOR
,
26118 IX86_BUILTIN_FXSAVE64
,
26119 IX86_BUILTIN_FXRSTOR64
,
26121 IX86_BUILTIN_XSAVE
,
26122 IX86_BUILTIN_XRSTOR
,
26123 IX86_BUILTIN_XSAVE64
,
26124 IX86_BUILTIN_XRSTOR64
,
26126 IX86_BUILTIN_XSAVEOPT
,
26127 IX86_BUILTIN_XSAVEOPT64
,
26129 /* 3DNow! Original */
26130 IX86_BUILTIN_FEMMS
,
26131 IX86_BUILTIN_PAVGUSB
,
26132 IX86_BUILTIN_PF2ID
,
26133 IX86_BUILTIN_PFACC
,
26134 IX86_BUILTIN_PFADD
,
26135 IX86_BUILTIN_PFCMPEQ
,
26136 IX86_BUILTIN_PFCMPGE
,
26137 IX86_BUILTIN_PFCMPGT
,
26138 IX86_BUILTIN_PFMAX
,
26139 IX86_BUILTIN_PFMIN
,
26140 IX86_BUILTIN_PFMUL
,
26141 IX86_BUILTIN_PFRCP
,
26142 IX86_BUILTIN_PFRCPIT1
,
26143 IX86_BUILTIN_PFRCPIT2
,
26144 IX86_BUILTIN_PFRSQIT1
,
26145 IX86_BUILTIN_PFRSQRT
,
26146 IX86_BUILTIN_PFSUB
,
26147 IX86_BUILTIN_PFSUBR
,
26148 IX86_BUILTIN_PI2FD
,
26149 IX86_BUILTIN_PMULHRW
,
26151 /* 3DNow! Athlon Extensions */
26152 IX86_BUILTIN_PF2IW
,
26153 IX86_BUILTIN_PFNACC
,
26154 IX86_BUILTIN_PFPNACC
,
26155 IX86_BUILTIN_PI2FW
,
26156 IX86_BUILTIN_PSWAPDSI
,
26157 IX86_BUILTIN_PSWAPDSF
,
26160 IX86_BUILTIN_ADDPD
,
26161 IX86_BUILTIN_ADDSD
,
26162 IX86_BUILTIN_DIVPD
,
26163 IX86_BUILTIN_DIVSD
,
26164 IX86_BUILTIN_MULPD
,
26165 IX86_BUILTIN_MULSD
,
26166 IX86_BUILTIN_SUBPD
,
26167 IX86_BUILTIN_SUBSD
,
26169 IX86_BUILTIN_CMPEQPD
,
26170 IX86_BUILTIN_CMPLTPD
,
26171 IX86_BUILTIN_CMPLEPD
,
26172 IX86_BUILTIN_CMPGTPD
,
26173 IX86_BUILTIN_CMPGEPD
,
26174 IX86_BUILTIN_CMPNEQPD
,
26175 IX86_BUILTIN_CMPNLTPD
,
26176 IX86_BUILTIN_CMPNLEPD
,
26177 IX86_BUILTIN_CMPNGTPD
,
26178 IX86_BUILTIN_CMPNGEPD
,
26179 IX86_BUILTIN_CMPORDPD
,
26180 IX86_BUILTIN_CMPUNORDPD
,
26181 IX86_BUILTIN_CMPEQSD
,
26182 IX86_BUILTIN_CMPLTSD
,
26183 IX86_BUILTIN_CMPLESD
,
26184 IX86_BUILTIN_CMPNEQSD
,
26185 IX86_BUILTIN_CMPNLTSD
,
26186 IX86_BUILTIN_CMPNLESD
,
26187 IX86_BUILTIN_CMPORDSD
,
26188 IX86_BUILTIN_CMPUNORDSD
,
26190 IX86_BUILTIN_COMIEQSD
,
26191 IX86_BUILTIN_COMILTSD
,
26192 IX86_BUILTIN_COMILESD
,
26193 IX86_BUILTIN_COMIGTSD
,
26194 IX86_BUILTIN_COMIGESD
,
26195 IX86_BUILTIN_COMINEQSD
,
26196 IX86_BUILTIN_UCOMIEQSD
,
26197 IX86_BUILTIN_UCOMILTSD
,
26198 IX86_BUILTIN_UCOMILESD
,
26199 IX86_BUILTIN_UCOMIGTSD
,
26200 IX86_BUILTIN_UCOMIGESD
,
26201 IX86_BUILTIN_UCOMINEQSD
,
26203 IX86_BUILTIN_MAXPD
,
26204 IX86_BUILTIN_MAXSD
,
26205 IX86_BUILTIN_MINPD
,
26206 IX86_BUILTIN_MINSD
,
26208 IX86_BUILTIN_ANDPD
,
26209 IX86_BUILTIN_ANDNPD
,
26211 IX86_BUILTIN_XORPD
,
26213 IX86_BUILTIN_SQRTPD
,
26214 IX86_BUILTIN_SQRTSD
,
26216 IX86_BUILTIN_UNPCKHPD
,
26217 IX86_BUILTIN_UNPCKLPD
,
26219 IX86_BUILTIN_SHUFPD
,
26221 IX86_BUILTIN_LOADUPD
,
26222 IX86_BUILTIN_STOREUPD
,
26223 IX86_BUILTIN_MOVSD
,
26225 IX86_BUILTIN_LOADHPD
,
26226 IX86_BUILTIN_LOADLPD
,
26228 IX86_BUILTIN_CVTDQ2PD
,
26229 IX86_BUILTIN_CVTDQ2PS
,
26231 IX86_BUILTIN_CVTPD2DQ
,
26232 IX86_BUILTIN_CVTPD2PI
,
26233 IX86_BUILTIN_CVTPD2PS
,
26234 IX86_BUILTIN_CVTTPD2DQ
,
26235 IX86_BUILTIN_CVTTPD2PI
,
26237 IX86_BUILTIN_CVTPI2PD
,
26238 IX86_BUILTIN_CVTSI2SD
,
26239 IX86_BUILTIN_CVTSI642SD
,
26241 IX86_BUILTIN_CVTSD2SI
,
26242 IX86_BUILTIN_CVTSD2SI64
,
26243 IX86_BUILTIN_CVTSD2SS
,
26244 IX86_BUILTIN_CVTSS2SD
,
26245 IX86_BUILTIN_CVTTSD2SI
,
26246 IX86_BUILTIN_CVTTSD2SI64
,
26248 IX86_BUILTIN_CVTPS2DQ
,
26249 IX86_BUILTIN_CVTPS2PD
,
26250 IX86_BUILTIN_CVTTPS2DQ
,
26252 IX86_BUILTIN_MOVNTI
,
26253 IX86_BUILTIN_MOVNTI64
,
26254 IX86_BUILTIN_MOVNTPD
,
26255 IX86_BUILTIN_MOVNTDQ
,
26257 IX86_BUILTIN_MOVQ128
,
26260 IX86_BUILTIN_MASKMOVDQU
,
26261 IX86_BUILTIN_MOVMSKPD
,
26262 IX86_BUILTIN_PMOVMSKB128
,
26264 IX86_BUILTIN_PACKSSWB128
,
26265 IX86_BUILTIN_PACKSSDW128
,
26266 IX86_BUILTIN_PACKUSWB128
,
26268 IX86_BUILTIN_PADDB128
,
26269 IX86_BUILTIN_PADDW128
,
26270 IX86_BUILTIN_PADDD128
,
26271 IX86_BUILTIN_PADDQ128
,
26272 IX86_BUILTIN_PADDSB128
,
26273 IX86_BUILTIN_PADDSW128
,
26274 IX86_BUILTIN_PADDUSB128
,
26275 IX86_BUILTIN_PADDUSW128
,
26276 IX86_BUILTIN_PSUBB128
,
26277 IX86_BUILTIN_PSUBW128
,
26278 IX86_BUILTIN_PSUBD128
,
26279 IX86_BUILTIN_PSUBQ128
,
26280 IX86_BUILTIN_PSUBSB128
,
26281 IX86_BUILTIN_PSUBSW128
,
26282 IX86_BUILTIN_PSUBUSB128
,
26283 IX86_BUILTIN_PSUBUSW128
,
26285 IX86_BUILTIN_PAND128
,
26286 IX86_BUILTIN_PANDN128
,
26287 IX86_BUILTIN_POR128
,
26288 IX86_BUILTIN_PXOR128
,
26290 IX86_BUILTIN_PAVGB128
,
26291 IX86_BUILTIN_PAVGW128
,
26293 IX86_BUILTIN_PCMPEQB128
,
26294 IX86_BUILTIN_PCMPEQW128
,
26295 IX86_BUILTIN_PCMPEQD128
,
26296 IX86_BUILTIN_PCMPGTB128
,
26297 IX86_BUILTIN_PCMPGTW128
,
26298 IX86_BUILTIN_PCMPGTD128
,
26300 IX86_BUILTIN_PMADDWD128
,
26302 IX86_BUILTIN_PMAXSW128
,
26303 IX86_BUILTIN_PMAXUB128
,
26304 IX86_BUILTIN_PMINSW128
,
26305 IX86_BUILTIN_PMINUB128
,
26307 IX86_BUILTIN_PMULUDQ
,
26308 IX86_BUILTIN_PMULUDQ128
,
26309 IX86_BUILTIN_PMULHUW128
,
26310 IX86_BUILTIN_PMULHW128
,
26311 IX86_BUILTIN_PMULLW128
,
26313 IX86_BUILTIN_PSADBW128
,
26314 IX86_BUILTIN_PSHUFHW
,
26315 IX86_BUILTIN_PSHUFLW
,
26316 IX86_BUILTIN_PSHUFD
,
26318 IX86_BUILTIN_PSLLDQI128
,
26319 IX86_BUILTIN_PSLLWI128
,
26320 IX86_BUILTIN_PSLLDI128
,
26321 IX86_BUILTIN_PSLLQI128
,
26322 IX86_BUILTIN_PSRAWI128
,
26323 IX86_BUILTIN_PSRADI128
,
26324 IX86_BUILTIN_PSRLDQI128
,
26325 IX86_BUILTIN_PSRLWI128
,
26326 IX86_BUILTIN_PSRLDI128
,
26327 IX86_BUILTIN_PSRLQI128
,
26329 IX86_BUILTIN_PSLLDQ128
,
26330 IX86_BUILTIN_PSLLW128
,
26331 IX86_BUILTIN_PSLLD128
,
26332 IX86_BUILTIN_PSLLQ128
,
26333 IX86_BUILTIN_PSRAW128
,
26334 IX86_BUILTIN_PSRAD128
,
26335 IX86_BUILTIN_PSRLW128
,
26336 IX86_BUILTIN_PSRLD128
,
26337 IX86_BUILTIN_PSRLQ128
,
26339 IX86_BUILTIN_PUNPCKHBW128
,
26340 IX86_BUILTIN_PUNPCKHWD128
,
26341 IX86_BUILTIN_PUNPCKHDQ128
,
26342 IX86_BUILTIN_PUNPCKHQDQ128
,
26343 IX86_BUILTIN_PUNPCKLBW128
,
26344 IX86_BUILTIN_PUNPCKLWD128
,
26345 IX86_BUILTIN_PUNPCKLDQ128
,
26346 IX86_BUILTIN_PUNPCKLQDQ128
,
26348 IX86_BUILTIN_CLFLUSH
,
26349 IX86_BUILTIN_MFENCE
,
26350 IX86_BUILTIN_LFENCE
,
26351 IX86_BUILTIN_PAUSE
,
26353 IX86_BUILTIN_BSRSI
,
26354 IX86_BUILTIN_BSRDI
,
26355 IX86_BUILTIN_RDPMC
,
26356 IX86_BUILTIN_RDTSC
,
26357 IX86_BUILTIN_RDTSCP
,
26358 IX86_BUILTIN_ROLQI
,
26359 IX86_BUILTIN_ROLHI
,
26360 IX86_BUILTIN_RORQI
,
26361 IX86_BUILTIN_RORHI
,
26364 IX86_BUILTIN_ADDSUBPS
,
26365 IX86_BUILTIN_HADDPS
,
26366 IX86_BUILTIN_HSUBPS
,
26367 IX86_BUILTIN_MOVSHDUP
,
26368 IX86_BUILTIN_MOVSLDUP
,
26369 IX86_BUILTIN_ADDSUBPD
,
26370 IX86_BUILTIN_HADDPD
,
26371 IX86_BUILTIN_HSUBPD
,
26372 IX86_BUILTIN_LDDQU
,
26374 IX86_BUILTIN_MONITOR
,
26375 IX86_BUILTIN_MWAIT
,
26378 IX86_BUILTIN_PHADDW
,
26379 IX86_BUILTIN_PHADDD
,
26380 IX86_BUILTIN_PHADDSW
,
26381 IX86_BUILTIN_PHSUBW
,
26382 IX86_BUILTIN_PHSUBD
,
26383 IX86_BUILTIN_PHSUBSW
,
26384 IX86_BUILTIN_PMADDUBSW
,
26385 IX86_BUILTIN_PMULHRSW
,
26386 IX86_BUILTIN_PSHUFB
,
26387 IX86_BUILTIN_PSIGNB
,
26388 IX86_BUILTIN_PSIGNW
,
26389 IX86_BUILTIN_PSIGND
,
26390 IX86_BUILTIN_PALIGNR
,
26391 IX86_BUILTIN_PABSB
,
26392 IX86_BUILTIN_PABSW
,
26393 IX86_BUILTIN_PABSD
,
26395 IX86_BUILTIN_PHADDW128
,
26396 IX86_BUILTIN_PHADDD128
,
26397 IX86_BUILTIN_PHADDSW128
,
26398 IX86_BUILTIN_PHSUBW128
,
26399 IX86_BUILTIN_PHSUBD128
,
26400 IX86_BUILTIN_PHSUBSW128
,
26401 IX86_BUILTIN_PMADDUBSW128
,
26402 IX86_BUILTIN_PMULHRSW128
,
26403 IX86_BUILTIN_PSHUFB128
,
26404 IX86_BUILTIN_PSIGNB128
,
26405 IX86_BUILTIN_PSIGNW128
,
26406 IX86_BUILTIN_PSIGND128
,
26407 IX86_BUILTIN_PALIGNR128
,
26408 IX86_BUILTIN_PABSB128
,
26409 IX86_BUILTIN_PABSW128
,
26410 IX86_BUILTIN_PABSD128
,
26412 /* AMDFAM10 - SSE4A New Instructions. */
26413 IX86_BUILTIN_MOVNTSD
,
26414 IX86_BUILTIN_MOVNTSS
,
26415 IX86_BUILTIN_EXTRQI
,
26416 IX86_BUILTIN_EXTRQ
,
26417 IX86_BUILTIN_INSERTQI
,
26418 IX86_BUILTIN_INSERTQ
,
26421 IX86_BUILTIN_BLENDPD
,
26422 IX86_BUILTIN_BLENDPS
,
26423 IX86_BUILTIN_BLENDVPD
,
26424 IX86_BUILTIN_BLENDVPS
,
26425 IX86_BUILTIN_PBLENDVB128
,
26426 IX86_BUILTIN_PBLENDW128
,
26431 IX86_BUILTIN_INSERTPS128
,
26433 IX86_BUILTIN_MOVNTDQA
,
26434 IX86_BUILTIN_MPSADBW128
,
26435 IX86_BUILTIN_PACKUSDW128
,
26436 IX86_BUILTIN_PCMPEQQ
,
26437 IX86_BUILTIN_PHMINPOSUW128
,
26439 IX86_BUILTIN_PMAXSB128
,
26440 IX86_BUILTIN_PMAXSD128
,
26441 IX86_BUILTIN_PMAXUD128
,
26442 IX86_BUILTIN_PMAXUW128
,
26444 IX86_BUILTIN_PMINSB128
,
26445 IX86_BUILTIN_PMINSD128
,
26446 IX86_BUILTIN_PMINUD128
,
26447 IX86_BUILTIN_PMINUW128
,
26449 IX86_BUILTIN_PMOVSXBW128
,
26450 IX86_BUILTIN_PMOVSXBD128
,
26451 IX86_BUILTIN_PMOVSXBQ128
,
26452 IX86_BUILTIN_PMOVSXWD128
,
26453 IX86_BUILTIN_PMOVSXWQ128
,
26454 IX86_BUILTIN_PMOVSXDQ128
,
26456 IX86_BUILTIN_PMOVZXBW128
,
26457 IX86_BUILTIN_PMOVZXBD128
,
26458 IX86_BUILTIN_PMOVZXBQ128
,
26459 IX86_BUILTIN_PMOVZXWD128
,
26460 IX86_BUILTIN_PMOVZXWQ128
,
26461 IX86_BUILTIN_PMOVZXDQ128
,
26463 IX86_BUILTIN_PMULDQ128
,
26464 IX86_BUILTIN_PMULLD128
,
26466 IX86_BUILTIN_ROUNDSD
,
26467 IX86_BUILTIN_ROUNDSS
,
26469 IX86_BUILTIN_ROUNDPD
,
26470 IX86_BUILTIN_ROUNDPS
,
26472 IX86_BUILTIN_FLOORPD
,
26473 IX86_BUILTIN_CEILPD
,
26474 IX86_BUILTIN_TRUNCPD
,
26475 IX86_BUILTIN_RINTPD
,
26476 IX86_BUILTIN_ROUNDPD_AZ
,
26478 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26479 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26480 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26482 IX86_BUILTIN_FLOORPS
,
26483 IX86_BUILTIN_CEILPS
,
26484 IX86_BUILTIN_TRUNCPS
,
26485 IX86_BUILTIN_RINTPS
,
26486 IX86_BUILTIN_ROUNDPS_AZ
,
26488 IX86_BUILTIN_FLOORPS_SFIX
,
26489 IX86_BUILTIN_CEILPS_SFIX
,
26490 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26492 IX86_BUILTIN_PTESTZ
,
26493 IX86_BUILTIN_PTESTC
,
26494 IX86_BUILTIN_PTESTNZC
,
26496 IX86_BUILTIN_VEC_INIT_V2SI
,
26497 IX86_BUILTIN_VEC_INIT_V4HI
,
26498 IX86_BUILTIN_VEC_INIT_V8QI
,
26499 IX86_BUILTIN_VEC_EXT_V2DF
,
26500 IX86_BUILTIN_VEC_EXT_V2DI
,
26501 IX86_BUILTIN_VEC_EXT_V4SF
,
26502 IX86_BUILTIN_VEC_EXT_V4SI
,
26503 IX86_BUILTIN_VEC_EXT_V8HI
,
26504 IX86_BUILTIN_VEC_EXT_V2SI
,
26505 IX86_BUILTIN_VEC_EXT_V4HI
,
26506 IX86_BUILTIN_VEC_EXT_V16QI
,
26507 IX86_BUILTIN_VEC_SET_V2DI
,
26508 IX86_BUILTIN_VEC_SET_V4SF
,
26509 IX86_BUILTIN_VEC_SET_V4SI
,
26510 IX86_BUILTIN_VEC_SET_V8HI
,
26511 IX86_BUILTIN_VEC_SET_V4HI
,
26512 IX86_BUILTIN_VEC_SET_V16QI
,
26514 IX86_BUILTIN_VEC_PACK_SFIX
,
26515 IX86_BUILTIN_VEC_PACK_SFIX256
,
26518 IX86_BUILTIN_CRC32QI
,
26519 IX86_BUILTIN_CRC32HI
,
26520 IX86_BUILTIN_CRC32SI
,
26521 IX86_BUILTIN_CRC32DI
,
26523 IX86_BUILTIN_PCMPESTRI128
,
26524 IX86_BUILTIN_PCMPESTRM128
,
26525 IX86_BUILTIN_PCMPESTRA128
,
26526 IX86_BUILTIN_PCMPESTRC128
,
26527 IX86_BUILTIN_PCMPESTRO128
,
26528 IX86_BUILTIN_PCMPESTRS128
,
26529 IX86_BUILTIN_PCMPESTRZ128
,
26530 IX86_BUILTIN_PCMPISTRI128
,
26531 IX86_BUILTIN_PCMPISTRM128
,
26532 IX86_BUILTIN_PCMPISTRA128
,
26533 IX86_BUILTIN_PCMPISTRC128
,
26534 IX86_BUILTIN_PCMPISTRO128
,
26535 IX86_BUILTIN_PCMPISTRS128
,
26536 IX86_BUILTIN_PCMPISTRZ128
,
26538 IX86_BUILTIN_PCMPGTQ
,
26540 /* AES instructions */
26541 IX86_BUILTIN_AESENC128
,
26542 IX86_BUILTIN_AESENCLAST128
,
26543 IX86_BUILTIN_AESDEC128
,
26544 IX86_BUILTIN_AESDECLAST128
,
26545 IX86_BUILTIN_AESIMC128
,
26546 IX86_BUILTIN_AESKEYGENASSIST128
,
26548 /* PCLMUL instruction */
26549 IX86_BUILTIN_PCLMULQDQ128
,
26552 IX86_BUILTIN_ADDPD256
,
26553 IX86_BUILTIN_ADDPS256
,
26554 IX86_BUILTIN_ADDSUBPD256
,
26555 IX86_BUILTIN_ADDSUBPS256
,
26556 IX86_BUILTIN_ANDPD256
,
26557 IX86_BUILTIN_ANDPS256
,
26558 IX86_BUILTIN_ANDNPD256
,
26559 IX86_BUILTIN_ANDNPS256
,
26560 IX86_BUILTIN_BLENDPD256
,
26561 IX86_BUILTIN_BLENDPS256
,
26562 IX86_BUILTIN_BLENDVPD256
,
26563 IX86_BUILTIN_BLENDVPS256
,
26564 IX86_BUILTIN_DIVPD256
,
26565 IX86_BUILTIN_DIVPS256
,
26566 IX86_BUILTIN_DPPS256
,
26567 IX86_BUILTIN_HADDPD256
,
26568 IX86_BUILTIN_HADDPS256
,
26569 IX86_BUILTIN_HSUBPD256
,
26570 IX86_BUILTIN_HSUBPS256
,
26571 IX86_BUILTIN_MAXPD256
,
26572 IX86_BUILTIN_MAXPS256
,
26573 IX86_BUILTIN_MINPD256
,
26574 IX86_BUILTIN_MINPS256
,
26575 IX86_BUILTIN_MULPD256
,
26576 IX86_BUILTIN_MULPS256
,
26577 IX86_BUILTIN_ORPD256
,
26578 IX86_BUILTIN_ORPS256
,
26579 IX86_BUILTIN_SHUFPD256
,
26580 IX86_BUILTIN_SHUFPS256
,
26581 IX86_BUILTIN_SUBPD256
,
26582 IX86_BUILTIN_SUBPS256
,
26583 IX86_BUILTIN_XORPD256
,
26584 IX86_BUILTIN_XORPS256
,
26585 IX86_BUILTIN_CMPSD
,
26586 IX86_BUILTIN_CMPSS
,
26587 IX86_BUILTIN_CMPPD
,
26588 IX86_BUILTIN_CMPPS
,
26589 IX86_BUILTIN_CMPPD256
,
26590 IX86_BUILTIN_CMPPS256
,
26591 IX86_BUILTIN_CVTDQ2PD256
,
26592 IX86_BUILTIN_CVTDQ2PS256
,
26593 IX86_BUILTIN_CVTPD2PS256
,
26594 IX86_BUILTIN_CVTPS2DQ256
,
26595 IX86_BUILTIN_CVTPS2PD256
,
26596 IX86_BUILTIN_CVTTPD2DQ256
,
26597 IX86_BUILTIN_CVTPD2DQ256
,
26598 IX86_BUILTIN_CVTTPS2DQ256
,
26599 IX86_BUILTIN_EXTRACTF128PD256
,
26600 IX86_BUILTIN_EXTRACTF128PS256
,
26601 IX86_BUILTIN_EXTRACTF128SI256
,
26602 IX86_BUILTIN_VZEROALL
,
26603 IX86_BUILTIN_VZEROUPPER
,
26604 IX86_BUILTIN_VPERMILVARPD
,
26605 IX86_BUILTIN_VPERMILVARPS
,
26606 IX86_BUILTIN_VPERMILVARPD256
,
26607 IX86_BUILTIN_VPERMILVARPS256
,
26608 IX86_BUILTIN_VPERMILPD
,
26609 IX86_BUILTIN_VPERMILPS
,
26610 IX86_BUILTIN_VPERMILPD256
,
26611 IX86_BUILTIN_VPERMILPS256
,
26612 IX86_BUILTIN_VPERMIL2PD
,
26613 IX86_BUILTIN_VPERMIL2PS
,
26614 IX86_BUILTIN_VPERMIL2PD256
,
26615 IX86_BUILTIN_VPERMIL2PS256
,
26616 IX86_BUILTIN_VPERM2F128PD256
,
26617 IX86_BUILTIN_VPERM2F128PS256
,
26618 IX86_BUILTIN_VPERM2F128SI256
,
26619 IX86_BUILTIN_VBROADCASTSS
,
26620 IX86_BUILTIN_VBROADCASTSD256
,
26621 IX86_BUILTIN_VBROADCASTSS256
,
26622 IX86_BUILTIN_VBROADCASTPD256
,
26623 IX86_BUILTIN_VBROADCASTPS256
,
26624 IX86_BUILTIN_VINSERTF128PD256
,
26625 IX86_BUILTIN_VINSERTF128PS256
,
26626 IX86_BUILTIN_VINSERTF128SI256
,
26627 IX86_BUILTIN_LOADUPD256
,
26628 IX86_BUILTIN_LOADUPS256
,
26629 IX86_BUILTIN_STOREUPD256
,
26630 IX86_BUILTIN_STOREUPS256
,
26631 IX86_BUILTIN_LDDQU256
,
26632 IX86_BUILTIN_MOVNTDQ256
,
26633 IX86_BUILTIN_MOVNTPD256
,
26634 IX86_BUILTIN_MOVNTPS256
,
26635 IX86_BUILTIN_LOADDQU256
,
26636 IX86_BUILTIN_STOREDQU256
,
26637 IX86_BUILTIN_MASKLOADPD
,
26638 IX86_BUILTIN_MASKLOADPS
,
26639 IX86_BUILTIN_MASKSTOREPD
,
26640 IX86_BUILTIN_MASKSTOREPS
,
26641 IX86_BUILTIN_MASKLOADPD256
,
26642 IX86_BUILTIN_MASKLOADPS256
,
26643 IX86_BUILTIN_MASKSTOREPD256
,
26644 IX86_BUILTIN_MASKSTOREPS256
,
26645 IX86_BUILTIN_MOVSHDUP256
,
26646 IX86_BUILTIN_MOVSLDUP256
,
26647 IX86_BUILTIN_MOVDDUP256
,
26649 IX86_BUILTIN_SQRTPD256
,
26650 IX86_BUILTIN_SQRTPS256
,
26651 IX86_BUILTIN_SQRTPS_NR256
,
26652 IX86_BUILTIN_RSQRTPS256
,
26653 IX86_BUILTIN_RSQRTPS_NR256
,
26655 IX86_BUILTIN_RCPPS256
,
26657 IX86_BUILTIN_ROUNDPD256
,
26658 IX86_BUILTIN_ROUNDPS256
,
26660 IX86_BUILTIN_FLOORPD256
,
26661 IX86_BUILTIN_CEILPD256
,
26662 IX86_BUILTIN_TRUNCPD256
,
26663 IX86_BUILTIN_RINTPD256
,
26664 IX86_BUILTIN_ROUNDPD_AZ256
,
26666 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26667 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26668 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26670 IX86_BUILTIN_FLOORPS256
,
26671 IX86_BUILTIN_CEILPS256
,
26672 IX86_BUILTIN_TRUNCPS256
,
26673 IX86_BUILTIN_RINTPS256
,
26674 IX86_BUILTIN_ROUNDPS_AZ256
,
26676 IX86_BUILTIN_FLOORPS_SFIX256
,
26677 IX86_BUILTIN_CEILPS_SFIX256
,
26678 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26680 IX86_BUILTIN_UNPCKHPD256
,
26681 IX86_BUILTIN_UNPCKLPD256
,
26682 IX86_BUILTIN_UNPCKHPS256
,
26683 IX86_BUILTIN_UNPCKLPS256
,
26685 IX86_BUILTIN_SI256_SI
,
26686 IX86_BUILTIN_PS256_PS
,
26687 IX86_BUILTIN_PD256_PD
,
26688 IX86_BUILTIN_SI_SI256
,
26689 IX86_BUILTIN_PS_PS256
,
26690 IX86_BUILTIN_PD_PD256
,
26692 IX86_BUILTIN_VTESTZPD
,
26693 IX86_BUILTIN_VTESTCPD
,
26694 IX86_BUILTIN_VTESTNZCPD
,
26695 IX86_BUILTIN_VTESTZPS
,
26696 IX86_BUILTIN_VTESTCPS
,
26697 IX86_BUILTIN_VTESTNZCPS
,
26698 IX86_BUILTIN_VTESTZPD256
,
26699 IX86_BUILTIN_VTESTCPD256
,
26700 IX86_BUILTIN_VTESTNZCPD256
,
26701 IX86_BUILTIN_VTESTZPS256
,
26702 IX86_BUILTIN_VTESTCPS256
,
26703 IX86_BUILTIN_VTESTNZCPS256
,
26704 IX86_BUILTIN_PTESTZ256
,
26705 IX86_BUILTIN_PTESTC256
,
26706 IX86_BUILTIN_PTESTNZC256
,
26708 IX86_BUILTIN_MOVMSKPD256
,
26709 IX86_BUILTIN_MOVMSKPS256
,
26712 IX86_BUILTIN_MPSADBW256
,
26713 IX86_BUILTIN_PABSB256
,
26714 IX86_BUILTIN_PABSW256
,
26715 IX86_BUILTIN_PABSD256
,
26716 IX86_BUILTIN_PACKSSDW256
,
26717 IX86_BUILTIN_PACKSSWB256
,
26718 IX86_BUILTIN_PACKUSDW256
,
26719 IX86_BUILTIN_PACKUSWB256
,
26720 IX86_BUILTIN_PADDB256
,
26721 IX86_BUILTIN_PADDW256
,
26722 IX86_BUILTIN_PADDD256
,
26723 IX86_BUILTIN_PADDQ256
,
26724 IX86_BUILTIN_PADDSB256
,
26725 IX86_BUILTIN_PADDSW256
,
26726 IX86_BUILTIN_PADDUSB256
,
26727 IX86_BUILTIN_PADDUSW256
,
26728 IX86_BUILTIN_PALIGNR256
,
26729 IX86_BUILTIN_AND256I
,
26730 IX86_BUILTIN_ANDNOT256I
,
26731 IX86_BUILTIN_PAVGB256
,
26732 IX86_BUILTIN_PAVGW256
,
26733 IX86_BUILTIN_PBLENDVB256
,
26734 IX86_BUILTIN_PBLENDVW256
,
26735 IX86_BUILTIN_PCMPEQB256
,
26736 IX86_BUILTIN_PCMPEQW256
,
26737 IX86_BUILTIN_PCMPEQD256
,
26738 IX86_BUILTIN_PCMPEQQ256
,
26739 IX86_BUILTIN_PCMPGTB256
,
26740 IX86_BUILTIN_PCMPGTW256
,
26741 IX86_BUILTIN_PCMPGTD256
,
26742 IX86_BUILTIN_PCMPGTQ256
,
26743 IX86_BUILTIN_PHADDW256
,
26744 IX86_BUILTIN_PHADDD256
,
26745 IX86_BUILTIN_PHADDSW256
,
26746 IX86_BUILTIN_PHSUBW256
,
26747 IX86_BUILTIN_PHSUBD256
,
26748 IX86_BUILTIN_PHSUBSW256
,
26749 IX86_BUILTIN_PMADDUBSW256
,
26750 IX86_BUILTIN_PMADDWD256
,
26751 IX86_BUILTIN_PMAXSB256
,
26752 IX86_BUILTIN_PMAXSW256
,
26753 IX86_BUILTIN_PMAXSD256
,
26754 IX86_BUILTIN_PMAXUB256
,
26755 IX86_BUILTIN_PMAXUW256
,
26756 IX86_BUILTIN_PMAXUD256
,
26757 IX86_BUILTIN_PMINSB256
,
26758 IX86_BUILTIN_PMINSW256
,
26759 IX86_BUILTIN_PMINSD256
,
26760 IX86_BUILTIN_PMINUB256
,
26761 IX86_BUILTIN_PMINUW256
,
26762 IX86_BUILTIN_PMINUD256
,
26763 IX86_BUILTIN_PMOVMSKB256
,
26764 IX86_BUILTIN_PMOVSXBW256
,
26765 IX86_BUILTIN_PMOVSXBD256
,
26766 IX86_BUILTIN_PMOVSXBQ256
,
26767 IX86_BUILTIN_PMOVSXWD256
,
26768 IX86_BUILTIN_PMOVSXWQ256
,
26769 IX86_BUILTIN_PMOVSXDQ256
,
26770 IX86_BUILTIN_PMOVZXBW256
,
26771 IX86_BUILTIN_PMOVZXBD256
,
26772 IX86_BUILTIN_PMOVZXBQ256
,
26773 IX86_BUILTIN_PMOVZXWD256
,
26774 IX86_BUILTIN_PMOVZXWQ256
,
26775 IX86_BUILTIN_PMOVZXDQ256
,
26776 IX86_BUILTIN_PMULDQ256
,
26777 IX86_BUILTIN_PMULHRSW256
,
26778 IX86_BUILTIN_PMULHUW256
,
26779 IX86_BUILTIN_PMULHW256
,
26780 IX86_BUILTIN_PMULLW256
,
26781 IX86_BUILTIN_PMULLD256
,
26782 IX86_BUILTIN_PMULUDQ256
,
26783 IX86_BUILTIN_POR256
,
26784 IX86_BUILTIN_PSADBW256
,
26785 IX86_BUILTIN_PSHUFB256
,
26786 IX86_BUILTIN_PSHUFD256
,
26787 IX86_BUILTIN_PSHUFHW256
,
26788 IX86_BUILTIN_PSHUFLW256
,
26789 IX86_BUILTIN_PSIGNB256
,
26790 IX86_BUILTIN_PSIGNW256
,
26791 IX86_BUILTIN_PSIGND256
,
26792 IX86_BUILTIN_PSLLDQI256
,
26793 IX86_BUILTIN_PSLLWI256
,
26794 IX86_BUILTIN_PSLLW256
,
26795 IX86_BUILTIN_PSLLDI256
,
26796 IX86_BUILTIN_PSLLD256
,
26797 IX86_BUILTIN_PSLLQI256
,
26798 IX86_BUILTIN_PSLLQ256
,
26799 IX86_BUILTIN_PSRAWI256
,
26800 IX86_BUILTIN_PSRAW256
,
26801 IX86_BUILTIN_PSRADI256
,
26802 IX86_BUILTIN_PSRAD256
,
26803 IX86_BUILTIN_PSRLDQI256
,
26804 IX86_BUILTIN_PSRLWI256
,
26805 IX86_BUILTIN_PSRLW256
,
26806 IX86_BUILTIN_PSRLDI256
,
26807 IX86_BUILTIN_PSRLD256
,
26808 IX86_BUILTIN_PSRLQI256
,
26809 IX86_BUILTIN_PSRLQ256
,
26810 IX86_BUILTIN_PSUBB256
,
26811 IX86_BUILTIN_PSUBW256
,
26812 IX86_BUILTIN_PSUBD256
,
26813 IX86_BUILTIN_PSUBQ256
,
26814 IX86_BUILTIN_PSUBSB256
,
26815 IX86_BUILTIN_PSUBSW256
,
26816 IX86_BUILTIN_PSUBUSB256
,
26817 IX86_BUILTIN_PSUBUSW256
,
26818 IX86_BUILTIN_PUNPCKHBW256
,
26819 IX86_BUILTIN_PUNPCKHWD256
,
26820 IX86_BUILTIN_PUNPCKHDQ256
,
26821 IX86_BUILTIN_PUNPCKHQDQ256
,
26822 IX86_BUILTIN_PUNPCKLBW256
,
26823 IX86_BUILTIN_PUNPCKLWD256
,
26824 IX86_BUILTIN_PUNPCKLDQ256
,
26825 IX86_BUILTIN_PUNPCKLQDQ256
,
26826 IX86_BUILTIN_PXOR256
,
26827 IX86_BUILTIN_MOVNTDQA256
,
26828 IX86_BUILTIN_VBROADCASTSS_PS
,
26829 IX86_BUILTIN_VBROADCASTSS_PS256
,
26830 IX86_BUILTIN_VBROADCASTSD_PD256
,
26831 IX86_BUILTIN_VBROADCASTSI256
,
26832 IX86_BUILTIN_PBLENDD256
,
26833 IX86_BUILTIN_PBLENDD128
,
26834 IX86_BUILTIN_PBROADCASTB256
,
26835 IX86_BUILTIN_PBROADCASTW256
,
26836 IX86_BUILTIN_PBROADCASTD256
,
26837 IX86_BUILTIN_PBROADCASTQ256
,
26838 IX86_BUILTIN_PBROADCASTB128
,
26839 IX86_BUILTIN_PBROADCASTW128
,
26840 IX86_BUILTIN_PBROADCASTD128
,
26841 IX86_BUILTIN_PBROADCASTQ128
,
26842 IX86_BUILTIN_VPERMVARSI256
,
26843 IX86_BUILTIN_VPERMDF256
,
26844 IX86_BUILTIN_VPERMVARSF256
,
26845 IX86_BUILTIN_VPERMDI256
,
26846 IX86_BUILTIN_VPERMTI256
,
26847 IX86_BUILTIN_VEXTRACT128I256
,
26848 IX86_BUILTIN_VINSERT128I256
,
26849 IX86_BUILTIN_MASKLOADD
,
26850 IX86_BUILTIN_MASKLOADQ
,
26851 IX86_BUILTIN_MASKLOADD256
,
26852 IX86_BUILTIN_MASKLOADQ256
,
26853 IX86_BUILTIN_MASKSTORED
,
26854 IX86_BUILTIN_MASKSTOREQ
,
26855 IX86_BUILTIN_MASKSTORED256
,
26856 IX86_BUILTIN_MASKSTOREQ256
,
26857 IX86_BUILTIN_PSLLVV4DI
,
26858 IX86_BUILTIN_PSLLVV2DI
,
26859 IX86_BUILTIN_PSLLVV8SI
,
26860 IX86_BUILTIN_PSLLVV4SI
,
26861 IX86_BUILTIN_PSRAVV8SI
,
26862 IX86_BUILTIN_PSRAVV4SI
,
26863 IX86_BUILTIN_PSRLVV4DI
,
26864 IX86_BUILTIN_PSRLVV2DI
,
26865 IX86_BUILTIN_PSRLVV8SI
,
26866 IX86_BUILTIN_PSRLVV4SI
,
26868 IX86_BUILTIN_GATHERSIV2DF
,
26869 IX86_BUILTIN_GATHERSIV4DF
,
26870 IX86_BUILTIN_GATHERDIV2DF
,
26871 IX86_BUILTIN_GATHERDIV4DF
,
26872 IX86_BUILTIN_GATHERSIV4SF
,
26873 IX86_BUILTIN_GATHERSIV8SF
,
26874 IX86_BUILTIN_GATHERDIV4SF
,
26875 IX86_BUILTIN_GATHERDIV8SF
,
26876 IX86_BUILTIN_GATHERSIV2DI
,
26877 IX86_BUILTIN_GATHERSIV4DI
,
26878 IX86_BUILTIN_GATHERDIV2DI
,
26879 IX86_BUILTIN_GATHERDIV4DI
,
26880 IX86_BUILTIN_GATHERSIV4SI
,
26881 IX86_BUILTIN_GATHERSIV8SI
,
26882 IX86_BUILTIN_GATHERDIV4SI
,
26883 IX86_BUILTIN_GATHERDIV8SI
,
26885 /* Alternate 4 element gather for the vectorizer where
26886 all operands are 32-byte wide. */
26887 IX86_BUILTIN_GATHERALTSIV4DF
,
26888 IX86_BUILTIN_GATHERALTDIV8SF
,
26889 IX86_BUILTIN_GATHERALTSIV4DI
,
26890 IX86_BUILTIN_GATHERALTDIV8SI
,
26892 /* TFmode support builtins. */
26894 IX86_BUILTIN_HUGE_VALQ
,
26895 IX86_BUILTIN_FABSQ
,
26896 IX86_BUILTIN_COPYSIGNQ
,
26898 /* Vectorizer support builtins. */
26899 IX86_BUILTIN_CPYSGNPS
,
26900 IX86_BUILTIN_CPYSGNPD
,
26901 IX86_BUILTIN_CPYSGNPS256
,
26902 IX86_BUILTIN_CPYSGNPD256
,
26904 /* FMA4 instructions. */
26905 IX86_BUILTIN_VFMADDSS
,
26906 IX86_BUILTIN_VFMADDSD
,
26907 IX86_BUILTIN_VFMADDPS
,
26908 IX86_BUILTIN_VFMADDPD
,
26909 IX86_BUILTIN_VFMADDPS256
,
26910 IX86_BUILTIN_VFMADDPD256
,
26911 IX86_BUILTIN_VFMADDSUBPS
,
26912 IX86_BUILTIN_VFMADDSUBPD
,
26913 IX86_BUILTIN_VFMADDSUBPS256
,
26914 IX86_BUILTIN_VFMADDSUBPD256
,
26916 /* FMA3 instructions. */
26917 IX86_BUILTIN_VFMADDSS3
,
26918 IX86_BUILTIN_VFMADDSD3
,
26920 /* XOP instructions. */
26921 IX86_BUILTIN_VPCMOV
,
26922 IX86_BUILTIN_VPCMOV_V2DI
,
26923 IX86_BUILTIN_VPCMOV_V4SI
,
26924 IX86_BUILTIN_VPCMOV_V8HI
,
26925 IX86_BUILTIN_VPCMOV_V16QI
,
26926 IX86_BUILTIN_VPCMOV_V4SF
,
26927 IX86_BUILTIN_VPCMOV_V2DF
,
26928 IX86_BUILTIN_VPCMOV256
,
26929 IX86_BUILTIN_VPCMOV_V4DI256
,
26930 IX86_BUILTIN_VPCMOV_V8SI256
,
26931 IX86_BUILTIN_VPCMOV_V16HI256
,
26932 IX86_BUILTIN_VPCMOV_V32QI256
,
26933 IX86_BUILTIN_VPCMOV_V8SF256
,
26934 IX86_BUILTIN_VPCMOV_V4DF256
,
26936 IX86_BUILTIN_VPPERM
,
26938 IX86_BUILTIN_VPMACSSWW
,
26939 IX86_BUILTIN_VPMACSWW
,
26940 IX86_BUILTIN_VPMACSSWD
,
26941 IX86_BUILTIN_VPMACSWD
,
26942 IX86_BUILTIN_VPMACSSDD
,
26943 IX86_BUILTIN_VPMACSDD
,
26944 IX86_BUILTIN_VPMACSSDQL
,
26945 IX86_BUILTIN_VPMACSSDQH
,
26946 IX86_BUILTIN_VPMACSDQL
,
26947 IX86_BUILTIN_VPMACSDQH
,
26948 IX86_BUILTIN_VPMADCSSWD
,
26949 IX86_BUILTIN_VPMADCSWD
,
26951 IX86_BUILTIN_VPHADDBW
,
26952 IX86_BUILTIN_VPHADDBD
,
26953 IX86_BUILTIN_VPHADDBQ
,
26954 IX86_BUILTIN_VPHADDWD
,
26955 IX86_BUILTIN_VPHADDWQ
,
26956 IX86_BUILTIN_VPHADDDQ
,
26957 IX86_BUILTIN_VPHADDUBW
,
26958 IX86_BUILTIN_VPHADDUBD
,
26959 IX86_BUILTIN_VPHADDUBQ
,
26960 IX86_BUILTIN_VPHADDUWD
,
26961 IX86_BUILTIN_VPHADDUWQ
,
26962 IX86_BUILTIN_VPHADDUDQ
,
26963 IX86_BUILTIN_VPHSUBBW
,
26964 IX86_BUILTIN_VPHSUBWD
,
26965 IX86_BUILTIN_VPHSUBDQ
,
26967 IX86_BUILTIN_VPROTB
,
26968 IX86_BUILTIN_VPROTW
,
26969 IX86_BUILTIN_VPROTD
,
26970 IX86_BUILTIN_VPROTQ
,
26971 IX86_BUILTIN_VPROTB_IMM
,
26972 IX86_BUILTIN_VPROTW_IMM
,
26973 IX86_BUILTIN_VPROTD_IMM
,
26974 IX86_BUILTIN_VPROTQ_IMM
,
26976 IX86_BUILTIN_VPSHLB
,
26977 IX86_BUILTIN_VPSHLW
,
26978 IX86_BUILTIN_VPSHLD
,
26979 IX86_BUILTIN_VPSHLQ
,
26980 IX86_BUILTIN_VPSHAB
,
26981 IX86_BUILTIN_VPSHAW
,
26982 IX86_BUILTIN_VPSHAD
,
26983 IX86_BUILTIN_VPSHAQ
,
26985 IX86_BUILTIN_VFRCZSS
,
26986 IX86_BUILTIN_VFRCZSD
,
26987 IX86_BUILTIN_VFRCZPS
,
26988 IX86_BUILTIN_VFRCZPD
,
26989 IX86_BUILTIN_VFRCZPS256
,
26990 IX86_BUILTIN_VFRCZPD256
,
26992 IX86_BUILTIN_VPCOMEQUB
,
26993 IX86_BUILTIN_VPCOMNEUB
,
26994 IX86_BUILTIN_VPCOMLTUB
,
26995 IX86_BUILTIN_VPCOMLEUB
,
26996 IX86_BUILTIN_VPCOMGTUB
,
26997 IX86_BUILTIN_VPCOMGEUB
,
26998 IX86_BUILTIN_VPCOMFALSEUB
,
26999 IX86_BUILTIN_VPCOMTRUEUB
,
27001 IX86_BUILTIN_VPCOMEQUW
,
27002 IX86_BUILTIN_VPCOMNEUW
,
27003 IX86_BUILTIN_VPCOMLTUW
,
27004 IX86_BUILTIN_VPCOMLEUW
,
27005 IX86_BUILTIN_VPCOMGTUW
,
27006 IX86_BUILTIN_VPCOMGEUW
,
27007 IX86_BUILTIN_VPCOMFALSEUW
,
27008 IX86_BUILTIN_VPCOMTRUEUW
,
27010 IX86_BUILTIN_VPCOMEQUD
,
27011 IX86_BUILTIN_VPCOMNEUD
,
27012 IX86_BUILTIN_VPCOMLTUD
,
27013 IX86_BUILTIN_VPCOMLEUD
,
27014 IX86_BUILTIN_VPCOMGTUD
,
27015 IX86_BUILTIN_VPCOMGEUD
,
27016 IX86_BUILTIN_VPCOMFALSEUD
,
27017 IX86_BUILTIN_VPCOMTRUEUD
,
27019 IX86_BUILTIN_VPCOMEQUQ
,
27020 IX86_BUILTIN_VPCOMNEUQ
,
27021 IX86_BUILTIN_VPCOMLTUQ
,
27022 IX86_BUILTIN_VPCOMLEUQ
,
27023 IX86_BUILTIN_VPCOMGTUQ
,
27024 IX86_BUILTIN_VPCOMGEUQ
,
27025 IX86_BUILTIN_VPCOMFALSEUQ
,
27026 IX86_BUILTIN_VPCOMTRUEUQ
,
27028 IX86_BUILTIN_VPCOMEQB
,
27029 IX86_BUILTIN_VPCOMNEB
,
27030 IX86_BUILTIN_VPCOMLTB
,
27031 IX86_BUILTIN_VPCOMLEB
,
27032 IX86_BUILTIN_VPCOMGTB
,
27033 IX86_BUILTIN_VPCOMGEB
,
27034 IX86_BUILTIN_VPCOMFALSEB
,
27035 IX86_BUILTIN_VPCOMTRUEB
,
27037 IX86_BUILTIN_VPCOMEQW
,
27038 IX86_BUILTIN_VPCOMNEW
,
27039 IX86_BUILTIN_VPCOMLTW
,
27040 IX86_BUILTIN_VPCOMLEW
,
27041 IX86_BUILTIN_VPCOMGTW
,
27042 IX86_BUILTIN_VPCOMGEW
,
27043 IX86_BUILTIN_VPCOMFALSEW
,
27044 IX86_BUILTIN_VPCOMTRUEW
,
27046 IX86_BUILTIN_VPCOMEQD
,
27047 IX86_BUILTIN_VPCOMNED
,
27048 IX86_BUILTIN_VPCOMLTD
,
27049 IX86_BUILTIN_VPCOMLED
,
27050 IX86_BUILTIN_VPCOMGTD
,
27051 IX86_BUILTIN_VPCOMGED
,
27052 IX86_BUILTIN_VPCOMFALSED
,
27053 IX86_BUILTIN_VPCOMTRUED
,
27055 IX86_BUILTIN_VPCOMEQQ
,
27056 IX86_BUILTIN_VPCOMNEQ
,
27057 IX86_BUILTIN_VPCOMLTQ
,
27058 IX86_BUILTIN_VPCOMLEQ
,
27059 IX86_BUILTIN_VPCOMGTQ
,
27060 IX86_BUILTIN_VPCOMGEQ
,
27061 IX86_BUILTIN_VPCOMFALSEQ
,
27062 IX86_BUILTIN_VPCOMTRUEQ
,
27064 /* LWP instructions. */
27065 IX86_BUILTIN_LLWPCB
,
27066 IX86_BUILTIN_SLWPCB
,
27067 IX86_BUILTIN_LWPVAL32
,
27068 IX86_BUILTIN_LWPVAL64
,
27069 IX86_BUILTIN_LWPINS32
,
27070 IX86_BUILTIN_LWPINS64
,
27075 IX86_BUILTIN_XBEGIN
,
27077 IX86_BUILTIN_XABORT
,
27078 IX86_BUILTIN_XTEST
,
27080 /* BMI instructions. */
27081 IX86_BUILTIN_BEXTR32
,
27082 IX86_BUILTIN_BEXTR64
,
27085 /* TBM instructions. */
27086 IX86_BUILTIN_BEXTRI32
,
27087 IX86_BUILTIN_BEXTRI64
,
27089 /* BMI2 instructions. */
27090 IX86_BUILTIN_BZHI32
,
27091 IX86_BUILTIN_BZHI64
,
27092 IX86_BUILTIN_PDEP32
,
27093 IX86_BUILTIN_PDEP64
,
27094 IX86_BUILTIN_PEXT32
,
27095 IX86_BUILTIN_PEXT64
,
27097 /* ADX instructions. */
27098 IX86_BUILTIN_ADDCARRYX32
,
27099 IX86_BUILTIN_ADDCARRYX64
,
27101 /* FSGSBASE instructions. */
27102 IX86_BUILTIN_RDFSBASE32
,
27103 IX86_BUILTIN_RDFSBASE64
,
27104 IX86_BUILTIN_RDGSBASE32
,
27105 IX86_BUILTIN_RDGSBASE64
,
27106 IX86_BUILTIN_WRFSBASE32
,
27107 IX86_BUILTIN_WRFSBASE64
,
27108 IX86_BUILTIN_WRGSBASE32
,
27109 IX86_BUILTIN_WRGSBASE64
,
27111 /* RDRND instructions. */
27112 IX86_BUILTIN_RDRAND16_STEP
,
27113 IX86_BUILTIN_RDRAND32_STEP
,
27114 IX86_BUILTIN_RDRAND64_STEP
,
27116 /* RDSEED instructions. */
27117 IX86_BUILTIN_RDSEED16_STEP
,
27118 IX86_BUILTIN_RDSEED32_STEP
,
27119 IX86_BUILTIN_RDSEED64_STEP
,
27121 /* F16C instructions. */
27122 IX86_BUILTIN_CVTPH2PS
,
27123 IX86_BUILTIN_CVTPH2PS256
,
27124 IX86_BUILTIN_CVTPS2PH
,
27125 IX86_BUILTIN_CVTPS2PH256
,
27127 /* CFString built-in for darwin */
27128 IX86_BUILTIN_CFSTRING
,
27130 /* Builtins to get CPU type and supported features. */
27131 IX86_BUILTIN_CPU_INIT
,
27132 IX86_BUILTIN_CPU_IS
,
27133 IX86_BUILTIN_CPU_SUPPORTS
,
27138 /* Table for the ix86 builtin decls. */
27139 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27141 /* Table of all of the builtin functions that are possible with different ISA's
27142 but are waiting to be built until a function is declared to use that
27144 struct builtin_isa
{
27145 const char *name
; /* function name */
27146 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27147 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27148 bool const_p
; /* true if the declaration is constant */
27149 bool set_and_not_built_p
;
27152 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27155 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27156 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27157 function decl in the ix86_builtins array. Returns the function decl or
27158 NULL_TREE, if the builtin was not added.
27160 If the front end has a special hook for builtin functions, delay adding
27161 builtin functions that aren't in the current ISA until the ISA is changed
27162 with function specific optimization. Doing so, can save about 300K for the
27163 default compiler. When the builtin is expanded, check at that time whether
27166 If the front end doesn't have a special hook, record all builtins, even if
27167 it isn't an instruction set in the current ISA in case the user uses
27168 function specific options for a different ISA, so that we don't get scope
27169 errors if a builtin is added in the middle of a function scope. */
27172 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27173 enum ix86_builtin_func_type tcode
,
27174 enum ix86_builtins code
)
27176 tree decl
= NULL_TREE
;
27178 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27180 ix86_builtins_isa
[(int) code
].isa
= mask
;
27182 mask
&= ~OPTION_MASK_ISA_64BIT
;
27184 || (mask
& ix86_isa_flags
) != 0
27185 || (lang_hooks
.builtin_function
27186 == lang_hooks
.builtin_function_ext_scope
))
27189 tree type
= ix86_get_builtin_func_type (tcode
);
27190 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27192 ix86_builtins
[(int) code
] = decl
;
27193 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27197 ix86_builtins
[(int) code
] = NULL_TREE
;
27198 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27199 ix86_builtins_isa
[(int) code
].name
= name
;
27200 ix86_builtins_isa
[(int) code
].const_p
= false;
27201 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27208 /* Like def_builtin, but also marks the function decl "const". */
27211 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27212 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27214 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27216 TREE_READONLY (decl
) = 1;
27218 ix86_builtins_isa
[(int) code
].const_p
= true;
27223 /* Add any new builtin functions for a given ISA that may not have been
27224 declared. This saves a bit of space compared to adding all of the
27225 declarations to the tree, even if we didn't use them. */
27228 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27232 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27234 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27235 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27239 /* Don't define the builtin again. */
27240 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27242 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27243 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27244 type
, i
, BUILT_IN_MD
, NULL
,
27247 ix86_builtins
[i
] = decl
;
27248 if (ix86_builtins_isa
[i
].const_p
)
27249 TREE_READONLY (decl
) = 1;
27254 /* Bits for builtin_description.flag. */
27256 /* Set when we don't support the comparison natively, and should
27257 swap_comparison in order to support it. */
27258 #define BUILTIN_DESC_SWAP_OPERANDS 1
27260 struct builtin_description
27262 const HOST_WIDE_INT mask
;
27263 const enum insn_code icode
;
27264 const char *const name
;
27265 const enum ix86_builtins code
;
27266 const enum rtx_code comparison
;
27270 static const struct builtin_description bdesc_comi
[] =
27272 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
27273 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
27274 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
27275 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
27276 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
27277 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
27278 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
27279 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
27280 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
27281 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
27282 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
27283 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
27286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
27287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
27289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
27291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
27293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
27298 static const struct builtin_description bdesc_pcmpestr
[] =
27301 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
27302 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
27303 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
27304 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
27305 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
27306 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
27307 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
27310 static const struct builtin_description bdesc_pcmpistr
[] =
27313 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
27314 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
27315 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
27316 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
27317 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
27318 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
27319 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
27322 /* Special builtins with variable number of arguments. */
27323 static const struct builtin_description bdesc_special_args
[] =
27325 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27326 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
27327 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27330 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27333 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27335 /* FXSR, XSAVE and XSAVEOPT */
27336 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27337 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27338 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27339 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27340 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27342 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27343 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27344 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27345 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27346 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27349 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27350 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27351 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27353 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27354 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27356 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27358 /* SSE or 3DNow!A */
27359 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27360 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
27363 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27364 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27365 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27366 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
27367 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27368 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
27369 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
27370 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
27371 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
27372 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27374 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27375 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27378 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27381 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
27384 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27385 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27388 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27389 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27391 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27392 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27393 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27394 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
27395 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
27397 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27398 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27399 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27400 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27401 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27402 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
27403 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27405 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
27406 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27407 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27409 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
27410 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
27411 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
27412 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
27413 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
27414 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
27415 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
27416 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
27419 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
27420 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
27421 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
27422 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
27423 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
27424 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
27425 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
27426 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
27427 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
27429 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27430 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
27431 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
27432 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
27433 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
27434 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
27437 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27438 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27439 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27440 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27441 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27442 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27443 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27444 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27447 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27448 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27449 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
27452 /* Builtins with variable number of arguments. */
27453 static const struct builtin_description bdesc_args
[] =
27455 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
27456 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
27457 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
27458 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27459 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27460 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27461 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27464 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27465 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27466 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27467 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27468 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27469 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27471 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27472 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27473 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27474 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27475 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27476 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27477 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27478 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27480 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27481 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27483 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27484 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27485 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27486 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27488 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27489 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27490 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27491 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27492 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27493 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27495 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27496 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27497 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27498 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27499 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27500 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27502 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27503 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27504 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27506 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27508 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27509 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27510 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27511 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27512 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27513 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27515 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27516 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27517 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27518 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27520 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27522 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27523 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27524 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27525 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27528 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27529 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27530 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27531 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27533 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27534 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27535 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27536 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27537 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27538 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27539 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27540 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27541 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27542 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27543 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27544 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27545 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27546 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27547 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27550 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27551 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27552 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27553 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27554 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27555 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27558 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27559 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27560 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27561 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27562 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27563 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27564 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27565 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27566 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27567 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27568 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27569 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27571 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27573 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27574 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27575 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27576 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27577 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27578 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27579 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27580 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27582 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27583 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27584 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27585 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27586 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27587 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27588 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27589 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27590 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27591 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27592 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27593 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27594 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27595 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27596 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27597 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27598 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27599 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27600 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27601 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27603 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27604 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27605 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27606 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27608 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27609 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27610 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27611 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27613 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27615 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27616 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27618 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27619 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27622 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27623 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27625 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27627 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27628 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27629 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27631 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27632 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27634 /* SSE MMX or 3Dnow!A */
27635 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27636 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27637 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27639 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27640 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27641 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27642 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27644 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27645 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27647 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27650 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27652 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27654 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27656 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27658 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27659 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27660 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27661 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27662 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27667 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27668 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27669 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27673 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27675 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27676 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27677 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27678 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27679 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27680 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27681 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27682 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27684 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27686 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27689 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27691 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27692 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27693 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27694 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27695 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27696 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27700 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27701 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27726 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27749 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27755 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27757 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27766 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27768 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27786 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27803 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27817 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27824 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27825 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27827 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27828 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27829 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27830 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27831 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27832 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27835 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27836 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27837 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27838 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27839 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27840 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27842 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27843 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27844 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27845 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27846 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27847 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27848 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27849 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27850 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27851 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27852 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27853 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27854 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27855 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27856 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27857 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27858 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27859 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27860 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27861 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27862 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27863 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27864 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27865 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27868 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27869 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27872 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27873 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27874 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27875 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27876 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27877 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27878 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27879 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27880 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27881 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27883 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27884 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27885 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27886 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27887 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27888 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27889 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27890 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27891 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27892 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27893 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27894 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27895 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27897 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27898 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27899 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27900 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27901 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27902 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27903 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27904 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27905 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27906 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27907 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27908 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27911 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27912 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27913 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27914 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27916 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27917 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27918 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27919 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27921 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27922 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27924 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27925 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27927 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27928 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27929 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27930 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27932 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27933 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27935 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27936 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27938 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27939 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27940 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27943 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27944 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27945 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27946 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27947 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27950 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27951 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27952 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27953 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27968 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27969 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27970 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27971 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27972 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27973 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27974 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27975 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27976 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27977 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27978 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27979 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27980 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27981 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27982 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27983 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27984 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27985 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27986 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27987 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27988 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27989 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27990 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27991 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27992 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27993 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27995 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27996 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27997 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27998 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28000 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28001 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28004 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28007 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28011 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28014 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28016 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28017 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28018 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28019 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28021 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28022 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28023 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28024 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28025 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28027 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28028 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28029 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28030 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28035 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28037 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28039 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28040 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28041 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28043 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28045 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28048 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28051 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28055 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28056 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28069 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28070 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28072 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28075 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28078 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28079 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28080 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28081 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28082 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28084 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28085 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28086 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28087 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28088 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28089 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28090 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28091 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28092 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28093 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28094 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28095 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28096 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28097 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28098 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28100 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28101 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28103 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28104 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28106 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28109 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28110 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28111 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28112 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28113 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28114 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28115 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28116 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28117 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28118 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28119 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28120 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28121 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28122 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28123 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28124 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28125 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28126 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28127 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28128 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28129 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28130 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28131 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28132 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28133 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28134 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28135 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28136 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28137 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28138 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28139 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28140 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28141 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28142 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28143 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28144 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28145 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28146 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28147 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28148 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28149 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28150 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28151 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28152 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28153 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28154 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28155 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28156 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28157 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28158 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28159 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28160 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28235 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28236 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28237 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28238 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28239 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28240 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28241 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28242 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28243 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28244 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28245 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28256 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28259 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28260 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28261 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28264 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28265 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28268 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
28269 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
28270 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
28271 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
28274 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28275 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28276 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28277 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28278 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28279 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28282 /* FMA4 and XOP. */
28283 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28284 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28285 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28286 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28287 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28288 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28289 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28290 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28291 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28292 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28293 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28294 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28295 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28296 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28297 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28298 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28299 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28300 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28301 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28302 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28303 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28304 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28305 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28306 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28307 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28308 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28309 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28310 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28311 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28312 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28313 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28314 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28315 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28316 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28317 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28318 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28319 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28320 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28321 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28322 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28323 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28324 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28325 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28326 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28327 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28328 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28329 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28330 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28331 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28332 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28333 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28334 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28336 static const struct builtin_description bdesc_multi_arg
[] =
28338 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
28339 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
28340 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28341 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
28342 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
28343 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28345 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
28346 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
28347 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28348 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
28349 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
28350 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28352 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
28353 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
28354 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28355 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
28356 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
28357 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28358 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
28359 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
28360 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28361 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
28362 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
28363 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28365 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
28366 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
28367 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28368 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
28369 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
28370 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28371 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
28372 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
28373 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28374 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
28375 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
28376 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28378 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28379 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28380 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28381 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28382 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
28383 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
28384 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
28386 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28387 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28388 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
28389 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
28390 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
28391 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28392 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28394 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
28396 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28397 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28398 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28399 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28400 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28401 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28402 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28403 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28404 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28405 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28406 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28407 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28409 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28410 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28411 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28412 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28413 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
28414 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
28415 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
28416 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
28417 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28418 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28419 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28420 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28421 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28422 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28423 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28424 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28426 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
28427 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
28428 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
28429 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
28430 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
28431 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
28433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28436 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28437 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28439 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28440 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28443 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28444 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28445 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28447 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28450 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28452 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
28453 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
28454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
28455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
28457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28460 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
28461 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
28462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
28463 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
28465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28467 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28468 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
28469 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
28470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
28471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28477 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28479 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28484 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28487 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28503 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28511 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28515 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28516 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28519 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28523 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28524 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28526 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28527 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28531 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28532 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28538 /* TM vector builtins. */
28540 /* Reuse the existing x86-specific `struct builtin_description' cause
28541 we're lazy. Add casts to make them fit. */
28542 static const struct builtin_description bdesc_tm
[] =
28544 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28545 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28546 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28547 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28548 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28549 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28550 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28552 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28553 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28554 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28555 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28556 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28557 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28558 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28560 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28561 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28562 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28563 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28564 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28565 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28566 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28568 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28569 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28570 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28573 /* TM callbacks. */
28575 /* Return the builtin decl needed to load a vector of TYPE. */
28578 ix86_builtin_tm_load (tree type
)
28580 if (TREE_CODE (type
) == VECTOR_TYPE
)
28582 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28585 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28587 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28589 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28595 /* Return the builtin decl needed to store a vector of TYPE. */
28598 ix86_builtin_tm_store (tree type
)
28600 if (TREE_CODE (type
) == VECTOR_TYPE
)
28602 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28605 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28607 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28609 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28615 /* Initialize the transactional memory vector load/store builtins. */
28618 ix86_init_tm_builtins (void)
28620 enum ix86_builtin_func_type ftype
;
28621 const struct builtin_description
*d
;
28624 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28625 tree attrs_log
, attrs_type_log
;
28630 /* If there are no builtins defined, we must be compiling in a
28631 language without trans-mem support. */
28632 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28635 /* Use whatever attributes a normal TM load has. */
28636 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28637 attrs_load
= DECL_ATTRIBUTES (decl
);
28638 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28639 /* Use whatever attributes a normal TM store has. */
28640 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28641 attrs_store
= DECL_ATTRIBUTES (decl
);
28642 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28643 /* Use whatever attributes a normal TM log has. */
28644 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28645 attrs_log
= DECL_ATTRIBUTES (decl
);
28646 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28648 for (i
= 0, d
= bdesc_tm
;
28649 i
< ARRAY_SIZE (bdesc_tm
);
28652 if ((d
->mask
& ix86_isa_flags
) != 0
28653 || (lang_hooks
.builtin_function
28654 == lang_hooks
.builtin_function_ext_scope
))
28656 tree type
, attrs
, attrs_type
;
28657 enum built_in_function code
= (enum built_in_function
) d
->code
;
28659 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28660 type
= ix86_get_builtin_func_type (ftype
);
28662 if (BUILTIN_TM_LOAD_P (code
))
28664 attrs
= attrs_load
;
28665 attrs_type
= attrs_type_load
;
28667 else if (BUILTIN_TM_STORE_P (code
))
28669 attrs
= attrs_store
;
28670 attrs_type
= attrs_type_store
;
28675 attrs_type
= attrs_type_log
;
28677 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28678 /* The builtin without the prefix for
28679 calling it directly. */
28680 d
->name
+ strlen ("__builtin_"),
28682 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28683 set the TYPE_ATTRIBUTES. */
28684 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28686 set_builtin_decl (code
, decl
, false);
28691 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28692 in the current target ISA to allow the user to compile particular modules
28693 with different target specific options that differ from the command line
28696 ix86_init_mmx_sse_builtins (void)
28698 const struct builtin_description
* d
;
28699 enum ix86_builtin_func_type ftype
;
28702 /* Add all special builtins with variable number of operands. */
28703 for (i
= 0, d
= bdesc_special_args
;
28704 i
< ARRAY_SIZE (bdesc_special_args
);
28710 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28711 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28714 /* Add all builtins with variable number of operands. */
28715 for (i
= 0, d
= bdesc_args
;
28716 i
< ARRAY_SIZE (bdesc_args
);
28722 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28723 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28726 /* pcmpestr[im] insns. */
28727 for (i
= 0, d
= bdesc_pcmpestr
;
28728 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28731 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28732 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28734 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28735 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28738 /* pcmpistr[im] insns. */
28739 for (i
= 0, d
= bdesc_pcmpistr
;
28740 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28743 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28744 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28746 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28747 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28750 /* comi/ucomi insns. */
28751 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28753 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28754 ftype
= INT_FTYPE_V2DF_V2DF
;
28756 ftype
= INT_FTYPE_V4SF_V4SF
;
28757 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28761 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28762 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28763 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28764 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28766 /* SSE or 3DNow!A */
28767 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28768 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28769 IX86_BUILTIN_MASKMOVQ
);
28772 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28773 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28775 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28776 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28777 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28778 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28781 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28782 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28783 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28784 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28787 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28788 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28789 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28790 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28791 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28792 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28793 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28794 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28795 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28796 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28797 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28798 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28801 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28802 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28805 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28806 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28807 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28808 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28809 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28810 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28811 IX86_BUILTIN_RDRAND64_STEP
);
28814 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28815 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28816 IX86_BUILTIN_GATHERSIV2DF
);
28818 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28819 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28820 IX86_BUILTIN_GATHERSIV4DF
);
28822 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28823 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28824 IX86_BUILTIN_GATHERDIV2DF
);
28826 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28827 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28828 IX86_BUILTIN_GATHERDIV4DF
);
28830 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28831 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28832 IX86_BUILTIN_GATHERSIV4SF
);
28834 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28835 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28836 IX86_BUILTIN_GATHERSIV8SF
);
28838 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28839 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28840 IX86_BUILTIN_GATHERDIV4SF
);
28842 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28843 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28844 IX86_BUILTIN_GATHERDIV8SF
);
28846 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28847 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28848 IX86_BUILTIN_GATHERSIV2DI
);
28850 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28851 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28852 IX86_BUILTIN_GATHERSIV4DI
);
28854 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28855 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28856 IX86_BUILTIN_GATHERDIV2DI
);
28858 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28859 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28860 IX86_BUILTIN_GATHERDIV4DI
);
28862 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28863 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28864 IX86_BUILTIN_GATHERSIV4SI
);
28866 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28867 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28868 IX86_BUILTIN_GATHERSIV8SI
);
28870 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28871 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28872 IX86_BUILTIN_GATHERDIV4SI
);
28874 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28875 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28876 IX86_BUILTIN_GATHERDIV8SI
);
28878 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28879 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28880 IX86_BUILTIN_GATHERALTSIV4DF
);
28882 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28883 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28884 IX86_BUILTIN_GATHERALTDIV8SF
);
28886 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28887 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28888 IX86_BUILTIN_GATHERALTSIV4DI
);
28890 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28891 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28892 IX86_BUILTIN_GATHERALTDIV8SI
);
28895 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28896 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28898 /* MMX access to the vec_init patterns. */
28899 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28900 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28902 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28903 V4HI_FTYPE_HI_HI_HI_HI
,
28904 IX86_BUILTIN_VEC_INIT_V4HI
);
28906 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28907 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28908 IX86_BUILTIN_VEC_INIT_V8QI
);
28910 /* Access to the vec_extract patterns. */
28911 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28912 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28913 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28914 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28915 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28916 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28917 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28918 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28919 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28920 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28922 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28923 "__builtin_ia32_vec_ext_v4hi",
28924 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28926 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28927 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28929 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28930 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28932 /* Access to the vec_set patterns. */
28933 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28934 "__builtin_ia32_vec_set_v2di",
28935 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28937 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28938 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28940 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28941 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28943 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28944 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28946 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28947 "__builtin_ia32_vec_set_v4hi",
28948 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28950 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28951 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28954 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28955 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28956 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28957 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28958 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28959 "__builtin_ia32_rdseed_di_step",
28960 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28963 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28964 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28965 def_builtin (OPTION_MASK_ISA_64BIT
,
28966 "__builtin_ia32_addcarryx_u64",
28967 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28968 IX86_BUILTIN_ADDCARRYX64
);
28970 /* Add FMA4 multi-arg argument instructions */
28971 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28976 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28977 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28981 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28982 to return a pointer to VERSION_DECL if the outcome of the expression
28983 formed by PREDICATE_CHAIN is true. This function will be called during
28984 version dispatch to decide which function version to execute. It returns
28985 the basic block at the end, to which more conditions can be added. */
28988 add_condition_to_bb (tree function_decl
, tree version_decl
,
28989 tree predicate_chain
, basic_block new_bb
)
28991 gimple return_stmt
;
28992 tree convert_expr
, result_var
;
28993 gimple convert_stmt
;
28994 gimple call_cond_stmt
;
28995 gimple if_else_stmt
;
28997 basic_block bb1
, bb2
, bb3
;
29000 tree cond_var
, and_expr_var
= NULL_TREE
;
29003 tree predicate_decl
, predicate_arg
;
29005 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29007 gcc_assert (new_bb
!= NULL
);
29008 gseq
= bb_seq (new_bb
);
29011 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29012 build_fold_addr_expr (version_decl
));
29013 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29014 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29015 return_stmt
= gimple_build_return (result_var
);
29017 if (predicate_chain
== NULL_TREE
)
29019 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29020 gimple_seq_add_stmt (&gseq
, return_stmt
);
29021 set_bb_seq (new_bb
, gseq
);
29022 gimple_set_bb (convert_stmt
, new_bb
);
29023 gimple_set_bb (return_stmt
, new_bb
);
29028 while (predicate_chain
!= NULL
)
29030 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29031 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29032 predicate_arg
= TREE_VALUE (predicate_chain
);
29033 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29034 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29036 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29037 gimple_set_bb (call_cond_stmt
, new_bb
);
29038 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29040 predicate_chain
= TREE_CHAIN (predicate_chain
);
29042 if (and_expr_var
== NULL
)
29043 and_expr_var
= cond_var
;
29046 gimple assign_stmt
;
29047 /* Use MIN_EXPR to check if any integer is zero?.
29048 and_expr_var = min_expr <cond_var, and_expr_var> */
29049 assign_stmt
= gimple_build_assign (and_expr_var
,
29050 build2 (MIN_EXPR
, integer_type_node
,
29051 cond_var
, and_expr_var
));
29053 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29054 gimple_set_bb (assign_stmt
, new_bb
);
29055 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29059 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29061 NULL_TREE
, NULL_TREE
);
29062 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29063 gimple_set_bb (if_else_stmt
, new_bb
);
29064 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29066 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29067 gimple_seq_add_stmt (&gseq
, return_stmt
);
29068 set_bb_seq (new_bb
, gseq
);
29071 e12
= split_block (bb1
, if_else_stmt
);
29073 e12
->flags
&= ~EDGE_FALLTHRU
;
29074 e12
->flags
|= EDGE_TRUE_VALUE
;
29076 e23
= split_block (bb2
, return_stmt
);
29078 gimple_set_bb (convert_stmt
, bb2
);
29079 gimple_set_bb (return_stmt
, bb2
);
29082 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29085 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29092 /* This parses the attribute arguments to target in DECL and determines
29093 the right builtin to use to match the platform specification.
29094 It returns the priority value for this version decl. If PREDICATE_LIST
29095 is not NULL, it stores the list of cpu features that need to be checked
29096 before dispatching this function. */
29098 static unsigned int
29099 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29102 struct cl_target_option cur_target
;
29104 struct cl_target_option
*new_target
;
29105 const char *arg_str
= NULL
;
29106 const char *attrs_str
= NULL
;
29107 char *tok_str
= NULL
;
29110 /* Priority of i386 features, greater value is higher priority. This is
29111 used to decide the order in which function dispatch must happen. For
29112 instance, a version specialized for SSE4.2 should be checked for dispatch
29113 before a version for SSE3, as SSE4.2 implies SSE3. */
29114 enum feature_priority
29135 enum feature_priority priority
= P_ZERO
;
29137 /* These are the target attribute strings for which a dispatcher is
29138 available, from fold_builtin_cpu. */
29140 static struct _feature_list
29142 const char *const name
;
29143 const enum feature_priority priority
;
29145 const feature_list
[] =
29151 {"ssse3", P_SSSE3
},
29152 {"sse4.1", P_SSE4_1
},
29153 {"sse4.2", P_SSE4_2
},
29154 {"popcnt", P_POPCNT
},
29160 static unsigned int NUM_FEATURES
29161 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29165 tree predicate_chain
= NULL_TREE
;
29166 tree predicate_decl
, predicate_arg
;
29168 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29169 gcc_assert (attrs
!= NULL
);
29171 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29173 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29174 attrs_str
= TREE_STRING_POINTER (attrs
);
29176 /* Return priority zero for default function. */
29177 if (strcmp (attrs_str
, "default") == 0)
29180 /* Handle arch= if specified. For priority, set it to be 1 more than
29181 the best instruction set the processor can handle. For instance, if
29182 there is a version for atom and a version for ssse3 (the highest ISA
29183 priority for atom), the atom version must be checked for dispatch
29184 before the ssse3 version. */
29185 if (strstr (attrs_str
, "arch=") != NULL
)
29187 cl_target_option_save (&cur_target
, &global_options
);
29188 target_node
= ix86_valid_target_attribute_tree (attrs
);
29190 gcc_assert (target_node
);
29191 new_target
= TREE_TARGET_OPTION (target_node
);
29192 gcc_assert (new_target
);
29194 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29196 switch (new_target
->arch
)
29198 case PROCESSOR_CORE2
:
29200 priority
= P_PROC_SSSE3
;
29202 case PROCESSOR_COREI7
:
29203 arg_str
= "corei7";
29204 priority
= P_PROC_SSE4_2
;
29206 case PROCESSOR_ATOM
:
29208 priority
= P_PROC_SSSE3
;
29210 case PROCESSOR_AMDFAM10
:
29211 arg_str
= "amdfam10h";
29212 priority
= P_PROC_SSE4_a
;
29214 case PROCESSOR_BDVER1
:
29215 arg_str
= "bdver1";
29216 priority
= P_PROC_FMA
;
29218 case PROCESSOR_BDVER2
:
29219 arg_str
= "bdver2";
29220 priority
= P_PROC_FMA
;
29225 cl_target_option_restore (&global_options
, &cur_target
);
29227 if (predicate_list
&& arg_str
== NULL
)
29229 error_at (DECL_SOURCE_LOCATION (decl
),
29230 "No dispatcher found for the versioning attributes");
29234 if (predicate_list
)
29236 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29237 /* For a C string literal the length includes the trailing NULL. */
29238 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29239 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29244 /* Process feature name. */
29245 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29246 strcpy (tok_str
, attrs_str
);
29247 token
= strtok (tok_str
, ",");
29248 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29250 while (token
!= NULL
)
29252 /* Do not process "arch=" */
29253 if (strncmp (token
, "arch=", 5) == 0)
29255 token
= strtok (NULL
, ",");
29258 for (i
= 0; i
< NUM_FEATURES
; ++i
)
29260 if (strcmp (token
, feature_list
[i
].name
) == 0)
29262 if (predicate_list
)
29264 predicate_arg
= build_string_literal (
29265 strlen (feature_list
[i
].name
) + 1,
29266 feature_list
[i
].name
);
29267 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29270 /* Find the maximum priority feature. */
29271 if (feature_list
[i
].priority
> priority
)
29272 priority
= feature_list
[i
].priority
;
29277 if (predicate_list
&& i
== NUM_FEATURES
)
29279 error_at (DECL_SOURCE_LOCATION (decl
),
29280 "No dispatcher found for %s", token
);
29283 token
= strtok (NULL
, ",");
29287 if (predicate_list
&& predicate_chain
== NULL_TREE
)
29289 error_at (DECL_SOURCE_LOCATION (decl
),
29290 "No dispatcher found for the versioning attributes : %s",
29294 else if (predicate_list
)
29296 predicate_chain
= nreverse (predicate_chain
);
29297 *predicate_list
= predicate_chain
;
29303 /* This compares the priority of target features in function DECL1
29304 and DECL2. It returns positive value if DECL1 is higher priority,
29305 negative value if DECL2 is higher priority and 0 if they are the
29309 ix86_compare_version_priority (tree decl1
, tree decl2
)
29311 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
29312 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
29314 return (int)priority1
- (int)priority2
;
29317 /* V1 and V2 point to function versions with different priorities
29318 based on the target ISA. This function compares their priorities. */
29321 feature_compare (const void *v1
, const void *v2
)
29323 typedef struct _function_version_info
29326 tree predicate_chain
;
29327 unsigned int dispatch_priority
;
29328 } function_version_info
;
29330 const function_version_info c1
= *(const function_version_info
*)v1
;
29331 const function_version_info c2
= *(const function_version_info
*)v2
;
29332 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
29335 /* This function generates the dispatch function for
29336 multi-versioned functions. DISPATCH_DECL is the function which will
29337 contain the dispatch logic. FNDECLS are the function choices for
29338 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29339 in DISPATCH_DECL in which the dispatch code is generated. */
29342 dispatch_function_versions (tree dispatch_decl
,
29344 basic_block
*empty_bb
)
29347 gimple ifunc_cpu_init_stmt
;
29351 vec
<tree
> *fndecls
;
29352 unsigned int num_versions
= 0;
29353 unsigned int actual_versions
= 0;
29356 struct _function_version_info
29359 tree predicate_chain
;
29360 unsigned int dispatch_priority
;
29361 }*function_version_info
;
29363 gcc_assert (dispatch_decl
!= NULL
29364 && fndecls_p
!= NULL
29365 && empty_bb
!= NULL
);
29367 /*fndecls_p is actually a vector. */
29368 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
29370 /* At least one more version other than the default. */
29371 num_versions
= fndecls
->length ();
29372 gcc_assert (num_versions
>= 2);
29374 function_version_info
= (struct _function_version_info
*)
29375 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
29377 /* The first version in the vector is the default decl. */
29378 default_decl
= (*fndecls
)[0];
29380 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
29382 gseq
= bb_seq (*empty_bb
);
29383 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29384 constructors, so explicity call __builtin_cpu_init here. */
29385 ifunc_cpu_init_stmt
= gimple_build_call_vec (
29386 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
29387 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
29388 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
29389 set_bb_seq (*empty_bb
, gseq
);
29394 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
29396 tree version_decl
= ele
;
29397 tree predicate_chain
= NULL_TREE
;
29398 unsigned int priority
;
29399 /* Get attribute string, parse it and find the right predicate decl.
29400 The predicate function could be a lengthy combination of many
29401 features, like arch-type and various isa-variants. */
29402 priority
= get_builtin_code_for_version (version_decl
,
29405 if (predicate_chain
== NULL_TREE
)
29408 function_version_info
[actual_versions
].version_decl
= version_decl
;
29409 function_version_info
[actual_versions
].predicate_chain
29411 function_version_info
[actual_versions
].dispatch_priority
= priority
;
29415 /* Sort the versions according to descending order of dispatch priority. The
29416 priority is based on the ISA. This is not a perfect solution. There
29417 could still be ambiguity. If more than one function version is suitable
29418 to execute, which one should be dispatched? In future, allow the user
29419 to specify a dispatch priority next to the version. */
29420 qsort (function_version_info
, actual_versions
,
29421 sizeof (struct _function_version_info
), feature_compare
);
29423 for (i
= 0; i
< actual_versions
; ++i
)
29424 *empty_bb
= add_condition_to_bb (dispatch_decl
,
29425 function_version_info
[i
].version_decl
,
29426 function_version_info
[i
].predicate_chain
,
29429 /* dispatch default version at the end. */
29430 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
29433 free (function_version_info
);
29437 /* Comparator function to be used in qsort routine to sort attribute
29438 specification strings to "target". */
29441 attr_strcmp (const void *v1
, const void *v2
)
29443 const char *c1
= *(char *const*)v1
;
29444 const char *c2
= *(char *const*)v2
;
29445 return strcmp (c1
, c2
);
29448 /* ARGLIST is the argument to target attribute. This function tokenizes
29449 the comma separated arguments, sorts them and returns a string which
29450 is a unique identifier for the comma separated arguments. It also
29451 replaces non-identifier characters "=,-" with "_". */
29454 sorted_attr_string (tree arglist
)
29457 size_t str_len_sum
= 0;
29458 char **args
= NULL
;
29459 char *attr_str
, *ret_str
;
29461 unsigned int argnum
= 1;
29464 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29466 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29467 size_t len
= strlen (str
);
29468 str_len_sum
+= len
+ 1;
29469 if (arg
!= arglist
)
29471 for (i
= 0; i
< strlen (str
); i
++)
29476 attr_str
= XNEWVEC (char, str_len_sum
);
29478 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29480 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29481 size_t len
= strlen (str
);
29482 memcpy (attr_str
+ str_len_sum
, str
, len
);
29483 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29484 str_len_sum
+= len
+ 1;
29487 /* Replace "=,-" with "_". */
29488 for (i
= 0; i
< strlen (attr_str
); i
++)
29489 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29495 args
= XNEWVEC (char *, argnum
);
29498 attr
= strtok (attr_str
, ",");
29499 while (attr
!= NULL
)
29503 attr
= strtok (NULL
, ",");
29506 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29508 ret_str
= XNEWVEC (char, str_len_sum
);
29510 for (i
= 0; i
< argnum
; i
++)
29512 size_t len
= strlen (args
[i
]);
29513 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29514 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29515 str_len_sum
+= len
+ 1;
29519 XDELETEVEC (attr_str
);
29523 /* This function changes the assembler name for functions that are
29524 versions. If DECL is a function version and has a "target"
29525 attribute, it appends the attribute string to its assembler name. */
29528 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29531 const char *orig_name
, *version_string
;
29532 char *attr_str
, *assembler_name
;
29534 if (DECL_DECLARED_INLINE_P (decl
)
29535 && lookup_attribute ("gnu_inline",
29536 DECL_ATTRIBUTES (decl
)))
29537 error_at (DECL_SOURCE_LOCATION (decl
),
29538 "Function versions cannot be marked as gnu_inline,"
29539 " bodies have to be generated");
29541 if (DECL_VIRTUAL_P (decl
)
29542 || DECL_VINDEX (decl
))
29543 sorry ("Virtual function multiversioning not supported");
29545 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29547 /* target attribute string cannot be NULL. */
29548 gcc_assert (version_attr
!= NULL_TREE
);
29550 orig_name
= IDENTIFIER_POINTER (id
);
29552 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29554 if (strcmp (version_string
, "default") == 0)
29557 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29558 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29560 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29562 /* Allow assembler name to be modified if already set. */
29563 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29564 SET_DECL_RTL (decl
, NULL
);
29566 tree ret
= get_identifier (assembler_name
);
29567 XDELETEVEC (attr_str
);
29568 XDELETEVEC (assembler_name
);
29572 /* This function returns true if FN1 and FN2 are versions of the same function,
29573 that is, the target strings of the function decls are different. This assumes
29574 that FN1 and FN2 have the same signature. */
29577 ix86_function_versions (tree fn1
, tree fn2
)
29580 char *target1
, *target2
;
29583 if (TREE_CODE (fn1
) != FUNCTION_DECL
29584 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29587 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29588 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29590 /* At least one function decl should have the target attribute specified. */
29591 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29594 /* Diagnose missing target attribute if one of the decls is already
29595 multi-versioned. */
29596 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29598 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29600 if (attr2
!= NULL_TREE
)
29607 error_at (DECL_SOURCE_LOCATION (fn2
),
29608 "missing %<target%> attribute for multi-versioned %D",
29610 error_at (DECL_SOURCE_LOCATION (fn1
),
29611 "previous declaration of %D", fn1
);
29612 /* Prevent diagnosing of the same error multiple times. */
29613 DECL_ATTRIBUTES (fn2
)
29614 = tree_cons (get_identifier ("target"),
29615 copy_node (TREE_VALUE (attr1
)),
29616 DECL_ATTRIBUTES (fn2
));
29621 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29622 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29624 /* The sorted target strings must be different for fn1 and fn2
29626 if (strcmp (target1
, target2
) == 0)
29631 XDELETEVEC (target1
);
29632 XDELETEVEC (target2
);
29638 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29640 /* For function version, add the target suffix to the assembler name. */
29641 if (TREE_CODE (decl
) == FUNCTION_DECL
29642 && DECL_FUNCTION_VERSIONED (decl
))
29643 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29644 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29645 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29651 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29652 is true, append the full path name of the source file. */
29655 make_name (tree decl
, const char *suffix
, bool make_unique
)
29657 char *global_var_name
;
29660 const char *unique_name
= NULL
;
29662 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29664 /* Get a unique name that can be used globally without any chances
29665 of collision at link time. */
29667 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29669 name_len
= strlen (name
) + strlen (suffix
) + 2;
29672 name_len
+= strlen (unique_name
) + 1;
29673 global_var_name
= XNEWVEC (char, name_len
);
29675 /* Use '.' to concatenate names as it is demangler friendly. */
29677 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29680 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29682 return global_var_name
;
29685 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29687 /* Make a dispatcher declaration for the multi-versioned function DECL.
29688 Calls to DECL function will be replaced with calls to the dispatcher
29689 by the front-end. Return the decl created. */
29692 make_dispatcher_decl (const tree decl
)
29696 tree fn_type
, func_type
;
29697 bool is_uniq
= false;
29699 if (TREE_PUBLIC (decl
) == 0)
29702 func_name
= make_name (decl
, "ifunc", is_uniq
);
29704 fn_type
= TREE_TYPE (decl
);
29705 func_type
= build_function_type (TREE_TYPE (fn_type
),
29706 TYPE_ARG_TYPES (fn_type
));
29708 func_decl
= build_fn_decl (func_name
, func_type
);
29709 XDELETEVEC (func_name
);
29710 TREE_USED (func_decl
) = 1;
29711 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29712 DECL_INITIAL (func_decl
) = error_mark_node
;
29713 DECL_ARTIFICIAL (func_decl
) = 1;
29714 /* Mark this func as external, the resolver will flip it again if
29715 it gets generated. */
29716 DECL_EXTERNAL (func_decl
) = 1;
29717 /* This will be of type IFUNCs have to be externally visible. */
29718 TREE_PUBLIC (func_decl
) = 1;
29725 /* Returns true if decl is multi-versioned and DECL is the default function,
29726 that is it is not tagged with target specific optimization. */
29729 is_function_default_version (const tree decl
)
29731 if (TREE_CODE (decl
) != FUNCTION_DECL
29732 || !DECL_FUNCTION_VERSIONED (decl
))
29734 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29736 attr
= TREE_VALUE (TREE_VALUE (attr
));
29737 return (TREE_CODE (attr
) == STRING_CST
29738 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29741 /* Make a dispatcher declaration for the multi-versioned function DECL.
29742 Calls to DECL function will be replaced with calls to the dispatcher
29743 by the front-end. Returns the decl of the dispatcher function. */
29746 ix86_get_function_versions_dispatcher (void *decl
)
29748 tree fn
= (tree
) decl
;
29749 struct cgraph_node
*node
= NULL
;
29750 struct cgraph_node
*default_node
= NULL
;
29751 struct cgraph_function_version_info
*node_v
= NULL
;
29752 struct cgraph_function_version_info
*first_v
= NULL
;
29754 tree dispatch_decl
= NULL
;
29756 struct cgraph_function_version_info
*default_version_info
= NULL
;
29758 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29760 node
= cgraph_get_node (fn
);
29761 gcc_assert (node
!= NULL
);
29763 node_v
= get_cgraph_node_version (node
);
29764 gcc_assert (node_v
!= NULL
);
29766 if (node_v
->dispatcher_resolver
!= NULL
)
29767 return node_v
->dispatcher_resolver
;
29769 /* Find the default version and make it the first node. */
29771 /* Go to the beginning of the chain. */
29772 while (first_v
->prev
!= NULL
)
29773 first_v
= first_v
->prev
;
29774 default_version_info
= first_v
;
29775 while (default_version_info
!= NULL
)
29777 if (is_function_default_version
29778 (default_version_info
->this_node
->symbol
.decl
))
29780 default_version_info
= default_version_info
->next
;
29783 /* If there is no default node, just return NULL. */
29784 if (default_version_info
== NULL
)
29787 /* Make default info the first node. */
29788 if (first_v
!= default_version_info
)
29790 default_version_info
->prev
->next
= default_version_info
->next
;
29791 if (default_version_info
->next
)
29792 default_version_info
->next
->prev
= default_version_info
->prev
;
29793 first_v
->prev
= default_version_info
;
29794 default_version_info
->next
= first_v
;
29795 default_version_info
->prev
= NULL
;
29798 default_node
= default_version_info
->this_node
;
29800 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29801 if (targetm
.has_ifunc_p ())
29803 struct cgraph_function_version_info
*it_v
= NULL
;
29804 struct cgraph_node
*dispatcher_node
= NULL
;
29805 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29807 /* Right now, the dispatching is done via ifunc. */
29808 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29810 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29811 gcc_assert (dispatcher_node
!= NULL
);
29812 dispatcher_node
->dispatcher_function
= 1;
29813 dispatcher_version_info
29814 = insert_new_cgraph_node_version (dispatcher_node
);
29815 dispatcher_version_info
->next
= default_version_info
;
29816 dispatcher_node
->symbol
.definition
= 1;
29818 /* Set the dispatcher for all the versions. */
29819 it_v
= default_version_info
;
29820 while (it_v
!= NULL
)
29822 it_v
->dispatcher_resolver
= dispatch_decl
;
29829 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29830 "multiversioning needs ifunc which is not supported "
29834 return dispatch_decl
;
29837 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29841 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29844 tree attr_arg_name
;
29848 attr_name
= get_identifier (name
);
29849 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29850 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29851 attr
= tree_cons (attr_name
, attr_args
, chain
);
29855 /* Make the resolver function decl to dispatch the versions of
29856 a multi-versioned function, DEFAULT_DECL. Create an
29857 empty basic block in the resolver and store the pointer in
29858 EMPTY_BB. Return the decl of the resolver function. */
29861 make_resolver_func (const tree default_decl
,
29862 const tree dispatch_decl
,
29863 basic_block
*empty_bb
)
29865 char *resolver_name
;
29866 tree decl
, type
, decl_name
, t
;
29867 bool is_uniq
= false;
29869 /* IFUNC's have to be globally visible. So, if the default_decl is
29870 not, then the name of the IFUNC should be made unique. */
29871 if (TREE_PUBLIC (default_decl
) == 0)
29874 /* Append the filename to the resolver function if the versions are
29875 not externally visible. This is because the resolver function has
29876 to be externally visible for the loader to find it. So, appending
29877 the filename will prevent conflicts with a resolver function from
29878 another module which is based on the same version name. */
29879 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29881 /* The resolver function should return a (void *). */
29882 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29884 decl
= build_fn_decl (resolver_name
, type
);
29885 decl_name
= get_identifier (resolver_name
);
29886 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29888 DECL_NAME (decl
) = decl_name
;
29889 TREE_USED (decl
) = 1;
29890 DECL_ARTIFICIAL (decl
) = 1;
29891 DECL_IGNORED_P (decl
) = 0;
29892 /* IFUNC resolvers have to be externally visible. */
29893 TREE_PUBLIC (decl
) = 1;
29894 DECL_UNINLINABLE (decl
) = 0;
29896 /* Resolver is not external, body is generated. */
29897 DECL_EXTERNAL (decl
) = 0;
29898 DECL_EXTERNAL (dispatch_decl
) = 0;
29900 DECL_CONTEXT (decl
) = NULL_TREE
;
29901 DECL_INITIAL (decl
) = make_node (BLOCK
);
29902 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29904 if (DECL_COMDAT_GROUP (default_decl
)
29905 || TREE_PUBLIC (default_decl
))
29907 /* In this case, each translation unit with a call to this
29908 versioned function will put out a resolver. Ensure it
29909 is comdat to keep just one copy. */
29910 DECL_COMDAT (decl
) = 1;
29911 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29913 /* Build result decl and add to function_decl. */
29914 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29915 DECL_ARTIFICIAL (t
) = 1;
29916 DECL_IGNORED_P (t
) = 1;
29917 DECL_RESULT (decl
) = t
;
29919 gimplify_function_tree (decl
);
29920 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29921 *empty_bb
= init_lowered_empty_function (decl
, false);
29923 cgraph_add_new_function (decl
, true);
29924 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29928 gcc_assert (dispatch_decl
!= NULL
);
29929 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29930 DECL_ATTRIBUTES (dispatch_decl
)
29931 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29933 /* Create the alias for dispatch to resolver here. */
29934 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29935 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29936 XDELETEVEC (resolver_name
);
29940 /* Generate the dispatching code body to dispatch multi-versioned function
29941 DECL. The target hook is called to process the "target" attributes and
29942 provide the code to dispatch the right function at run-time. NODE points
29943 to the dispatcher decl whose body will be created. */
29946 ix86_generate_version_dispatcher_body (void *node_p
)
29948 tree resolver_decl
;
29949 basic_block empty_bb
;
29950 vec
<tree
> fn_ver_vec
= vNULL
;
29951 tree default_ver_decl
;
29952 struct cgraph_node
*versn
;
29953 struct cgraph_node
*node
;
29955 struct cgraph_function_version_info
*node_version_info
= NULL
;
29956 struct cgraph_function_version_info
*versn_info
= NULL
;
29958 node
= (cgraph_node
*)node_p
;
29960 node_version_info
= get_cgraph_node_version (node
);
29961 gcc_assert (node
->dispatcher_function
29962 && node_version_info
!= NULL
);
29964 if (node_version_info
->dispatcher_resolver
)
29965 return node_version_info
->dispatcher_resolver
;
29967 /* The first version in the chain corresponds to the default version. */
29968 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29970 /* node is going to be an alias, so remove the finalized bit. */
29971 node
->symbol
.definition
= false;
29973 resolver_decl
= make_resolver_func (default_ver_decl
,
29974 node
->symbol
.decl
, &empty_bb
);
29976 node_version_info
->dispatcher_resolver
= resolver_decl
;
29978 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29980 fn_ver_vec
.create (2);
29982 for (versn_info
= node_version_info
->next
; versn_info
;
29983 versn_info
= versn_info
->next
)
29985 versn
= versn_info
->this_node
;
29986 /* Check for virtual functions here again, as by this time it should
29987 have been determined if this function needs a vtable index or
29988 not. This happens for methods in derived classes that override
29989 virtual methods in base classes but are not explicitly marked as
29991 if (DECL_VINDEX (versn
->symbol
.decl
))
29992 sorry ("Virtual function multiversioning not supported");
29994 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29997 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29998 fn_ver_vec
.release ();
29999 rebuild_cgraph_edges ();
30001 return resolver_decl
;
30003 /* This builds the processor_model struct type defined in
30004 libgcc/config/i386/cpuinfo.c */
30007 build_processor_model_struct (void)
30009 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30011 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30013 tree type
= make_node (RECORD_TYPE
);
30015 /* The first 3 fields are unsigned int. */
30016 for (i
= 0; i
< 3; ++i
)
30018 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30019 get_identifier (field_name
[i
]), unsigned_type_node
);
30020 if (field_chain
!= NULL_TREE
)
30021 DECL_CHAIN (field
) = field_chain
;
30022 field_chain
= field
;
30025 /* The last field is an array of unsigned integers of size one. */
30026 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30027 get_identifier (field_name
[3]),
30028 build_array_type (unsigned_type_node
,
30029 build_index_type (size_one_node
)));
30030 if (field_chain
!= NULL_TREE
)
30031 DECL_CHAIN (field
) = field_chain
;
30032 field_chain
= field
;
30034 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30038 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30041 make_var_decl (tree type
, const char *name
)
30045 new_decl
= build_decl (UNKNOWN_LOCATION
,
30047 get_identifier(name
),
30050 DECL_EXTERNAL (new_decl
) = 1;
30051 TREE_STATIC (new_decl
) = 1;
30052 TREE_PUBLIC (new_decl
) = 1;
30053 DECL_INITIAL (new_decl
) = 0;
30054 DECL_ARTIFICIAL (new_decl
) = 0;
30055 DECL_PRESERVE_P (new_decl
) = 1;
30057 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30058 assemble_variable (new_decl
, 0, 0, 0);
30063 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30064 into an integer defined in libgcc/config/i386/cpuinfo.c */
30067 fold_builtin_cpu (tree fndecl
, tree
*args
)
30070 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30071 DECL_FUNCTION_CODE (fndecl
);
30072 tree param_string_cst
= NULL
;
30074 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30075 enum processor_features
30091 /* These are the values for vendor types and cpu types and subtypes
30092 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30093 the corresponding start value. */
30094 enum processor_model
30105 M_CPU_SUBTYPE_START
,
30106 M_INTEL_COREI7_NEHALEM
,
30107 M_INTEL_COREI7_WESTMERE
,
30108 M_INTEL_COREI7_SANDYBRIDGE
,
30109 M_AMDFAM10H_BARCELONA
,
30110 M_AMDFAM10H_SHANGHAI
,
30111 M_AMDFAM10H_ISTANBUL
,
30112 M_AMDFAM15H_BDVER1
,
30113 M_AMDFAM15H_BDVER2
,
30117 static struct _arch_names_table
30119 const char *const name
;
30120 const enum processor_model model
;
30122 const arch_names_table
[] =
30125 {"intel", M_INTEL
},
30126 {"atom", M_INTEL_ATOM
},
30127 {"slm", M_INTEL_SLM
},
30128 {"core2", M_INTEL_CORE2
},
30129 {"corei7", M_INTEL_COREI7
},
30130 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30131 {"westmere", M_INTEL_COREI7_WESTMERE
},
30132 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30133 {"amdfam10h", M_AMDFAM10H
},
30134 {"barcelona", M_AMDFAM10H_BARCELONA
},
30135 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30136 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30137 {"amdfam15h", M_AMDFAM15H
},
30138 {"bdver1", M_AMDFAM15H_BDVER1
},
30139 {"bdver2", M_AMDFAM15H_BDVER2
},
30140 {"bdver3", M_AMDFAM15H_BDVER3
},
30143 static struct _isa_names_table
30145 const char *const name
;
30146 const enum processor_features feature
;
30148 const isa_names_table
[] =
30152 {"popcnt", F_POPCNT
},
30156 {"ssse3", F_SSSE3
},
30157 {"sse4.1", F_SSE4_1
},
30158 {"sse4.2", F_SSE4_2
},
30163 tree __processor_model_type
= build_processor_model_struct ();
30164 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30168 varpool_add_new_variable (__cpu_model_var
);
30170 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30172 param_string_cst
= *args
;
30173 while (param_string_cst
30174 && TREE_CODE (param_string_cst
) != STRING_CST
)
30176 /* *args must be a expr that can contain other EXPRS leading to a
30178 if (!EXPR_P (param_string_cst
))
30180 error ("Parameter to builtin must be a string constant or literal");
30181 return integer_zero_node
;
30183 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30186 gcc_assert (param_string_cst
);
30188 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30194 unsigned int field_val
= 0;
30195 unsigned int NUM_ARCH_NAMES
30196 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30198 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30199 if (strcmp (arch_names_table
[i
].name
,
30200 TREE_STRING_POINTER (param_string_cst
)) == 0)
30203 if (i
== NUM_ARCH_NAMES
)
30205 error ("Parameter to builtin not valid: %s",
30206 TREE_STRING_POINTER (param_string_cst
));
30207 return integer_zero_node
;
30210 field
= TYPE_FIELDS (__processor_model_type
);
30211 field_val
= arch_names_table
[i
].model
;
30213 /* CPU types are stored in the next field. */
30214 if (field_val
> M_CPU_TYPE_START
30215 && field_val
< M_CPU_SUBTYPE_START
)
30217 field
= DECL_CHAIN (field
);
30218 field_val
-= M_CPU_TYPE_START
;
30221 /* CPU subtypes are stored in the next field. */
30222 if (field_val
> M_CPU_SUBTYPE_START
)
30224 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30225 field_val
-= M_CPU_SUBTYPE_START
;
30228 /* Get the appropriate field in __cpu_model. */
30229 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30232 /* Check the value. */
30233 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30234 build_int_cstu (unsigned_type_node
, field_val
));
30235 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30237 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30244 unsigned int field_val
= 0;
30245 unsigned int NUM_ISA_NAMES
30246 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30248 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30249 if (strcmp (isa_names_table
[i
].name
,
30250 TREE_STRING_POINTER (param_string_cst
)) == 0)
30253 if (i
== NUM_ISA_NAMES
)
30255 error ("Parameter to builtin not valid: %s",
30256 TREE_STRING_POINTER (param_string_cst
));
30257 return integer_zero_node
;
30260 field
= TYPE_FIELDS (__processor_model_type
);
30261 /* Get the last field, which is __cpu_features. */
30262 while (DECL_CHAIN (field
))
30263 field
= DECL_CHAIN (field
);
30265 /* Get the appropriate field: __cpu_model.__cpu_features */
30266 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30269 /* Access the 0th element of __cpu_features array. */
30270 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
30271 integer_zero_node
, NULL_TREE
, NULL_TREE
);
30273 field_val
= (1 << isa_names_table
[i
].feature
);
30274 /* Return __cpu_model.__cpu_features[0] & field_val */
30275 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
30276 build_int_cstu (unsigned_type_node
, field_val
));
30277 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30279 gcc_unreachable ();
30283 ix86_fold_builtin (tree fndecl
, int n_args
,
30284 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
30286 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30288 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30289 DECL_FUNCTION_CODE (fndecl
);
30290 if (fn_code
== IX86_BUILTIN_CPU_IS
30291 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30293 gcc_assert (n_args
== 1);
30294 return fold_builtin_cpu (fndecl
, args
);
30298 #ifdef SUBTARGET_FOLD_BUILTIN
30299 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
30305 /* Make builtins to detect cpu type and features supported. NAME is
30306 the builtin name, CODE is the builtin code, and FTYPE is the function
30307 type of the builtin. */
30310 make_cpu_type_builtin (const char* name
, int code
,
30311 enum ix86_builtin_func_type ftype
, bool is_const
)
30316 type
= ix86_get_builtin_func_type (ftype
);
30317 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
30319 gcc_assert (decl
!= NULL_TREE
);
30320 ix86_builtins
[(int) code
] = decl
;
30321 TREE_READONLY (decl
) = is_const
;
30324 /* Make builtins to get CPU type and features supported. The created
30327 __builtin_cpu_init (), to detect cpu type and features,
30328 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30329 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30333 ix86_init_platform_type_builtins (void)
30335 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
30336 INT_FTYPE_VOID
, false);
30337 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
30338 INT_FTYPE_PCCHAR
, true);
30339 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
30340 INT_FTYPE_PCCHAR
, true);
30343 /* Internal method for ix86_init_builtins. */
30346 ix86_init_builtins_va_builtins_abi (void)
30348 tree ms_va_ref
, sysv_va_ref
;
30349 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
30350 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
30351 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
30352 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
30356 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
30357 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
30358 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
30360 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
30363 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30364 fnvoid_va_start_ms
=
30365 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30366 fnvoid_va_end_sysv
=
30367 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
30368 fnvoid_va_start_sysv
=
30369 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
30371 fnvoid_va_copy_ms
=
30372 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
30374 fnvoid_va_copy_sysv
=
30375 build_function_type_list (void_type_node
, sysv_va_ref
,
30376 sysv_va_ref
, NULL_TREE
);
30378 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
30379 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30380 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
30381 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30382 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
30383 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30384 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
30385 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30386 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
30387 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30388 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
30389 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30393 ix86_init_builtin_types (void)
30395 tree float128_type_node
, float80_type_node
;
30397 /* The __float80 type. */
30398 float80_type_node
= long_double_type_node
;
30399 if (TYPE_MODE (float80_type_node
) != XFmode
)
30401 /* The __float80 type. */
30402 float80_type_node
= make_node (REAL_TYPE
);
30404 TYPE_PRECISION (float80_type_node
) = 80;
30405 layout_type (float80_type_node
);
30407 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
30409 /* The __float128 type. */
30410 float128_type_node
= make_node (REAL_TYPE
);
30411 TYPE_PRECISION (float128_type_node
) = 128;
30412 layout_type (float128_type_node
);
30413 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
30415 /* This macro is built by i386-builtin-types.awk. */
30416 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
30420 ix86_init_builtins (void)
30424 ix86_init_builtin_types ();
30426 /* Builtins to get CPU type and features. */
30427 ix86_init_platform_type_builtins ();
30429 /* TFmode support builtins. */
30430 def_builtin_const (0, "__builtin_infq",
30431 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
30432 def_builtin_const (0, "__builtin_huge_valq",
30433 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
30435 /* We will expand them to normal call if SSE isn't available since
30436 they are used by libgcc. */
30437 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
30438 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
30439 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
30440 TREE_READONLY (t
) = 1;
30441 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
30443 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
30444 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
30445 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
30446 TREE_READONLY (t
) = 1;
30447 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
30449 ix86_init_tm_builtins ();
30450 ix86_init_mmx_sse_builtins ();
30453 ix86_init_builtins_va_builtins_abi ();
30455 #ifdef SUBTARGET_INIT_BUILTINS
30456 SUBTARGET_INIT_BUILTINS
;
30460 /* Return the ix86 builtin for CODE. */
30463 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
30465 if (code
>= IX86_BUILTIN_MAX
)
30466 return error_mark_node
;
30468 return ix86_builtins
[code
];
30471 /* Errors in the source file can cause expand_expr to return const0_rtx
30472 where we expect a vector. To avoid crashing, use one of the vector
30473 clear instructions. */
30475 safe_vector_operand (rtx x
, enum machine_mode mode
)
30477 if (x
== const0_rtx
)
30478 x
= CONST0_RTX (mode
);
30482 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30485 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30488 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30489 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30490 rtx op0
= expand_normal (arg0
);
30491 rtx op1
= expand_normal (arg1
);
30492 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30493 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30494 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30496 if (VECTOR_MODE_P (mode0
))
30497 op0
= safe_vector_operand (op0
, mode0
);
30498 if (VECTOR_MODE_P (mode1
))
30499 op1
= safe_vector_operand (op1
, mode1
);
30501 if (optimize
|| !target
30502 || GET_MODE (target
) != tmode
30503 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30504 target
= gen_reg_rtx (tmode
);
30506 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30508 rtx x
= gen_reg_rtx (V4SImode
);
30509 emit_insn (gen_sse2_loadd (x
, op1
));
30510 op1
= gen_lowpart (TImode
, x
);
30513 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30514 op0
= copy_to_mode_reg (mode0
, op0
);
30515 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30516 op1
= copy_to_mode_reg (mode1
, op1
);
30518 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30527 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30530 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30531 enum ix86_builtin_func_type m_type
,
30532 enum rtx_code sub_code
)
30537 bool comparison_p
= false;
30539 bool last_arg_constant
= false;
30540 int num_memory
= 0;
30543 enum machine_mode mode
;
30546 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30550 case MULTI_ARG_4_DF2_DI_I
:
30551 case MULTI_ARG_4_DF2_DI_I1
:
30552 case MULTI_ARG_4_SF2_SI_I
:
30553 case MULTI_ARG_4_SF2_SI_I1
:
30555 last_arg_constant
= true;
30558 case MULTI_ARG_3_SF
:
30559 case MULTI_ARG_3_DF
:
30560 case MULTI_ARG_3_SF2
:
30561 case MULTI_ARG_3_DF2
:
30562 case MULTI_ARG_3_DI
:
30563 case MULTI_ARG_3_SI
:
30564 case MULTI_ARG_3_SI_DI
:
30565 case MULTI_ARG_3_HI
:
30566 case MULTI_ARG_3_HI_SI
:
30567 case MULTI_ARG_3_QI
:
30568 case MULTI_ARG_3_DI2
:
30569 case MULTI_ARG_3_SI2
:
30570 case MULTI_ARG_3_HI2
:
30571 case MULTI_ARG_3_QI2
:
30575 case MULTI_ARG_2_SF
:
30576 case MULTI_ARG_2_DF
:
30577 case MULTI_ARG_2_DI
:
30578 case MULTI_ARG_2_SI
:
30579 case MULTI_ARG_2_HI
:
30580 case MULTI_ARG_2_QI
:
30584 case MULTI_ARG_2_DI_IMM
:
30585 case MULTI_ARG_2_SI_IMM
:
30586 case MULTI_ARG_2_HI_IMM
:
30587 case MULTI_ARG_2_QI_IMM
:
30589 last_arg_constant
= true;
30592 case MULTI_ARG_1_SF
:
30593 case MULTI_ARG_1_DF
:
30594 case MULTI_ARG_1_SF2
:
30595 case MULTI_ARG_1_DF2
:
30596 case MULTI_ARG_1_DI
:
30597 case MULTI_ARG_1_SI
:
30598 case MULTI_ARG_1_HI
:
30599 case MULTI_ARG_1_QI
:
30600 case MULTI_ARG_1_SI_DI
:
30601 case MULTI_ARG_1_HI_DI
:
30602 case MULTI_ARG_1_HI_SI
:
30603 case MULTI_ARG_1_QI_DI
:
30604 case MULTI_ARG_1_QI_SI
:
30605 case MULTI_ARG_1_QI_HI
:
30609 case MULTI_ARG_2_DI_CMP
:
30610 case MULTI_ARG_2_SI_CMP
:
30611 case MULTI_ARG_2_HI_CMP
:
30612 case MULTI_ARG_2_QI_CMP
:
30614 comparison_p
= true;
30617 case MULTI_ARG_2_SF_TF
:
30618 case MULTI_ARG_2_DF_TF
:
30619 case MULTI_ARG_2_DI_TF
:
30620 case MULTI_ARG_2_SI_TF
:
30621 case MULTI_ARG_2_HI_TF
:
30622 case MULTI_ARG_2_QI_TF
:
30628 gcc_unreachable ();
30631 if (optimize
|| !target
30632 || GET_MODE (target
) != tmode
30633 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30634 target
= gen_reg_rtx (tmode
);
30636 gcc_assert (nargs
<= 4);
30638 for (i
= 0; i
< nargs
; i
++)
30640 tree arg
= CALL_EXPR_ARG (exp
, i
);
30641 rtx op
= expand_normal (arg
);
30642 int adjust
= (comparison_p
) ? 1 : 0;
30643 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30645 if (last_arg_constant
&& i
== nargs
- 1)
30647 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30649 enum insn_code new_icode
= icode
;
30652 case CODE_FOR_xop_vpermil2v2df3
:
30653 case CODE_FOR_xop_vpermil2v4sf3
:
30654 case CODE_FOR_xop_vpermil2v4df3
:
30655 case CODE_FOR_xop_vpermil2v8sf3
:
30656 error ("the last argument must be a 2-bit immediate");
30657 return gen_reg_rtx (tmode
);
30658 case CODE_FOR_xop_rotlv2di3
:
30659 new_icode
= CODE_FOR_rotlv2di3
;
30661 case CODE_FOR_xop_rotlv4si3
:
30662 new_icode
= CODE_FOR_rotlv4si3
;
30664 case CODE_FOR_xop_rotlv8hi3
:
30665 new_icode
= CODE_FOR_rotlv8hi3
;
30667 case CODE_FOR_xop_rotlv16qi3
:
30668 new_icode
= CODE_FOR_rotlv16qi3
;
30670 if (CONST_INT_P (op
))
30672 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30673 op
= GEN_INT (INTVAL (op
) & mask
);
30674 gcc_checking_assert
30675 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30679 gcc_checking_assert
30681 && insn_data
[new_icode
].operand
[0].mode
== tmode
30682 && insn_data
[new_icode
].operand
[1].mode
== tmode
30683 && insn_data
[new_icode
].operand
[2].mode
== mode
30684 && insn_data
[new_icode
].operand
[0].predicate
30685 == insn_data
[icode
].operand
[0].predicate
30686 && insn_data
[new_icode
].operand
[1].predicate
30687 == insn_data
[icode
].operand
[1].predicate
);
30693 gcc_unreachable ();
30700 if (VECTOR_MODE_P (mode
))
30701 op
= safe_vector_operand (op
, mode
);
30703 /* If we aren't optimizing, only allow one memory operand to be
30705 if (memory_operand (op
, mode
))
30708 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30711 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30713 op
= force_reg (mode
, op
);
30717 args
[i
].mode
= mode
;
30723 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30728 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30729 GEN_INT ((int)sub_code
));
30730 else if (! comparison_p
)
30731 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30734 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30738 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30743 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30747 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30751 gcc_unreachable ();
30761 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30762 insns with vec_merge. */
30765 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30769 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30770 rtx op1
, op0
= expand_normal (arg0
);
30771 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30772 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30774 if (optimize
|| !target
30775 || GET_MODE (target
) != tmode
30776 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30777 target
= gen_reg_rtx (tmode
);
30779 if (VECTOR_MODE_P (mode0
))
30780 op0
= safe_vector_operand (op0
, mode0
);
30782 if ((optimize
&& !register_operand (op0
, mode0
))
30783 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30784 op0
= copy_to_mode_reg (mode0
, op0
);
30787 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30788 op1
= copy_to_mode_reg (mode0
, op1
);
30790 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30797 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30800 ix86_expand_sse_compare (const struct builtin_description
*d
,
30801 tree exp
, rtx target
, bool swap
)
30804 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30805 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30806 rtx op0
= expand_normal (arg0
);
30807 rtx op1
= expand_normal (arg1
);
30809 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30810 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30811 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30812 enum rtx_code comparison
= d
->comparison
;
30814 if (VECTOR_MODE_P (mode0
))
30815 op0
= safe_vector_operand (op0
, mode0
);
30816 if (VECTOR_MODE_P (mode1
))
30817 op1
= safe_vector_operand (op1
, mode1
);
30819 /* Swap operands if we have a comparison that isn't available in
30823 rtx tmp
= gen_reg_rtx (mode1
);
30824 emit_move_insn (tmp
, op1
);
30829 if (optimize
|| !target
30830 || GET_MODE (target
) != tmode
30831 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30832 target
= gen_reg_rtx (tmode
);
30834 if ((optimize
&& !register_operand (op0
, mode0
))
30835 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30836 op0
= copy_to_mode_reg (mode0
, op0
);
30837 if ((optimize
&& !register_operand (op1
, mode1
))
30838 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30839 op1
= copy_to_mode_reg (mode1
, op1
);
30841 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30842 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30849 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30852 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30856 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30857 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30858 rtx op0
= expand_normal (arg0
);
30859 rtx op1
= expand_normal (arg1
);
30860 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30861 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30862 enum rtx_code comparison
= d
->comparison
;
30864 if (VECTOR_MODE_P (mode0
))
30865 op0
= safe_vector_operand (op0
, mode0
);
30866 if (VECTOR_MODE_P (mode1
))
30867 op1
= safe_vector_operand (op1
, mode1
);
30869 /* Swap operands if we have a comparison that isn't available in
30871 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30878 target
= gen_reg_rtx (SImode
);
30879 emit_move_insn (target
, const0_rtx
);
30880 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30882 if ((optimize
&& !register_operand (op0
, mode0
))
30883 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30884 op0
= copy_to_mode_reg (mode0
, op0
);
30885 if ((optimize
&& !register_operand (op1
, mode1
))
30886 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30887 op1
= copy_to_mode_reg (mode1
, op1
);
30889 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30893 emit_insn (gen_rtx_SET (VOIDmode
,
30894 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30895 gen_rtx_fmt_ee (comparison
, QImode
,
30899 return SUBREG_REG (target
);
30902 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30905 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30909 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30910 rtx op1
, op0
= expand_normal (arg0
);
30911 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30912 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30914 if (optimize
|| target
== 0
30915 || GET_MODE (target
) != tmode
30916 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30917 target
= gen_reg_rtx (tmode
);
30919 if (VECTOR_MODE_P (mode0
))
30920 op0
= safe_vector_operand (op0
, mode0
);
30922 if ((optimize
&& !register_operand (op0
, mode0
))
30923 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30924 op0
= copy_to_mode_reg (mode0
, op0
);
30926 op1
= GEN_INT (d
->comparison
);
30928 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30936 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30937 tree exp
, rtx target
)
30940 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30941 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30942 rtx op0
= expand_normal (arg0
);
30943 rtx op1
= expand_normal (arg1
);
30945 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30946 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30947 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30949 if (optimize
|| target
== 0
30950 || GET_MODE (target
) != tmode
30951 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30952 target
= gen_reg_rtx (tmode
);
30954 op0
= safe_vector_operand (op0
, mode0
);
30955 op1
= safe_vector_operand (op1
, mode1
);
30957 if ((optimize
&& !register_operand (op0
, mode0
))
30958 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30959 op0
= copy_to_mode_reg (mode0
, op0
);
30960 if ((optimize
&& !register_operand (op1
, mode1
))
30961 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30962 op1
= copy_to_mode_reg (mode1
, op1
);
30964 op2
= GEN_INT (d
->comparison
);
30966 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30973 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30976 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30980 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30981 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30982 rtx op0
= expand_normal (arg0
);
30983 rtx op1
= expand_normal (arg1
);
30984 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30985 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30986 enum rtx_code comparison
= d
->comparison
;
30988 if (VECTOR_MODE_P (mode0
))
30989 op0
= safe_vector_operand (op0
, mode0
);
30990 if (VECTOR_MODE_P (mode1
))
30991 op1
= safe_vector_operand (op1
, mode1
);
30993 target
= gen_reg_rtx (SImode
);
30994 emit_move_insn (target
, const0_rtx
);
30995 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30997 if ((optimize
&& !register_operand (op0
, mode0
))
30998 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30999 op0
= copy_to_mode_reg (mode0
, op0
);
31000 if ((optimize
&& !register_operand (op1
, mode1
))
31001 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31002 op1
= copy_to_mode_reg (mode1
, op1
);
31004 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31008 emit_insn (gen_rtx_SET (VOIDmode
,
31009 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31010 gen_rtx_fmt_ee (comparison
, QImode
,
31014 return SUBREG_REG (target
);
31017 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31020 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31021 tree exp
, rtx target
)
31024 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31025 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31026 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31027 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31028 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31029 rtx scratch0
, scratch1
;
31030 rtx op0
= expand_normal (arg0
);
31031 rtx op1
= expand_normal (arg1
);
31032 rtx op2
= expand_normal (arg2
);
31033 rtx op3
= expand_normal (arg3
);
31034 rtx op4
= expand_normal (arg4
);
31035 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31037 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31038 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31039 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31040 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31041 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31042 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31043 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31045 if (VECTOR_MODE_P (modev2
))
31046 op0
= safe_vector_operand (op0
, modev2
);
31047 if (VECTOR_MODE_P (modev4
))
31048 op2
= safe_vector_operand (op2
, modev4
);
31050 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31051 op0
= copy_to_mode_reg (modev2
, op0
);
31052 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31053 op1
= copy_to_mode_reg (modei3
, op1
);
31054 if ((optimize
&& !register_operand (op2
, modev4
))
31055 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31056 op2
= copy_to_mode_reg (modev4
, op2
);
31057 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31058 op3
= copy_to_mode_reg (modei5
, op3
);
31060 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31062 error ("the fifth argument must be an 8-bit immediate");
31066 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31068 if (optimize
|| !target
31069 || GET_MODE (target
) != tmode0
31070 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31071 target
= gen_reg_rtx (tmode0
);
31073 scratch1
= gen_reg_rtx (tmode1
);
31075 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31077 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31079 if (optimize
|| !target
31080 || GET_MODE (target
) != tmode1
31081 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31082 target
= gen_reg_rtx (tmode1
);
31084 scratch0
= gen_reg_rtx (tmode0
);
31086 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31090 gcc_assert (d
->flag
);
31092 scratch0
= gen_reg_rtx (tmode0
);
31093 scratch1
= gen_reg_rtx (tmode1
);
31095 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31105 target
= gen_reg_rtx (SImode
);
31106 emit_move_insn (target
, const0_rtx
);
31107 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31110 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31111 gen_rtx_fmt_ee (EQ
, QImode
,
31112 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31115 return SUBREG_REG (target
);
31122 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31125 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31126 tree exp
, rtx target
)
31129 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31130 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31131 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31132 rtx scratch0
, scratch1
;
31133 rtx op0
= expand_normal (arg0
);
31134 rtx op1
= expand_normal (arg1
);
31135 rtx op2
= expand_normal (arg2
);
31136 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31138 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31139 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31140 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31141 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31142 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31144 if (VECTOR_MODE_P (modev2
))
31145 op0
= safe_vector_operand (op0
, modev2
);
31146 if (VECTOR_MODE_P (modev3
))
31147 op1
= safe_vector_operand (op1
, modev3
);
31149 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31150 op0
= copy_to_mode_reg (modev2
, op0
);
31151 if ((optimize
&& !register_operand (op1
, modev3
))
31152 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31153 op1
= copy_to_mode_reg (modev3
, op1
);
31155 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31157 error ("the third argument must be an 8-bit immediate");
31161 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31163 if (optimize
|| !target
31164 || GET_MODE (target
) != tmode0
31165 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31166 target
= gen_reg_rtx (tmode0
);
31168 scratch1
= gen_reg_rtx (tmode1
);
31170 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31172 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31174 if (optimize
|| !target
31175 || GET_MODE (target
) != tmode1
31176 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31177 target
= gen_reg_rtx (tmode1
);
31179 scratch0
= gen_reg_rtx (tmode0
);
31181 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31185 gcc_assert (d
->flag
);
31187 scratch0
= gen_reg_rtx (tmode0
);
31188 scratch1
= gen_reg_rtx (tmode1
);
31190 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31200 target
= gen_reg_rtx (SImode
);
31201 emit_move_insn (target
, const0_rtx
);
31202 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31205 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31206 gen_rtx_fmt_ee (EQ
, QImode
,
31207 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31210 return SUBREG_REG (target
);
31216 /* Subroutine of ix86_expand_builtin to take care of insns with
31217 variable number of operands. */
31220 ix86_expand_args_builtin (const struct builtin_description
*d
,
31221 tree exp
, rtx target
)
31223 rtx pat
, real_target
;
31224 unsigned int i
, nargs
;
31225 unsigned int nargs_constant
= 0;
31226 int num_memory
= 0;
31230 enum machine_mode mode
;
31232 bool last_arg_count
= false;
31233 enum insn_code icode
= d
->icode
;
31234 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31235 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31236 enum machine_mode rmode
= VOIDmode
;
31238 enum rtx_code comparison
= d
->comparison
;
31240 switch ((enum ix86_builtin_func_type
) d
->flag
)
31242 case V2DF_FTYPE_V2DF_ROUND
:
31243 case V4DF_FTYPE_V4DF_ROUND
:
31244 case V4SF_FTYPE_V4SF_ROUND
:
31245 case V8SF_FTYPE_V8SF_ROUND
:
31246 case V4SI_FTYPE_V4SF_ROUND
:
31247 case V8SI_FTYPE_V8SF_ROUND
:
31248 return ix86_expand_sse_round (d
, exp
, target
);
31249 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31250 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31251 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31252 case INT_FTYPE_V8SF_V8SF_PTEST
:
31253 case INT_FTYPE_V4DI_V4DI_PTEST
:
31254 case INT_FTYPE_V4DF_V4DF_PTEST
:
31255 case INT_FTYPE_V4SF_V4SF_PTEST
:
31256 case INT_FTYPE_V2DI_V2DI_PTEST
:
31257 case INT_FTYPE_V2DF_V2DF_PTEST
:
31258 return ix86_expand_sse_ptest (d
, exp
, target
);
31259 case FLOAT128_FTYPE_FLOAT128
:
31260 case FLOAT_FTYPE_FLOAT
:
31261 case INT_FTYPE_INT
:
31262 case UINT64_FTYPE_INT
:
31263 case UINT16_FTYPE_UINT16
:
31264 case INT64_FTYPE_INT64
:
31265 case INT64_FTYPE_V4SF
:
31266 case INT64_FTYPE_V2DF
:
31267 case INT_FTYPE_V16QI
:
31268 case INT_FTYPE_V8QI
:
31269 case INT_FTYPE_V8SF
:
31270 case INT_FTYPE_V4DF
:
31271 case INT_FTYPE_V4SF
:
31272 case INT_FTYPE_V2DF
:
31273 case INT_FTYPE_V32QI
:
31274 case V16QI_FTYPE_V16QI
:
31275 case V8SI_FTYPE_V8SF
:
31276 case V8SI_FTYPE_V4SI
:
31277 case V8HI_FTYPE_V8HI
:
31278 case V8HI_FTYPE_V16QI
:
31279 case V8QI_FTYPE_V8QI
:
31280 case V8SF_FTYPE_V8SF
:
31281 case V8SF_FTYPE_V8SI
:
31282 case V8SF_FTYPE_V4SF
:
31283 case V8SF_FTYPE_V8HI
:
31284 case V4SI_FTYPE_V4SI
:
31285 case V4SI_FTYPE_V16QI
:
31286 case V4SI_FTYPE_V4SF
:
31287 case V4SI_FTYPE_V8SI
:
31288 case V4SI_FTYPE_V8HI
:
31289 case V4SI_FTYPE_V4DF
:
31290 case V4SI_FTYPE_V2DF
:
31291 case V4HI_FTYPE_V4HI
:
31292 case V4DF_FTYPE_V4DF
:
31293 case V4DF_FTYPE_V4SI
:
31294 case V4DF_FTYPE_V4SF
:
31295 case V4DF_FTYPE_V2DF
:
31296 case V4SF_FTYPE_V4SF
:
31297 case V4SF_FTYPE_V4SI
:
31298 case V4SF_FTYPE_V8SF
:
31299 case V4SF_FTYPE_V4DF
:
31300 case V4SF_FTYPE_V8HI
:
31301 case V4SF_FTYPE_V2DF
:
31302 case V2DI_FTYPE_V2DI
:
31303 case V2DI_FTYPE_V16QI
:
31304 case V2DI_FTYPE_V8HI
:
31305 case V2DI_FTYPE_V4SI
:
31306 case V2DF_FTYPE_V2DF
:
31307 case V2DF_FTYPE_V4SI
:
31308 case V2DF_FTYPE_V4DF
:
31309 case V2DF_FTYPE_V4SF
:
31310 case V2DF_FTYPE_V2SI
:
31311 case V2SI_FTYPE_V2SI
:
31312 case V2SI_FTYPE_V4SF
:
31313 case V2SI_FTYPE_V2SF
:
31314 case V2SI_FTYPE_V2DF
:
31315 case V2SF_FTYPE_V2SF
:
31316 case V2SF_FTYPE_V2SI
:
31317 case V32QI_FTYPE_V32QI
:
31318 case V32QI_FTYPE_V16QI
:
31319 case V16HI_FTYPE_V16HI
:
31320 case V16HI_FTYPE_V8HI
:
31321 case V8SI_FTYPE_V8SI
:
31322 case V16HI_FTYPE_V16QI
:
31323 case V8SI_FTYPE_V16QI
:
31324 case V4DI_FTYPE_V16QI
:
31325 case V8SI_FTYPE_V8HI
:
31326 case V4DI_FTYPE_V8HI
:
31327 case V4DI_FTYPE_V4SI
:
31328 case V4DI_FTYPE_V2DI
:
31331 case V4SF_FTYPE_V4SF_VEC_MERGE
:
31332 case V2DF_FTYPE_V2DF_VEC_MERGE
:
31333 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
31334 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
31335 case V16QI_FTYPE_V16QI_V16QI
:
31336 case V16QI_FTYPE_V8HI_V8HI
:
31337 case V8QI_FTYPE_V8QI_V8QI
:
31338 case V8QI_FTYPE_V4HI_V4HI
:
31339 case V8HI_FTYPE_V8HI_V8HI
:
31340 case V8HI_FTYPE_V16QI_V16QI
:
31341 case V8HI_FTYPE_V4SI_V4SI
:
31342 case V8SF_FTYPE_V8SF_V8SF
:
31343 case V8SF_FTYPE_V8SF_V8SI
:
31344 case V4SI_FTYPE_V4SI_V4SI
:
31345 case V4SI_FTYPE_V8HI_V8HI
:
31346 case V4SI_FTYPE_V4SF_V4SF
:
31347 case V4SI_FTYPE_V2DF_V2DF
:
31348 case V4HI_FTYPE_V4HI_V4HI
:
31349 case V4HI_FTYPE_V8QI_V8QI
:
31350 case V4HI_FTYPE_V2SI_V2SI
:
31351 case V4DF_FTYPE_V4DF_V4DF
:
31352 case V4DF_FTYPE_V4DF_V4DI
:
31353 case V4SF_FTYPE_V4SF_V4SF
:
31354 case V4SF_FTYPE_V4SF_V4SI
:
31355 case V4SF_FTYPE_V4SF_V2SI
:
31356 case V4SF_FTYPE_V4SF_V2DF
:
31357 case V4SF_FTYPE_V4SF_DI
:
31358 case V4SF_FTYPE_V4SF_SI
:
31359 case V2DI_FTYPE_V2DI_V2DI
:
31360 case V2DI_FTYPE_V16QI_V16QI
:
31361 case V2DI_FTYPE_V4SI_V4SI
:
31362 case V2UDI_FTYPE_V4USI_V4USI
:
31363 case V2DI_FTYPE_V2DI_V16QI
:
31364 case V2DI_FTYPE_V2DF_V2DF
:
31365 case V2SI_FTYPE_V2SI_V2SI
:
31366 case V2SI_FTYPE_V4HI_V4HI
:
31367 case V2SI_FTYPE_V2SF_V2SF
:
31368 case V2DF_FTYPE_V2DF_V2DF
:
31369 case V2DF_FTYPE_V2DF_V4SF
:
31370 case V2DF_FTYPE_V2DF_V2DI
:
31371 case V2DF_FTYPE_V2DF_DI
:
31372 case V2DF_FTYPE_V2DF_SI
:
31373 case V2SF_FTYPE_V2SF_V2SF
:
31374 case V1DI_FTYPE_V1DI_V1DI
:
31375 case V1DI_FTYPE_V8QI_V8QI
:
31376 case V1DI_FTYPE_V2SI_V2SI
:
31377 case V32QI_FTYPE_V16HI_V16HI
:
31378 case V16HI_FTYPE_V8SI_V8SI
:
31379 case V32QI_FTYPE_V32QI_V32QI
:
31380 case V16HI_FTYPE_V32QI_V32QI
:
31381 case V16HI_FTYPE_V16HI_V16HI
:
31382 case V8SI_FTYPE_V4DF_V4DF
:
31383 case V8SI_FTYPE_V8SI_V8SI
:
31384 case V8SI_FTYPE_V16HI_V16HI
:
31385 case V4DI_FTYPE_V4DI_V4DI
:
31386 case V4DI_FTYPE_V8SI_V8SI
:
31387 case V4UDI_FTYPE_V8USI_V8USI
:
31388 if (comparison
== UNKNOWN
)
31389 return ix86_expand_binop_builtin (icode
, exp
, target
);
31392 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
31393 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
31394 gcc_assert (comparison
!= UNKNOWN
);
31398 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
31399 case V16HI_FTYPE_V16HI_SI_COUNT
:
31400 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
31401 case V8SI_FTYPE_V8SI_SI_COUNT
:
31402 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
31403 case V4DI_FTYPE_V4DI_INT_COUNT
:
31404 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
31405 case V8HI_FTYPE_V8HI_SI_COUNT
:
31406 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
31407 case V4SI_FTYPE_V4SI_SI_COUNT
:
31408 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
31409 case V4HI_FTYPE_V4HI_SI_COUNT
:
31410 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
31411 case V2DI_FTYPE_V2DI_SI_COUNT
:
31412 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
31413 case V2SI_FTYPE_V2SI_SI_COUNT
:
31414 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
31415 case V1DI_FTYPE_V1DI_SI_COUNT
:
31417 last_arg_count
= true;
31419 case UINT64_FTYPE_UINT64_UINT64
:
31420 case UINT_FTYPE_UINT_UINT
:
31421 case UINT_FTYPE_UINT_USHORT
:
31422 case UINT_FTYPE_UINT_UCHAR
:
31423 case UINT16_FTYPE_UINT16_INT
:
31424 case UINT8_FTYPE_UINT8_INT
:
31427 case V2DI_FTYPE_V2DI_INT_CONVERT
:
31430 nargs_constant
= 1;
31432 case V4DI_FTYPE_V4DI_INT_CONVERT
:
31435 nargs_constant
= 1;
31437 case V8HI_FTYPE_V8HI_INT
:
31438 case V8HI_FTYPE_V8SF_INT
:
31439 case V8HI_FTYPE_V4SF_INT
:
31440 case V8SF_FTYPE_V8SF_INT
:
31441 case V4SI_FTYPE_V4SI_INT
:
31442 case V4SI_FTYPE_V8SI_INT
:
31443 case V4HI_FTYPE_V4HI_INT
:
31444 case V4DF_FTYPE_V4DF_INT
:
31445 case V4SF_FTYPE_V4SF_INT
:
31446 case V4SF_FTYPE_V8SF_INT
:
31447 case V2DI_FTYPE_V2DI_INT
:
31448 case V2DF_FTYPE_V2DF_INT
:
31449 case V2DF_FTYPE_V4DF_INT
:
31450 case V16HI_FTYPE_V16HI_INT
:
31451 case V8SI_FTYPE_V8SI_INT
:
31452 case V4DI_FTYPE_V4DI_INT
:
31453 case V2DI_FTYPE_V4DI_INT
:
31455 nargs_constant
= 1;
31457 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
31458 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
31459 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
31460 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
31461 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
31462 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
31465 case V32QI_FTYPE_V32QI_V32QI_INT
:
31466 case V16HI_FTYPE_V16HI_V16HI_INT
:
31467 case V16QI_FTYPE_V16QI_V16QI_INT
:
31468 case V4DI_FTYPE_V4DI_V4DI_INT
:
31469 case V8HI_FTYPE_V8HI_V8HI_INT
:
31470 case V8SI_FTYPE_V8SI_V8SI_INT
:
31471 case V8SI_FTYPE_V8SI_V4SI_INT
:
31472 case V8SF_FTYPE_V8SF_V8SF_INT
:
31473 case V8SF_FTYPE_V8SF_V4SF_INT
:
31474 case V4SI_FTYPE_V4SI_V4SI_INT
:
31475 case V4DF_FTYPE_V4DF_V4DF_INT
:
31476 case V4DF_FTYPE_V4DF_V2DF_INT
:
31477 case V4SF_FTYPE_V4SF_V4SF_INT
:
31478 case V2DI_FTYPE_V2DI_V2DI_INT
:
31479 case V4DI_FTYPE_V4DI_V2DI_INT
:
31480 case V2DF_FTYPE_V2DF_V2DF_INT
:
31482 nargs_constant
= 1;
31484 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31487 nargs_constant
= 1;
31489 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31492 nargs_constant
= 1;
31494 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31497 nargs_constant
= 1;
31499 case V2DI_FTYPE_V2DI_UINT_UINT
:
31501 nargs_constant
= 2;
31503 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31504 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31505 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31506 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31508 nargs_constant
= 1;
31510 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31512 nargs_constant
= 2;
31514 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31515 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31519 gcc_unreachable ();
31522 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31524 if (comparison
!= UNKNOWN
)
31526 gcc_assert (nargs
== 2);
31527 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31530 if (rmode
== VOIDmode
|| rmode
== tmode
)
31534 || GET_MODE (target
) != tmode
31535 || !insn_p
->operand
[0].predicate (target
, tmode
))
31536 target
= gen_reg_rtx (tmode
);
31537 real_target
= target
;
31541 target
= gen_reg_rtx (rmode
);
31542 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31545 for (i
= 0; i
< nargs
; i
++)
31547 tree arg
= CALL_EXPR_ARG (exp
, i
);
31548 rtx op
= expand_normal (arg
);
31549 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31550 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31552 if (last_arg_count
&& (i
+ 1) == nargs
)
31554 /* SIMD shift insns take either an 8-bit immediate or
31555 register as count. But builtin functions take int as
31556 count. If count doesn't match, we put it in register. */
31559 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31560 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31561 op
= copy_to_reg (op
);
31564 else if ((nargs
- i
) <= nargs_constant
)
31569 case CODE_FOR_avx2_inserti128
:
31570 case CODE_FOR_avx2_extracti128
:
31571 error ("the last argument must be an 1-bit immediate");
31574 case CODE_FOR_sse4_1_roundsd
:
31575 case CODE_FOR_sse4_1_roundss
:
31577 case CODE_FOR_sse4_1_roundpd
:
31578 case CODE_FOR_sse4_1_roundps
:
31579 case CODE_FOR_avx_roundpd256
:
31580 case CODE_FOR_avx_roundps256
:
31582 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31583 case CODE_FOR_sse4_1_roundps_sfix
:
31584 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31585 case CODE_FOR_avx_roundps_sfix256
:
31587 case CODE_FOR_sse4_1_blendps
:
31588 case CODE_FOR_avx_blendpd256
:
31589 case CODE_FOR_avx_vpermilv4df
:
31590 error ("the last argument must be a 4-bit immediate");
31593 case CODE_FOR_sse4_1_blendpd
:
31594 case CODE_FOR_avx_vpermilv2df
:
31595 case CODE_FOR_xop_vpermil2v2df3
:
31596 case CODE_FOR_xop_vpermil2v4sf3
:
31597 case CODE_FOR_xop_vpermil2v4df3
:
31598 case CODE_FOR_xop_vpermil2v8sf3
:
31599 error ("the last argument must be a 2-bit immediate");
31602 case CODE_FOR_avx_vextractf128v4df
:
31603 case CODE_FOR_avx_vextractf128v8sf
:
31604 case CODE_FOR_avx_vextractf128v8si
:
31605 case CODE_FOR_avx_vinsertf128v4df
:
31606 case CODE_FOR_avx_vinsertf128v8sf
:
31607 case CODE_FOR_avx_vinsertf128v8si
:
31608 error ("the last argument must be a 1-bit immediate");
31611 case CODE_FOR_avx_vmcmpv2df3
:
31612 case CODE_FOR_avx_vmcmpv4sf3
:
31613 case CODE_FOR_avx_cmpv2df3
:
31614 case CODE_FOR_avx_cmpv4sf3
:
31615 case CODE_FOR_avx_cmpv4df3
:
31616 case CODE_FOR_avx_cmpv8sf3
:
31617 error ("the last argument must be a 5-bit immediate");
31621 switch (nargs_constant
)
31624 if ((nargs
- i
) == nargs_constant
)
31626 error ("the next to last argument must be an 8-bit immediate");
31630 error ("the last argument must be an 8-bit immediate");
31633 gcc_unreachable ();
31640 if (VECTOR_MODE_P (mode
))
31641 op
= safe_vector_operand (op
, mode
);
31643 /* If we aren't optimizing, only allow one memory operand to
31645 if (memory_operand (op
, mode
))
31648 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31650 if (optimize
|| !match
|| num_memory
> 1)
31651 op
= copy_to_mode_reg (mode
, op
);
31655 op
= copy_to_reg (op
);
31656 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31661 args
[i
].mode
= mode
;
31667 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31670 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31673 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31677 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31678 args
[2].op
, args
[3].op
);
31681 gcc_unreachable ();
31691 /* Subroutine of ix86_expand_builtin to take care of special insns
31692 with variable number of operands. */
31695 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31696 tree exp
, rtx target
)
31700 unsigned int i
, nargs
, arg_adjust
, memory
;
31704 enum machine_mode mode
;
31706 enum insn_code icode
= d
->icode
;
31707 bool last_arg_constant
= false;
31708 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31709 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31710 enum { load
, store
} klass
;
31712 switch ((enum ix86_builtin_func_type
) d
->flag
)
31714 case VOID_FTYPE_VOID
:
31715 emit_insn (GEN_FCN (icode
) (target
));
31717 case VOID_FTYPE_UINT64
:
31718 case VOID_FTYPE_UNSIGNED
:
31724 case INT_FTYPE_VOID
:
31725 case UINT64_FTYPE_VOID
:
31726 case UNSIGNED_FTYPE_VOID
:
31731 case UINT64_FTYPE_PUNSIGNED
:
31732 case V2DI_FTYPE_PV2DI
:
31733 case V4DI_FTYPE_PV4DI
:
31734 case V32QI_FTYPE_PCCHAR
:
31735 case V16QI_FTYPE_PCCHAR
:
31736 case V8SF_FTYPE_PCV4SF
:
31737 case V8SF_FTYPE_PCFLOAT
:
31738 case V4SF_FTYPE_PCFLOAT
:
31739 case V4DF_FTYPE_PCV2DF
:
31740 case V4DF_FTYPE_PCDOUBLE
:
31741 case V2DF_FTYPE_PCDOUBLE
:
31742 case VOID_FTYPE_PVOID
:
31747 case VOID_FTYPE_PV2SF_V4SF
:
31748 case VOID_FTYPE_PV4DI_V4DI
:
31749 case VOID_FTYPE_PV2DI_V2DI
:
31750 case VOID_FTYPE_PCHAR_V32QI
:
31751 case VOID_FTYPE_PCHAR_V16QI
:
31752 case VOID_FTYPE_PFLOAT_V8SF
:
31753 case VOID_FTYPE_PFLOAT_V4SF
:
31754 case VOID_FTYPE_PDOUBLE_V4DF
:
31755 case VOID_FTYPE_PDOUBLE_V2DF
:
31756 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31757 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31758 case VOID_FTYPE_PINT_INT
:
31761 /* Reserve memory operand for target. */
31762 memory
= ARRAY_SIZE (args
);
31764 case V4SF_FTYPE_V4SF_PCV2SF
:
31765 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31770 case V8SF_FTYPE_PCV8SF_V8SI
:
31771 case V4DF_FTYPE_PCV4DF_V4DI
:
31772 case V4SF_FTYPE_PCV4SF_V4SI
:
31773 case V2DF_FTYPE_PCV2DF_V2DI
:
31774 case V8SI_FTYPE_PCV8SI_V8SI
:
31775 case V4DI_FTYPE_PCV4DI_V4DI
:
31776 case V4SI_FTYPE_PCV4SI_V4SI
:
31777 case V2DI_FTYPE_PCV2DI_V2DI
:
31782 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31783 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31784 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31785 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31786 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31787 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31788 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31789 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31792 /* Reserve memory operand for target. */
31793 memory
= ARRAY_SIZE (args
);
31795 case VOID_FTYPE_UINT_UINT_UINT
:
31796 case VOID_FTYPE_UINT64_UINT_UINT
:
31797 case UCHAR_FTYPE_UINT_UINT_UINT
:
31798 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31801 memory
= ARRAY_SIZE (args
);
31802 last_arg_constant
= true;
31805 gcc_unreachable ();
31808 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31810 if (klass
== store
)
31812 arg
= CALL_EXPR_ARG (exp
, 0);
31813 op
= expand_normal (arg
);
31814 gcc_assert (target
== 0);
31817 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31818 target
= gen_rtx_MEM (tmode
, op
);
31821 target
= force_reg (tmode
, op
);
31829 || !register_operand (target
, tmode
)
31830 || GET_MODE (target
) != tmode
)
31831 target
= gen_reg_rtx (tmode
);
31834 for (i
= 0; i
< nargs
; i
++)
31836 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31839 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31840 op
= expand_normal (arg
);
31841 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31843 if (last_arg_constant
&& (i
+ 1) == nargs
)
31847 if (icode
== CODE_FOR_lwp_lwpvalsi3
31848 || icode
== CODE_FOR_lwp_lwpinssi3
31849 || icode
== CODE_FOR_lwp_lwpvaldi3
31850 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31851 error ("the last argument must be a 32-bit immediate");
31853 error ("the last argument must be an 8-bit immediate");
31861 /* This must be the memory operand. */
31862 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31863 op
= gen_rtx_MEM (mode
, op
);
31864 gcc_assert (GET_MODE (op
) == mode
31865 || GET_MODE (op
) == VOIDmode
);
31869 /* This must be register. */
31870 if (VECTOR_MODE_P (mode
))
31871 op
= safe_vector_operand (op
, mode
);
31873 gcc_assert (GET_MODE (op
) == mode
31874 || GET_MODE (op
) == VOIDmode
);
31875 op
= copy_to_mode_reg (mode
, op
);
31880 args
[i
].mode
= mode
;
31886 pat
= GEN_FCN (icode
) (target
);
31889 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31892 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31895 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31898 gcc_unreachable ();
31904 return klass
== store
? 0 : target
;
31907 /* Return the integer constant in ARG. Constrain it to be in the range
31908 of the subparts of VEC_TYPE; issue an error if not. */
31911 get_element_number (tree vec_type
, tree arg
)
31913 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31915 if (!host_integerp (arg
, 1)
31916 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31918 error ("selector must be an integer constant in the range 0..%wi", max
);
31925 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31926 ix86_expand_vector_init. We DO have language-level syntax for this, in
31927 the form of (type){ init-list }. Except that since we can't place emms
31928 instructions from inside the compiler, we can't allow the use of MMX
31929 registers unless the user explicitly asks for it. So we do *not* define
31930 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31931 we have builtins invoked by mmintrin.h that gives us license to emit
31932 these sorts of instructions. */
31935 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31937 enum machine_mode tmode
= TYPE_MODE (type
);
31938 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31939 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31940 rtvec v
= rtvec_alloc (n_elt
);
31942 gcc_assert (VECTOR_MODE_P (tmode
));
31943 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31945 for (i
= 0; i
< n_elt
; ++i
)
31947 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31948 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31951 if (!target
|| !register_operand (target
, tmode
))
31952 target
= gen_reg_rtx (tmode
);
31954 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31958 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31959 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31960 had a language-level syntax for referencing vector elements. */
31963 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31965 enum machine_mode tmode
, mode0
;
31970 arg0
= CALL_EXPR_ARG (exp
, 0);
31971 arg1
= CALL_EXPR_ARG (exp
, 1);
31973 op0
= expand_normal (arg0
);
31974 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31976 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31977 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31978 gcc_assert (VECTOR_MODE_P (mode0
));
31980 op0
= force_reg (mode0
, op0
);
31982 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31983 target
= gen_reg_rtx (tmode
);
31985 ix86_expand_vector_extract (true, target
, op0
, elt
);
31990 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31991 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31992 a language-level syntax for referencing vector elements. */
31995 ix86_expand_vec_set_builtin (tree exp
)
31997 enum machine_mode tmode
, mode1
;
31998 tree arg0
, arg1
, arg2
;
32000 rtx op0
, op1
, target
;
32002 arg0
= CALL_EXPR_ARG (exp
, 0);
32003 arg1
= CALL_EXPR_ARG (exp
, 1);
32004 arg2
= CALL_EXPR_ARG (exp
, 2);
32006 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32007 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32008 gcc_assert (VECTOR_MODE_P (tmode
));
32010 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32011 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32012 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32014 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32015 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32017 op0
= force_reg (tmode
, op0
);
32018 op1
= force_reg (mode1
, op1
);
32020 /* OP0 is the source of these builtin functions and shouldn't be
32021 modified. Create a copy, use it and return it as target. */
32022 target
= gen_reg_rtx (tmode
);
32023 emit_move_insn (target
, op0
);
32024 ix86_expand_vector_set (true, target
, op1
, elt
);
32029 /* Expand an expression EXP that calls a built-in function,
32030 with result going to TARGET if that's convenient
32031 (and in mode MODE if that's convenient).
32032 SUBTARGET may be used as the target for computing one of EXP's operands.
32033 IGNORE is nonzero if the value is to be ignored. */
32036 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
32037 enum machine_mode mode ATTRIBUTE_UNUSED
,
32038 int ignore ATTRIBUTE_UNUSED
)
32040 const struct builtin_description
*d
;
32042 enum insn_code icode
;
32043 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32044 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32045 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32046 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32047 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32049 /* For CPU builtins that can be folded, fold first and expand the fold. */
32052 case IX86_BUILTIN_CPU_INIT
:
32054 /* Make it call __cpu_indicator_init in libgcc. */
32055 tree call_expr
, fndecl
, type
;
32056 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32057 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32058 call_expr
= build_call_expr (fndecl
, 0);
32059 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32061 case IX86_BUILTIN_CPU_IS
:
32062 case IX86_BUILTIN_CPU_SUPPORTS
:
32064 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32065 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32066 gcc_assert (fold_expr
!= NULL_TREE
);
32067 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32071 /* Determine whether the builtin function is available under the current ISA.
32072 Originally the builtin was not created if it wasn't applicable to the
32073 current ISA based on the command line switches. With function specific
32074 options, we need to check in the context of the function making the call
32075 whether it is supported. */
32076 if (ix86_builtins_isa
[fcode
].isa
32077 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32079 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32080 NULL
, (enum fpmath_unit
) 0, false);
32083 error ("%qE needs unknown isa option", fndecl
);
32086 gcc_assert (opts
!= NULL
);
32087 error ("%qE needs isa option %s", fndecl
, opts
);
32095 case IX86_BUILTIN_MASKMOVQ
:
32096 case IX86_BUILTIN_MASKMOVDQU
:
32097 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32098 ? CODE_FOR_mmx_maskmovq
32099 : CODE_FOR_sse2_maskmovdqu
);
32100 /* Note the arg order is different from the operand order. */
32101 arg1
= CALL_EXPR_ARG (exp
, 0);
32102 arg2
= CALL_EXPR_ARG (exp
, 1);
32103 arg0
= CALL_EXPR_ARG (exp
, 2);
32104 op0
= expand_normal (arg0
);
32105 op1
= expand_normal (arg1
);
32106 op2
= expand_normal (arg2
);
32107 mode0
= insn_data
[icode
].operand
[0].mode
;
32108 mode1
= insn_data
[icode
].operand
[1].mode
;
32109 mode2
= insn_data
[icode
].operand
[2].mode
;
32111 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32112 op0
= gen_rtx_MEM (mode1
, op0
);
32114 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32115 op0
= copy_to_mode_reg (mode0
, op0
);
32116 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32117 op1
= copy_to_mode_reg (mode1
, op1
);
32118 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32119 op2
= copy_to_mode_reg (mode2
, op2
);
32120 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32126 case IX86_BUILTIN_LDMXCSR
:
32127 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32128 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32129 emit_move_insn (target
, op0
);
32130 emit_insn (gen_sse_ldmxcsr (target
));
32133 case IX86_BUILTIN_STMXCSR
:
32134 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32135 emit_insn (gen_sse_stmxcsr (target
));
32136 return copy_to_mode_reg (SImode
, target
);
32138 case IX86_BUILTIN_CLFLUSH
:
32139 arg0
= CALL_EXPR_ARG (exp
, 0);
32140 op0
= expand_normal (arg0
);
32141 icode
= CODE_FOR_sse2_clflush
;
32142 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32143 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32145 emit_insn (gen_sse2_clflush (op0
));
32148 case IX86_BUILTIN_MONITOR
:
32149 arg0
= CALL_EXPR_ARG (exp
, 0);
32150 arg1
= CALL_EXPR_ARG (exp
, 1);
32151 arg2
= CALL_EXPR_ARG (exp
, 2);
32152 op0
= expand_normal (arg0
);
32153 op1
= expand_normal (arg1
);
32154 op2
= expand_normal (arg2
);
32156 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32158 op1
= copy_to_mode_reg (SImode
, op1
);
32160 op2
= copy_to_mode_reg (SImode
, op2
);
32161 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32164 case IX86_BUILTIN_MWAIT
:
32165 arg0
= CALL_EXPR_ARG (exp
, 0);
32166 arg1
= CALL_EXPR_ARG (exp
, 1);
32167 op0
= expand_normal (arg0
);
32168 op1
= expand_normal (arg1
);
32170 op0
= copy_to_mode_reg (SImode
, op0
);
32172 op1
= copy_to_mode_reg (SImode
, op1
);
32173 emit_insn (gen_sse3_mwait (op0
, op1
));
32176 case IX86_BUILTIN_VEC_INIT_V2SI
:
32177 case IX86_BUILTIN_VEC_INIT_V4HI
:
32178 case IX86_BUILTIN_VEC_INIT_V8QI
:
32179 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32181 case IX86_BUILTIN_VEC_EXT_V2DF
:
32182 case IX86_BUILTIN_VEC_EXT_V2DI
:
32183 case IX86_BUILTIN_VEC_EXT_V4SF
:
32184 case IX86_BUILTIN_VEC_EXT_V4SI
:
32185 case IX86_BUILTIN_VEC_EXT_V8HI
:
32186 case IX86_BUILTIN_VEC_EXT_V2SI
:
32187 case IX86_BUILTIN_VEC_EXT_V4HI
:
32188 case IX86_BUILTIN_VEC_EXT_V16QI
:
32189 return ix86_expand_vec_ext_builtin (exp
, target
);
32191 case IX86_BUILTIN_VEC_SET_V2DI
:
32192 case IX86_BUILTIN_VEC_SET_V4SF
:
32193 case IX86_BUILTIN_VEC_SET_V4SI
:
32194 case IX86_BUILTIN_VEC_SET_V8HI
:
32195 case IX86_BUILTIN_VEC_SET_V4HI
:
32196 case IX86_BUILTIN_VEC_SET_V16QI
:
32197 return ix86_expand_vec_set_builtin (exp
);
32199 case IX86_BUILTIN_INFQ
:
32200 case IX86_BUILTIN_HUGE_VALQ
:
32202 REAL_VALUE_TYPE inf
;
32206 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32208 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32211 target
= gen_reg_rtx (mode
);
32213 emit_move_insn (target
, tmp
);
32217 case IX86_BUILTIN_RDPMC
:
32218 case IX86_BUILTIN_RDTSC
:
32219 case IX86_BUILTIN_RDTSCP
:
32221 op0
= gen_reg_rtx (DImode
);
32222 op1
= gen_reg_rtx (DImode
);
32224 if (fcode
== IX86_BUILTIN_RDPMC
)
32226 arg0
= CALL_EXPR_ARG (exp
, 0);
32227 op2
= expand_normal (arg0
);
32228 if (!register_operand (op2
, SImode
))
32229 op2
= copy_to_mode_reg (SImode
, op2
);
32231 insn
= (TARGET_64BIT
32232 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32233 : gen_rdpmc (op0
, op2
));
32236 else if (fcode
== IX86_BUILTIN_RDTSC
)
32238 insn
= (TARGET_64BIT
32239 ? gen_rdtsc_rex64 (op0
, op1
)
32240 : gen_rdtsc (op0
));
32245 op2
= gen_reg_rtx (SImode
);
32247 insn
= (TARGET_64BIT
32248 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32249 : gen_rdtscp (op0
, op2
));
32252 arg0
= CALL_EXPR_ARG (exp
, 0);
32253 op4
= expand_normal (arg0
);
32254 if (!address_operand (op4
, VOIDmode
))
32256 op4
= convert_memory_address (Pmode
, op4
);
32257 op4
= copy_addr_to_reg (op4
);
32259 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
32264 /* mode is VOIDmode if __builtin_rd* has been called
32266 if (mode
== VOIDmode
)
32268 target
= gen_reg_rtx (mode
);
32273 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
32274 op1
, 1, OPTAB_DIRECT
);
32275 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
32276 op0
, 1, OPTAB_DIRECT
);
32279 emit_move_insn (target
, op0
);
32282 case IX86_BUILTIN_FXSAVE
:
32283 case IX86_BUILTIN_FXRSTOR
:
32284 case IX86_BUILTIN_FXSAVE64
:
32285 case IX86_BUILTIN_FXRSTOR64
:
32288 case IX86_BUILTIN_FXSAVE
:
32289 icode
= CODE_FOR_fxsave
;
32291 case IX86_BUILTIN_FXRSTOR
:
32292 icode
= CODE_FOR_fxrstor
;
32294 case IX86_BUILTIN_FXSAVE64
:
32295 icode
= CODE_FOR_fxsave64
;
32297 case IX86_BUILTIN_FXRSTOR64
:
32298 icode
= CODE_FOR_fxrstor64
;
32301 gcc_unreachable ();
32304 arg0
= CALL_EXPR_ARG (exp
, 0);
32305 op0
= expand_normal (arg0
);
32307 if (!address_operand (op0
, VOIDmode
))
32309 op0
= convert_memory_address (Pmode
, op0
);
32310 op0
= copy_addr_to_reg (op0
);
32312 op0
= gen_rtx_MEM (BLKmode
, op0
);
32314 pat
= GEN_FCN (icode
) (op0
);
32319 case IX86_BUILTIN_XSAVE
:
32320 case IX86_BUILTIN_XRSTOR
:
32321 case IX86_BUILTIN_XSAVE64
:
32322 case IX86_BUILTIN_XRSTOR64
:
32323 case IX86_BUILTIN_XSAVEOPT
:
32324 case IX86_BUILTIN_XSAVEOPT64
:
32325 arg0
= CALL_EXPR_ARG (exp
, 0);
32326 arg1
= CALL_EXPR_ARG (exp
, 1);
32327 op0
= expand_normal (arg0
);
32328 op1
= expand_normal (arg1
);
32330 if (!address_operand (op0
, VOIDmode
))
32332 op0
= convert_memory_address (Pmode
, op0
);
32333 op0
= copy_addr_to_reg (op0
);
32335 op0
= gen_rtx_MEM (BLKmode
, op0
);
32337 op1
= force_reg (DImode
, op1
);
32341 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
32342 NULL
, 1, OPTAB_DIRECT
);
32345 case IX86_BUILTIN_XSAVE
:
32346 icode
= CODE_FOR_xsave_rex64
;
32348 case IX86_BUILTIN_XRSTOR
:
32349 icode
= CODE_FOR_xrstor_rex64
;
32351 case IX86_BUILTIN_XSAVE64
:
32352 icode
= CODE_FOR_xsave64
;
32354 case IX86_BUILTIN_XRSTOR64
:
32355 icode
= CODE_FOR_xrstor64
;
32357 case IX86_BUILTIN_XSAVEOPT
:
32358 icode
= CODE_FOR_xsaveopt_rex64
;
32360 case IX86_BUILTIN_XSAVEOPT64
:
32361 icode
= CODE_FOR_xsaveopt64
;
32364 gcc_unreachable ();
32367 op2
= gen_lowpart (SImode
, op2
);
32368 op1
= gen_lowpart (SImode
, op1
);
32369 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32375 case IX86_BUILTIN_XSAVE
:
32376 icode
= CODE_FOR_xsave
;
32378 case IX86_BUILTIN_XRSTOR
:
32379 icode
= CODE_FOR_xrstor
;
32381 case IX86_BUILTIN_XSAVEOPT
:
32382 icode
= CODE_FOR_xsaveopt
;
32385 gcc_unreachable ();
32387 pat
= GEN_FCN (icode
) (op0
, op1
);
32394 case IX86_BUILTIN_LLWPCB
:
32395 arg0
= CALL_EXPR_ARG (exp
, 0);
32396 op0
= expand_normal (arg0
);
32397 icode
= CODE_FOR_lwp_llwpcb
;
32398 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32399 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32400 emit_insn (gen_lwp_llwpcb (op0
));
32403 case IX86_BUILTIN_SLWPCB
:
32404 icode
= CODE_FOR_lwp_slwpcb
;
32406 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
32407 target
= gen_reg_rtx (Pmode
);
32408 emit_insn (gen_lwp_slwpcb (target
));
32411 case IX86_BUILTIN_BEXTRI32
:
32412 case IX86_BUILTIN_BEXTRI64
:
32413 arg0
= CALL_EXPR_ARG (exp
, 0);
32414 arg1
= CALL_EXPR_ARG (exp
, 1);
32415 op0
= expand_normal (arg0
);
32416 op1
= expand_normal (arg1
);
32417 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
32418 ? CODE_FOR_tbm_bextri_si
32419 : CODE_FOR_tbm_bextri_di
);
32420 if (!CONST_INT_P (op1
))
32422 error ("last argument must be an immediate");
32427 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
32428 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
32429 op1
= GEN_INT (length
);
32430 op2
= GEN_INT (lsb_index
);
32431 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
32437 case IX86_BUILTIN_RDRAND16_STEP
:
32438 icode
= CODE_FOR_rdrandhi_1
;
32442 case IX86_BUILTIN_RDRAND32_STEP
:
32443 icode
= CODE_FOR_rdrandsi_1
;
32447 case IX86_BUILTIN_RDRAND64_STEP
:
32448 icode
= CODE_FOR_rdranddi_1
;
32452 op0
= gen_reg_rtx (mode0
);
32453 emit_insn (GEN_FCN (icode
) (op0
));
32455 arg0
= CALL_EXPR_ARG (exp
, 0);
32456 op1
= expand_normal (arg0
);
32457 if (!address_operand (op1
, VOIDmode
))
32459 op1
= convert_memory_address (Pmode
, op1
);
32460 op1
= copy_addr_to_reg (op1
);
32462 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32464 op1
= gen_reg_rtx (SImode
);
32465 emit_move_insn (op1
, CONST1_RTX (SImode
));
32467 /* Emit SImode conditional move. */
32468 if (mode0
== HImode
)
32470 op2
= gen_reg_rtx (SImode
);
32471 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
32473 else if (mode0
== SImode
)
32476 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
32479 target
= gen_reg_rtx (SImode
);
32481 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32483 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32484 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32487 case IX86_BUILTIN_RDSEED16_STEP
:
32488 icode
= CODE_FOR_rdseedhi_1
;
32492 case IX86_BUILTIN_RDSEED32_STEP
:
32493 icode
= CODE_FOR_rdseedsi_1
;
32497 case IX86_BUILTIN_RDSEED64_STEP
:
32498 icode
= CODE_FOR_rdseeddi_1
;
32502 op0
= gen_reg_rtx (mode0
);
32503 emit_insn (GEN_FCN (icode
) (op0
));
32505 arg0
= CALL_EXPR_ARG (exp
, 0);
32506 op1
= expand_normal (arg0
);
32507 if (!address_operand (op1
, VOIDmode
))
32509 op1
= convert_memory_address (Pmode
, op1
);
32510 op1
= copy_addr_to_reg (op1
);
32512 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32514 op2
= gen_reg_rtx (QImode
);
32516 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32518 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32521 target
= gen_reg_rtx (SImode
);
32523 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32526 case IX86_BUILTIN_ADDCARRYX32
:
32527 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32531 case IX86_BUILTIN_ADDCARRYX64
:
32532 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32536 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32537 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32538 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32539 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32541 op0
= gen_reg_rtx (QImode
);
32543 /* Generate CF from input operand. */
32544 op1
= expand_normal (arg0
);
32545 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32546 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32548 /* Gen ADCX instruction to compute X+Y+CF. */
32549 op2
= expand_normal (arg1
);
32550 op3
= expand_normal (arg2
);
32553 op2
= copy_to_mode_reg (mode0
, op2
);
32555 op3
= copy_to_mode_reg (mode0
, op3
);
32557 op0
= gen_reg_rtx (mode0
);
32559 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32560 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32561 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32563 /* Store the result. */
32564 op4
= expand_normal (arg3
);
32565 if (!address_operand (op4
, VOIDmode
))
32567 op4
= convert_memory_address (Pmode
, op4
);
32568 op4
= copy_addr_to_reg (op4
);
32570 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32572 /* Return current CF value. */
32574 target
= gen_reg_rtx (QImode
);
32576 PUT_MODE (pat
, QImode
);
32577 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32580 case IX86_BUILTIN_GATHERSIV2DF
:
32581 icode
= CODE_FOR_avx2_gathersiv2df
;
32583 case IX86_BUILTIN_GATHERSIV4DF
:
32584 icode
= CODE_FOR_avx2_gathersiv4df
;
32586 case IX86_BUILTIN_GATHERDIV2DF
:
32587 icode
= CODE_FOR_avx2_gatherdiv2df
;
32589 case IX86_BUILTIN_GATHERDIV4DF
:
32590 icode
= CODE_FOR_avx2_gatherdiv4df
;
32592 case IX86_BUILTIN_GATHERSIV4SF
:
32593 icode
= CODE_FOR_avx2_gathersiv4sf
;
32595 case IX86_BUILTIN_GATHERSIV8SF
:
32596 icode
= CODE_FOR_avx2_gathersiv8sf
;
32598 case IX86_BUILTIN_GATHERDIV4SF
:
32599 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32601 case IX86_BUILTIN_GATHERDIV8SF
:
32602 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32604 case IX86_BUILTIN_GATHERSIV2DI
:
32605 icode
= CODE_FOR_avx2_gathersiv2di
;
32607 case IX86_BUILTIN_GATHERSIV4DI
:
32608 icode
= CODE_FOR_avx2_gathersiv4di
;
32610 case IX86_BUILTIN_GATHERDIV2DI
:
32611 icode
= CODE_FOR_avx2_gatherdiv2di
;
32613 case IX86_BUILTIN_GATHERDIV4DI
:
32614 icode
= CODE_FOR_avx2_gatherdiv4di
;
32616 case IX86_BUILTIN_GATHERSIV4SI
:
32617 icode
= CODE_FOR_avx2_gathersiv4si
;
32619 case IX86_BUILTIN_GATHERSIV8SI
:
32620 icode
= CODE_FOR_avx2_gathersiv8si
;
32622 case IX86_BUILTIN_GATHERDIV4SI
:
32623 icode
= CODE_FOR_avx2_gatherdiv4si
;
32625 case IX86_BUILTIN_GATHERDIV8SI
:
32626 icode
= CODE_FOR_avx2_gatherdiv8si
;
32628 case IX86_BUILTIN_GATHERALTSIV4DF
:
32629 icode
= CODE_FOR_avx2_gathersiv4df
;
32631 case IX86_BUILTIN_GATHERALTDIV8SF
:
32632 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32634 case IX86_BUILTIN_GATHERALTSIV4DI
:
32635 icode
= CODE_FOR_avx2_gathersiv4di
;
32637 case IX86_BUILTIN_GATHERALTDIV8SI
:
32638 icode
= CODE_FOR_avx2_gatherdiv8si
;
32642 arg0
= CALL_EXPR_ARG (exp
, 0);
32643 arg1
= CALL_EXPR_ARG (exp
, 1);
32644 arg2
= CALL_EXPR_ARG (exp
, 2);
32645 arg3
= CALL_EXPR_ARG (exp
, 3);
32646 arg4
= CALL_EXPR_ARG (exp
, 4);
32647 op0
= expand_normal (arg0
);
32648 op1
= expand_normal (arg1
);
32649 op2
= expand_normal (arg2
);
32650 op3
= expand_normal (arg3
);
32651 op4
= expand_normal (arg4
);
32652 /* Note the arg order is different from the operand order. */
32653 mode0
= insn_data
[icode
].operand
[1].mode
;
32654 mode2
= insn_data
[icode
].operand
[3].mode
;
32655 mode3
= insn_data
[icode
].operand
[4].mode
;
32656 mode4
= insn_data
[icode
].operand
[5].mode
;
32658 if (target
== NULL_RTX
32659 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32660 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32662 subtarget
= target
;
32664 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32665 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32667 rtx half
= gen_reg_rtx (V4SImode
);
32668 if (!nonimmediate_operand (op2
, V8SImode
))
32669 op2
= copy_to_mode_reg (V8SImode
, op2
);
32670 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32673 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32674 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32676 rtx (*gen
) (rtx
, rtx
);
32677 rtx half
= gen_reg_rtx (mode0
);
32678 if (mode0
== V4SFmode
)
32679 gen
= gen_vec_extract_lo_v8sf
;
32681 gen
= gen_vec_extract_lo_v8si
;
32682 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32683 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32684 emit_insn (gen (half
, op0
));
32686 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32687 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32688 emit_insn (gen (half
, op3
));
32692 /* Force memory operand only with base register here. But we
32693 don't want to do it on memory operand for other builtin
32695 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32697 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32698 op0
= copy_to_mode_reg (mode0
, op0
);
32699 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32700 op1
= copy_to_mode_reg (Pmode
, op1
);
32701 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32702 op2
= copy_to_mode_reg (mode2
, op2
);
32703 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32704 op3
= copy_to_mode_reg (mode3
, op3
);
32705 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32707 error ("last argument must be scale 1, 2, 4, 8");
32711 /* Optimize. If mask is known to have all high bits set,
32712 replace op0 with pc_rtx to signal that the instruction
32713 overwrites the whole destination and doesn't use its
32714 previous contents. */
32717 if (TREE_CODE (arg3
) == VECTOR_CST
)
32719 unsigned int negative
= 0;
32720 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32722 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32723 if (TREE_CODE (cst
) == INTEGER_CST
32724 && tree_int_cst_sign_bit (cst
))
32726 else if (TREE_CODE (cst
) == REAL_CST
32727 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32730 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32733 else if (TREE_CODE (arg3
) == SSA_NAME
)
32735 /* Recognize also when mask is like:
32736 __v2df src = _mm_setzero_pd ();
32737 __v2df mask = _mm_cmpeq_pd (src, src);
32739 __v8sf src = _mm256_setzero_ps ();
32740 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32741 as that is a cheaper way to load all ones into
32742 a register than having to load a constant from
32744 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32745 if (is_gimple_call (def_stmt
))
32747 tree fndecl
= gimple_call_fndecl (def_stmt
);
32749 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32750 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32752 case IX86_BUILTIN_CMPPD
:
32753 case IX86_BUILTIN_CMPPS
:
32754 case IX86_BUILTIN_CMPPD256
:
32755 case IX86_BUILTIN_CMPPS256
:
32756 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32759 case IX86_BUILTIN_CMPEQPD
:
32760 case IX86_BUILTIN_CMPEQPS
:
32761 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32762 && initializer_zerop (gimple_call_arg (def_stmt
,
32773 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32778 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32779 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32781 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32782 ? V4SFmode
: V4SImode
;
32783 if (target
== NULL_RTX
)
32784 target
= gen_reg_rtx (tmode
);
32785 if (tmode
== V4SFmode
)
32786 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32788 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32791 target
= subtarget
;
32795 case IX86_BUILTIN_XABORT
:
32796 icode
= CODE_FOR_xabort
;
32797 arg0
= CALL_EXPR_ARG (exp
, 0);
32798 op0
= expand_normal (arg0
);
32799 mode0
= insn_data
[icode
].operand
[0].mode
;
32800 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32802 error ("the xabort's argument must be an 8-bit immediate");
32805 emit_insn (gen_xabort (op0
));
32812 for (i
= 0, d
= bdesc_special_args
;
32813 i
< ARRAY_SIZE (bdesc_special_args
);
32815 if (d
->code
== fcode
)
32816 return ix86_expand_special_args_builtin (d
, exp
, target
);
32818 for (i
= 0, d
= bdesc_args
;
32819 i
< ARRAY_SIZE (bdesc_args
);
32821 if (d
->code
== fcode
)
32824 case IX86_BUILTIN_FABSQ
:
32825 case IX86_BUILTIN_COPYSIGNQ
:
32827 /* Emit a normal call if SSE isn't available. */
32828 return expand_call (exp
, target
, ignore
);
32830 return ix86_expand_args_builtin (d
, exp
, target
);
32833 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32834 if (d
->code
== fcode
)
32835 return ix86_expand_sse_comi (d
, exp
, target
);
32837 for (i
= 0, d
= bdesc_pcmpestr
;
32838 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32840 if (d
->code
== fcode
)
32841 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32843 for (i
= 0, d
= bdesc_pcmpistr
;
32844 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32846 if (d
->code
== fcode
)
32847 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32849 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32850 if (d
->code
== fcode
)
32851 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32852 (enum ix86_builtin_func_type
)
32853 d
->flag
, d
->comparison
);
32855 gcc_unreachable ();
32858 /* Returns a function decl for a vectorized version of the builtin function
32859 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32860 if it is not available. */
32863 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32866 enum machine_mode in_mode
, out_mode
;
32868 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32870 if (TREE_CODE (type_out
) != VECTOR_TYPE
32871 || TREE_CODE (type_in
) != VECTOR_TYPE
32872 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32875 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32876 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32877 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32878 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32882 case BUILT_IN_SQRT
:
32883 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32885 if (out_n
== 2 && in_n
== 2)
32886 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32887 else if (out_n
== 4 && in_n
== 4)
32888 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32892 case BUILT_IN_SQRTF
:
32893 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32895 if (out_n
== 4 && in_n
== 4)
32896 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32897 else if (out_n
== 8 && in_n
== 8)
32898 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32902 case BUILT_IN_IFLOOR
:
32903 case BUILT_IN_LFLOOR
:
32904 case BUILT_IN_LLFLOOR
:
32905 /* The round insn does not trap on denormals. */
32906 if (flag_trapping_math
|| !TARGET_ROUND
)
32909 if (out_mode
== SImode
&& in_mode
== DFmode
)
32911 if (out_n
== 4 && in_n
== 2)
32912 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32913 else if (out_n
== 8 && in_n
== 4)
32914 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32918 case BUILT_IN_IFLOORF
:
32919 case BUILT_IN_LFLOORF
:
32920 case BUILT_IN_LLFLOORF
:
32921 /* The round insn does not trap on denormals. */
32922 if (flag_trapping_math
|| !TARGET_ROUND
)
32925 if (out_mode
== SImode
&& in_mode
== SFmode
)
32927 if (out_n
== 4 && in_n
== 4)
32928 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32929 else if (out_n
== 8 && in_n
== 8)
32930 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32934 case BUILT_IN_ICEIL
:
32935 case BUILT_IN_LCEIL
:
32936 case BUILT_IN_LLCEIL
:
32937 /* The round insn does not trap on denormals. */
32938 if (flag_trapping_math
|| !TARGET_ROUND
)
32941 if (out_mode
== SImode
&& in_mode
== DFmode
)
32943 if (out_n
== 4 && in_n
== 2)
32944 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32945 else if (out_n
== 8 && in_n
== 4)
32946 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32950 case BUILT_IN_ICEILF
:
32951 case BUILT_IN_LCEILF
:
32952 case BUILT_IN_LLCEILF
:
32953 /* The round insn does not trap on denormals. */
32954 if (flag_trapping_math
|| !TARGET_ROUND
)
32957 if (out_mode
== SImode
&& in_mode
== SFmode
)
32959 if (out_n
== 4 && in_n
== 4)
32960 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32961 else if (out_n
== 8 && in_n
== 8)
32962 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32966 case BUILT_IN_IRINT
:
32967 case BUILT_IN_LRINT
:
32968 case BUILT_IN_LLRINT
:
32969 if (out_mode
== SImode
&& in_mode
== DFmode
)
32971 if (out_n
== 4 && in_n
== 2)
32972 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32973 else if (out_n
== 8 && in_n
== 4)
32974 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32978 case BUILT_IN_IRINTF
:
32979 case BUILT_IN_LRINTF
:
32980 case BUILT_IN_LLRINTF
:
32981 if (out_mode
== SImode
&& in_mode
== SFmode
)
32983 if (out_n
== 4 && in_n
== 4)
32984 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32985 else if (out_n
== 8 && in_n
== 8)
32986 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32990 case BUILT_IN_IROUND
:
32991 case BUILT_IN_LROUND
:
32992 case BUILT_IN_LLROUND
:
32993 /* The round insn does not trap on denormals. */
32994 if (flag_trapping_math
|| !TARGET_ROUND
)
32997 if (out_mode
== SImode
&& in_mode
== DFmode
)
32999 if (out_n
== 4 && in_n
== 2)
33000 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33001 else if (out_n
== 8 && in_n
== 4)
33002 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33006 case BUILT_IN_IROUNDF
:
33007 case BUILT_IN_LROUNDF
:
33008 case BUILT_IN_LLROUNDF
:
33009 /* The round insn does not trap on denormals. */
33010 if (flag_trapping_math
|| !TARGET_ROUND
)
33013 if (out_mode
== SImode
&& in_mode
== SFmode
)
33015 if (out_n
== 4 && in_n
== 4)
33016 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33017 else if (out_n
== 8 && in_n
== 8)
33018 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33022 case BUILT_IN_COPYSIGN
:
33023 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33025 if (out_n
== 2 && in_n
== 2)
33026 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33027 else if (out_n
== 4 && in_n
== 4)
33028 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33032 case BUILT_IN_COPYSIGNF
:
33033 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33035 if (out_n
== 4 && in_n
== 4)
33036 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33037 else if (out_n
== 8 && in_n
== 8)
33038 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33042 case BUILT_IN_FLOOR
:
33043 /* The round insn does not trap on denormals. */
33044 if (flag_trapping_math
|| !TARGET_ROUND
)
33047 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33049 if (out_n
== 2 && in_n
== 2)
33050 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33051 else if (out_n
== 4 && in_n
== 4)
33052 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33056 case BUILT_IN_FLOORF
:
33057 /* The round insn does not trap on denormals. */
33058 if (flag_trapping_math
|| !TARGET_ROUND
)
33061 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33063 if (out_n
== 4 && in_n
== 4)
33064 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33065 else if (out_n
== 8 && in_n
== 8)
33066 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33070 case BUILT_IN_CEIL
:
33071 /* The round insn does not trap on denormals. */
33072 if (flag_trapping_math
|| !TARGET_ROUND
)
33075 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33077 if (out_n
== 2 && in_n
== 2)
33078 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33079 else if (out_n
== 4 && in_n
== 4)
33080 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33084 case BUILT_IN_CEILF
:
33085 /* The round insn does not trap on denormals. */
33086 if (flag_trapping_math
|| !TARGET_ROUND
)
33089 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33091 if (out_n
== 4 && in_n
== 4)
33092 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33093 else if (out_n
== 8 && in_n
== 8)
33094 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33098 case BUILT_IN_TRUNC
:
33099 /* The round insn does not trap on denormals. */
33100 if (flag_trapping_math
|| !TARGET_ROUND
)
33103 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33105 if (out_n
== 2 && in_n
== 2)
33106 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33107 else if (out_n
== 4 && in_n
== 4)
33108 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33112 case BUILT_IN_TRUNCF
:
33113 /* The round insn does not trap on denormals. */
33114 if (flag_trapping_math
|| !TARGET_ROUND
)
33117 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33119 if (out_n
== 4 && in_n
== 4)
33120 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33121 else if (out_n
== 8 && in_n
== 8)
33122 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33126 case BUILT_IN_RINT
:
33127 /* The round insn does not trap on denormals. */
33128 if (flag_trapping_math
|| !TARGET_ROUND
)
33131 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33133 if (out_n
== 2 && in_n
== 2)
33134 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33135 else if (out_n
== 4 && in_n
== 4)
33136 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33140 case BUILT_IN_RINTF
:
33141 /* The round insn does not trap on denormals. */
33142 if (flag_trapping_math
|| !TARGET_ROUND
)
33145 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33147 if (out_n
== 4 && in_n
== 4)
33148 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33149 else if (out_n
== 8 && in_n
== 8)
33150 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33154 case BUILT_IN_ROUND
:
33155 /* The round insn does not trap on denormals. */
33156 if (flag_trapping_math
|| !TARGET_ROUND
)
33159 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33161 if (out_n
== 2 && in_n
== 2)
33162 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33163 else if (out_n
== 4 && in_n
== 4)
33164 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33168 case BUILT_IN_ROUNDF
:
33169 /* The round insn does not trap on denormals. */
33170 if (flag_trapping_math
|| !TARGET_ROUND
)
33173 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33175 if (out_n
== 4 && in_n
== 4)
33176 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33177 else if (out_n
== 8 && in_n
== 8)
33178 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33183 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33185 if (out_n
== 2 && in_n
== 2)
33186 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33187 if (out_n
== 4 && in_n
== 4)
33188 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33192 case BUILT_IN_FMAF
:
33193 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33195 if (out_n
== 4 && in_n
== 4)
33196 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33197 if (out_n
== 8 && in_n
== 8)
33198 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33206 /* Dispatch to a handler for a vectorization library. */
33207 if (ix86_veclib_handler
)
33208 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33214 /* Handler for an SVML-style interface to
33215 a library with vectorized intrinsics. */
33218 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33221 tree fntype
, new_fndecl
, args
;
33224 enum machine_mode el_mode
, in_mode
;
33227 /* The SVML is suitable for unsafe math only. */
33228 if (!flag_unsafe_math_optimizations
)
33231 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33232 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33233 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33234 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33235 if (el_mode
!= in_mode
33243 case BUILT_IN_LOG10
:
33245 case BUILT_IN_TANH
:
33247 case BUILT_IN_ATAN
:
33248 case BUILT_IN_ATAN2
:
33249 case BUILT_IN_ATANH
:
33250 case BUILT_IN_CBRT
:
33251 case BUILT_IN_SINH
:
33253 case BUILT_IN_ASINH
:
33254 case BUILT_IN_ASIN
:
33255 case BUILT_IN_COSH
:
33257 case BUILT_IN_ACOSH
:
33258 case BUILT_IN_ACOS
:
33259 if (el_mode
!= DFmode
|| n
!= 2)
33263 case BUILT_IN_EXPF
:
33264 case BUILT_IN_LOGF
:
33265 case BUILT_IN_LOG10F
:
33266 case BUILT_IN_POWF
:
33267 case BUILT_IN_TANHF
:
33268 case BUILT_IN_TANF
:
33269 case BUILT_IN_ATANF
:
33270 case BUILT_IN_ATAN2F
:
33271 case BUILT_IN_ATANHF
:
33272 case BUILT_IN_CBRTF
:
33273 case BUILT_IN_SINHF
:
33274 case BUILT_IN_SINF
:
33275 case BUILT_IN_ASINHF
:
33276 case BUILT_IN_ASINF
:
33277 case BUILT_IN_COSHF
:
33278 case BUILT_IN_COSF
:
33279 case BUILT_IN_ACOSHF
:
33280 case BUILT_IN_ACOSF
:
33281 if (el_mode
!= SFmode
|| n
!= 4)
33289 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33291 if (fn
== BUILT_IN_LOGF
)
33292 strcpy (name
, "vmlsLn4");
33293 else if (fn
== BUILT_IN_LOG
)
33294 strcpy (name
, "vmldLn2");
33297 sprintf (name
, "vmls%s", bname
+10);
33298 name
[strlen (name
)-1] = '4';
33301 sprintf (name
, "vmld%s2", bname
+10);
33303 /* Convert to uppercase. */
33307 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33309 args
= TREE_CHAIN (args
))
33313 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33315 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33317 /* Build a function declaration for the vectorized function. */
33318 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33319 FUNCTION_DECL
, get_identifier (name
), fntype
);
33320 TREE_PUBLIC (new_fndecl
) = 1;
33321 DECL_EXTERNAL (new_fndecl
) = 1;
33322 DECL_IS_NOVOPS (new_fndecl
) = 1;
33323 TREE_READONLY (new_fndecl
) = 1;
33328 /* Handler for an ACML-style interface to
33329 a library with vectorized intrinsics. */
33332 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
33334 char name
[20] = "__vr.._";
33335 tree fntype
, new_fndecl
, args
;
33338 enum machine_mode el_mode
, in_mode
;
33341 /* The ACML is 64bits only and suitable for unsafe math only as
33342 it does not correctly support parts of IEEE with the required
33343 precision such as denormals. */
33345 || !flag_unsafe_math_optimizations
)
33348 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33349 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33350 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33351 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33352 if (el_mode
!= in_mode
33362 case BUILT_IN_LOG2
:
33363 case BUILT_IN_LOG10
:
33366 if (el_mode
!= DFmode
33371 case BUILT_IN_SINF
:
33372 case BUILT_IN_COSF
:
33373 case BUILT_IN_EXPF
:
33374 case BUILT_IN_POWF
:
33375 case BUILT_IN_LOGF
:
33376 case BUILT_IN_LOG2F
:
33377 case BUILT_IN_LOG10F
:
33380 if (el_mode
!= SFmode
33389 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33390 sprintf (name
+ 7, "%s", bname
+10);
33393 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33395 args
= TREE_CHAIN (args
))
33399 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33401 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33403 /* Build a function declaration for the vectorized function. */
33404 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33405 FUNCTION_DECL
, get_identifier (name
), fntype
);
33406 TREE_PUBLIC (new_fndecl
) = 1;
33407 DECL_EXTERNAL (new_fndecl
) = 1;
33408 DECL_IS_NOVOPS (new_fndecl
) = 1;
33409 TREE_READONLY (new_fndecl
) = 1;
33414 /* Returns a decl of a function that implements gather load with
33415 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33416 Return NULL_TREE if it is not available. */
33419 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
33420 const_tree index_type
, int scale
)
33423 enum ix86_builtins code
;
33428 if ((TREE_CODE (index_type
) != INTEGER_TYPE
33429 && !POINTER_TYPE_P (index_type
))
33430 || (TYPE_MODE (index_type
) != SImode
33431 && TYPE_MODE (index_type
) != DImode
))
33434 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
33437 /* v*gather* insn sign extends index to pointer mode. */
33438 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
33439 && TYPE_UNSIGNED (index_type
))
33444 || (scale
& (scale
- 1)) != 0)
33447 si
= TYPE_MODE (index_type
) == SImode
;
33448 switch (TYPE_MODE (mem_vectype
))
33451 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
33454 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
33457 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
33460 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
33463 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
33466 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
33469 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
33472 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
33478 return ix86_builtins
[code
];
33481 /* Returns a code for a target-specific builtin that implements
33482 reciprocal of the function, or NULL_TREE if not available. */
33485 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33486 bool sqrt ATTRIBUTE_UNUSED
)
33488 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33489 && flag_finite_math_only
&& !flag_trapping_math
33490 && flag_unsafe_math_optimizations
))
33494 /* Machine dependent builtins. */
33497 /* Vectorized version of sqrt to rsqrt conversion. */
33498 case IX86_BUILTIN_SQRTPS_NR
:
33499 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33501 case IX86_BUILTIN_SQRTPS_NR256
:
33502 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33508 /* Normal builtins. */
33511 /* Sqrt to rsqrt conversion. */
33512 case BUILT_IN_SQRTF
:
33513 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33520 /* Helper for avx_vpermilps256_operand et al. This is also used by
33521 the expansion functions to turn the parallel back into a mask.
33522 The return value is 0 for no match and the imm8+1 for a match. */
33525 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33527 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33529 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33531 if (XVECLEN (par
, 0) != (int) nelt
)
33534 /* Validate that all of the elements are constants, and not totally
33535 out of range. Copy the data into an integral array to make the
33536 subsequent checks easier. */
33537 for (i
= 0; i
< nelt
; ++i
)
33539 rtx er
= XVECEXP (par
, 0, i
);
33540 unsigned HOST_WIDE_INT ei
;
33542 if (!CONST_INT_P (er
))
33553 /* In the 256-bit DFmode case, we can only move elements within
33555 for (i
= 0; i
< 2; ++i
)
33559 mask
|= ipar
[i
] << i
;
33561 for (i
= 2; i
< 4; ++i
)
33565 mask
|= (ipar
[i
] - 2) << i
;
33570 /* In the 256-bit SFmode case, we have full freedom of movement
33571 within the low 128-bit lane, but the high 128-bit lane must
33572 mirror the exact same pattern. */
33573 for (i
= 0; i
< 4; ++i
)
33574 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33581 /* In the 128-bit case, we've full freedom in the placement of
33582 the elements from the source operand. */
33583 for (i
= 0; i
< nelt
; ++i
)
33584 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33588 gcc_unreachable ();
33591 /* Make sure success has a non-zero value by adding one. */
33595 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33596 the expansion functions to turn the parallel back into a mask.
33597 The return value is 0 for no match and the imm8+1 for a match. */
33600 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33602 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33604 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33606 if (XVECLEN (par
, 0) != (int) nelt
)
33609 /* Validate that all of the elements are constants, and not totally
33610 out of range. Copy the data into an integral array to make the
33611 subsequent checks easier. */
33612 for (i
= 0; i
< nelt
; ++i
)
33614 rtx er
= XVECEXP (par
, 0, i
);
33615 unsigned HOST_WIDE_INT ei
;
33617 if (!CONST_INT_P (er
))
33620 if (ei
>= 2 * nelt
)
33625 /* Validate that the halves of the permute are halves. */
33626 for (i
= 0; i
< nelt2
- 1; ++i
)
33627 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33629 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33630 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33633 /* Reconstruct the mask. */
33634 for (i
= 0; i
< 2; ++i
)
33636 unsigned e
= ipar
[i
* nelt2
];
33640 mask
|= e
<< (i
* 4);
33643 /* Make sure success has a non-zero value by adding one. */
33647 /* Store OPERAND to the memory after reload is completed. This means
33648 that we can't easily use assign_stack_local. */
33650 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33654 gcc_assert (reload_completed
);
33655 if (ix86_using_red_zone ())
33657 result
= gen_rtx_MEM (mode
,
33658 gen_rtx_PLUS (Pmode
,
33660 GEN_INT (-RED_ZONE_SIZE
)));
33661 emit_move_insn (result
, operand
);
33663 else if (TARGET_64BIT
)
33669 operand
= gen_lowpart (DImode
, operand
);
33673 gen_rtx_SET (VOIDmode
,
33674 gen_rtx_MEM (DImode
,
33675 gen_rtx_PRE_DEC (DImode
,
33676 stack_pointer_rtx
)),
33680 gcc_unreachable ();
33682 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33691 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33693 gen_rtx_SET (VOIDmode
,
33694 gen_rtx_MEM (SImode
,
33695 gen_rtx_PRE_DEC (Pmode
,
33696 stack_pointer_rtx
)),
33699 gen_rtx_SET (VOIDmode
,
33700 gen_rtx_MEM (SImode
,
33701 gen_rtx_PRE_DEC (Pmode
,
33702 stack_pointer_rtx
)),
33707 /* Store HImodes as SImodes. */
33708 operand
= gen_lowpart (SImode
, operand
);
33712 gen_rtx_SET (VOIDmode
,
33713 gen_rtx_MEM (GET_MODE (operand
),
33714 gen_rtx_PRE_DEC (SImode
,
33715 stack_pointer_rtx
)),
33719 gcc_unreachable ();
33721 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33726 /* Free operand from the memory. */
33728 ix86_free_from_memory (enum machine_mode mode
)
33730 if (!ix86_using_red_zone ())
33734 if (mode
== DImode
|| TARGET_64BIT
)
33738 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33739 to pop or add instruction if registers are available. */
33740 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33741 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33746 /* Return a register priority for hard reg REGNO. */
33748 ix86_register_priority (int hard_regno
)
33750 /* ebp and r13 as the base always wants a displacement, r12 as the
33751 base always wants an index. So discourage their usage in an
33753 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33755 if (hard_regno
== BP_REG
)
33757 /* New x86-64 int registers result in bigger code size. Discourage
33759 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33761 /* New x86-64 SSE registers result in bigger code size. Discourage
33763 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33765 /* Usage of AX register results in smaller code. Prefer it. */
33766 if (hard_regno
== 0)
33771 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33773 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33774 QImode must go into class Q_REGS.
33775 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33776 movdf to do mem-to-mem moves through integer regs. */
33779 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33781 enum machine_mode mode
= GET_MODE (x
);
33783 /* We're only allowed to return a subclass of CLASS. Many of the
33784 following checks fail for NO_REGS, so eliminate that early. */
33785 if (regclass
== NO_REGS
)
33788 /* All classes can load zeros. */
33789 if (x
== CONST0_RTX (mode
))
33792 /* Force constants into memory if we are loading a (nonzero) constant into
33793 an MMX or SSE register. This is because there are no MMX/SSE instructions
33794 to load from a constant. */
33796 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33799 /* Prefer SSE regs only, if we can use them for math. */
33800 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33801 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33803 /* Floating-point constants need more complex checks. */
33804 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33806 /* General regs can load everything. */
33807 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33810 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33811 zero above. We only want to wind up preferring 80387 registers if
33812 we plan on doing computation with them. */
33814 && standard_80387_constant_p (x
) > 0)
33816 /* Limit class to non-sse. */
33817 if (regclass
== FLOAT_SSE_REGS
)
33819 if (regclass
== FP_TOP_SSE_REGS
)
33821 if (regclass
== FP_SECOND_SSE_REGS
)
33822 return FP_SECOND_REG
;
33823 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33830 /* Generally when we see PLUS here, it's the function invariant
33831 (plus soft-fp const_int). Which can only be computed into general
33833 if (GET_CODE (x
) == PLUS
)
33834 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33836 /* QImode constants are easy to load, but non-constant QImode data
33837 must go into Q_REGS. */
33838 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33840 if (reg_class_subset_p (regclass
, Q_REGS
))
33842 if (reg_class_subset_p (Q_REGS
, regclass
))
33850 /* Discourage putting floating-point values in SSE registers unless
33851 SSE math is being used, and likewise for the 387 registers. */
33853 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33855 enum machine_mode mode
= GET_MODE (x
);
33857 /* Restrict the output reload class to the register bank that we are doing
33858 math on. If we would like not to return a subset of CLASS, reject this
33859 alternative: if reload cannot do this, it will still use its choice. */
33860 mode
= GET_MODE (x
);
33861 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33862 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33864 if (X87_FLOAT_MODE_P (mode
))
33866 if (regclass
== FP_TOP_SSE_REGS
)
33868 else if (regclass
== FP_SECOND_SSE_REGS
)
33869 return FP_SECOND_REG
;
33871 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33878 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33879 enum machine_mode mode
, secondary_reload_info
*sri
)
33881 /* Double-word spills from general registers to non-offsettable memory
33882 references (zero-extended addresses) require special handling. */
33885 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33886 && INTEGER_CLASS_P (rclass
)
33887 && !offsettable_memref_p (x
))
33890 ? CODE_FOR_reload_noff_load
33891 : CODE_FOR_reload_noff_store
);
33892 /* Add the cost of moving address to a temporary. */
33893 sri
->extra_cost
= 1;
33898 /* QImode spills from non-QI registers require
33899 intermediate register on 32bit targets. */
33901 && !in_p
&& mode
== QImode
33902 && INTEGER_CLASS_P (rclass
)
33903 && MAYBE_NON_Q_CLASS_P (rclass
))
33912 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33913 regno
= true_regnum (x
);
33915 /* Return Q_REGS if the operand is in memory. */
33920 /* This condition handles corner case where an expression involving
33921 pointers gets vectorized. We're trying to use the address of a
33922 stack slot as a vector initializer.
33924 (set (reg:V2DI 74 [ vect_cst_.2 ])
33925 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33927 Eventually frame gets turned into sp+offset like this:
33929 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33930 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33931 (const_int 392 [0x188]))))
33933 That later gets turned into:
33935 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33936 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33937 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33939 We'll have the following reload recorded:
33941 Reload 0: reload_in (DI) =
33942 (plus:DI (reg/f:DI 7 sp)
33943 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33944 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33945 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33946 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33947 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33948 reload_reg_rtx: (reg:V2DI 22 xmm1)
33950 Which isn't going to work since SSE instructions can't handle scalar
33951 additions. Returning GENERAL_REGS forces the addition into integer
33952 register and reload can handle subsequent reloads without problems. */
33954 if (in_p
&& GET_CODE (x
) == PLUS
33955 && SSE_CLASS_P (rclass
)
33956 && SCALAR_INT_MODE_P (mode
))
33957 return GENERAL_REGS
;
33962 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33965 ix86_class_likely_spilled_p (reg_class_t rclass
)
33976 case SSE_FIRST_REG
:
33978 case FP_SECOND_REG
:
33988 /* If we are copying between general and FP registers, we need a memory
33989 location. The same is true for SSE and MMX registers.
33991 To optimize register_move_cost performance, allow inline variant.
33993 The macro can't work reliably when one of the CLASSES is class containing
33994 registers from multiple units (SSE, MMX, integer). We avoid this by never
33995 combining those units in single alternative in the machine description.
33996 Ensure that this constraint holds to avoid unexpected surprises.
33998 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33999 enforce these sanity checks. */
34002 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34003 enum machine_mode mode
, int strict
)
34005 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34007 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34008 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34009 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34010 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34011 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34012 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34014 gcc_assert (!strict
|| lra_in_progress
);
34018 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34021 /* ??? This is a lie. We do have moves between mmx/general, and for
34022 mmx/sse2. But by saying we need secondary memory we discourage the
34023 register allocator from using the mmx registers unless needed. */
34024 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34027 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34029 /* SSE1 doesn't have any direct moves from other classes. */
34033 /* If the target says that inter-unit moves are more expensive
34034 than moving through memory, then don't generate them. */
34035 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34036 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34039 /* Between SSE and general, we have moves no larger than word size. */
34040 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34048 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34049 enum machine_mode mode
, int strict
)
34051 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34054 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34056 On the 80386, this is the size of MODE in words,
34057 except in the FP regs, where a single reg is always enough. */
34059 static unsigned char
34060 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34062 if (MAYBE_INTEGER_CLASS_P (rclass
))
34064 if (mode
== XFmode
)
34065 return (TARGET_64BIT
? 2 : 3);
34066 else if (mode
== XCmode
)
34067 return (TARGET_64BIT
? 4 : 6);
34069 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34073 if (COMPLEX_MODE_P (mode
))
34080 /* Return true if the registers in CLASS cannot represent the change from
34081 modes FROM to TO. */
34084 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34085 enum reg_class regclass
)
34090 /* x87 registers can't do subreg at all, as all values are reformatted
34091 to extended precision. */
34092 if (MAYBE_FLOAT_CLASS_P (regclass
))
34095 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34097 /* Vector registers do not support QI or HImode loads. If we don't
34098 disallow a change to these modes, reload will assume it's ok to
34099 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34100 the vec_dupv4hi pattern. */
34101 if (GET_MODE_SIZE (from
) < 4)
34104 /* Vector registers do not support subreg with nonzero offsets, which
34105 are otherwise valid for integer registers. Since we can't see
34106 whether we have a nonzero offset from here, prohibit all
34107 nonparadoxical subregs changing size. */
34108 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34115 /* Return the cost of moving data of mode M between a
34116 register and memory. A value of 2 is the default; this cost is
34117 relative to those in `REGISTER_MOVE_COST'.
34119 This function is used extensively by register_move_cost that is used to
34120 build tables at startup. Make it inline in this case.
34121 When IN is 2, return maximum of in and out move cost.
34123 If moving between registers and memory is more expensive than
34124 between two registers, you should define this macro to express the
34127 Model also increased moving costs of QImode registers in non
34131 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34135 if (FLOAT_CLASS_P (regclass
))
34153 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34154 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34156 if (SSE_CLASS_P (regclass
))
34159 switch (GET_MODE_SIZE (mode
))
34174 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34175 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34177 if (MMX_CLASS_P (regclass
))
34180 switch (GET_MODE_SIZE (mode
))
34192 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34193 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34195 switch (GET_MODE_SIZE (mode
))
34198 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34201 return ix86_cost
->int_store
[0];
34202 if (TARGET_PARTIAL_REG_DEPENDENCY
34203 && optimize_function_for_speed_p (cfun
))
34204 cost
= ix86_cost
->movzbl_load
;
34206 cost
= ix86_cost
->int_load
[0];
34208 return MAX (cost
, ix86_cost
->int_store
[0]);
34214 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34216 return ix86_cost
->movzbl_load
;
34218 return ix86_cost
->int_store
[0] + 4;
34223 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34224 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34226 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34227 if (mode
== TFmode
)
34230 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34232 cost
= ix86_cost
->int_load
[2];
34234 cost
= ix86_cost
->int_store
[2];
34235 return (cost
* (((int) GET_MODE_SIZE (mode
)
34236 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
34241 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
34244 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
34248 /* Return the cost of moving data from a register in class CLASS1 to
34249 one in class CLASS2.
34251 It is not required that the cost always equal 2 when FROM is the same as TO;
34252 on some machines it is expensive to move between registers if they are not
34253 general registers. */
34256 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
34257 reg_class_t class2_i
)
34259 enum reg_class class1
= (enum reg_class
) class1_i
;
34260 enum reg_class class2
= (enum reg_class
) class2_i
;
34262 /* In case we require secondary memory, compute cost of the store followed
34263 by load. In order to avoid bad register allocation choices, we need
34264 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34266 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
34270 cost
+= inline_memory_move_cost (mode
, class1
, 2);
34271 cost
+= inline_memory_move_cost (mode
, class2
, 2);
34273 /* In case of copying from general_purpose_register we may emit multiple
34274 stores followed by single load causing memory size mismatch stall.
34275 Count this as arbitrarily high cost of 20. */
34276 if (targetm
.class_max_nregs (class1
, mode
)
34277 > targetm
.class_max_nregs (class2
, mode
))
34280 /* In the case of FP/MMX moves, the registers actually overlap, and we
34281 have to switch modes in order to treat them differently. */
34282 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
34283 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
34289 /* Moves between SSE/MMX and integer unit are expensive. */
34290 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
34291 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34293 /* ??? By keeping returned value relatively high, we limit the number
34294 of moves between integer and MMX/SSE registers for all targets.
34295 Additionally, high value prevents problem with x86_modes_tieable_p(),
34296 where integer modes in MMX/SSE registers are not tieable
34297 because of missing QImode and HImode moves to, from or between
34298 MMX/SSE registers. */
34299 return MAX (8, ix86_cost
->mmxsse_to_integer
);
34301 if (MAYBE_FLOAT_CLASS_P (class1
))
34302 return ix86_cost
->fp_move
;
34303 if (MAYBE_SSE_CLASS_P (class1
))
34304 return ix86_cost
->sse_move
;
34305 if (MAYBE_MMX_CLASS_P (class1
))
34306 return ix86_cost
->mmx_move
;
34310 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34314 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
34316 /* Flags and only flags can only hold CCmode values. */
34317 if (CC_REGNO_P (regno
))
34318 return GET_MODE_CLASS (mode
) == MODE_CC
;
34319 if (GET_MODE_CLASS (mode
) == MODE_CC
34320 || GET_MODE_CLASS (mode
) == MODE_RANDOM
34321 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
34323 if (STACK_REGNO_P (regno
))
34324 return VALID_FP_MODE_P (mode
);
34325 if (SSE_REGNO_P (regno
))
34327 /* We implement the move patterns for all vector modes into and
34328 out of SSE registers, even when no operation instructions
34329 are available. OImode move is available only when AVX is
34331 return ((TARGET_AVX
&& mode
== OImode
)
34332 || VALID_AVX256_REG_MODE (mode
)
34333 || VALID_SSE_REG_MODE (mode
)
34334 || VALID_SSE2_REG_MODE (mode
)
34335 || VALID_MMX_REG_MODE (mode
)
34336 || VALID_MMX_REG_MODE_3DNOW (mode
));
34338 if (MMX_REGNO_P (regno
))
34340 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34341 so if the register is available at all, then we can move data of
34342 the given mode into or out of it. */
34343 return (VALID_MMX_REG_MODE (mode
)
34344 || VALID_MMX_REG_MODE_3DNOW (mode
));
34347 if (mode
== QImode
)
34349 /* Take care for QImode values - they can be in non-QI regs,
34350 but then they do cause partial register stalls. */
34351 if (ANY_QI_REGNO_P (regno
))
34353 if (!TARGET_PARTIAL_REG_STALL
)
34355 /* LRA checks if the hard register is OK for the given mode.
34356 QImode values can live in non-QI regs, so we allow all
34358 if (lra_in_progress
)
34360 return !can_create_pseudo_p ();
34362 /* We handle both integer and floats in the general purpose registers. */
34363 else if (VALID_INT_MODE_P (mode
))
34365 else if (VALID_FP_MODE_P (mode
))
34367 else if (VALID_DFP_MODE_P (mode
))
34369 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34370 on to use that value in smaller contexts, this can easily force a
34371 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34372 supporting DImode, allow it. */
34373 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
34379 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34380 tieable integer mode. */
34383 ix86_tieable_integer_mode_p (enum machine_mode mode
)
34392 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
34395 return TARGET_64BIT
;
34402 /* Return true if MODE1 is accessible in a register that can hold MODE2
34403 without copying. That is, all register classes that can hold MODE2
34404 can also hold MODE1. */
34407 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
34409 if (mode1
== mode2
)
34412 if (ix86_tieable_integer_mode_p (mode1
)
34413 && ix86_tieable_integer_mode_p (mode2
))
34416 /* MODE2 being XFmode implies fp stack or general regs, which means we
34417 can tie any smaller floating point modes to it. Note that we do not
34418 tie this with TFmode. */
34419 if (mode2
== XFmode
)
34420 return mode1
== SFmode
|| mode1
== DFmode
;
34422 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34423 that we can tie it with SFmode. */
34424 if (mode2
== DFmode
)
34425 return mode1
== SFmode
;
34427 /* If MODE2 is only appropriate for an SSE register, then tie with
34428 any other mode acceptable to SSE registers. */
34429 if (GET_MODE_SIZE (mode2
) == 32
34430 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34431 return (GET_MODE_SIZE (mode1
) == 32
34432 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34433 if (GET_MODE_SIZE (mode2
) == 16
34434 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34435 return (GET_MODE_SIZE (mode1
) == 16
34436 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34438 /* If MODE2 is appropriate for an MMX register, then tie
34439 with any other mode acceptable to MMX registers. */
34440 if (GET_MODE_SIZE (mode2
) == 8
34441 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
34442 return (GET_MODE_SIZE (mode1
) == 8
34443 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
34448 /* Return the cost of moving between two registers of mode MODE. */
34451 ix86_set_reg_reg_cost (enum machine_mode mode
)
34453 unsigned int units
= UNITS_PER_WORD
;
34455 switch (GET_MODE_CLASS (mode
))
34461 units
= GET_MODE_SIZE (CCmode
);
34465 if ((TARGET_SSE
&& mode
== TFmode
)
34466 || (TARGET_80387
&& mode
== XFmode
)
34467 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
34468 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
34469 units
= GET_MODE_SIZE (mode
);
34472 case MODE_COMPLEX_FLOAT
:
34473 if ((TARGET_SSE
&& mode
== TCmode
)
34474 || (TARGET_80387
&& mode
== XCmode
)
34475 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
34476 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
34477 units
= GET_MODE_SIZE (mode
);
34480 case MODE_VECTOR_INT
:
34481 case MODE_VECTOR_FLOAT
:
34482 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34483 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34484 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34485 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
34486 units
= GET_MODE_SIZE (mode
);
34489 /* Return the cost of moving between two registers of mode MODE,
34490 assuming that the move will be in pieces of at most UNITS bytes. */
34491 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34494 /* Compute a (partial) cost for rtx X. Return true if the complete
34495 cost has been computed, and false if subexpressions should be
34496 scanned. In either case, *TOTAL contains the cost result. */
34499 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34502 enum rtx_code code
= (enum rtx_code
) code_i
;
34503 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34504 enum machine_mode mode
= GET_MODE (x
);
34505 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34510 if (register_operand (SET_DEST (x
), VOIDmode
)
34511 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34513 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34522 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34524 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34526 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34528 || (!GET_CODE (x
) != LABEL_REF
34529 && (GET_CODE (x
) != SYMBOL_REF
34530 || !SYMBOL_REF_LOCAL_P (x
)))))
34537 if (mode
== VOIDmode
)
34542 switch (standard_80387_constant_p (x
))
34547 default: /* Other constants */
34554 if (SSE_FLOAT_MODE_P (mode
))
34557 switch (standard_sse_constant_p (x
))
34561 case 1: /* 0: xor eliminates false dependency */
34564 default: /* -1: cmp contains false dependency */
34569 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34570 it'll probably end up. Add a penalty for size. */
34571 *total
= (COSTS_N_INSNS (1)
34572 + (flag_pic
!= 0 && !TARGET_64BIT
)
34573 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34577 /* The zero extensions is often completely free on x86_64, so make
34578 it as cheap as possible. */
34579 if (TARGET_64BIT
&& mode
== DImode
34580 && GET_MODE (XEXP (x
, 0)) == SImode
)
34582 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34583 *total
= cost
->add
;
34585 *total
= cost
->movzx
;
34589 *total
= cost
->movsx
;
34593 if (SCALAR_INT_MODE_P (mode
)
34594 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34595 && CONST_INT_P (XEXP (x
, 1)))
34597 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34600 *total
= cost
->add
;
34603 if ((value
== 2 || value
== 3)
34604 && cost
->lea
<= cost
->shift_const
)
34606 *total
= cost
->lea
;
34616 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34618 /* ??? Should be SSE vector operation cost. */
34619 /* At least for published AMD latencies, this really is the same
34620 as the latency for a simple fpu operation like fabs. */
34621 /* V*QImode is emulated with 1-11 insns. */
34622 if (mode
== V16QImode
|| mode
== V32QImode
)
34625 if (TARGET_XOP
&& mode
== V16QImode
)
34627 /* For XOP we use vpshab, which requires a broadcast of the
34628 value to the variable shift insn. For constants this
34629 means a V16Q const in mem; even when we can perform the
34630 shift with one insn set the cost to prefer paddb. */
34631 if (CONSTANT_P (XEXP (x
, 1)))
34633 *total
= (cost
->fabs
34634 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34635 + (speed
? 2 : COSTS_N_BYTES (16)));
34640 else if (TARGET_SSSE3
)
34642 *total
= cost
->fabs
* count
;
34645 *total
= cost
->fabs
;
34647 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34649 if (CONST_INT_P (XEXP (x
, 1)))
34651 if (INTVAL (XEXP (x
, 1)) > 32)
34652 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34654 *total
= cost
->shift_const
* 2;
34658 if (GET_CODE (XEXP (x
, 1)) == AND
)
34659 *total
= cost
->shift_var
* 2;
34661 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34666 if (CONST_INT_P (XEXP (x
, 1)))
34667 *total
= cost
->shift_const
;
34668 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
34669 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
34671 /* Return the cost after shift-and truncation. */
34672 *total
= cost
->shift_var
;
34676 *total
= cost
->shift_var
;
34684 gcc_assert (FLOAT_MODE_P (mode
));
34685 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34687 /* ??? SSE scalar/vector cost should be used here. */
34688 /* ??? Bald assumption that fma has the same cost as fmul. */
34689 *total
= cost
->fmul
;
34690 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34692 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34694 if (GET_CODE (sub
) == NEG
)
34695 sub
= XEXP (sub
, 0);
34696 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34699 if (GET_CODE (sub
) == NEG
)
34700 sub
= XEXP (sub
, 0);
34701 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34706 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34708 /* ??? SSE scalar cost should be used here. */
34709 *total
= cost
->fmul
;
34712 else if (X87_FLOAT_MODE_P (mode
))
34714 *total
= cost
->fmul
;
34717 else if (FLOAT_MODE_P (mode
))
34719 /* ??? SSE vector cost should be used here. */
34720 *total
= cost
->fmul
;
34723 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34725 /* V*QImode is emulated with 7-13 insns. */
34726 if (mode
== V16QImode
|| mode
== V32QImode
)
34729 if (TARGET_XOP
&& mode
== V16QImode
)
34731 else if (TARGET_SSSE3
)
34733 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34735 /* V*DImode is emulated with 5-8 insns. */
34736 else if (mode
== V2DImode
|| mode
== V4DImode
)
34738 if (TARGET_XOP
&& mode
== V2DImode
)
34739 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34741 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34743 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34744 insns, including two PMULUDQ. */
34745 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34746 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34748 *total
= cost
->fmul
;
34753 rtx op0
= XEXP (x
, 0);
34754 rtx op1
= XEXP (x
, 1);
34756 if (CONST_INT_P (XEXP (x
, 1)))
34758 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34759 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34763 /* This is arbitrary. */
34766 /* Compute costs correctly for widening multiplication. */
34767 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34768 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34769 == GET_MODE_SIZE (mode
))
34771 int is_mulwiden
= 0;
34772 enum machine_mode inner_mode
= GET_MODE (op0
);
34774 if (GET_CODE (op0
) == GET_CODE (op1
))
34775 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34776 else if (CONST_INT_P (op1
))
34778 if (GET_CODE (op0
) == SIGN_EXTEND
)
34779 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34782 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34786 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34789 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34790 + nbits
* cost
->mult_bit
34791 + rtx_cost (op0
, outer_code
, opno
, speed
)
34792 + rtx_cost (op1
, outer_code
, opno
, speed
));
34801 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34802 /* ??? SSE cost should be used here. */
34803 *total
= cost
->fdiv
;
34804 else if (X87_FLOAT_MODE_P (mode
))
34805 *total
= cost
->fdiv
;
34806 else if (FLOAT_MODE_P (mode
))
34807 /* ??? SSE vector cost should be used here. */
34808 *total
= cost
->fdiv
;
34810 *total
= cost
->divide
[MODE_INDEX (mode
)];
34814 if (GET_MODE_CLASS (mode
) == MODE_INT
34815 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34817 if (GET_CODE (XEXP (x
, 0)) == PLUS
34818 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34819 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34820 && CONSTANT_P (XEXP (x
, 1)))
34822 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34823 if (val
== 2 || val
== 4 || val
== 8)
34825 *total
= cost
->lea
;
34826 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34827 outer_code
, opno
, speed
);
34828 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34829 outer_code
, opno
, speed
);
34830 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34834 else if (GET_CODE (XEXP (x
, 0)) == MULT
34835 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34837 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34838 if (val
== 2 || val
== 4 || val
== 8)
34840 *total
= cost
->lea
;
34841 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34842 outer_code
, opno
, speed
);
34843 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34847 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34849 *total
= cost
->lea
;
34850 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34851 outer_code
, opno
, speed
);
34852 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34853 outer_code
, opno
, speed
);
34854 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34861 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34863 /* ??? SSE cost should be used here. */
34864 *total
= cost
->fadd
;
34867 else if (X87_FLOAT_MODE_P (mode
))
34869 *total
= cost
->fadd
;
34872 else if (FLOAT_MODE_P (mode
))
34874 /* ??? SSE vector cost should be used here. */
34875 *total
= cost
->fadd
;
34883 if (GET_MODE_CLASS (mode
) == MODE_INT
34884 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34886 *total
= (cost
->add
* 2
34887 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34888 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34889 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34890 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34896 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34898 /* ??? SSE cost should be used here. */
34899 *total
= cost
->fchs
;
34902 else if (X87_FLOAT_MODE_P (mode
))
34904 *total
= cost
->fchs
;
34907 else if (FLOAT_MODE_P (mode
))
34909 /* ??? SSE vector cost should be used here. */
34910 *total
= cost
->fchs
;
34916 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34918 /* ??? Should be SSE vector operation cost. */
34919 /* At least for published AMD latencies, this really is the same
34920 as the latency for a simple fpu operation like fabs. */
34921 *total
= cost
->fabs
;
34923 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34924 *total
= cost
->add
* 2;
34926 *total
= cost
->add
;
34930 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34931 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34932 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34933 && XEXP (x
, 1) == const0_rtx
)
34935 /* This kind of construct is implemented using test[bwl].
34936 Treat it as if we had an AND. */
34937 *total
= (cost
->add
34938 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34939 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34945 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34950 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34951 /* ??? SSE cost should be used here. */
34952 *total
= cost
->fabs
;
34953 else if (X87_FLOAT_MODE_P (mode
))
34954 *total
= cost
->fabs
;
34955 else if (FLOAT_MODE_P (mode
))
34956 /* ??? SSE vector cost should be used here. */
34957 *total
= cost
->fabs
;
34961 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34962 /* ??? SSE cost should be used here. */
34963 *total
= cost
->fsqrt
;
34964 else if (X87_FLOAT_MODE_P (mode
))
34965 *total
= cost
->fsqrt
;
34966 else if (FLOAT_MODE_P (mode
))
34967 /* ??? SSE vector cost should be used here. */
34968 *total
= cost
->fsqrt
;
34972 if (XINT (x
, 1) == UNSPEC_TP
)
34979 case VEC_DUPLICATE
:
34980 /* ??? Assume all of these vector manipulation patterns are
34981 recognizable. In which case they all pretty much have the
34983 *total
= cost
->fabs
;
34993 static int current_machopic_label_num
;
34995 /* Given a symbol name and its associated stub, write out the
34996 definition of the stub. */
34999 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35001 unsigned int length
;
35002 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35003 int label
= ++current_machopic_label_num
;
35005 /* For 64-bit we shouldn't get here. */
35006 gcc_assert (!TARGET_64BIT
);
35008 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35009 symb
= targetm
.strip_name_encoding (symb
);
35011 length
= strlen (stub
);
35012 binder_name
= XALLOCAVEC (char, length
+ 32);
35013 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35015 length
= strlen (symb
);
35016 symbol_name
= XALLOCAVEC (char, length
+ 32);
35017 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35019 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35021 if (MACHOPIC_ATT_STUB
)
35022 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35023 else if (MACHOPIC_PURE
)
35024 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35026 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35028 fprintf (file
, "%s:\n", stub
);
35029 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35031 if (MACHOPIC_ATT_STUB
)
35033 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35035 else if (MACHOPIC_PURE
)
35038 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35039 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35040 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35041 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35042 label
, lazy_ptr_name
, label
);
35043 fprintf (file
, "\tjmp\t*%%ecx\n");
35046 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35048 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35049 it needs no stub-binding-helper. */
35050 if (MACHOPIC_ATT_STUB
)
35053 fprintf (file
, "%s:\n", binder_name
);
35057 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35058 fprintf (file
, "\tpushl\t%%ecx\n");
35061 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35063 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35065 /* N.B. Keep the correspondence of these
35066 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35067 old-pic/new-pic/non-pic stubs; altering this will break
35068 compatibility with existing dylibs. */
35071 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35072 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35075 /* 16-byte -mdynamic-no-pic stub. */
35076 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35078 fprintf (file
, "%s:\n", lazy_ptr_name
);
35079 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35080 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35082 #endif /* TARGET_MACHO */
35084 /* Order the registers for register allocator. */
35087 x86_order_regs_for_local_alloc (void)
35092 /* First allocate the local general purpose registers. */
35093 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35094 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35095 reg_alloc_order
[pos
++] = i
;
35097 /* Global general purpose registers. */
35098 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35099 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35100 reg_alloc_order
[pos
++] = i
;
35102 /* x87 registers come first in case we are doing FP math
35104 if (!TARGET_SSE_MATH
)
35105 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35106 reg_alloc_order
[pos
++] = i
;
35108 /* SSE registers. */
35109 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35110 reg_alloc_order
[pos
++] = i
;
35111 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35112 reg_alloc_order
[pos
++] = i
;
35114 /* x87 registers. */
35115 if (TARGET_SSE_MATH
)
35116 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35117 reg_alloc_order
[pos
++] = i
;
35119 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35120 reg_alloc_order
[pos
++] = i
;
35122 /* Initialize the rest of array as we do not allocate some registers
35124 while (pos
< FIRST_PSEUDO_REGISTER
)
35125 reg_alloc_order
[pos
++] = 0;
35128 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35129 in struct attribute_spec handler. */
35131 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35133 int flags ATTRIBUTE_UNUSED
,
35134 bool *no_add_attrs
)
35136 if (TREE_CODE (*node
) != FUNCTION_TYPE
35137 && TREE_CODE (*node
) != METHOD_TYPE
35138 && TREE_CODE (*node
) != FIELD_DECL
35139 && TREE_CODE (*node
) != TYPE_DECL
)
35141 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35143 *no_add_attrs
= true;
35148 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35150 *no_add_attrs
= true;
35153 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35157 cst
= TREE_VALUE (args
);
35158 if (TREE_CODE (cst
) != INTEGER_CST
)
35160 warning (OPT_Wattributes
,
35161 "%qE attribute requires an integer constant argument",
35163 *no_add_attrs
= true;
35165 else if (compare_tree_int (cst
, 0) != 0
35166 && compare_tree_int (cst
, 1) != 0)
35168 warning (OPT_Wattributes
,
35169 "argument to %qE attribute is neither zero, nor one",
35171 *no_add_attrs
= true;
35180 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35181 struct attribute_spec.handler. */
35183 ix86_handle_abi_attribute (tree
*node
, tree name
,
35184 tree args ATTRIBUTE_UNUSED
,
35185 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35187 if (TREE_CODE (*node
) != FUNCTION_TYPE
35188 && TREE_CODE (*node
) != METHOD_TYPE
35189 && TREE_CODE (*node
) != FIELD_DECL
35190 && TREE_CODE (*node
) != TYPE_DECL
)
35192 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35194 *no_add_attrs
= true;
35198 /* Can combine regparm with all attributes but fastcall. */
35199 if (is_attribute_p ("ms_abi", name
))
35201 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35203 error ("ms_abi and sysv_abi attributes are not compatible");
35208 else if (is_attribute_p ("sysv_abi", name
))
35210 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
35212 error ("ms_abi and sysv_abi attributes are not compatible");
35221 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35222 struct attribute_spec.handler. */
35224 ix86_handle_struct_attribute (tree
*node
, tree name
,
35225 tree args ATTRIBUTE_UNUSED
,
35226 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35229 if (DECL_P (*node
))
35231 if (TREE_CODE (*node
) == TYPE_DECL
)
35232 type
= &TREE_TYPE (*node
);
35237 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
35239 warning (OPT_Wattributes
, "%qE attribute ignored",
35241 *no_add_attrs
= true;
35244 else if ((is_attribute_p ("ms_struct", name
)
35245 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35246 || ((is_attribute_p ("gcc_struct", name
)
35247 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35249 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35251 *no_add_attrs
= true;
35258 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
35259 tree args ATTRIBUTE_UNUSED
,
35260 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35262 if (TREE_CODE (*node
) != FUNCTION_DECL
)
35264 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35266 *no_add_attrs
= true;
35272 ix86_ms_bitfield_layout_p (const_tree record_type
)
35274 return ((TARGET_MS_BITFIELD_LAYOUT
35275 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35276 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
35279 /* Returns an expression indicating where the this parameter is
35280 located on entry to the FUNCTION. */
35283 x86_this_parameter (tree function
)
35285 tree type
= TREE_TYPE (function
);
35286 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
35291 const int *parm_regs
;
35293 if (ix86_function_type_abi (type
) == MS_ABI
)
35294 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
35296 parm_regs
= x86_64_int_parameter_registers
;
35297 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
35300 nregs
= ix86_function_regparm (type
, function
);
35302 if (nregs
> 0 && !stdarg_p (type
))
35305 unsigned int ccvt
= ix86_get_callcvt (type
);
35307 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35308 regno
= aggr
? DX_REG
: CX_REG
;
35309 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35313 return gen_rtx_MEM (SImode
,
35314 plus_constant (Pmode
, stack_pointer_rtx
, 4));
35323 return gen_rtx_MEM (SImode
,
35324 plus_constant (Pmode
,
35325 stack_pointer_rtx
, 4));
35328 return gen_rtx_REG (SImode
, regno
);
35331 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
35335 /* Determine whether x86_output_mi_thunk can succeed. */
35338 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
35339 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
35340 HOST_WIDE_INT vcall_offset
, const_tree function
)
35342 /* 64-bit can handle anything. */
35346 /* For 32-bit, everything's fine if we have one free register. */
35347 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
35350 /* Need a free register for vcall_offset. */
35354 /* Need a free register for GOT references. */
35355 if (flag_pic
&& !targetm
.binds_local_p (function
))
35358 /* Otherwise ok. */
35362 /* Output the assembler code for a thunk function. THUNK_DECL is the
35363 declaration for the thunk function itself, FUNCTION is the decl for
35364 the target function. DELTA is an immediate constant offset to be
35365 added to THIS. If VCALL_OFFSET is nonzero, the word at
35366 *(*this + vcall_offset) should be added to THIS. */
35369 x86_output_mi_thunk (FILE *file
,
35370 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
35371 HOST_WIDE_INT vcall_offset
, tree function
)
35373 rtx this_param
= x86_this_parameter (function
);
35374 rtx this_reg
, tmp
, fnaddr
;
35375 unsigned int tmp_regno
;
35378 tmp_regno
= R10_REG
;
35381 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
35382 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35383 tmp_regno
= AX_REG
;
35384 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35385 tmp_regno
= DX_REG
;
35387 tmp_regno
= CX_REG
;
35390 emit_note (NOTE_INSN_PROLOGUE_END
);
35392 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35393 pull it in now and let DELTA benefit. */
35394 if (REG_P (this_param
))
35395 this_reg
= this_param
;
35396 else if (vcall_offset
)
35398 /* Put the this parameter into %eax. */
35399 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
35400 emit_move_insn (this_reg
, this_param
);
35403 this_reg
= NULL_RTX
;
35405 /* Adjust the this parameter by a fixed constant. */
35408 rtx delta_rtx
= GEN_INT (delta
);
35409 rtx delta_dst
= this_reg
? this_reg
: this_param
;
35413 if (!x86_64_general_operand (delta_rtx
, Pmode
))
35415 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35416 emit_move_insn (tmp
, delta_rtx
);
35421 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
35424 /* Adjust the this parameter by a value stored in the vtable. */
35427 rtx vcall_addr
, vcall_mem
, this_mem
;
35429 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35431 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
35432 if (Pmode
!= ptr_mode
)
35433 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
35434 emit_move_insn (tmp
, this_mem
);
35436 /* Adjust the this parameter. */
35437 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
35439 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
35441 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
35442 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
35443 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
35446 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
35447 if (Pmode
!= ptr_mode
)
35448 emit_insn (gen_addsi_1_zext (this_reg
,
35449 gen_rtx_REG (ptr_mode
,
35453 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
35456 /* If necessary, drop THIS back to its stack slot. */
35457 if (this_reg
&& this_reg
!= this_param
)
35458 emit_move_insn (this_param
, this_reg
);
35460 fnaddr
= XEXP (DECL_RTL (function
), 0);
35463 if (!flag_pic
|| targetm
.binds_local_p (function
)
35468 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
35469 tmp
= gen_rtx_CONST (Pmode
, tmp
);
35470 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
35475 if (!flag_pic
|| targetm
.binds_local_p (function
))
35478 else if (TARGET_MACHO
)
35480 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
35481 fnaddr
= XEXP (fnaddr
, 0);
35483 #endif /* TARGET_MACHO */
35486 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
35487 output_set_got (tmp
, NULL_RTX
);
35489 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
35490 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
35491 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35495 /* Our sibling call patterns do not allow memories, because we have no
35496 predicate that can distinguish between frame and non-frame memory.
35497 For our purposes here, we can get away with (ab)using a jump pattern,
35498 because we're going to do no optimization. */
35499 if (MEM_P (fnaddr
))
35500 emit_jump_insn (gen_indirect_jump (fnaddr
));
35503 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35504 fnaddr
= legitimize_pic_address (fnaddr
,
35505 gen_rtx_REG (Pmode
, tmp_regno
));
35507 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35509 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35510 if (GET_MODE (fnaddr
) != word_mode
)
35511 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35512 emit_move_insn (tmp
, fnaddr
);
35516 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35517 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35518 tmp
= emit_call_insn (tmp
);
35519 SIBLING_CALL_P (tmp
) = 1;
35523 /* Emit just enough of rest_of_compilation to get the insns emitted.
35524 Note that use_thunk calls assemble_start_function et al. */
35525 tmp
= get_insns ();
35526 shorten_branches (tmp
);
35527 final_start_function (tmp
, file
, 1);
35528 final (tmp
, file
, 1);
35529 final_end_function ();
35533 x86_file_start (void)
35535 default_file_start ();
35537 darwin_file_start ();
35539 if (X86_FILE_START_VERSION_DIRECTIVE
)
35540 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35541 if (X86_FILE_START_FLTUSED
)
35542 fputs ("\t.global\t__fltused\n", asm_out_file
);
35543 if (ix86_asm_dialect
== ASM_INTEL
)
35544 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35548 x86_field_alignment (tree field
, int computed
)
35550 enum machine_mode mode
;
35551 tree type
= TREE_TYPE (field
);
35553 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35555 mode
= TYPE_MODE (strip_array_types (type
));
35556 if (mode
== DFmode
|| mode
== DCmode
35557 || GET_MODE_CLASS (mode
) == MODE_INT
35558 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35559 return MIN (32, computed
);
35563 /* Output assembler code to FILE to increment profiler label # LABELNO
35564 for profiling a function entry. */
35566 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35568 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35573 #ifndef NO_PROFILE_COUNTERS
35574 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35577 if (!TARGET_PECOFF
&& flag_pic
)
35578 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35580 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35584 #ifndef NO_PROFILE_COUNTERS
35585 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35588 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35592 #ifndef NO_PROFILE_COUNTERS
35593 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35596 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35600 /* We don't have exact information about the insn sizes, but we may assume
35601 quite safely that we are informed about all 1 byte insns and memory
35602 address sizes. This is enough to eliminate unnecessary padding in
35606 min_insn_size (rtx insn
)
35610 if (!INSN_P (insn
) || !active_insn_p (insn
))
35613 /* Discard alignments we've emit and jump instructions. */
35614 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35615 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35618 /* Important case - calls are always 5 bytes.
35619 It is common to have many calls in the row. */
35621 && symbolic_reference_mentioned_p (PATTERN (insn
))
35622 && !SIBLING_CALL_P (insn
))
35624 len
= get_attr_length (insn
);
35628 /* For normal instructions we rely on get_attr_length being exact,
35629 with a few exceptions. */
35630 if (!JUMP_P (insn
))
35632 enum attr_type type
= get_attr_type (insn
);
35637 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35638 || asm_noperands (PATTERN (insn
)) >= 0)
35645 /* Otherwise trust get_attr_length. */
35649 l
= get_attr_length_address (insn
);
35650 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35659 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35661 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35665 ix86_avoid_jump_mispredicts (void)
35667 rtx insn
, start
= get_insns ();
35668 int nbytes
= 0, njumps
= 0;
35671 /* Look for all minimal intervals of instructions containing 4 jumps.
35672 The intervals are bounded by START and INSN. NBYTES is the total
35673 size of instructions in the interval including INSN and not including
35674 START. When the NBYTES is smaller than 16 bytes, it is possible
35675 that the end of START and INSN ends up in the same 16byte page.
35677 The smallest offset in the page INSN can start is the case where START
35678 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35679 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35681 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35685 if (LABEL_P (insn
))
35687 int align
= label_to_alignment (insn
);
35688 int max_skip
= label_to_max_skip (insn
);
35692 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35693 already in the current 16 byte page, because otherwise
35694 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35695 bytes to reach 16 byte boundary. */
35697 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35700 fprintf (dump_file
, "Label %i with max_skip %i\n",
35701 INSN_UID (insn
), max_skip
);
35704 while (nbytes
+ max_skip
>= 16)
35706 start
= NEXT_INSN (start
);
35707 if (JUMP_P (start
) || CALL_P (start
))
35708 njumps
--, isjump
= 1;
35711 nbytes
-= min_insn_size (start
);
35717 min_size
= min_insn_size (insn
);
35718 nbytes
+= min_size
;
35720 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35721 INSN_UID (insn
), min_size
);
35722 if (JUMP_P (insn
) || CALL_P (insn
))
35729 start
= NEXT_INSN (start
);
35730 if (JUMP_P (start
) || CALL_P (start
))
35731 njumps
--, isjump
= 1;
35734 nbytes
-= min_insn_size (start
);
35736 gcc_assert (njumps
>= 0);
35738 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35739 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35741 if (njumps
== 3 && isjump
&& nbytes
< 16)
35743 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35746 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35747 INSN_UID (insn
), padsize
);
35748 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35754 /* AMD Athlon works faster
35755 when RET is not destination of conditional jump or directly preceded
35756 by other jump instruction. We avoid the penalty by inserting NOP just
35757 before the RET instructions in such cases. */
35759 ix86_pad_returns (void)
35764 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35766 basic_block bb
= e
->src
;
35767 rtx ret
= BB_END (bb
);
35769 bool replace
= false;
35771 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35772 || optimize_bb_for_size_p (bb
))
35774 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35775 if (active_insn_p (prev
) || LABEL_P (prev
))
35777 if (prev
&& LABEL_P (prev
))
35782 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35783 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35784 && !(e
->flags
& EDGE_FALLTHRU
))
35792 prev
= prev_active_insn (ret
);
35794 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35797 /* Empty functions get branch mispredict even when
35798 the jump destination is not visible to us. */
35799 if (!prev
&& !optimize_function_for_size_p (cfun
))
35804 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35810 /* Count the minimum number of instructions in BB. Return 4 if the
35811 number of instructions >= 4. */
35814 ix86_count_insn_bb (basic_block bb
)
35817 int insn_count
= 0;
35819 /* Count number of instructions in this block. Return 4 if the number
35820 of instructions >= 4. */
35821 FOR_BB_INSNS (bb
, insn
)
35823 /* Only happen in exit blocks. */
35825 && ANY_RETURN_P (PATTERN (insn
)))
35828 if (NONDEBUG_INSN_P (insn
)
35829 && GET_CODE (PATTERN (insn
)) != USE
35830 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35833 if (insn_count
>= 4)
35842 /* Count the minimum number of instructions in code path in BB.
35843 Return 4 if the number of instructions >= 4. */
35846 ix86_count_insn (basic_block bb
)
35850 int min_prev_count
;
35852 /* Only bother counting instructions along paths with no
35853 more than 2 basic blocks between entry and exit. Given
35854 that BB has an edge to exit, determine if a predecessor
35855 of BB has an edge from entry. If so, compute the number
35856 of instructions in the predecessor block. If there
35857 happen to be multiple such blocks, compute the minimum. */
35858 min_prev_count
= 4;
35859 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35862 edge_iterator prev_ei
;
35864 if (e
->src
== ENTRY_BLOCK_PTR
)
35866 min_prev_count
= 0;
35869 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35871 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35873 int count
= ix86_count_insn_bb (e
->src
);
35874 if (count
< min_prev_count
)
35875 min_prev_count
= count
;
35881 if (min_prev_count
< 4)
35882 min_prev_count
+= ix86_count_insn_bb (bb
);
35884 return min_prev_count
;
35887 /* Pad short function to 4 instructions. */
35890 ix86_pad_short_function (void)
35895 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35897 rtx ret
= BB_END (e
->src
);
35898 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35900 int insn_count
= ix86_count_insn (e
->src
);
35902 /* Pad short function. */
35903 if (insn_count
< 4)
35907 /* Find epilogue. */
35910 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35911 insn
= PREV_INSN (insn
);
35916 /* Two NOPs count as one instruction. */
35917 insn_count
= 2 * (4 - insn_count
);
35918 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35924 /* Fix up a Windows system unwinder issue. If an EH region falls through into
35925 the epilogue, the Windows system unwinder will apply epilogue logic and
35926 produce incorrect offsets. This can be avoided by adding a nop between
35927 the last insn that can throw and the first insn of the epilogue. */
35930 ix86_seh_fixup_eh_fallthru (void)
35935 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35939 /* Find the beginning of the epilogue. */
35940 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
35941 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
35946 /* We only care about preceding insns that can throw. */
35947 insn
= prev_active_insn (insn
);
35948 if (insn
== NULL
|| !can_throw_internal (insn
))
35951 /* Do not separate calls from their debug information. */
35952 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
35954 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
35955 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
35960 emit_insn_after (gen_nops (const1_rtx
), insn
);
35964 /* Implement machine specific optimizations. We implement padding of returns
35965 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35969 /* We are freeing block_for_insn in the toplev to keep compatibility
35970 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35971 compute_bb_for_insn ();
35973 if (TARGET_SEH
&& current_function_has_exception_handlers ())
35974 ix86_seh_fixup_eh_fallthru ();
35976 if (optimize
&& optimize_function_for_speed_p (cfun
))
35978 if (TARGET_PAD_SHORT_FUNCTION
)
35979 ix86_pad_short_function ();
35980 else if (TARGET_PAD_RETURNS
)
35981 ix86_pad_returns ();
35982 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35983 if (TARGET_FOUR_JUMP_LIMIT
)
35984 ix86_avoid_jump_mispredicts ();
35989 /* Return nonzero when QImode register that must be represented via REX prefix
35992 x86_extended_QIreg_mentioned_p (rtx insn
)
35995 extract_insn_cached (insn
);
35996 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35997 if (GENERAL_REG_P (recog_data
.operand
[i
])
35998 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36003 /* Return nonzero when P points to register encoded via REX prefix.
36004 Called via for_each_rtx. */
36006 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36008 unsigned int regno
;
36011 regno
= REGNO (*p
);
36012 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36015 /* Return true when INSN mentions register that must be encoded using REX
36018 x86_extended_reg_mentioned_p (rtx insn
)
36020 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36021 extended_reg_mentioned_1
, NULL
);
36024 /* If profitable, negate (without causing overflow) integer constant
36025 of mode MODE at location LOC. Return true in this case. */
36027 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36031 if (!CONST_INT_P (*loc
))
36037 /* DImode x86_64 constants must fit in 32 bits. */
36038 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36049 gcc_unreachable ();
36052 /* Avoid overflows. */
36053 if (mode_signbit_p (mode
, *loc
))
36056 val
= INTVAL (*loc
);
36058 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36059 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36060 if ((val
< 0 && val
!= -128)
36063 *loc
= GEN_INT (-val
);
36070 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36071 optabs would emit if we didn't have TFmode patterns. */
36074 x86_emit_floatuns (rtx operands
[2])
36076 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36077 enum machine_mode mode
, inmode
;
36079 inmode
= GET_MODE (operands
[1]);
36080 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36083 in
= force_reg (inmode
, operands
[1]);
36084 mode
= GET_MODE (out
);
36085 neglab
= gen_label_rtx ();
36086 donelab
= gen_label_rtx ();
36087 f0
= gen_reg_rtx (mode
);
36089 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36091 expand_float (out
, in
, 0);
36093 emit_jump_insn (gen_jump (donelab
));
36096 emit_label (neglab
);
36098 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36100 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36102 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36104 expand_float (f0
, i0
, 0);
36106 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36108 emit_label (donelab
);
36111 /* AVX2 does support 32-byte integer vector operations,
36112 thus the longest vector we are faced with is V32QImode. */
36113 #define MAX_VECT_LEN 32
36115 struct expand_vec_perm_d
36117 rtx target
, op0
, op1
;
36118 unsigned char perm
[MAX_VECT_LEN
];
36119 enum machine_mode vmode
;
36120 unsigned char nelt
;
36121 bool one_operand_p
;
36125 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36126 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36127 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36129 /* Get a vector mode of the same size as the original but with elements
36130 twice as wide. This is only guaranteed to apply to integral vectors. */
36132 static inline enum machine_mode
36133 get_mode_wider_vector (enum machine_mode o
)
36135 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36136 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36137 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36138 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36142 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36143 with all elements equal to VAR. Return true if successful. */
36146 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36147 rtx target
, rtx val
)
36170 /* First attempt to recognize VAL as-is. */
36171 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36172 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36173 if (recog_memoized (insn
) < 0)
36176 /* If that fails, force VAL into a register. */
36179 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36180 seq
= get_insns ();
36183 emit_insn_before (seq
, insn
);
36185 ok
= recog_memoized (insn
) >= 0;
36194 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36198 val
= gen_lowpart (SImode
, val
);
36199 x
= gen_rtx_TRUNCATE (HImode
, val
);
36200 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36201 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36214 struct expand_vec_perm_d dperm
;
36218 memset (&dperm
, 0, sizeof (dperm
));
36219 dperm
.target
= target
;
36220 dperm
.vmode
= mode
;
36221 dperm
.nelt
= GET_MODE_NUNITS (mode
);
36222 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
36223 dperm
.one_operand_p
= true;
36225 /* Extend to SImode using a paradoxical SUBREG. */
36226 tmp1
= gen_reg_rtx (SImode
);
36227 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
36229 /* Insert the SImode value as low element of a V4SImode vector. */
36230 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
36231 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
36233 ok
= (expand_vec_perm_1 (&dperm
)
36234 || expand_vec_perm_broadcast_1 (&dperm
));
36246 /* Replicate the value once into the next wider mode and recurse. */
36248 enum machine_mode smode
, wsmode
, wvmode
;
36251 smode
= GET_MODE_INNER (mode
);
36252 wvmode
= get_mode_wider_vector (mode
);
36253 wsmode
= GET_MODE_INNER (wvmode
);
36255 val
= convert_modes (wsmode
, smode
, val
, true);
36256 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
36257 GEN_INT (GET_MODE_BITSIZE (smode
)),
36258 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36259 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
36261 x
= gen_lowpart (wvmode
, target
);
36262 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
36270 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
36271 rtx x
= gen_reg_rtx (hvmode
);
36273 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
36276 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
36277 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36286 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36287 whose ONE_VAR element is VAR, and other elements are zero. Return true
36291 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
36292 rtx target
, rtx var
, int one_var
)
36294 enum machine_mode vsimode
;
36297 bool use_vector_set
= false;
36302 /* For SSE4.1, we normally use vector set. But if the second
36303 element is zero and inter-unit moves are OK, we use movq
36305 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
36306 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36312 use_vector_set
= TARGET_SSE4_1
;
36315 use_vector_set
= TARGET_SSE2
;
36318 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
36325 use_vector_set
= TARGET_AVX
;
36328 /* Use ix86_expand_vector_set in 64bit mode only. */
36329 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
36335 if (use_vector_set
)
36337 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
36338 var
= force_reg (GET_MODE_INNER (mode
), var
);
36339 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36355 var
= force_reg (GET_MODE_INNER (mode
), var
);
36356 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
36357 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36362 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
36363 new_target
= gen_reg_rtx (mode
);
36365 new_target
= target
;
36366 var
= force_reg (GET_MODE_INNER (mode
), var
);
36367 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
36368 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
36369 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
36372 /* We need to shuffle the value to the correct position, so
36373 create a new pseudo to store the intermediate result. */
36375 /* With SSE2, we can use the integer shuffle insns. */
36376 if (mode
!= V4SFmode
&& TARGET_SSE2
)
36378 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
36380 GEN_INT (one_var
== 1 ? 0 : 1),
36381 GEN_INT (one_var
== 2 ? 0 : 1),
36382 GEN_INT (one_var
== 3 ? 0 : 1)));
36383 if (target
!= new_target
)
36384 emit_move_insn (target
, new_target
);
36388 /* Otherwise convert the intermediate result to V4SFmode and
36389 use the SSE1 shuffle instructions. */
36390 if (mode
!= V4SFmode
)
36392 tmp
= gen_reg_rtx (V4SFmode
);
36393 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
36398 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
36400 GEN_INT (one_var
== 1 ? 0 : 1),
36401 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
36402 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
36404 if (mode
!= V4SFmode
)
36405 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
36406 else if (tmp
!= target
)
36407 emit_move_insn (target
, tmp
);
36409 else if (target
!= new_target
)
36410 emit_move_insn (target
, new_target
);
36415 vsimode
= V4SImode
;
36421 vsimode
= V2SImode
;
36427 /* Zero extend the variable element to SImode and recurse. */
36428 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
36430 x
= gen_reg_rtx (vsimode
);
36431 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
36433 gcc_unreachable ();
36435 emit_move_insn (target
, gen_lowpart (mode
, x
));
36443 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36444 consisting of the values in VALS. It is known that all elements
36445 except ONE_VAR are constants. Return true if successful. */
36448 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
36449 rtx target
, rtx vals
, int one_var
)
36451 rtx var
= XVECEXP (vals
, 0, one_var
);
36452 enum machine_mode wmode
;
36455 const_vec
= copy_rtx (vals
);
36456 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
36457 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
36465 /* For the two element vectors, it's just as easy to use
36466 the general case. */
36470 /* Use ix86_expand_vector_set in 64bit mode only. */
36493 /* There's no way to set one QImode entry easily. Combine
36494 the variable value with its adjacent constant value, and
36495 promote to an HImode set. */
36496 x
= XVECEXP (vals
, 0, one_var
^ 1);
36499 var
= convert_modes (HImode
, QImode
, var
, true);
36500 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
36501 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36502 x
= GEN_INT (INTVAL (x
) & 0xff);
36506 var
= convert_modes (HImode
, QImode
, var
, true);
36507 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
36509 if (x
!= const0_rtx
)
36510 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
36511 1, OPTAB_LIB_WIDEN
);
36513 x
= gen_reg_rtx (wmode
);
36514 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
36515 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
36517 emit_move_insn (target
, gen_lowpart (mode
, x
));
36524 emit_move_insn (target
, const_vec
);
36525 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36529 /* A subroutine of ix86_expand_vector_init_general. Use vector
36530 concatenate to handle the most general case: all values variable,
36531 and none identical. */
36534 ix86_expand_vector_init_concat (enum machine_mode mode
,
36535 rtx target
, rtx
*ops
, int n
)
36537 enum machine_mode cmode
, hmode
= VOIDmode
;
36538 rtx first
[8], second
[4];
36578 gcc_unreachable ();
36581 if (!register_operand (ops
[1], cmode
))
36582 ops
[1] = force_reg (cmode
, ops
[1]);
36583 if (!register_operand (ops
[0], cmode
))
36584 ops
[0] = force_reg (cmode
, ops
[0]);
36585 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36586 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36606 gcc_unreachable ();
36622 gcc_unreachable ();
36627 /* FIXME: We process inputs backward to help RA. PR 36222. */
36630 for (; i
> 0; i
-= 2, j
--)
36632 first
[j
] = gen_reg_rtx (cmode
);
36633 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36634 ix86_expand_vector_init (false, first
[j
],
36635 gen_rtx_PARALLEL (cmode
, v
));
36641 gcc_assert (hmode
!= VOIDmode
);
36642 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36644 second
[j
] = gen_reg_rtx (hmode
);
36645 ix86_expand_vector_init_concat (hmode
, second
[j
],
36649 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36652 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36656 gcc_unreachable ();
36660 /* A subroutine of ix86_expand_vector_init_general. Use vector
36661 interleave to handle the most general case: all values variable,
36662 and none identical. */
36665 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36666 rtx target
, rtx
*ops
, int n
)
36668 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36671 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36672 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36673 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36678 gen_load_even
= gen_vec_setv8hi
;
36679 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36680 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36681 inner_mode
= HImode
;
36682 first_imode
= V4SImode
;
36683 second_imode
= V2DImode
;
36684 third_imode
= VOIDmode
;
36687 gen_load_even
= gen_vec_setv16qi
;
36688 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36689 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36690 inner_mode
= QImode
;
36691 first_imode
= V8HImode
;
36692 second_imode
= V4SImode
;
36693 third_imode
= V2DImode
;
36696 gcc_unreachable ();
36699 for (i
= 0; i
< n
; i
++)
36701 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36702 op0
= gen_reg_rtx (SImode
);
36703 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36705 /* Insert the SImode value as low element of V4SImode vector. */
36706 op1
= gen_reg_rtx (V4SImode
);
36707 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36708 gen_rtx_VEC_DUPLICATE (V4SImode
,
36710 CONST0_RTX (V4SImode
),
36712 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36714 /* Cast the V4SImode vector back to a vector in orignal mode. */
36715 op0
= gen_reg_rtx (mode
);
36716 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36718 /* Load even elements into the second position. */
36719 emit_insn (gen_load_even (op0
,
36720 force_reg (inner_mode
,
36724 /* Cast vector to FIRST_IMODE vector. */
36725 ops
[i
] = gen_reg_rtx (first_imode
);
36726 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36729 /* Interleave low FIRST_IMODE vectors. */
36730 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36732 op0
= gen_reg_rtx (first_imode
);
36733 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36735 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36736 ops
[j
] = gen_reg_rtx (second_imode
);
36737 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36740 /* Interleave low SECOND_IMODE vectors. */
36741 switch (second_imode
)
36744 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36746 op0
= gen_reg_rtx (second_imode
);
36747 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36750 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36752 ops
[j
] = gen_reg_rtx (third_imode
);
36753 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36755 second_imode
= V2DImode
;
36756 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36760 op0
= gen_reg_rtx (second_imode
);
36761 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36764 /* Cast the SECOND_IMODE vector back to a vector on original
36766 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36767 gen_lowpart (mode
, op0
)));
36771 gcc_unreachable ();
36775 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36776 all values variable, and none identical. */
36779 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36780 rtx target
, rtx vals
)
36782 rtx ops
[32], op0
, op1
;
36783 enum machine_mode half_mode
= VOIDmode
;
36790 if (!mmx_ok
&& !TARGET_SSE
)
36802 n
= GET_MODE_NUNITS (mode
);
36803 for (i
= 0; i
< n
; i
++)
36804 ops
[i
] = XVECEXP (vals
, 0, i
);
36805 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36809 half_mode
= V16QImode
;
36813 half_mode
= V8HImode
;
36817 n
= GET_MODE_NUNITS (mode
);
36818 for (i
= 0; i
< n
; i
++)
36819 ops
[i
] = XVECEXP (vals
, 0, i
);
36820 op0
= gen_reg_rtx (half_mode
);
36821 op1
= gen_reg_rtx (half_mode
);
36822 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36824 ix86_expand_vector_init_interleave (half_mode
, op1
,
36825 &ops
[n
>> 1], n
>> 2);
36826 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36827 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36831 if (!TARGET_SSE4_1
)
36839 /* Don't use ix86_expand_vector_init_interleave if we can't
36840 move from GPR to SSE register directly. */
36841 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
36844 n
= GET_MODE_NUNITS (mode
);
36845 for (i
= 0; i
< n
; i
++)
36846 ops
[i
] = XVECEXP (vals
, 0, i
);
36847 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36855 gcc_unreachable ();
36859 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36860 enum machine_mode inner_mode
;
36861 rtx words
[4], shift
;
36863 inner_mode
= GET_MODE_INNER (mode
);
36864 n_elts
= GET_MODE_NUNITS (mode
);
36865 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36866 n_elt_per_word
= n_elts
/ n_words
;
36867 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36869 for (i
= 0; i
< n_words
; ++i
)
36871 rtx word
= NULL_RTX
;
36873 for (j
= 0; j
< n_elt_per_word
; ++j
)
36875 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36876 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36882 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36883 word
, 1, OPTAB_LIB_WIDEN
);
36884 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36885 word
, 1, OPTAB_LIB_WIDEN
);
36893 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36894 else if (n_words
== 2)
36896 rtx tmp
= gen_reg_rtx (mode
);
36897 emit_clobber (tmp
);
36898 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36899 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36900 emit_move_insn (target
, tmp
);
36902 else if (n_words
== 4)
36904 rtx tmp
= gen_reg_rtx (V4SImode
);
36905 gcc_assert (word_mode
== SImode
);
36906 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36907 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36908 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36911 gcc_unreachable ();
36915 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36916 instructions unless MMX_OK is true. */
36919 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36921 enum machine_mode mode
= GET_MODE (target
);
36922 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36923 int n_elts
= GET_MODE_NUNITS (mode
);
36924 int n_var
= 0, one_var
= -1;
36925 bool all_same
= true, all_const_zero
= true;
36929 for (i
= 0; i
< n_elts
; ++i
)
36931 x
= XVECEXP (vals
, 0, i
);
36932 if (!(CONST_INT_P (x
)
36933 || GET_CODE (x
) == CONST_DOUBLE
36934 || GET_CODE (x
) == CONST_FIXED
))
36935 n_var
++, one_var
= i
;
36936 else if (x
!= CONST0_RTX (inner_mode
))
36937 all_const_zero
= false;
36938 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36942 /* Constants are best loaded from the constant pool. */
36945 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36949 /* If all values are identical, broadcast the value. */
36951 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36952 XVECEXP (vals
, 0, 0)))
36955 /* Values where only one field is non-constant are best loaded from
36956 the pool and overwritten via move later. */
36960 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36961 XVECEXP (vals
, 0, one_var
),
36965 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36969 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36973 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36975 enum machine_mode mode
= GET_MODE (target
);
36976 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36977 enum machine_mode half_mode
;
36978 bool use_vec_merge
= false;
36980 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36982 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36983 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36984 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36985 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36986 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36987 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36989 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36991 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36992 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36993 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36994 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36995 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36996 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37006 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37007 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37009 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37011 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37012 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37018 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37022 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37023 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37025 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37027 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37028 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37035 /* For the two element vectors, we implement a VEC_CONCAT with
37036 the extraction of the other element. */
37038 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37039 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37042 op0
= val
, op1
= tmp
;
37044 op0
= tmp
, op1
= val
;
37046 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37047 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37052 use_vec_merge
= TARGET_SSE4_1
;
37059 use_vec_merge
= true;
37063 /* tmp = target = A B C D */
37064 tmp
= copy_to_reg (target
);
37065 /* target = A A B B */
37066 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37067 /* target = X A B B */
37068 ix86_expand_vector_set (false, target
, val
, 0);
37069 /* target = A X C D */
37070 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37071 const1_rtx
, const0_rtx
,
37072 GEN_INT (2+4), GEN_INT (3+4)));
37076 /* tmp = target = A B C D */
37077 tmp
= copy_to_reg (target
);
37078 /* tmp = X B C D */
37079 ix86_expand_vector_set (false, tmp
, val
, 0);
37080 /* target = A B X D */
37081 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37082 const0_rtx
, const1_rtx
,
37083 GEN_INT (0+4), GEN_INT (3+4)));
37087 /* tmp = target = A B C D */
37088 tmp
= copy_to_reg (target
);
37089 /* tmp = X B C D */
37090 ix86_expand_vector_set (false, tmp
, val
, 0);
37091 /* target = A B X D */
37092 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37093 const0_rtx
, const1_rtx
,
37094 GEN_INT (2+4), GEN_INT (0+4)));
37098 gcc_unreachable ();
37103 use_vec_merge
= TARGET_SSE4_1
;
37107 /* Element 0 handled by vec_merge below. */
37110 use_vec_merge
= true;
37116 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37117 store into element 0, then shuffle them back. */
37121 order
[0] = GEN_INT (elt
);
37122 order
[1] = const1_rtx
;
37123 order
[2] = const2_rtx
;
37124 order
[3] = GEN_INT (3);
37125 order
[elt
] = const0_rtx
;
37127 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37128 order
[1], order
[2], order
[3]));
37130 ix86_expand_vector_set (false, target
, val
, 0);
37132 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37133 order
[1], order
[2], order
[3]));
37137 /* For SSE1, we have to reuse the V4SF code. */
37138 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
37139 gen_lowpart (SFmode
, val
), elt
);
37144 use_vec_merge
= TARGET_SSE2
;
37147 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37151 use_vec_merge
= TARGET_SSE4_1
;
37158 half_mode
= V16QImode
;
37164 half_mode
= V8HImode
;
37170 half_mode
= V4SImode
;
37176 half_mode
= V2DImode
;
37182 half_mode
= V4SFmode
;
37188 half_mode
= V2DFmode
;
37194 /* Compute offset. */
37198 gcc_assert (i
<= 1);
37200 /* Extract the half. */
37201 tmp
= gen_reg_rtx (half_mode
);
37202 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
37204 /* Put val in tmp at elt. */
37205 ix86_expand_vector_set (false, tmp
, val
, elt
);
37208 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
37217 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37218 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
37219 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37223 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37225 emit_move_insn (mem
, target
);
37227 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37228 emit_move_insn (tmp
, val
);
37230 emit_move_insn (target
, mem
);
37235 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
37237 enum machine_mode mode
= GET_MODE (vec
);
37238 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37239 bool use_vec_extr
= false;
37252 use_vec_extr
= true;
37256 use_vec_extr
= TARGET_SSE4_1
;
37268 tmp
= gen_reg_rtx (mode
);
37269 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
37270 GEN_INT (elt
), GEN_INT (elt
),
37271 GEN_INT (elt
+4), GEN_INT (elt
+4)));
37275 tmp
= gen_reg_rtx (mode
);
37276 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
37280 gcc_unreachable ();
37283 use_vec_extr
= true;
37288 use_vec_extr
= TARGET_SSE4_1
;
37302 tmp
= gen_reg_rtx (mode
);
37303 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
37304 GEN_INT (elt
), GEN_INT (elt
),
37305 GEN_INT (elt
), GEN_INT (elt
)));
37309 tmp
= gen_reg_rtx (mode
);
37310 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
37314 gcc_unreachable ();
37317 use_vec_extr
= true;
37322 /* For SSE1, we have to reuse the V4SF code. */
37323 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
37324 gen_lowpart (V4SFmode
, vec
), elt
);
37330 use_vec_extr
= TARGET_SSE2
;
37333 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37337 use_vec_extr
= TARGET_SSE4_1
;
37343 tmp
= gen_reg_rtx (V4SFmode
);
37345 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
37347 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
37348 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37356 tmp
= gen_reg_rtx (V2DFmode
);
37358 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
37360 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
37361 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37369 tmp
= gen_reg_rtx (V16QImode
);
37371 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
37373 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
37374 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
37382 tmp
= gen_reg_rtx (V8HImode
);
37384 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
37386 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
37387 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
37395 tmp
= gen_reg_rtx (V4SImode
);
37397 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
37399 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
37400 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37408 tmp
= gen_reg_rtx (V2DImode
);
37410 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
37412 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
37413 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37419 /* ??? Could extract the appropriate HImode element and shift. */
37426 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
37427 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
37429 /* Let the rtl optimizers know about the zero extension performed. */
37430 if (inner_mode
== QImode
|| inner_mode
== HImode
)
37432 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
37433 target
= gen_lowpart (SImode
, target
);
37436 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37440 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37442 emit_move_insn (mem
, vec
);
37444 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37445 emit_move_insn (target
, tmp
);
37449 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37450 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37451 The upper bits of DEST are undefined, though they shouldn't cause
37452 exceptions (some bits from src or all zeros are ok). */
37455 emit_reduc_half (rtx dest
, rtx src
, int i
)
37458 switch (GET_MODE (src
))
37462 tem
= gen_sse_movhlps (dest
, src
, src
);
37464 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
37465 GEN_INT (1 + 4), GEN_INT (1 + 4));
37468 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
37474 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
37475 gen_lowpart (V1TImode
, src
),
37480 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
37482 tem
= gen_avx_shufps256 (dest
, src
, src
,
37483 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
37487 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
37489 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
37496 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
37497 gen_lowpart (V4DImode
, src
),
37498 gen_lowpart (V4DImode
, src
),
37501 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
37502 gen_lowpart (V2TImode
, src
),
37506 gcc_unreachable ();
37511 /* Expand a vector reduction. FN is the binary pattern to reduce;
37512 DEST is the destination; IN is the input vector. */
37515 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
37517 rtx half
, dst
, vec
= in
;
37518 enum machine_mode mode
= GET_MODE (in
);
37521 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37523 && mode
== V8HImode
37524 && fn
== gen_uminv8hi3
)
37526 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37530 for (i
= GET_MODE_BITSIZE (mode
);
37531 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37534 half
= gen_reg_rtx (mode
);
37535 emit_reduc_half (half
, vec
, i
);
37536 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37539 dst
= gen_reg_rtx (mode
);
37540 emit_insn (fn (dst
, half
, vec
));
37545 /* Target hook for scalar_mode_supported_p. */
37547 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37549 if (DECIMAL_FLOAT_MODE_P (mode
))
37550 return default_decimal_float_supported_p ();
37551 else if (mode
== TFmode
)
37554 return default_scalar_mode_supported_p (mode
);
37557 /* Implements target hook vector_mode_supported_p. */
37559 ix86_vector_mode_supported_p (enum machine_mode mode
)
37561 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37563 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37565 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37567 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37569 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37574 /* Target hook for c_mode_for_suffix. */
37575 static enum machine_mode
37576 ix86_c_mode_for_suffix (char suffix
)
37586 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37588 We do this in the new i386 backend to maintain source compatibility
37589 with the old cc0-based compiler. */
37592 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37593 tree inputs ATTRIBUTE_UNUSED
,
37596 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37598 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37603 /* Implements target vector targetm.asm.encode_section_info. */
37605 static void ATTRIBUTE_UNUSED
37606 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37608 default_encode_section_info (decl
, rtl
, first
);
37610 if (TREE_CODE (decl
) == VAR_DECL
37611 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37612 && ix86_in_large_data_p (decl
))
37613 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37616 /* Worker function for REVERSE_CONDITION. */
37619 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37621 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37622 ? reverse_condition (code
)
37623 : reverse_condition_maybe_unordered (code
));
37626 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37630 output_387_reg_move (rtx insn
, rtx
*operands
)
37632 if (REG_P (operands
[0]))
37634 if (REG_P (operands
[1])
37635 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37637 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37638 return output_387_ffreep (operands
, 0);
37639 return "fstp\t%y0";
37641 if (STACK_TOP_P (operands
[0]))
37642 return "fld%Z1\t%y1";
37645 else if (MEM_P (operands
[0]))
37647 gcc_assert (REG_P (operands
[1]));
37648 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37649 return "fstp%Z0\t%y0";
37652 /* There is no non-popping store to memory for XFmode.
37653 So if we need one, follow the store with a load. */
37654 if (GET_MODE (operands
[0]) == XFmode
)
37655 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37657 return "fst%Z0\t%y0";
37664 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37665 FP status register is set. */
37668 ix86_emit_fp_unordered_jump (rtx label
)
37670 rtx reg
= gen_reg_rtx (HImode
);
37673 emit_insn (gen_x86_fnstsw_1 (reg
));
37675 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37677 emit_insn (gen_x86_sahf_1 (reg
));
37679 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37680 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37684 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37686 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37687 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37690 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37691 gen_rtx_LABEL_REF (VOIDmode
, label
),
37693 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37695 emit_jump_insn (temp
);
37696 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37699 /* Output code to perform a log1p XFmode calculation. */
37701 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37703 rtx label1
= gen_label_rtx ();
37704 rtx label2
= gen_label_rtx ();
37706 rtx tmp
= gen_reg_rtx (XFmode
);
37707 rtx tmp2
= gen_reg_rtx (XFmode
);
37710 emit_insn (gen_absxf2 (tmp
, op1
));
37711 test
= gen_rtx_GE (VOIDmode
, tmp
,
37712 CONST_DOUBLE_FROM_REAL_VALUE (
37713 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37715 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37717 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37718 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37719 emit_jump (label2
);
37721 emit_label (label1
);
37722 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37723 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37724 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37725 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37727 emit_label (label2
);
37730 /* Emit code for round calculation. */
37731 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37733 enum machine_mode inmode
= GET_MODE (op1
);
37734 enum machine_mode outmode
= GET_MODE (op0
);
37735 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37736 rtx scratch
= gen_reg_rtx (HImode
);
37737 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37738 rtx jump_label
= gen_label_rtx ();
37740 rtx (*gen_abs
) (rtx
, rtx
);
37741 rtx (*gen_neg
) (rtx
, rtx
);
37746 gen_abs
= gen_abssf2
;
37749 gen_abs
= gen_absdf2
;
37752 gen_abs
= gen_absxf2
;
37755 gcc_unreachable ();
37761 gen_neg
= gen_negsf2
;
37764 gen_neg
= gen_negdf2
;
37767 gen_neg
= gen_negxf2
;
37770 gen_neg
= gen_neghi2
;
37773 gen_neg
= gen_negsi2
;
37776 gen_neg
= gen_negdi2
;
37779 gcc_unreachable ();
37782 e1
= gen_reg_rtx (inmode
);
37783 e2
= gen_reg_rtx (inmode
);
37784 res
= gen_reg_rtx (outmode
);
37786 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37788 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37790 /* scratch = fxam(op1) */
37791 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37792 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37794 /* e1 = fabs(op1) */
37795 emit_insn (gen_abs (e1
, op1
));
37797 /* e2 = e1 + 0.5 */
37798 half
= force_reg (inmode
, half
);
37799 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37800 gen_rtx_PLUS (inmode
, e1
, half
)));
37802 /* res = floor(e2) */
37803 if (inmode
!= XFmode
)
37805 tmp1
= gen_reg_rtx (XFmode
);
37807 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37808 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37818 rtx tmp0
= gen_reg_rtx (XFmode
);
37820 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37822 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37823 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37824 UNSPEC_TRUNC_NOOP
)));
37828 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37831 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37834 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37837 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37840 gcc_unreachable ();
37843 /* flags = signbit(a) */
37844 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37846 /* if (flags) then res = -res */
37847 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37848 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37849 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37851 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37852 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37853 JUMP_LABEL (insn
) = jump_label
;
37855 emit_insn (gen_neg (res
, res
));
37857 emit_label (jump_label
);
37858 LABEL_NUSES (jump_label
) = 1;
37860 emit_move_insn (op0
, res
);
37863 /* Output code to perform a Newton-Rhapson approximation of a single precision
37864 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37866 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37868 rtx x0
, x1
, e0
, e1
;
37870 x0
= gen_reg_rtx (mode
);
37871 e0
= gen_reg_rtx (mode
);
37872 e1
= gen_reg_rtx (mode
);
37873 x1
= gen_reg_rtx (mode
);
37875 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37877 b
= force_reg (mode
, b
);
37879 /* x0 = rcp(b) estimate */
37880 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37881 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37884 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37885 gen_rtx_MULT (mode
, x0
, b
)));
37888 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37889 gen_rtx_MULT (mode
, x0
, e0
)));
37892 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37893 gen_rtx_PLUS (mode
, x0
, x0
)));
37896 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37897 gen_rtx_MINUS (mode
, e1
, e0
)));
37900 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37901 gen_rtx_MULT (mode
, a
, x1
)));
37904 /* Output code to perform a Newton-Rhapson approximation of a
37905 single precision floating point [reciprocal] square root. */
37907 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37910 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37913 x0
= gen_reg_rtx (mode
);
37914 e0
= gen_reg_rtx (mode
);
37915 e1
= gen_reg_rtx (mode
);
37916 e2
= gen_reg_rtx (mode
);
37917 e3
= gen_reg_rtx (mode
);
37919 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37920 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37922 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37923 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37925 if (VECTOR_MODE_P (mode
))
37927 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37928 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37931 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37932 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37934 a
= force_reg (mode
, a
);
37936 /* x0 = rsqrt(a) estimate */
37937 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37938 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37941 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37946 zero
= gen_reg_rtx (mode
);
37947 mask
= gen_reg_rtx (mode
);
37949 zero
= force_reg (mode
, CONST0_RTX(mode
));
37950 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37951 gen_rtx_NE (mode
, zero
, a
)));
37953 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37954 gen_rtx_AND (mode
, x0
, mask
)));
37958 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37959 gen_rtx_MULT (mode
, x0
, a
)));
37961 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37962 gen_rtx_MULT (mode
, e0
, x0
)));
37965 mthree
= force_reg (mode
, mthree
);
37966 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37967 gen_rtx_PLUS (mode
, e1
, mthree
)));
37969 mhalf
= force_reg (mode
, mhalf
);
37971 /* e3 = -.5 * x0 */
37972 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37973 gen_rtx_MULT (mode
, x0
, mhalf
)));
37975 /* e3 = -.5 * e0 */
37976 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37977 gen_rtx_MULT (mode
, e0
, mhalf
)));
37978 /* ret = e2 * e3 */
37979 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37980 gen_rtx_MULT (mode
, e2
, e3
)));
37983 #ifdef TARGET_SOLARIS
37984 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37987 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37990 /* With Binutils 2.15, the "@unwind" marker must be specified on
37991 every occurrence of the ".eh_frame" section, not just the first
37994 && strcmp (name
, ".eh_frame") == 0)
37996 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37997 flags
& SECTION_WRITE
? "aw" : "a");
38002 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38004 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38009 default_elf_asm_named_section (name
, flags
, decl
);
38011 #endif /* TARGET_SOLARIS */
38013 /* Return the mangling of TYPE if it is an extended fundamental type. */
38015 static const char *
38016 ix86_mangle_type (const_tree type
)
38018 type
= TYPE_MAIN_VARIANT (type
);
38020 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38021 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38024 switch (TYPE_MODE (type
))
38027 /* __float128 is "g". */
38030 /* "long double" or __float80 is "e". */
38037 /* For 32-bit code we can save PIC register setup by using
38038 __stack_chk_fail_local hidden function instead of calling
38039 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38040 register, so it is better to call __stack_chk_fail directly. */
38042 static tree ATTRIBUTE_UNUSED
38043 ix86_stack_protect_fail (void)
38045 return TARGET_64BIT
38046 ? default_external_stack_protect_fail ()
38047 : default_hidden_stack_protect_fail ();
38050 /* Select a format to encode pointers in exception handling data. CODE
38051 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38052 true if the symbol may be affected by dynamic relocations.
38054 ??? All x86 object file formats are capable of representing this.
38055 After all, the relocation needed is the same as for the call insn.
38056 Whether or not a particular assembler allows us to enter such, I
38057 guess we'll have to see. */
38059 asm_preferred_eh_data_format (int code
, int global
)
38063 int type
= DW_EH_PE_sdata8
;
38065 || ix86_cmodel
== CM_SMALL_PIC
38066 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38067 type
= DW_EH_PE_sdata4
;
38068 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38070 if (ix86_cmodel
== CM_SMALL
38071 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38072 return DW_EH_PE_udata4
;
38073 return DW_EH_PE_absptr
;
38076 /* Expand copysign from SIGN to the positive value ABS_VALUE
38077 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38080 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38082 enum machine_mode mode
= GET_MODE (sign
);
38083 rtx sgn
= gen_reg_rtx (mode
);
38084 if (mask
== NULL_RTX
)
38086 enum machine_mode vmode
;
38088 if (mode
== SFmode
)
38090 else if (mode
== DFmode
)
38095 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38096 if (!VECTOR_MODE_P (mode
))
38098 /* We need to generate a scalar mode mask in this case. */
38099 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38100 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38101 mask
= gen_reg_rtx (mode
);
38102 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38106 mask
= gen_rtx_NOT (mode
, mask
);
38107 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38108 gen_rtx_AND (mode
, mask
, sign
)));
38109 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38110 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38113 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38114 mask for masking out the sign-bit is stored in *SMASK, if that is
38117 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38119 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38122 xa
= gen_reg_rtx (mode
);
38123 if (mode
== SFmode
)
38125 else if (mode
== DFmode
)
38129 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38130 if (!VECTOR_MODE_P (mode
))
38132 /* We need to generate a scalar mode mask in this case. */
38133 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38134 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38135 mask
= gen_reg_rtx (mode
);
38136 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38138 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38139 gen_rtx_AND (mode
, op0
, mask
)));
38147 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38148 swapping the operands if SWAP_OPERANDS is true. The expanded
38149 code is a forward jump to a newly created label in case the
38150 comparison is true. The generated label rtx is returned. */
38152 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38153 bool swap_operands
)
38164 label
= gen_label_rtx ();
38165 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
38166 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38167 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
38168 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38169 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38170 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38171 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38172 JUMP_LABEL (tmp
) = label
;
38177 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38178 using comparison code CODE. Operands are swapped for the comparison if
38179 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38181 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38182 bool swap_operands
)
38184 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38185 enum machine_mode mode
= GET_MODE (op0
);
38186 rtx mask
= gen_reg_rtx (mode
);
38195 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
38197 emit_insn (insn (mask
, op0
, op1
,
38198 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
38202 /* Generate and return a rtx of mode MODE for 2**n where n is the number
38203 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
38205 ix86_gen_TWO52 (enum machine_mode mode
)
38207 REAL_VALUE_TYPE TWO52r
;
38210 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
38211 TWO52
= const_double_from_real_value (TWO52r
, mode
);
38212 TWO52
= force_reg (mode
, TWO52
);
38217 /* Expand SSE sequence for computing lround from OP1 storing
38220 ix86_expand_lround (rtx op0
, rtx op1
)
38222 /* C code for the stuff we're doing below:
38223 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38226 enum machine_mode mode
= GET_MODE (op1
);
38227 const struct real_format
*fmt
;
38228 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38231 /* load nextafter (0.5, 0.0) */
38232 fmt
= REAL_MODE_FORMAT (mode
);
38233 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38234 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38236 /* adj = copysign (0.5, op1) */
38237 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38238 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
38240 /* adj = op1 + adj */
38241 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38243 /* op0 = (imode)adj */
38244 expand_fix (op0
, adj
, 0);
38247 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38250 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
38252 /* C code for the stuff we're doing below (for do_floor):
38254 xi -= (double)xi > op1 ? 1 : 0;
38257 enum machine_mode fmode
= GET_MODE (op1
);
38258 enum machine_mode imode
= GET_MODE (op0
);
38259 rtx ireg
, freg
, label
, tmp
;
38261 /* reg = (long)op1 */
38262 ireg
= gen_reg_rtx (imode
);
38263 expand_fix (ireg
, op1
, 0);
38265 /* freg = (double)reg */
38266 freg
= gen_reg_rtx (fmode
);
38267 expand_float (freg
, ireg
, 0);
38269 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38270 label
= ix86_expand_sse_compare_and_jump (UNLE
,
38271 freg
, op1
, !do_floor
);
38272 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
38273 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
38274 emit_move_insn (ireg
, tmp
);
38276 emit_label (label
);
38277 LABEL_NUSES (label
) = 1;
38279 emit_move_insn (op0
, ireg
);
38282 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38283 result in OPERAND0. */
38285 ix86_expand_rint (rtx operand0
, rtx operand1
)
38287 /* C code for the stuff we're doing below:
38288 xa = fabs (operand1);
38289 if (!isless (xa, 2**52))
38291 xa = xa + 2**52 - 2**52;
38292 return copysign (xa, operand1);
38294 enum machine_mode mode
= GET_MODE (operand0
);
38295 rtx res
, xa
, label
, TWO52
, mask
;
38297 res
= gen_reg_rtx (mode
);
38298 emit_move_insn (res
, operand1
);
38300 /* xa = abs (operand1) */
38301 xa
= ix86_expand_sse_fabs (res
, &mask
);
38303 /* if (!isless (xa, TWO52)) goto label; */
38304 TWO52
= ix86_gen_TWO52 (mode
);
38305 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38307 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38308 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38310 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
38312 emit_label (label
);
38313 LABEL_NUSES (label
) = 1;
38315 emit_move_insn (operand0
, res
);
38318 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38321 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
38323 /* C code for the stuff we expand below.
38324 double xa = fabs (x), x2;
38325 if (!isless (xa, TWO52))
38327 xa = xa + TWO52 - TWO52;
38328 x2 = copysign (xa, x);
38337 enum machine_mode mode
= GET_MODE (operand0
);
38338 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
38340 TWO52
= ix86_gen_TWO52 (mode
);
38342 /* Temporary for holding the result, initialized to the input
38343 operand to ease control flow. */
38344 res
= gen_reg_rtx (mode
);
38345 emit_move_insn (res
, operand1
);
38347 /* xa = abs (operand1) */
38348 xa
= ix86_expand_sse_fabs (res
, &mask
);
38350 /* if (!isless (xa, TWO52)) goto label; */
38351 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38353 /* xa = xa + TWO52 - TWO52; */
38354 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38355 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38357 /* xa = copysign (xa, operand1) */
38358 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
38360 /* generate 1.0 or -1.0 */
38361 one
= force_reg (mode
,
38362 const_double_from_real_value (do_floor
38363 ? dconst1
: dconstm1
, mode
));
38365 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38366 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38367 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38368 gen_rtx_AND (mode
, one
, tmp
)));
38369 /* We always need to subtract here to preserve signed zero. */
38370 tmp
= expand_simple_binop (mode
, MINUS
,
38371 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38372 emit_move_insn (res
, tmp
);
38374 emit_label (label
);
38375 LABEL_NUSES (label
) = 1;
38377 emit_move_insn (operand0
, res
);
38380 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38383 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
38385 /* C code for the stuff we expand below.
38386 double xa = fabs (x), x2;
38387 if (!isless (xa, TWO52))
38389 x2 = (double)(long)x;
38396 if (HONOR_SIGNED_ZEROS (mode))
38397 return copysign (x2, x);
38400 enum machine_mode mode
= GET_MODE (operand0
);
38401 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
38403 TWO52
= ix86_gen_TWO52 (mode
);
38405 /* Temporary for holding the result, initialized to the input
38406 operand to ease control flow. */
38407 res
= gen_reg_rtx (mode
);
38408 emit_move_insn (res
, operand1
);
38410 /* xa = abs (operand1) */
38411 xa
= ix86_expand_sse_fabs (res
, &mask
);
38413 /* if (!isless (xa, TWO52)) goto label; */
38414 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38416 /* xa = (double)(long)x */
38417 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38418 expand_fix (xi
, res
, 0);
38419 expand_float (xa
, xi
, 0);
38422 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38424 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38425 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38426 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38427 gen_rtx_AND (mode
, one
, tmp
)));
38428 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
38429 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38430 emit_move_insn (res
, tmp
);
38432 if (HONOR_SIGNED_ZEROS (mode
))
38433 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38435 emit_label (label
);
38436 LABEL_NUSES (label
) = 1;
38438 emit_move_insn (operand0
, res
);
38441 /* Expand SSE sequence for computing round from OPERAND1 storing
38442 into OPERAND0. Sequence that works without relying on DImode truncation
38443 via cvttsd2siq that is only available on 64bit targets. */
38445 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
38447 /* C code for the stuff we expand below.
38448 double xa = fabs (x), xa2, x2;
38449 if (!isless (xa, TWO52))
38451 Using the absolute value and copying back sign makes
38452 -0.0 -> -0.0 correct.
38453 xa2 = xa + TWO52 - TWO52;
38458 else if (dxa > 0.5)
38460 x2 = copysign (xa2, x);
38463 enum machine_mode mode
= GET_MODE (operand0
);
38464 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
38466 TWO52
= ix86_gen_TWO52 (mode
);
38468 /* Temporary for holding the result, initialized to the input
38469 operand to ease control flow. */
38470 res
= gen_reg_rtx (mode
);
38471 emit_move_insn (res
, operand1
);
38473 /* xa = abs (operand1) */
38474 xa
= ix86_expand_sse_fabs (res
, &mask
);
38476 /* if (!isless (xa, TWO52)) goto label; */
38477 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38479 /* xa2 = xa + TWO52 - TWO52; */
38480 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38481 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
38483 /* dxa = xa2 - xa; */
38484 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
38486 /* generate 0.5, 1.0 and -0.5 */
38487 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
38488 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38489 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
38493 tmp
= gen_reg_rtx (mode
);
38494 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38495 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
38496 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38497 gen_rtx_AND (mode
, one
, tmp
)));
38498 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38499 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38500 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
38501 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38502 gen_rtx_AND (mode
, one
, tmp
)));
38503 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38505 /* res = copysign (xa2, operand1) */
38506 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
38508 emit_label (label
);
38509 LABEL_NUSES (label
) = 1;
38511 emit_move_insn (operand0
, res
);
38514 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38517 ix86_expand_trunc (rtx operand0
, rtx operand1
)
38519 /* C code for SSE variant we expand below.
38520 double xa = fabs (x), x2;
38521 if (!isless (xa, TWO52))
38523 x2 = (double)(long)x;
38524 if (HONOR_SIGNED_ZEROS (mode))
38525 return copysign (x2, x);
38528 enum machine_mode mode
= GET_MODE (operand0
);
38529 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38531 TWO52
= ix86_gen_TWO52 (mode
);
38533 /* Temporary for holding the result, initialized to the input
38534 operand to ease control flow. */
38535 res
= gen_reg_rtx (mode
);
38536 emit_move_insn (res
, operand1
);
38538 /* xa = abs (operand1) */
38539 xa
= ix86_expand_sse_fabs (res
, &mask
);
38541 /* if (!isless (xa, TWO52)) goto label; */
38542 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38544 /* x = (double)(long)x */
38545 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38546 expand_fix (xi
, res
, 0);
38547 expand_float (res
, xi
, 0);
38549 if (HONOR_SIGNED_ZEROS (mode
))
38550 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38552 emit_label (label
);
38553 LABEL_NUSES (label
) = 1;
38555 emit_move_insn (operand0
, res
);
38558 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38561 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38563 enum machine_mode mode
= GET_MODE (operand0
);
38564 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38566 /* C code for SSE variant we expand below.
38567 double xa = fabs (x), x2;
38568 if (!isless (xa, TWO52))
38570 xa2 = xa + TWO52 - TWO52;
38574 x2 = copysign (xa2, x);
38578 TWO52
= ix86_gen_TWO52 (mode
);
38580 /* Temporary for holding the result, initialized to the input
38581 operand to ease control flow. */
38582 res
= gen_reg_rtx (mode
);
38583 emit_move_insn (res
, operand1
);
38585 /* xa = abs (operand1) */
38586 xa
= ix86_expand_sse_fabs (res
, &smask
);
38588 /* if (!isless (xa, TWO52)) goto label; */
38589 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38591 /* res = xa + TWO52 - TWO52; */
38592 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38593 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38594 emit_move_insn (res
, tmp
);
38597 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38599 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38600 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38601 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38602 gen_rtx_AND (mode
, mask
, one
)));
38603 tmp
= expand_simple_binop (mode
, MINUS
,
38604 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38605 emit_move_insn (res
, tmp
);
38607 /* res = copysign (res, operand1) */
38608 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38610 emit_label (label
);
38611 LABEL_NUSES (label
) = 1;
38613 emit_move_insn (operand0
, res
);
38616 /* Expand SSE sequence for computing round from OPERAND1 storing
38619 ix86_expand_round (rtx operand0
, rtx operand1
)
38621 /* C code for the stuff we're doing below:
38622 double xa = fabs (x);
38623 if (!isless (xa, TWO52))
38625 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38626 return copysign (xa, x);
38628 enum machine_mode mode
= GET_MODE (operand0
);
38629 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38630 const struct real_format
*fmt
;
38631 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38633 /* Temporary for holding the result, initialized to the input
38634 operand to ease control flow. */
38635 res
= gen_reg_rtx (mode
);
38636 emit_move_insn (res
, operand1
);
38638 TWO52
= ix86_gen_TWO52 (mode
);
38639 xa
= ix86_expand_sse_fabs (res
, &mask
);
38640 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38642 /* load nextafter (0.5, 0.0) */
38643 fmt
= REAL_MODE_FORMAT (mode
);
38644 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38645 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38647 /* xa = xa + 0.5 */
38648 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38649 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38651 /* xa = (double)(int64_t)xa */
38652 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38653 expand_fix (xi
, xa
, 0);
38654 expand_float (xa
, xi
, 0);
38656 /* res = copysign (xa, operand1) */
38657 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38659 emit_label (label
);
38660 LABEL_NUSES (label
) = 1;
38662 emit_move_insn (operand0
, res
);
38665 /* Expand SSE sequence for computing round
38666 from OP1 storing into OP0 using sse4 round insn. */
38668 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38670 enum machine_mode mode
= GET_MODE (op0
);
38671 rtx e1
, e2
, res
, half
;
38672 const struct real_format
*fmt
;
38673 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38674 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38675 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38680 gen_copysign
= gen_copysignsf3
;
38681 gen_round
= gen_sse4_1_roundsf2
;
38684 gen_copysign
= gen_copysigndf3
;
38685 gen_round
= gen_sse4_1_rounddf2
;
38688 gcc_unreachable ();
38691 /* round (a) = trunc (a + copysign (0.5, a)) */
38693 /* load nextafter (0.5, 0.0) */
38694 fmt
= REAL_MODE_FORMAT (mode
);
38695 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38696 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38697 half
= const_double_from_real_value (pred_half
, mode
);
38699 /* e1 = copysign (0.5, op1) */
38700 e1
= gen_reg_rtx (mode
);
38701 emit_insn (gen_copysign (e1
, half
, op1
));
38703 /* e2 = op1 + e1 */
38704 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38706 /* res = trunc (e2) */
38707 res
= gen_reg_rtx (mode
);
38708 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38710 emit_move_insn (op0
, res
);
38714 /* Table of valid machine attributes. */
38715 static const struct attribute_spec ix86_attribute_table
[] =
38717 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38718 affects_type_identity } */
38719 /* Stdcall attribute says callee is responsible for popping arguments
38720 if they are not variable. */
38721 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38723 /* Fastcall attribute says callee is responsible for popping arguments
38724 if they are not variable. */
38725 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38727 /* Thiscall attribute says callee is responsible for popping arguments
38728 if they are not variable. */
38729 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38731 /* Cdecl attribute says the callee is a normal C declaration */
38732 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38734 /* Regparm attribute specifies how many integer arguments are to be
38735 passed in registers. */
38736 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38738 /* Sseregparm attribute says we are using x86_64 calling conventions
38739 for FP arguments. */
38740 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38742 /* The transactional memory builtins are implicitly regparm or fastcall
38743 depending on the ABI. Override the generic do-nothing attribute that
38744 these builtins were declared with. */
38745 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38747 /* force_align_arg_pointer says this function realigns the stack at entry. */
38748 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38749 false, true, true, ix86_handle_cconv_attribute
, false },
38750 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38751 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38752 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38753 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38756 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38758 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38760 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38761 SUBTARGET_ATTRIBUTE_TABLE
,
38763 /* ms_abi and sysv_abi calling convention function attributes. */
38764 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38765 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38766 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38768 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38769 ix86_handle_callee_pop_aggregate_return
, true },
38771 { NULL
, 0, 0, false, false, false, NULL
, false }
38774 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38776 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38778 int misalign ATTRIBUTE_UNUSED
)
38782 switch (type_of_cost
)
38785 return ix86_cost
->scalar_stmt_cost
;
38788 return ix86_cost
->scalar_load_cost
;
38791 return ix86_cost
->scalar_store_cost
;
38794 return ix86_cost
->vec_stmt_cost
;
38797 return ix86_cost
->vec_align_load_cost
;
38800 return ix86_cost
->vec_store_cost
;
38802 case vec_to_scalar
:
38803 return ix86_cost
->vec_to_scalar_cost
;
38805 case scalar_to_vec
:
38806 return ix86_cost
->scalar_to_vec_cost
;
38808 case unaligned_load
:
38809 case unaligned_store
:
38810 return ix86_cost
->vec_unalign_load_cost
;
38812 case cond_branch_taken
:
38813 return ix86_cost
->cond_taken_branch_cost
;
38815 case cond_branch_not_taken
:
38816 return ix86_cost
->cond_not_taken_branch_cost
;
38819 case vec_promote_demote
:
38820 return ix86_cost
->vec_stmt_cost
;
38822 case vec_construct
:
38823 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38824 return elements
/ 2 + 1;
38827 gcc_unreachable ();
38831 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38832 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38833 insn every time. */
38835 static GTY(()) rtx vselect_insn
;
38837 /* Initialize vselect_insn. */
38840 init_vselect_insn (void)
38845 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38846 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38847 XVECEXP (x
, 0, i
) = const0_rtx
;
38848 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38850 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38852 vselect_insn
= emit_insn (x
);
38856 /* Construct (set target (vec_select op0 (parallel perm))) and
38857 return true if that's a valid instruction in the active ISA. */
38860 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38861 unsigned nelt
, bool testing_p
)
38864 rtx x
, save_vconcat
;
38867 if (vselect_insn
== NULL_RTX
)
38868 init_vselect_insn ();
38870 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38871 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38872 for (i
= 0; i
< nelt
; ++i
)
38873 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38874 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38875 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38876 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38877 SET_DEST (PATTERN (vselect_insn
)) = target
;
38878 icode
= recog_memoized (vselect_insn
);
38880 if (icode
>= 0 && !testing_p
)
38881 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38883 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38884 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38885 INSN_CODE (vselect_insn
) = -1;
38890 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38893 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38894 const unsigned char *perm
, unsigned nelt
,
38897 enum machine_mode v2mode
;
38901 if (vselect_insn
== NULL_RTX
)
38902 init_vselect_insn ();
38904 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38905 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38906 PUT_MODE (x
, v2mode
);
38909 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38910 XEXP (x
, 0) = const0_rtx
;
38911 XEXP (x
, 1) = const0_rtx
;
38915 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38916 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38919 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38921 enum machine_mode vmode
= d
->vmode
;
38922 unsigned i
, mask
, nelt
= d
->nelt
;
38923 rtx target
, op0
, op1
, x
;
38924 rtx rperm
[32], vperm
;
38926 if (d
->one_operand_p
)
38928 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38930 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38932 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38937 /* This is a blend, not a permute. Elements must stay in their
38938 respective lanes. */
38939 for (i
= 0; i
< nelt
; ++i
)
38941 unsigned e
= d
->perm
[i
];
38942 if (!(e
== i
|| e
== i
+ nelt
))
38949 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38950 decision should be extracted elsewhere, so that we only try that
38951 sequence once all budget==3 options have been tried. */
38952 target
= d
->target
;
38965 for (i
= 0; i
< nelt
; ++i
)
38966 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38970 for (i
= 0; i
< 2; ++i
)
38971 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38976 for (i
= 0; i
< 4; ++i
)
38977 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38982 /* See if bytes move in pairs so we can use pblendw with
38983 an immediate argument, rather than pblendvb with a vector
38985 for (i
= 0; i
< 16; i
+= 2)
38986 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38989 for (i
= 0; i
< nelt
; ++i
)
38990 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38993 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38994 vperm
= force_reg (vmode
, vperm
);
38996 if (GET_MODE_SIZE (vmode
) == 16)
38997 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38999 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39003 for (i
= 0; i
< 8; ++i
)
39004 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39009 target
= gen_lowpart (vmode
, target
);
39010 op0
= gen_lowpart (vmode
, op0
);
39011 op1
= gen_lowpart (vmode
, op1
);
39015 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39016 for (i
= 0; i
< 32; i
+= 2)
39017 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39019 /* See if bytes move in quadruplets. If yes, vpblendd
39020 with immediate can be used. */
39021 for (i
= 0; i
< 32; i
+= 4)
39022 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39026 /* See if bytes move the same in both lanes. If yes,
39027 vpblendw with immediate can be used. */
39028 for (i
= 0; i
< 16; i
+= 2)
39029 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39032 /* Use vpblendw. */
39033 for (i
= 0; i
< 16; ++i
)
39034 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39039 /* Use vpblendd. */
39040 for (i
= 0; i
< 8; ++i
)
39041 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39046 /* See if words move in pairs. If yes, vpblendd can be used. */
39047 for (i
= 0; i
< 16; i
+= 2)
39048 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39052 /* See if words move the same in both lanes. If not,
39053 vpblendvb must be used. */
39054 for (i
= 0; i
< 8; i
++)
39055 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39057 /* Use vpblendvb. */
39058 for (i
= 0; i
< 32; ++i
)
39059 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39063 target
= gen_lowpart (vmode
, target
);
39064 op0
= gen_lowpart (vmode
, op0
);
39065 op1
= gen_lowpart (vmode
, op1
);
39066 goto finish_pblendvb
;
39069 /* Use vpblendw. */
39070 for (i
= 0; i
< 16; ++i
)
39071 mask
|= (d
->perm
[i
] >= 16) << i
;
39075 /* Use vpblendd. */
39076 for (i
= 0; i
< 8; ++i
)
39077 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39082 /* Use vpblendd. */
39083 for (i
= 0; i
< 4; ++i
)
39084 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39089 gcc_unreachable ();
39092 /* This matches five different patterns with the different modes. */
39093 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39094 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39100 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39101 in terms of the variable form of vpermilps.
39103 Note that we will have already failed the immediate input vpermilps,
39104 which requires that the high and low part shuffle be identical; the
39105 variable form doesn't require that. */
39108 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39110 rtx rperm
[8], vperm
;
39113 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39116 /* We can only permute within the 128-bit lane. */
39117 for (i
= 0; i
< 8; ++i
)
39119 unsigned e
= d
->perm
[i
];
39120 if (i
< 4 ? e
>= 4 : e
< 4)
39127 for (i
= 0; i
< 8; ++i
)
39129 unsigned e
= d
->perm
[i
];
39131 /* Within each 128-bit lane, the elements of op0 are numbered
39132 from 0 and the elements of op1 are numbered from 4. */
39138 rperm
[i
] = GEN_INT (e
);
39141 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39142 vperm
= force_reg (V8SImode
, vperm
);
39143 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39148 /* Return true if permutation D can be performed as VMODE permutation
39152 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39154 unsigned int i
, j
, chunk
;
39156 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39157 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39158 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39161 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39164 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39165 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39166 if (d
->perm
[i
] & (chunk
- 1))
39169 for (j
= 1; j
< chunk
; ++j
)
39170 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39176 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39177 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39180 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39182 unsigned i
, nelt
, eltsz
, mask
;
39183 unsigned char perm
[32];
39184 enum machine_mode vmode
= V16QImode
;
39185 rtx rperm
[32], vperm
, target
, op0
, op1
;
39189 if (!d
->one_operand_p
)
39191 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
39194 && valid_perm_using_mode_p (V2TImode
, d
))
39199 /* Use vperm2i128 insn. The pattern uses
39200 V4DImode instead of V2TImode. */
39201 target
= gen_lowpart (V4DImode
, d
->target
);
39202 op0
= gen_lowpart (V4DImode
, d
->op0
);
39203 op1
= gen_lowpart (V4DImode
, d
->op1
);
39205 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
39206 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
39207 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
39215 if (GET_MODE_SIZE (d
->vmode
) == 16)
39220 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39225 /* V4DImode should be already handled through
39226 expand_vselect by vpermq instruction. */
39227 gcc_assert (d
->vmode
!= V4DImode
);
39230 if (d
->vmode
== V8SImode
39231 || d
->vmode
== V16HImode
39232 || d
->vmode
== V32QImode
)
39234 /* First see if vpermq can be used for
39235 V8SImode/V16HImode/V32QImode. */
39236 if (valid_perm_using_mode_p (V4DImode
, d
))
39238 for (i
= 0; i
< 4; i
++)
39239 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
39242 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
39243 gen_lowpart (V4DImode
, d
->op0
),
39247 /* Next see if vpermd can be used. */
39248 if (valid_perm_using_mode_p (V8SImode
, d
))
39251 /* Or if vpermps can be used. */
39252 else if (d
->vmode
== V8SFmode
)
39255 if (vmode
== V32QImode
)
39257 /* vpshufb only works intra lanes, it is not
39258 possible to shuffle bytes in between the lanes. */
39259 for (i
= 0; i
< nelt
; ++i
)
39260 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
39271 if (vmode
== V8SImode
)
39272 for (i
= 0; i
< 8; ++i
)
39273 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
39276 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39277 if (!d
->one_operand_p
)
39278 mask
= 2 * nelt
- 1;
39279 else if (vmode
== V16QImode
)
39282 mask
= nelt
/ 2 - 1;
39284 for (i
= 0; i
< nelt
; ++i
)
39286 unsigned j
, e
= d
->perm
[i
] & mask
;
39287 for (j
= 0; j
< eltsz
; ++j
)
39288 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
39292 vperm
= gen_rtx_CONST_VECTOR (vmode
,
39293 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
39294 vperm
= force_reg (vmode
, vperm
);
39296 target
= gen_lowpart (vmode
, d
->target
);
39297 op0
= gen_lowpart (vmode
, d
->op0
);
39298 if (d
->one_operand_p
)
39300 if (vmode
== V16QImode
)
39301 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
39302 else if (vmode
== V32QImode
)
39303 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
39304 else if (vmode
== V8SFmode
)
39305 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
39307 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
39311 op1
= gen_lowpart (vmode
, d
->op1
);
39312 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
39318 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39319 in a single instruction. */
39322 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
39324 unsigned i
, nelt
= d
->nelt
;
39325 unsigned char perm2
[MAX_VECT_LEN
];
39327 /* Check plain VEC_SELECT first, because AVX has instructions that could
39328 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39329 input where SEL+CONCAT may not. */
39330 if (d
->one_operand_p
)
39332 int mask
= nelt
- 1;
39333 bool identity_perm
= true;
39334 bool broadcast_perm
= true;
39336 for (i
= 0; i
< nelt
; i
++)
39338 perm2
[i
] = d
->perm
[i
] & mask
;
39340 identity_perm
= false;
39342 broadcast_perm
= false;
39348 emit_move_insn (d
->target
, d
->op0
);
39351 else if (broadcast_perm
&& TARGET_AVX2
)
39353 /* Use vpbroadcast{b,w,d}. */
39354 rtx (*gen
) (rtx
, rtx
) = NULL
;
39358 gen
= gen_avx2_pbroadcastv32qi_1
;
39361 gen
= gen_avx2_pbroadcastv16hi_1
;
39364 gen
= gen_avx2_pbroadcastv8si_1
;
39367 gen
= gen_avx2_pbroadcastv16qi
;
39370 gen
= gen_avx2_pbroadcastv8hi
;
39373 gen
= gen_avx2_vec_dupv8sf_1
;
39375 /* For other modes prefer other shuffles this function creates. */
39381 emit_insn (gen (d
->target
, d
->op0
));
39386 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
39389 /* There are plenty of patterns in sse.md that are written for
39390 SEL+CONCAT and are not replicated for a single op. Perhaps
39391 that should be changed, to avoid the nastiness here. */
39393 /* Recognize interleave style patterns, which means incrementing
39394 every other permutation operand. */
39395 for (i
= 0; i
< nelt
; i
+= 2)
39397 perm2
[i
] = d
->perm
[i
] & mask
;
39398 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
39400 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39404 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39407 for (i
= 0; i
< nelt
; i
+= 4)
39409 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
39410 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
39411 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
39412 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
39415 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39421 /* Finally, try the fully general two operand permute. */
39422 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
39426 /* Recognize interleave style patterns with reversed operands. */
39427 if (!d
->one_operand_p
)
39429 for (i
= 0; i
< nelt
; ++i
)
39431 unsigned e
= d
->perm
[i
];
39439 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
39444 /* Try the SSE4.1 blend variable merge instructions. */
39445 if (expand_vec_perm_blend (d
))
39448 /* Try one of the AVX vpermil variable permutations. */
39449 if (expand_vec_perm_vpermil (d
))
39452 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39453 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39454 if (expand_vec_perm_pshufb (d
))
39460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39461 in terms of a pair of pshuflw + pshufhw instructions. */
39464 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
39466 unsigned char perm2
[MAX_VECT_LEN
];
39470 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
39473 /* The two permutations only operate in 64-bit lanes. */
39474 for (i
= 0; i
< 4; ++i
)
39475 if (d
->perm
[i
] >= 4)
39477 for (i
= 4; i
< 8; ++i
)
39478 if (d
->perm
[i
] < 4)
39484 /* Emit the pshuflw. */
39485 memcpy (perm2
, d
->perm
, 4);
39486 for (i
= 4; i
< 8; ++i
)
39488 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
39491 /* Emit the pshufhw. */
39492 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
39493 for (i
= 0; i
< 4; ++i
)
39495 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
39501 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39502 the permutation using the SSSE3 palignr instruction. This succeeds
39503 when all of the elements in PERM fit within one vector and we merely
39504 need to shift them down so that a single vector permutation has a
39505 chance to succeed. */
39508 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
39510 unsigned i
, nelt
= d
->nelt
;
39515 /* Even with AVX, palignr only operates on 128-bit vectors. */
39516 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39519 min
= nelt
, max
= 0;
39520 for (i
= 0; i
< nelt
; ++i
)
39522 unsigned e
= d
->perm
[i
];
39528 if (min
== 0 || max
- min
>= nelt
)
39531 /* Given that we have SSSE3, we know we'll be able to implement the
39532 single operand permutation after the palignr with pshufb. */
39536 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39537 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39538 gen_lowpart (TImode
, d
->op1
),
39539 gen_lowpart (TImode
, d
->op0
), shift
));
39541 d
->op0
= d
->op1
= d
->target
;
39542 d
->one_operand_p
= true;
39545 for (i
= 0; i
< nelt
; ++i
)
39547 unsigned e
= d
->perm
[i
] - min
;
39553 /* Test for the degenerate case where the alignment by itself
39554 produces the desired permutation. */
39558 ok
= expand_vec_perm_1 (d
);
39564 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39566 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39567 a two vector permutation into a single vector permutation by using
39568 an interleave operation to merge the vectors. */
39571 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39573 struct expand_vec_perm_d dremap
, dfinal
;
39574 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39575 unsigned HOST_WIDE_INT contents
;
39576 unsigned char remap
[2 * MAX_VECT_LEN
];
39578 bool ok
, same_halves
= false;
39580 if (GET_MODE_SIZE (d
->vmode
) == 16)
39582 if (d
->one_operand_p
)
39585 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39589 /* For 32-byte modes allow even d->one_operand_p.
39590 The lack of cross-lane shuffling in some instructions
39591 might prevent a single insn shuffle. */
39593 dfinal
.testing_p
= true;
39594 /* If expand_vec_perm_interleave3 can expand this into
39595 a 3 insn sequence, give up and let it be expanded as
39596 3 insn sequence. While that is one insn longer,
39597 it doesn't need a memory operand and in the common
39598 case that both interleave low and high permutations
39599 with the same operands are adjacent needs 4 insns
39600 for both after CSE. */
39601 if (expand_vec_perm_interleave3 (&dfinal
))
39607 /* Examine from whence the elements come. */
39609 for (i
= 0; i
< nelt
; ++i
)
39610 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39612 memset (remap
, 0xff, sizeof (remap
));
39615 if (GET_MODE_SIZE (d
->vmode
) == 16)
39617 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39619 /* Split the two input vectors into 4 halves. */
39620 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39625 /* If the elements from the low halves use interleave low, and similarly
39626 for interleave high. If the elements are from mis-matched halves, we
39627 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39628 if ((contents
& (h1
| h3
)) == contents
)
39631 for (i
= 0; i
< nelt2
; ++i
)
39634 remap
[i
+ nelt
] = i
* 2 + 1;
39635 dremap
.perm
[i
* 2] = i
;
39636 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39638 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39639 dremap
.vmode
= V4SFmode
;
39641 else if ((contents
& (h2
| h4
)) == contents
)
39644 for (i
= 0; i
< nelt2
; ++i
)
39646 remap
[i
+ nelt2
] = i
* 2;
39647 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39648 dremap
.perm
[i
* 2] = i
+ nelt2
;
39649 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39651 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39652 dremap
.vmode
= V4SFmode
;
39654 else if ((contents
& (h1
| h4
)) == contents
)
39657 for (i
= 0; i
< nelt2
; ++i
)
39660 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39661 dremap
.perm
[i
] = i
;
39662 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39667 dremap
.vmode
= V2DImode
;
39669 dremap
.perm
[0] = 0;
39670 dremap
.perm
[1] = 3;
39673 else if ((contents
& (h2
| h3
)) == contents
)
39676 for (i
= 0; i
< nelt2
; ++i
)
39678 remap
[i
+ nelt2
] = i
;
39679 remap
[i
+ nelt
] = i
+ nelt2
;
39680 dremap
.perm
[i
] = i
+ nelt2
;
39681 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39686 dremap
.vmode
= V2DImode
;
39688 dremap
.perm
[0] = 1;
39689 dremap
.perm
[1] = 2;
39697 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39698 unsigned HOST_WIDE_INT q
[8];
39699 unsigned int nonzero_halves
[4];
39701 /* Split the two input vectors into 8 quarters. */
39702 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39703 for (i
= 1; i
< 8; ++i
)
39704 q
[i
] = q
[0] << (nelt4
* i
);
39705 for (i
= 0; i
< 4; ++i
)
39706 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39708 nonzero_halves
[nzcnt
] = i
;
39714 gcc_assert (d
->one_operand_p
);
39715 nonzero_halves
[1] = nonzero_halves
[0];
39716 same_halves
= true;
39718 else if (d
->one_operand_p
)
39720 gcc_assert (nonzero_halves
[0] == 0);
39721 gcc_assert (nonzero_halves
[1] == 1);
39726 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39728 /* Attempt to increase the likelihood that dfinal
39729 shuffle will be intra-lane. */
39730 char tmph
= nonzero_halves
[0];
39731 nonzero_halves
[0] = nonzero_halves
[1];
39732 nonzero_halves
[1] = tmph
;
39735 /* vperm2f128 or vperm2i128. */
39736 for (i
= 0; i
< nelt2
; ++i
)
39738 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39739 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39740 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39741 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39744 if (d
->vmode
!= V8SFmode
39745 && d
->vmode
!= V4DFmode
39746 && d
->vmode
!= V8SImode
)
39748 dremap
.vmode
= V8SImode
;
39750 for (i
= 0; i
< 4; ++i
)
39752 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39753 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39757 else if (d
->one_operand_p
)
39759 else if (TARGET_AVX2
39760 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39763 for (i
= 0; i
< nelt4
; ++i
)
39766 remap
[i
+ nelt
] = i
* 2 + 1;
39767 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39768 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39769 dremap
.perm
[i
* 2] = i
;
39770 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39771 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39772 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39775 else if (TARGET_AVX2
39776 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39779 for (i
= 0; i
< nelt4
; ++i
)
39781 remap
[i
+ nelt4
] = i
* 2;
39782 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39783 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39784 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39785 dremap
.perm
[i
* 2] = i
+ nelt4
;
39786 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39787 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39788 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39795 /* Use the remapping array set up above to move the elements from their
39796 swizzled locations into their final destinations. */
39798 for (i
= 0; i
< nelt
; ++i
)
39800 unsigned e
= remap
[d
->perm
[i
]];
39801 gcc_assert (e
< nelt
);
39802 /* If same_halves is true, both halves of the remapped vector are the
39803 same. Avoid cross-lane accesses if possible. */
39804 if (same_halves
&& i
>= nelt2
)
39806 gcc_assert (e
< nelt2
);
39807 dfinal
.perm
[i
] = e
+ nelt2
;
39810 dfinal
.perm
[i
] = e
;
39812 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39813 dfinal
.op1
= dfinal
.op0
;
39814 dfinal
.one_operand_p
= true;
39815 dremap
.target
= dfinal
.op0
;
39817 /* Test if the final remap can be done with a single insn. For V4SFmode or
39818 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39820 ok
= expand_vec_perm_1 (&dfinal
);
39821 seq
= get_insns ();
39830 if (dremap
.vmode
!= dfinal
.vmode
)
39832 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39833 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39834 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39837 ok
= expand_vec_perm_1 (&dremap
);
39844 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39845 a single vector cross-lane permutation into vpermq followed
39846 by any of the single insn permutations. */
39849 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39851 struct expand_vec_perm_d dremap
, dfinal
;
39852 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39853 unsigned contents
[2];
39857 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39858 && d
->one_operand_p
))
39863 for (i
= 0; i
< nelt2
; ++i
)
39865 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39866 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39869 for (i
= 0; i
< 2; ++i
)
39871 unsigned int cnt
= 0;
39872 for (j
= 0; j
< 4; ++j
)
39873 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39881 dremap
.vmode
= V4DImode
;
39883 dremap
.target
= gen_reg_rtx (V4DImode
);
39884 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39885 dremap
.op1
= dremap
.op0
;
39886 dremap
.one_operand_p
= true;
39887 for (i
= 0; i
< 2; ++i
)
39889 unsigned int cnt
= 0;
39890 for (j
= 0; j
< 4; ++j
)
39891 if ((contents
[i
] & (1u << j
)) != 0)
39892 dremap
.perm
[2 * i
+ cnt
++] = j
;
39893 for (; cnt
< 2; ++cnt
)
39894 dremap
.perm
[2 * i
+ cnt
] = 0;
39898 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39899 dfinal
.op1
= dfinal
.op0
;
39900 dfinal
.one_operand_p
= true;
39901 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39905 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39906 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39908 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39909 dfinal
.perm
[i
] |= nelt4
;
39911 gcc_unreachable ();
39914 ok
= expand_vec_perm_1 (&dremap
);
39917 ok
= expand_vec_perm_1 (&dfinal
);
39923 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39924 a vector permutation using two instructions, vperm2f128 resp.
39925 vperm2i128 followed by any single in-lane permutation. */
39928 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39930 struct expand_vec_perm_d dfirst
, dsecond
;
39931 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39935 || GET_MODE_SIZE (d
->vmode
) != 32
39936 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39940 dsecond
.one_operand_p
= false;
39941 dsecond
.testing_p
= true;
39943 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39944 immediate. For perm < 16 the second permutation uses
39945 d->op0 as first operand, for perm >= 16 it uses d->op1
39946 as first operand. The second operand is the result of
39948 for (perm
= 0; perm
< 32; perm
++)
39950 /* Ignore permutations which do not move anything cross-lane. */
39953 /* The second shuffle for e.g. V4DFmode has
39954 0123 and ABCD operands.
39955 Ignore AB23, as 23 is already in the second lane
39956 of the first operand. */
39957 if ((perm
& 0xc) == (1 << 2)) continue;
39958 /* And 01CD, as 01 is in the first lane of the first
39960 if ((perm
& 3) == 0) continue;
39961 /* And 4567, as then the vperm2[fi]128 doesn't change
39962 anything on the original 4567 second operand. */
39963 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39967 /* The second shuffle for e.g. V4DFmode has
39968 4567 and ABCD operands.
39969 Ignore AB67, as 67 is already in the second lane
39970 of the first operand. */
39971 if ((perm
& 0xc) == (3 << 2)) continue;
39972 /* And 45CD, as 45 is in the first lane of the first
39974 if ((perm
& 3) == 2) continue;
39975 /* And 0123, as then the vperm2[fi]128 doesn't change
39976 anything on the original 0123 first operand. */
39977 if ((perm
& 0xf) == (1 << 2)) continue;
39980 for (i
= 0; i
< nelt
; i
++)
39982 j
= d
->perm
[i
] / nelt2
;
39983 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39984 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39985 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39986 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39994 ok
= expand_vec_perm_1 (&dsecond
);
40005 /* Found a usable second shuffle. dfirst will be
40006 vperm2f128 on d->op0 and d->op1. */
40007 dsecond
.testing_p
= false;
40009 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40010 for (i
= 0; i
< nelt
; i
++)
40011 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40012 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40014 ok
= expand_vec_perm_1 (&dfirst
);
40017 /* And dsecond is some single insn shuffle, taking
40018 d->op0 and result of vperm2f128 (if perm < 16) or
40019 d->op1 and result of vperm2f128 (otherwise). */
40020 dsecond
.op1
= dfirst
.target
;
40022 dsecond
.op0
= dfirst
.op1
;
40024 ok
= expand_vec_perm_1 (&dsecond
);
40030 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40031 if (d
->one_operand_p
)
40038 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40039 a two vector permutation using 2 intra-lane interleave insns
40040 and cross-lane shuffle for 32-byte vectors. */
40043 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40046 rtx (*gen
) (rtx
, rtx
, rtx
);
40048 if (d
->one_operand_p
)
40050 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40052 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40058 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40060 for (i
= 0; i
< nelt
; i
+= 2)
40061 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40062 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40072 gen
= gen_vec_interleave_highv32qi
;
40074 gen
= gen_vec_interleave_lowv32qi
;
40078 gen
= gen_vec_interleave_highv16hi
;
40080 gen
= gen_vec_interleave_lowv16hi
;
40084 gen
= gen_vec_interleave_highv8si
;
40086 gen
= gen_vec_interleave_lowv8si
;
40090 gen
= gen_vec_interleave_highv4di
;
40092 gen
= gen_vec_interleave_lowv4di
;
40096 gen
= gen_vec_interleave_highv8sf
;
40098 gen
= gen_vec_interleave_lowv8sf
;
40102 gen
= gen_vec_interleave_highv4df
;
40104 gen
= gen_vec_interleave_lowv4df
;
40107 gcc_unreachable ();
40110 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40114 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40115 a single vector permutation using a single intra-lane vector
40116 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40117 the non-swapped and swapped vectors together. */
40120 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40122 struct expand_vec_perm_d dfirst
, dsecond
;
40123 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40126 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40130 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40131 || !d
->one_operand_p
)
40135 for (i
= 0; i
< nelt
; i
++)
40136 dfirst
.perm
[i
] = 0xff;
40137 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40139 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40140 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40142 dfirst
.perm
[j
] = d
->perm
[i
];
40146 for (i
= 0; i
< nelt
; i
++)
40147 if (dfirst
.perm
[i
] == 0xff)
40148 dfirst
.perm
[i
] = i
;
40151 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40154 ok
= expand_vec_perm_1 (&dfirst
);
40155 seq
= get_insns ();
40167 dsecond
.op0
= dfirst
.target
;
40168 dsecond
.op1
= dfirst
.target
;
40169 dsecond
.one_operand_p
= true;
40170 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40171 for (i
= 0; i
< nelt
; i
++)
40172 dsecond
.perm
[i
] = i
^ nelt2
;
40174 ok
= expand_vec_perm_1 (&dsecond
);
40177 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
40178 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
40182 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
40183 permutation using two vperm2f128, followed by a vshufpd insn blending
40184 the two vectors together. */
40187 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
40189 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
40192 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
40202 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
40203 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
40204 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
40205 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
40206 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
40207 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
40208 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
40209 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
40210 dthird
.perm
[0] = (d
->perm
[0] % 2);
40211 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
40212 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
40213 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
40215 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40216 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40217 dthird
.op0
= dfirst
.target
;
40218 dthird
.op1
= dsecond
.target
;
40219 dthird
.one_operand_p
= false;
40221 canonicalize_perm (&dfirst
);
40222 canonicalize_perm (&dsecond
);
40224 ok
= expand_vec_perm_1 (&dfirst
)
40225 && expand_vec_perm_1 (&dsecond
)
40226 && expand_vec_perm_1 (&dthird
);
40233 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40234 permutation with two pshufb insns and an ior. We should have already
40235 failed all two instruction sequences. */
40238 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
40240 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
40241 unsigned int i
, nelt
, eltsz
;
40243 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40245 gcc_assert (!d
->one_operand_p
);
40248 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40250 /* Generate two permutation masks. If the required element is within
40251 the given vector it is shuffled into the proper lane. If the required
40252 element is in the other vector, force a zero into the lane by setting
40253 bit 7 in the permutation mask. */
40254 m128
= GEN_INT (-128);
40255 for (i
= 0; i
< nelt
; ++i
)
40257 unsigned j
, e
= d
->perm
[i
];
40258 unsigned which
= (e
>= nelt
);
40262 for (j
= 0; j
< eltsz
; ++j
)
40264 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
40265 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
40269 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
40270 vperm
= force_reg (V16QImode
, vperm
);
40272 l
= gen_reg_rtx (V16QImode
);
40273 op
= gen_lowpart (V16QImode
, d
->op0
);
40274 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
40276 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
40277 vperm
= force_reg (V16QImode
, vperm
);
40279 h
= gen_reg_rtx (V16QImode
);
40280 op
= gen_lowpart (V16QImode
, d
->op1
);
40281 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
40283 op
= gen_lowpart (V16QImode
, d
->target
);
40284 emit_insn (gen_iorv16qi3 (op
, l
, h
));
40289 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40290 with two vpshufb insns, vpermq and vpor. We should have already failed
40291 all two or three instruction sequences. */
40294 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
40296 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
40297 unsigned int i
, nelt
, eltsz
;
40300 || !d
->one_operand_p
40301 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40308 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40310 /* Generate two permutation masks. If the required element is within
40311 the same lane, it is shuffled in. If the required element from the
40312 other lane, force a zero by setting bit 7 in the permutation mask.
40313 In the other mask the mask has non-negative elements if element
40314 is requested from the other lane, but also moved to the other lane,
40315 so that the result of vpshufb can have the two V2TImode halves
40317 m128
= GEN_INT (-128);
40318 for (i
= 0; i
< nelt
; ++i
)
40320 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40321 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40323 for (j
= 0; j
< eltsz
; ++j
)
40325 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
40326 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
40330 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40331 vperm
= force_reg (V32QImode
, vperm
);
40333 h
= gen_reg_rtx (V32QImode
);
40334 op
= gen_lowpart (V32QImode
, d
->op0
);
40335 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40337 /* Swap the 128-byte lanes of h into hp. */
40338 hp
= gen_reg_rtx (V4DImode
);
40339 op
= gen_lowpart (V4DImode
, h
);
40340 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
40343 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40344 vperm
= force_reg (V32QImode
, vperm
);
40346 l
= gen_reg_rtx (V32QImode
);
40347 op
= gen_lowpart (V32QImode
, d
->op0
);
40348 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40350 op
= gen_lowpart (V32QImode
, d
->target
);
40351 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
40356 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40357 and extract-odd permutations of two V32QImode and V16QImode operand
40358 with two vpshufb insns, vpor and vpermq. We should have already
40359 failed all two or three instruction sequences. */
40362 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
40364 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
40365 unsigned int i
, nelt
, eltsz
;
40368 || d
->one_operand_p
40369 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40372 for (i
= 0; i
< d
->nelt
; ++i
)
40373 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
40380 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40382 /* Generate two permutation masks. In the first permutation mask
40383 the first quarter will contain indexes for the first half
40384 of the op0, the second quarter will contain bit 7 set, third quarter
40385 will contain indexes for the second half of the op0 and the
40386 last quarter bit 7 set. In the second permutation mask
40387 the first quarter will contain bit 7 set, the second quarter
40388 indexes for the first half of the op1, the third quarter bit 7 set
40389 and last quarter indexes for the second half of the op1.
40390 I.e. the first mask e.g. for V32QImode extract even will be:
40391 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40392 (all values masked with 0xf except for -128) and second mask
40393 for extract even will be
40394 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40395 m128
= GEN_INT (-128);
40396 for (i
= 0; i
< nelt
; ++i
)
40398 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40399 unsigned which
= d
->perm
[i
] >= nelt
;
40400 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
40402 for (j
= 0; j
< eltsz
; ++j
)
40404 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
40405 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
40409 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40410 vperm
= force_reg (V32QImode
, vperm
);
40412 l
= gen_reg_rtx (V32QImode
);
40413 op
= gen_lowpart (V32QImode
, d
->op0
);
40414 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40416 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40417 vperm
= force_reg (V32QImode
, vperm
);
40419 h
= gen_reg_rtx (V32QImode
);
40420 op
= gen_lowpart (V32QImode
, d
->op1
);
40421 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40423 ior
= gen_reg_rtx (V32QImode
);
40424 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
40426 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40427 op
= gen_lowpart (V4DImode
, d
->target
);
40428 ior
= gen_lowpart (V4DImode
, ior
);
40429 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
40430 const1_rtx
, GEN_INT (3)));
40435 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40436 and extract-odd permutations. */
40439 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
40446 t1
= gen_reg_rtx (V4DFmode
);
40447 t2
= gen_reg_rtx (V4DFmode
);
40449 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40450 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40451 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40453 /* Now an unpck[lh]pd will produce the result required. */
40455 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
40457 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
40463 int mask
= odd
? 0xdd : 0x88;
40465 t1
= gen_reg_rtx (V8SFmode
);
40466 t2
= gen_reg_rtx (V8SFmode
);
40467 t3
= gen_reg_rtx (V8SFmode
);
40469 /* Shuffle within the 128-bit lanes to produce:
40470 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40471 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
40474 /* Shuffle the lanes around to produce:
40475 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40476 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
40479 /* Shuffle within the 128-bit lanes to produce:
40480 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40481 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
40483 /* Shuffle within the 128-bit lanes to produce:
40484 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40485 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
40487 /* Shuffle the lanes around to produce:
40488 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40489 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
40498 /* These are always directly implementable by expand_vec_perm_1. */
40499 gcc_unreachable ();
40503 return expand_vec_perm_pshufb2 (d
);
40506 /* We need 2*log2(N)-1 operations to achieve odd/even
40507 with interleave. */
40508 t1
= gen_reg_rtx (V8HImode
);
40509 t2
= gen_reg_rtx (V8HImode
);
40510 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
40511 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
40512 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
40513 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
40515 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
40517 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
40524 return expand_vec_perm_pshufb2 (d
);
40527 t1
= gen_reg_rtx (V16QImode
);
40528 t2
= gen_reg_rtx (V16QImode
);
40529 t3
= gen_reg_rtx (V16QImode
);
40530 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40531 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40532 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40533 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40534 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40535 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40537 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40539 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40546 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40551 struct expand_vec_perm_d d_copy
= *d
;
40552 d_copy
.vmode
= V4DFmode
;
40553 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40554 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40555 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40556 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40559 t1
= gen_reg_rtx (V4DImode
);
40560 t2
= gen_reg_rtx (V4DImode
);
40562 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40563 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40564 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40566 /* Now an vpunpck[lh]qdq will produce the result required. */
40568 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40570 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40577 struct expand_vec_perm_d d_copy
= *d
;
40578 d_copy
.vmode
= V8SFmode
;
40579 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40580 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40581 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40582 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40585 t1
= gen_reg_rtx (V8SImode
);
40586 t2
= gen_reg_rtx (V8SImode
);
40588 /* Shuffle the lanes around into
40589 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40590 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40591 gen_lowpart (V4DImode
, d
->op0
),
40592 gen_lowpart (V4DImode
, d
->op1
),
40594 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40595 gen_lowpart (V4DImode
, d
->op0
),
40596 gen_lowpart (V4DImode
, d
->op1
),
40599 /* Swap the 2nd and 3rd position in each lane into
40600 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40601 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40602 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40603 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40604 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40606 /* Now an vpunpck[lh]qdq will produce
40607 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40609 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40610 gen_lowpart (V4DImode
, t1
),
40611 gen_lowpart (V4DImode
, t2
));
40613 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40614 gen_lowpart (V4DImode
, t1
),
40615 gen_lowpart (V4DImode
, t2
));
40620 gcc_unreachable ();
40626 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40627 extract-even and extract-odd permutations. */
40630 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40632 unsigned i
, odd
, nelt
= d
->nelt
;
40635 if (odd
!= 0 && odd
!= 1)
40638 for (i
= 1; i
< nelt
; ++i
)
40639 if (d
->perm
[i
] != 2 * i
+ odd
)
40642 return expand_vec_perm_even_odd_1 (d
, odd
);
40645 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40646 permutations. We assume that expand_vec_perm_1 has already failed. */
40649 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40651 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40652 enum machine_mode vmode
= d
->vmode
;
40653 unsigned char perm2
[4];
40661 /* These are special-cased in sse.md so that we can optionally
40662 use the vbroadcast instruction. They expand to two insns
40663 if the input happens to be in a register. */
40664 gcc_unreachable ();
40670 /* These are always implementable using standard shuffle patterns. */
40671 gcc_unreachable ();
40675 /* These can be implemented via interleave. We save one insn by
40676 stopping once we have promoted to V4SImode and then use pshufd. */
40680 rtx (*gen
) (rtx
, rtx
, rtx
)
40681 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40682 : gen_vec_interleave_lowv8hi
;
40686 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40687 : gen_vec_interleave_highv8hi
;
40692 dest
= gen_reg_rtx (vmode
);
40693 emit_insn (gen (dest
, op0
, op0
));
40694 vmode
= get_mode_wider_vector (vmode
);
40695 op0
= gen_lowpart (vmode
, dest
);
40697 while (vmode
!= V4SImode
);
40699 memset (perm2
, elt
, 4);
40700 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40709 /* For AVX2 broadcasts of the first element vpbroadcast* or
40710 vpermq should be used by expand_vec_perm_1. */
40711 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40715 gcc_unreachable ();
40719 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40720 broadcast permutations. */
40723 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40725 unsigned i
, elt
, nelt
= d
->nelt
;
40727 if (!d
->one_operand_p
)
40731 for (i
= 1; i
< nelt
; ++i
)
40732 if (d
->perm
[i
] != elt
)
40735 return expand_vec_perm_broadcast_1 (d
);
40738 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40739 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40740 all the shorter instruction sequences. */
40743 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40745 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40746 unsigned int i
, nelt
, eltsz
;
40750 || d
->one_operand_p
40751 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40758 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40760 /* Generate 4 permutation masks. If the required element is within
40761 the same lane, it is shuffled in. If the required element from the
40762 other lane, force a zero by setting bit 7 in the permutation mask.
40763 In the other mask the mask has non-negative elements if element
40764 is requested from the other lane, but also moved to the other lane,
40765 so that the result of vpshufb can have the two V2TImode halves
40767 m128
= GEN_INT (-128);
40768 for (i
= 0; i
< 32; ++i
)
40770 rperm
[0][i
] = m128
;
40771 rperm
[1][i
] = m128
;
40772 rperm
[2][i
] = m128
;
40773 rperm
[3][i
] = m128
;
40779 for (i
= 0; i
< nelt
; ++i
)
40781 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40782 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40783 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40785 for (j
= 0; j
< eltsz
; ++j
)
40786 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40787 used
[which
] = true;
40790 for (i
= 0; i
< 2; ++i
)
40792 if (!used
[2 * i
+ 1])
40797 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40798 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40799 vperm
= force_reg (V32QImode
, vperm
);
40800 h
[i
] = gen_reg_rtx (V32QImode
);
40801 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40802 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40805 /* Swap the 128-byte lanes of h[X]. */
40806 for (i
= 0; i
< 2; ++i
)
40808 if (h
[i
] == NULL_RTX
)
40810 op
= gen_reg_rtx (V4DImode
);
40811 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40812 const2_rtx
, GEN_INT (3), const0_rtx
,
40814 h
[i
] = gen_lowpart (V32QImode
, op
);
40817 for (i
= 0; i
< 2; ++i
)
40824 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40825 vperm
= force_reg (V32QImode
, vperm
);
40826 l
[i
] = gen_reg_rtx (V32QImode
);
40827 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40828 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40831 for (i
= 0; i
< 2; ++i
)
40835 op
= gen_reg_rtx (V32QImode
);
40836 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40843 gcc_assert (l
[0] && l
[1]);
40844 op
= gen_lowpart (V32QImode
, d
->target
);
40845 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40849 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40850 With all of the interface bits taken care of, perform the expansion
40851 in D and return true on success. */
40854 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40856 /* Try a single instruction expansion. */
40857 if (expand_vec_perm_1 (d
))
40860 /* Try sequences of two instructions. */
40862 if (expand_vec_perm_pshuflw_pshufhw (d
))
40865 if (expand_vec_perm_palignr (d
))
40868 if (expand_vec_perm_interleave2 (d
))
40871 if (expand_vec_perm_broadcast (d
))
40874 if (expand_vec_perm_vpermq_perm_1 (d
))
40877 if (expand_vec_perm_vperm2f128 (d
))
40880 /* Try sequences of three instructions. */
40882 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40885 if (expand_vec_perm_pshufb2 (d
))
40888 if (expand_vec_perm_interleave3 (d
))
40891 if (expand_vec_perm_vperm2f128_vblend (d
))
40894 /* Try sequences of four instructions. */
40896 if (expand_vec_perm_vpshufb2_vpermq (d
))
40899 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40902 /* ??? Look for narrow permutations whose element orderings would
40903 allow the promotion to a wider mode. */
40905 /* ??? Look for sequences of interleave or a wider permute that place
40906 the data into the correct lanes for a half-vector shuffle like
40907 pshuf[lh]w or vpermilps. */
40909 /* ??? Look for sequences of interleave that produce the desired results.
40910 The combinatorics of punpck[lh] get pretty ugly... */
40912 if (expand_vec_perm_even_odd (d
))
40915 /* Even longer sequences. */
40916 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40922 /* If a permutation only uses one operand, make it clear. Returns true
40923 if the permutation references both operands. */
40926 canonicalize_perm (struct expand_vec_perm_d
*d
)
40928 int i
, which
, nelt
= d
->nelt
;
40930 for (i
= which
= 0; i
< nelt
; ++i
)
40931 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40933 d
->one_operand_p
= true;
40940 if (!rtx_equal_p (d
->op0
, d
->op1
))
40942 d
->one_operand_p
= false;
40945 /* The elements of PERM do not suggest that only the first operand
40946 is used, but both operands are identical. Allow easier matching
40947 of the permutation by folding the permutation into the single
40952 for (i
= 0; i
< nelt
; ++i
)
40953 d
->perm
[i
] &= nelt
- 1;
40962 return (which
== 3);
40966 ix86_expand_vec_perm_const (rtx operands
[4])
40968 struct expand_vec_perm_d d
;
40969 unsigned char perm
[MAX_VECT_LEN
];
40974 d
.target
= operands
[0];
40975 d
.op0
= operands
[1];
40976 d
.op1
= operands
[2];
40979 d
.vmode
= GET_MODE (d
.target
);
40980 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40981 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40982 d
.testing_p
= false;
40984 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40985 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40986 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40988 for (i
= 0; i
< nelt
; ++i
)
40990 rtx e
= XVECEXP (sel
, 0, i
);
40991 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40996 two_args
= canonicalize_perm (&d
);
40998 if (ix86_expand_vec_perm_const_1 (&d
))
41001 /* If the selector says both arguments are needed, but the operands are the
41002 same, the above tried to expand with one_operand_p and flattened selector.
41003 If that didn't work, retry without one_operand_p; we succeeded with that
41005 if (two_args
&& d
.one_operand_p
)
41007 d
.one_operand_p
= false;
41008 memcpy (d
.perm
, perm
, sizeof (perm
));
41009 return ix86_expand_vec_perm_const_1 (&d
);
41015 /* Implement targetm.vectorize.vec_perm_const_ok. */
41018 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41019 const unsigned char *sel
)
41021 struct expand_vec_perm_d d
;
41022 unsigned int i
, nelt
, which
;
41026 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41027 d
.testing_p
= true;
41029 /* Given sufficient ISA support we can just return true here
41030 for selected vector modes. */
41031 if (GET_MODE_SIZE (d
.vmode
) == 16)
41033 /* All implementable with a single vpperm insn. */
41036 /* All implementable with 2 pshufb + 1 ior. */
41039 /* All implementable with shufpd or unpck[lh]pd. */
41044 /* Extract the values from the vector CST into the permutation
41046 memcpy (d
.perm
, sel
, nelt
);
41047 for (i
= which
= 0; i
< nelt
; ++i
)
41049 unsigned char e
= d
.perm
[i
];
41050 gcc_assert (e
< 2 * nelt
);
41051 which
|= (e
< nelt
? 1 : 2);
41054 /* For all elements from second vector, fold the elements to first. */
41056 for (i
= 0; i
< nelt
; ++i
)
41059 /* Check whether the mask can be applied to the vector type. */
41060 d
.one_operand_p
= (which
!= 3);
41062 /* Implementable with shufps or pshufd. */
41063 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41066 /* Otherwise we have to go through the motions and see if we can
41067 figure out how to generate the requested permutation. */
41068 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41069 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41070 if (!d
.one_operand_p
)
41071 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41074 ret
= ix86_expand_vec_perm_const_1 (&d
);
41081 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41083 struct expand_vec_perm_d d
;
41089 d
.vmode
= GET_MODE (targ
);
41090 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41091 d
.one_operand_p
= false;
41092 d
.testing_p
= false;
41094 for (i
= 0; i
< nelt
; ++i
)
41095 d
.perm
[i
] = i
* 2 + odd
;
41097 /* We'll either be able to implement the permutation directly... */
41098 if (expand_vec_perm_1 (&d
))
41101 /* ... or we use the special-case patterns. */
41102 expand_vec_perm_even_odd_1 (&d
, odd
);
41106 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41108 struct expand_vec_perm_d d
;
41109 unsigned i
, nelt
, base
;
41115 d
.vmode
= GET_MODE (targ
);
41116 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41117 d
.one_operand_p
= false;
41118 d
.testing_p
= false;
41120 base
= high_p
? nelt
/ 2 : 0;
41121 for (i
= 0; i
< nelt
/ 2; ++i
)
41123 d
.perm
[i
* 2] = i
+ base
;
41124 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41127 /* Note that for AVX this isn't one instruction. */
41128 ok
= ix86_expand_vec_perm_const_1 (&d
);
41133 /* Expand a vector operation CODE for a V*QImode in terms of the
41134 same operation on V*HImode. */
41137 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41139 enum machine_mode qimode
= GET_MODE (dest
);
41140 enum machine_mode himode
;
41141 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41142 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41143 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
41144 struct expand_vec_perm_d d
;
41145 bool ok
, full_interleave
;
41146 bool uns_p
= false;
41153 gen_il
= gen_vec_interleave_lowv16qi
;
41154 gen_ih
= gen_vec_interleave_highv16qi
;
41157 himode
= V16HImode
;
41158 gen_il
= gen_avx2_interleave_lowv32qi
;
41159 gen_ih
= gen_avx2_interleave_highv32qi
;
41162 gcc_unreachable ();
41165 op2_l
= op2_h
= op2
;
41169 /* Unpack data such that we've got a source byte in each low byte of
41170 each word. We don't care what goes into the high byte of each word.
41171 Rather than trying to get zero in there, most convenient is to let
41172 it be a copy of the low byte. */
41173 op2_l
= gen_reg_rtx (qimode
);
41174 op2_h
= gen_reg_rtx (qimode
);
41175 emit_insn (gen_il (op2_l
, op2
, op2
));
41176 emit_insn (gen_ih (op2_h
, op2
, op2
));
41179 op1_l
= gen_reg_rtx (qimode
);
41180 op1_h
= gen_reg_rtx (qimode
);
41181 emit_insn (gen_il (op1_l
, op1
, op1
));
41182 emit_insn (gen_ih (op1_h
, op1
, op1
));
41183 full_interleave
= qimode
== V16QImode
;
41191 op1_l
= gen_reg_rtx (himode
);
41192 op1_h
= gen_reg_rtx (himode
);
41193 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
41194 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
41195 full_interleave
= true;
41198 gcc_unreachable ();
41201 /* Perform the operation. */
41202 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
41204 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
41206 gcc_assert (res_l
&& res_h
);
41208 /* Merge the data back into the right place. */
41210 d
.op0
= gen_lowpart (qimode
, res_l
);
41211 d
.op1
= gen_lowpart (qimode
, res_h
);
41213 d
.nelt
= GET_MODE_NUNITS (qimode
);
41214 d
.one_operand_p
= false;
41215 d
.testing_p
= false;
41217 if (full_interleave
)
41219 /* For SSE2, we used an full interleave, so the desired
41220 results are in the even elements. */
41221 for (i
= 0; i
< 32; ++i
)
41226 /* For AVX, the interleave used above was not cross-lane. So the
41227 extraction is evens but with the second and third quarter swapped.
41228 Happily, that is even one insn shorter than even extraction. */
41229 for (i
= 0; i
< 32; ++i
)
41230 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
41233 ok
= ix86_expand_vec_perm_const_1 (&d
);
41236 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41237 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
41240 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
41241 if op is CONST_VECTOR with all odd elements equal to their
41242 preceding element. */
41245 const_vector_equal_evenodd_p (rtx op
)
41247 enum machine_mode mode
= GET_MODE (op
);
41248 int i
, nunits
= GET_MODE_NUNITS (mode
);
41249 if (GET_CODE (op
) != CONST_VECTOR
41250 || nunits
!= CONST_VECTOR_NUNITS (op
))
41252 for (i
= 0; i
< nunits
; i
+= 2)
41253 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
41259 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
41260 bool uns_p
, bool odd_p
)
41262 enum machine_mode mode
= GET_MODE (op1
);
41263 enum machine_mode wmode
= GET_MODE (dest
);
41265 rtx orig_op1
= op1
, orig_op2
= op2
;
41267 if (!nonimmediate_operand (op1
, mode
))
41268 op1
= force_reg (mode
, op1
);
41269 if (!nonimmediate_operand (op2
, mode
))
41270 op2
= force_reg (mode
, op2
);
41272 /* We only play even/odd games with vectors of SImode. */
41273 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
41275 /* If we're looking for the odd results, shift those members down to
41276 the even slots. For some cpus this is faster than a PSHUFD. */
41279 /* For XOP use vpmacsdqh, but only for smult, as it is only
41281 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
41283 x
= force_reg (wmode
, CONST0_RTX (wmode
));
41284 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
41288 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
41289 if (!const_vector_equal_evenodd_p (orig_op1
))
41290 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
41291 x
, NULL
, 1, OPTAB_DIRECT
);
41292 if (!const_vector_equal_evenodd_p (orig_op2
))
41293 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
41294 x
, NULL
, 1, OPTAB_DIRECT
);
41295 op1
= gen_lowpart (mode
, op1
);
41296 op2
= gen_lowpart (mode
, op2
);
41299 if (mode
== V8SImode
)
41302 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
41304 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
41307 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
41308 else if (TARGET_SSE4_1
)
41309 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
41312 rtx s1
, s2
, t0
, t1
, t2
;
41314 /* The easiest way to implement this without PMULDQ is to go through
41315 the motions as if we are performing a full 64-bit multiply. With
41316 the exception that we need to do less shuffling of the elements. */
41318 /* Compute the sign-extension, aka highparts, of the two operands. */
41319 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41320 op1
, pc_rtx
, pc_rtx
);
41321 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41322 op2
, pc_rtx
, pc_rtx
);
41324 /* Multiply LO(A) * HI(B), and vice-versa. */
41325 t1
= gen_reg_rtx (wmode
);
41326 t2
= gen_reg_rtx (wmode
);
41327 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
41328 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
41330 /* Multiply LO(A) * LO(B). */
41331 t0
= gen_reg_rtx (wmode
);
41332 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
41334 /* Combine and shift the highparts into place. */
41335 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
41336 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
41339 /* Combine high and low parts. */
41340 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
41347 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
41348 bool uns_p
, bool high_p
)
41350 enum machine_mode wmode
= GET_MODE (dest
);
41351 enum machine_mode mode
= GET_MODE (op1
);
41352 rtx t1
, t2
, t3
, t4
, mask
;
41357 t1
= gen_reg_rtx (mode
);
41358 t2
= gen_reg_rtx (mode
);
41359 if (TARGET_XOP
&& !uns_p
)
41361 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41362 shuffle the elements once so that all elements are in the right
41363 place for immediate use: { A C B D }. */
41364 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
41365 const1_rtx
, GEN_INT (3)));
41366 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
41367 const1_rtx
, GEN_INT (3)));
41371 /* Put the elements into place for the multiply. */
41372 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
41373 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
41376 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
41380 /* Shuffle the elements between the lanes. After this we
41381 have { A B E F | C D G H } for each operand. */
41382 t1
= gen_reg_rtx (V4DImode
);
41383 t2
= gen_reg_rtx (V4DImode
);
41384 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
41385 const0_rtx
, const2_rtx
,
41386 const1_rtx
, GEN_INT (3)));
41387 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
41388 const0_rtx
, const2_rtx
,
41389 const1_rtx
, GEN_INT (3)));
41391 /* Shuffle the elements within the lanes. After this we
41392 have { A A B B | C C D D } or { E E F F | G G H H }. */
41393 t3
= gen_reg_rtx (V8SImode
);
41394 t4
= gen_reg_rtx (V8SImode
);
41395 mask
= GEN_INT (high_p
41396 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41397 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41398 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
41399 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
41401 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
41406 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
41407 uns_p
, OPTAB_DIRECT
);
41408 t2
= expand_binop (mode
,
41409 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
41410 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
41411 gcc_assert (t1
&& t2
);
41413 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
41418 t1
= gen_reg_rtx (wmode
);
41419 t2
= gen_reg_rtx (wmode
);
41420 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
41421 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
41423 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
41427 gcc_unreachable ();
41432 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
41436 res_1
= gen_reg_rtx (V4SImode
);
41437 res_2
= gen_reg_rtx (V4SImode
);
41438 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
41439 op1
, op2
, true, false);
41440 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
41441 op1
, op2
, true, true);
41443 /* Move the results in element 2 down to element 1; we don't care
41444 what goes in elements 2 and 3. Then we can merge the parts
41445 back together with an interleave.
41447 Note that two other sequences were tried:
41448 (1) Use interleaves at the start instead of psrldq, which allows
41449 us to use a single shufps to merge things back at the end.
41450 (2) Use shufps here to combine the two vectors, then pshufd to
41451 put the elements in the correct order.
41452 In both cases the cost of the reformatting stall was too high
41453 and the overall sequence slower. */
41455 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
41456 const0_rtx
, const0_rtx
));
41457 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
41458 const0_rtx
, const0_rtx
));
41459 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
41461 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
41465 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
41467 enum machine_mode mode
= GET_MODE (op0
);
41468 rtx t1
, t2
, t3
, t4
, t5
, t6
;
41470 if (TARGET_XOP
&& mode
== V2DImode
)
41472 /* op1: A,B,C,D, op2: E,F,G,H */
41473 op1
= gen_lowpart (V4SImode
, op1
);
41474 op2
= gen_lowpart (V4SImode
, op2
);
41476 t1
= gen_reg_rtx (V4SImode
);
41477 t2
= gen_reg_rtx (V4SImode
);
41478 t3
= gen_reg_rtx (V2DImode
);
41479 t4
= gen_reg_rtx (V2DImode
);
41482 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
41488 /* t2: (B*E),(A*F),(D*G),(C*H) */
41489 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
41491 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41492 emit_insn (gen_xop_phadddq (t3
, t2
));
41494 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41495 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
41497 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41498 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
41502 enum machine_mode nmode
;
41503 rtx (*umul
) (rtx
, rtx
, rtx
);
41505 if (mode
== V2DImode
)
41507 umul
= gen_vec_widen_umult_even_v4si
;
41510 else if (mode
== V4DImode
)
41512 umul
= gen_vec_widen_umult_even_v8si
;
41516 gcc_unreachable ();
41519 /* Multiply low parts. */
41520 t1
= gen_reg_rtx (mode
);
41521 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
41523 /* Shift input vectors right 32 bits so we can multiply high parts. */
41525 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
41526 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
41528 /* Multiply high parts by low parts. */
41529 t4
= gen_reg_rtx (mode
);
41530 t5
= gen_reg_rtx (mode
);
41531 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
41532 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
41534 /* Combine and shift the highparts back. */
41535 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
41536 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
41538 /* Combine high and low parts. */
41539 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
41542 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41543 gen_rtx_MULT (mode
, op1
, op2
));
41546 /* Expand an insert into a vector register through pinsr insn.
41547 Return true if successful. */
41550 ix86_expand_pinsr (rtx
*operands
)
41552 rtx dst
= operands
[0];
41553 rtx src
= operands
[3];
41555 unsigned int size
= INTVAL (operands
[1]);
41556 unsigned int pos
= INTVAL (operands
[2]);
41558 if (GET_CODE (dst
) == SUBREG
)
41560 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41561 dst
= SUBREG_REG (dst
);
41564 if (GET_CODE (src
) == SUBREG
)
41565 src
= SUBREG_REG (src
);
41567 switch (GET_MODE (dst
))
41574 enum machine_mode srcmode
, dstmode
;
41575 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41577 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41582 if (!TARGET_SSE4_1
)
41584 dstmode
= V16QImode
;
41585 pinsr
= gen_sse4_1_pinsrb
;
41591 dstmode
= V8HImode
;
41592 pinsr
= gen_sse2_pinsrw
;
41596 if (!TARGET_SSE4_1
)
41598 dstmode
= V4SImode
;
41599 pinsr
= gen_sse4_1_pinsrd
;
41603 gcc_assert (TARGET_64BIT
);
41604 if (!TARGET_SSE4_1
)
41606 dstmode
= V2DImode
;
41607 pinsr
= gen_sse4_1_pinsrq
;
41614 dst
= gen_lowpart (dstmode
, dst
);
41615 src
= gen_lowpart (srcmode
, src
);
41619 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41628 /* This function returns the calling abi specific va_list type node.
41629 It returns the FNDECL specific va_list type. */
41632 ix86_fn_abi_va_list (tree fndecl
)
41635 return va_list_type_node
;
41636 gcc_assert (fndecl
!= NULL_TREE
);
41638 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41639 return ms_va_list_type_node
;
41641 return sysv_va_list_type_node
;
41644 /* Returns the canonical va_list type specified by TYPE. If there
41645 is no valid TYPE provided, it return NULL_TREE. */
41648 ix86_canonical_va_list_type (tree type
)
41652 /* Resolve references and pointers to va_list type. */
41653 if (TREE_CODE (type
) == MEM_REF
)
41654 type
= TREE_TYPE (type
);
41655 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41656 type
= TREE_TYPE (type
);
41657 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41658 type
= TREE_TYPE (type
);
41660 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41662 wtype
= va_list_type_node
;
41663 gcc_assert (wtype
!= NULL_TREE
);
41665 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41667 /* If va_list is an array type, the argument may have decayed
41668 to a pointer type, e.g. by being passed to another function.
41669 In that case, unwrap both types so that we can compare the
41670 underlying records. */
41671 if (TREE_CODE (htype
) == ARRAY_TYPE
41672 || POINTER_TYPE_P (htype
))
41674 wtype
= TREE_TYPE (wtype
);
41675 htype
= TREE_TYPE (htype
);
41678 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41679 return va_list_type_node
;
41680 wtype
= sysv_va_list_type_node
;
41681 gcc_assert (wtype
!= NULL_TREE
);
41683 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41685 /* If va_list is an array type, the argument may have decayed
41686 to a pointer type, e.g. by being passed to another function.
41687 In that case, unwrap both types so that we can compare the
41688 underlying records. */
41689 if (TREE_CODE (htype
) == ARRAY_TYPE
41690 || POINTER_TYPE_P (htype
))
41692 wtype
= TREE_TYPE (wtype
);
41693 htype
= TREE_TYPE (htype
);
41696 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41697 return sysv_va_list_type_node
;
41698 wtype
= ms_va_list_type_node
;
41699 gcc_assert (wtype
!= NULL_TREE
);
41701 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41703 /* If va_list is an array type, the argument may have decayed
41704 to a pointer type, e.g. by being passed to another function.
41705 In that case, unwrap both types so that we can compare the
41706 underlying records. */
41707 if (TREE_CODE (htype
) == ARRAY_TYPE
41708 || POINTER_TYPE_P (htype
))
41710 wtype
= TREE_TYPE (wtype
);
41711 htype
= TREE_TYPE (htype
);
41714 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41715 return ms_va_list_type_node
;
41718 return std_canonical_va_list_type (type
);
41721 /* Iterate through the target-specific builtin types for va_list.
41722 IDX denotes the iterator, *PTREE is set to the result type of
41723 the va_list builtin, and *PNAME to its internal type.
41724 Returns zero if there is no element for this index, otherwise
41725 IDX should be increased upon the next call.
41726 Note, do not iterate a base builtin's name like __builtin_va_list.
41727 Used from c_common_nodes_and_builtins. */
41730 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41740 *ptree
= ms_va_list_type_node
;
41741 *pname
= "__builtin_ms_va_list";
41745 *ptree
= sysv_va_list_type_node
;
41746 *pname
= "__builtin_sysv_va_list";
41754 #undef TARGET_SCHED_DISPATCH
41755 #define TARGET_SCHED_DISPATCH has_dispatch
41756 #undef TARGET_SCHED_DISPATCH_DO
41757 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41758 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41759 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41760 #undef TARGET_SCHED_REORDER
41761 #define TARGET_SCHED_REORDER ix86_sched_reorder
41762 #undef TARGET_SCHED_ADJUST_PRIORITY
41763 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41764 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41765 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
41766 ix86_dependencies_evaluation_hook
41768 /* The size of the dispatch window is the total number of bytes of
41769 object code allowed in a window. */
41770 #define DISPATCH_WINDOW_SIZE 16
41772 /* Number of dispatch windows considered for scheduling. */
41773 #define MAX_DISPATCH_WINDOWS 3
41775 /* Maximum number of instructions in a window. */
41778 /* Maximum number of immediate operands in a window. */
41781 /* Maximum number of immediate bits allowed in a window. */
41782 #define MAX_IMM_SIZE 128
41784 /* Maximum number of 32 bit immediates allowed in a window. */
41785 #define MAX_IMM_32 4
41787 /* Maximum number of 64 bit immediates allowed in a window. */
41788 #define MAX_IMM_64 2
41790 /* Maximum total of loads or prefetches allowed in a window. */
41793 /* Maximum total of stores allowed in a window. */
41794 #define MAX_STORE 1
41800 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41801 enum dispatch_group
{
41816 /* Number of allowable groups in a dispatch window. It is an array
41817 indexed by dispatch_group enum. 100 is used as a big number,
41818 because the number of these kind of operations does not have any
41819 effect in dispatch window, but we need them for other reasons in
41821 static unsigned int num_allowable_groups
[disp_last
] = {
41822 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41825 char group_name
[disp_last
+ 1][16] = {
41826 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41827 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41828 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41831 /* Instruction path. */
41834 path_single
, /* Single micro op. */
41835 path_double
, /* Double micro op. */
41836 path_multi
, /* Instructions with more than 2 micro op.. */
41840 /* sched_insn_info defines a window to the instructions scheduled in
41841 the basic block. It contains a pointer to the insn_info table and
41842 the instruction scheduled.
41844 Windows are allocated for each basic block and are linked
41846 typedef struct sched_insn_info_s
{
41848 enum dispatch_group group
;
41849 enum insn_path path
;
41854 /* Linked list of dispatch windows. This is a two way list of
41855 dispatch windows of a basic block. It contains information about
41856 the number of uops in the window and the total number of
41857 instructions and of bytes in the object code for this dispatch
41859 typedef struct dispatch_windows_s
{
41860 int num_insn
; /* Number of insn in the window. */
41861 int num_uops
; /* Number of uops in the window. */
41862 int window_size
; /* Number of bytes in the window. */
41863 int window_num
; /* Window number between 0 or 1. */
41864 int num_imm
; /* Number of immediates in an insn. */
41865 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41866 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41867 int imm_size
; /* Total immediates in the window. */
41868 int num_loads
; /* Total memory loads in the window. */
41869 int num_stores
; /* Total memory stores in the window. */
41870 int violation
; /* Violation exists in window. */
41871 sched_insn_info
*window
; /* Pointer to the window. */
41872 struct dispatch_windows_s
*next
;
41873 struct dispatch_windows_s
*prev
;
41874 } dispatch_windows
;
41876 /* Immediate valuse used in an insn. */
41877 typedef struct imm_info_s
41884 static dispatch_windows
*dispatch_window_list
;
41885 static dispatch_windows
*dispatch_window_list1
;
41887 /* Get dispatch group of insn. */
41889 static enum dispatch_group
41890 get_mem_group (rtx insn
)
41892 enum attr_memory memory
;
41894 if (INSN_CODE (insn
) < 0)
41895 return disp_no_group
;
41896 memory
= get_attr_memory (insn
);
41897 if (memory
== MEMORY_STORE
)
41900 if (memory
== MEMORY_LOAD
)
41903 if (memory
== MEMORY_BOTH
)
41904 return disp_load_store
;
41906 return disp_no_group
;
41909 /* Return true if insn is a compare instruction. */
41914 enum attr_type type
;
41916 type
= get_attr_type (insn
);
41917 return (type
== TYPE_TEST
41918 || type
== TYPE_ICMP
41919 || type
== TYPE_FCMP
41920 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41923 /* Return true if a dispatch violation encountered. */
41926 dispatch_violation (void)
41928 if (dispatch_window_list
->next
)
41929 return dispatch_window_list
->next
->violation
;
41930 return dispatch_window_list
->violation
;
41933 /* Return true if insn is a branch instruction. */
41936 is_branch (rtx insn
)
41938 return (CALL_P (insn
) || JUMP_P (insn
));
41941 /* Return true if insn is a prefetch instruction. */
41944 is_prefetch (rtx insn
)
41946 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41949 /* This function initializes a dispatch window and the list container holding a
41950 pointer to the window. */
41953 init_window (int window_num
)
41956 dispatch_windows
*new_list
;
41958 if (window_num
== 0)
41959 new_list
= dispatch_window_list
;
41961 new_list
= dispatch_window_list1
;
41963 new_list
->num_insn
= 0;
41964 new_list
->num_uops
= 0;
41965 new_list
->window_size
= 0;
41966 new_list
->next
= NULL
;
41967 new_list
->prev
= NULL
;
41968 new_list
->window_num
= window_num
;
41969 new_list
->num_imm
= 0;
41970 new_list
->num_imm_32
= 0;
41971 new_list
->num_imm_64
= 0;
41972 new_list
->imm_size
= 0;
41973 new_list
->num_loads
= 0;
41974 new_list
->num_stores
= 0;
41975 new_list
->violation
= false;
41977 for (i
= 0; i
< MAX_INSN
; i
++)
41979 new_list
->window
[i
].insn
= NULL
;
41980 new_list
->window
[i
].group
= disp_no_group
;
41981 new_list
->window
[i
].path
= no_path
;
41982 new_list
->window
[i
].byte_len
= 0;
41983 new_list
->window
[i
].imm_bytes
= 0;
41988 /* This function allocates and initializes a dispatch window and the
41989 list container holding a pointer to the window. */
41991 static dispatch_windows
*
41992 allocate_window (void)
41994 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41995 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42000 /* This routine initializes the dispatch scheduling information. It
42001 initiates building dispatch scheduler tables and constructs the
42002 first dispatch window. */
42005 init_dispatch_sched (void)
42007 /* Allocate a dispatch list and a window. */
42008 dispatch_window_list
= allocate_window ();
42009 dispatch_window_list1
= allocate_window ();
42014 /* This function returns true if a branch is detected. End of a basic block
42015 does not have to be a branch, but here we assume only branches end a
42019 is_end_basic_block (enum dispatch_group group
)
42021 return group
== disp_branch
;
42024 /* This function is called when the end of a window processing is reached. */
42027 process_end_window (void)
42029 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42030 if (dispatch_window_list
->next
)
42032 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42033 gcc_assert (dispatch_window_list
->window_size
42034 + dispatch_window_list1
->window_size
<= 48);
42040 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42041 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42042 for 48 bytes of instructions. Note that these windows are not dispatch
42043 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42045 static dispatch_windows
*
42046 allocate_next_window (int window_num
)
42048 if (window_num
== 0)
42050 if (dispatch_window_list
->next
)
42053 return dispatch_window_list
;
42056 dispatch_window_list
->next
= dispatch_window_list1
;
42057 dispatch_window_list1
->prev
= dispatch_window_list
;
42059 return dispatch_window_list1
;
42062 /* Increment the number of immediate operands of an instruction. */
42065 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42070 switch ( GET_CODE (*in_rtx
))
42075 (imm_values
->imm
)++;
42076 if (x86_64_immediate_operand (*in_rtx
, SImode
))
42077 (imm_values
->imm32
)++;
42079 (imm_values
->imm64
)++;
42083 (imm_values
->imm
)++;
42084 (imm_values
->imm64
)++;
42088 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
42090 (imm_values
->imm
)++;
42091 (imm_values
->imm32
)++;
42102 /* Compute number of immediate operands of an instruction. */
42105 find_constant (rtx in_rtx
, imm_info
*imm_values
)
42107 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
42108 (rtx_function
) find_constant_1
, (void *) imm_values
);
42111 /* Return total size of immediate operands of an instruction along with number
42112 of corresponding immediate-operands. It initializes its parameters to zero
42113 befor calling FIND_CONSTANT.
42114 INSN is the input instruction. IMM is the total of immediates.
42115 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
42119 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
42121 imm_info imm_values
= {0, 0, 0};
42123 find_constant (insn
, &imm_values
);
42124 *imm
= imm_values
.imm
;
42125 *imm32
= imm_values
.imm32
;
42126 *imm64
= imm_values
.imm64
;
42127 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
42130 /* This function indicates if an operand of an instruction is an
42134 has_immediate (rtx insn
)
42136 int num_imm_operand
;
42137 int num_imm32_operand
;
42138 int num_imm64_operand
;
42141 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42142 &num_imm64_operand
);
42146 /* Return single or double path for instructions. */
42148 static enum insn_path
42149 get_insn_path (rtx insn
)
42151 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
42153 if ((int)path
== 0)
42154 return path_single
;
42156 if ((int)path
== 1)
42157 return path_double
;
42162 /* Return insn dispatch group. */
42164 static enum dispatch_group
42165 get_insn_group (rtx insn
)
42167 enum dispatch_group group
= get_mem_group (insn
);
42171 if (is_branch (insn
))
42172 return disp_branch
;
42177 if (has_immediate (insn
))
42180 if (is_prefetch (insn
))
42181 return disp_prefetch
;
42183 return disp_no_group
;
42186 /* Count number of GROUP restricted instructions in a dispatch
42187 window WINDOW_LIST. */
42190 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
42192 enum dispatch_group group
= get_insn_group (insn
);
42194 int num_imm_operand
;
42195 int num_imm32_operand
;
42196 int num_imm64_operand
;
42198 if (group
== disp_no_group
)
42201 if (group
== disp_imm
)
42203 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42204 &num_imm64_operand
);
42205 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
42206 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
42207 || (num_imm32_operand
> 0
42208 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
42209 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
42210 || (num_imm64_operand
> 0
42211 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
42212 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
42213 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
42214 && num_imm64_operand
> 0
42215 && ((window_list
->num_imm_64
> 0
42216 && window_list
->num_insn
>= 2)
42217 || window_list
->num_insn
>= 3)))
42223 if ((group
== disp_load_store
42224 && (window_list
->num_loads
>= MAX_LOAD
42225 || window_list
->num_stores
>= MAX_STORE
))
42226 || ((group
== disp_load
42227 || group
== disp_prefetch
)
42228 && window_list
->num_loads
>= MAX_LOAD
)
42229 || (group
== disp_store
42230 && window_list
->num_stores
>= MAX_STORE
))
42236 /* This function returns true if insn satisfies dispatch rules on the
42237 last window scheduled. */
42240 fits_dispatch_window (rtx insn
)
42242 dispatch_windows
*window_list
= dispatch_window_list
;
42243 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
42244 unsigned int num_restrict
;
42245 enum dispatch_group group
= get_insn_group (insn
);
42246 enum insn_path path
= get_insn_path (insn
);
42249 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
42250 instructions should be given the lowest priority in the
42251 scheduling process in Haifa scheduler to make sure they will be
42252 scheduled in the same dispatch window as the reference to them. */
42253 if (group
== disp_jcc
|| group
== disp_cmp
)
42256 /* Check nonrestricted. */
42257 if (group
== disp_no_group
|| group
== disp_branch
)
42260 /* Get last dispatch window. */
42261 if (window_list_next
)
42262 window_list
= window_list_next
;
42264 if (window_list
->window_num
== 1)
42266 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
42269 || (min_insn_size (insn
) + sum
) >= 48)
42270 /* Window 1 is full. Go for next window. */
42274 num_restrict
= count_num_restricted (insn
, window_list
);
42276 if (num_restrict
> num_allowable_groups
[group
])
42279 /* See if it fits in the first window. */
42280 if (window_list
->window_num
== 0)
42282 /* The first widow should have only single and double path
42284 if (path
== path_double
42285 && (window_list
->num_uops
+ 2) > MAX_INSN
)
42287 else if (path
!= path_single
)
42293 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42294 dispatch window WINDOW_LIST. */
42297 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
42299 int byte_len
= min_insn_size (insn
);
42300 int num_insn
= window_list
->num_insn
;
42302 sched_insn_info
*window
= window_list
->window
;
42303 enum dispatch_group group
= get_insn_group (insn
);
42304 enum insn_path path
= get_insn_path (insn
);
42305 int num_imm_operand
;
42306 int num_imm32_operand
;
42307 int num_imm64_operand
;
42309 if (!window_list
->violation
&& group
!= disp_cmp
42310 && !fits_dispatch_window (insn
))
42311 window_list
->violation
= true;
42313 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42314 &num_imm64_operand
);
42316 /* Initialize window with new instruction. */
42317 window
[num_insn
].insn
= insn
;
42318 window
[num_insn
].byte_len
= byte_len
;
42319 window
[num_insn
].group
= group
;
42320 window
[num_insn
].path
= path
;
42321 window
[num_insn
].imm_bytes
= imm_size
;
42323 window_list
->window_size
+= byte_len
;
42324 window_list
->num_insn
= num_insn
+ 1;
42325 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
42326 window_list
->imm_size
+= imm_size
;
42327 window_list
->num_imm
+= num_imm_operand
;
42328 window_list
->num_imm_32
+= num_imm32_operand
;
42329 window_list
->num_imm_64
+= num_imm64_operand
;
42331 if (group
== disp_store
)
42332 window_list
->num_stores
+= 1;
42333 else if (group
== disp_load
42334 || group
== disp_prefetch
)
42335 window_list
->num_loads
+= 1;
42336 else if (group
== disp_load_store
)
42338 window_list
->num_stores
+= 1;
42339 window_list
->num_loads
+= 1;
42343 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42344 If the total bytes of instructions or the number of instructions in
42345 the window exceed allowable, it allocates a new window. */
42348 add_to_dispatch_window (rtx insn
)
42351 dispatch_windows
*window_list
;
42352 dispatch_windows
*next_list
;
42353 dispatch_windows
*window0_list
;
42354 enum insn_path path
;
42355 enum dispatch_group insn_group
;
42363 if (INSN_CODE (insn
) < 0)
42366 byte_len
= min_insn_size (insn
);
42367 window_list
= dispatch_window_list
;
42368 next_list
= window_list
->next
;
42369 path
= get_insn_path (insn
);
42370 insn_group
= get_insn_group (insn
);
42372 /* Get the last dispatch window. */
42374 window_list
= dispatch_window_list
->next
;
42376 if (path
== path_single
)
42378 else if (path
== path_double
)
42381 insn_num_uops
= (int) path
;
42383 /* If current window is full, get a new window.
42384 Window number zero is full, if MAX_INSN uops are scheduled in it.
42385 Window number one is full, if window zero's bytes plus window
42386 one's bytes is 32, or if the bytes of the new instruction added
42387 to the total makes it greater than 48, or it has already MAX_INSN
42388 instructions in it. */
42389 num_insn
= window_list
->num_insn
;
42390 num_uops
= window_list
->num_uops
;
42391 window_num
= window_list
->window_num
;
42392 insn_fits
= fits_dispatch_window (insn
);
42394 if (num_insn
>= MAX_INSN
42395 || num_uops
+ insn_num_uops
> MAX_INSN
42398 window_num
= ~window_num
& 1;
42399 window_list
= allocate_next_window (window_num
);
42402 if (window_num
== 0)
42404 add_insn_window (insn
, window_list
, insn_num_uops
);
42405 if (window_list
->num_insn
>= MAX_INSN
42406 && insn_group
== disp_branch
)
42408 process_end_window ();
42412 else if (window_num
== 1)
42414 window0_list
= window_list
->prev
;
42415 sum
= window0_list
->window_size
+ window_list
->window_size
;
42417 || (byte_len
+ sum
) >= 48)
42419 process_end_window ();
42420 window_list
= dispatch_window_list
;
42423 add_insn_window (insn
, window_list
, insn_num_uops
);
42426 gcc_unreachable ();
42428 if (is_end_basic_block (insn_group
))
42430 /* End of basic block is reached do end-basic-block process. */
42431 process_end_window ();
42436 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42438 DEBUG_FUNCTION
static void
42439 debug_dispatch_window_file (FILE *file
, int window_num
)
42441 dispatch_windows
*list
;
42444 if (window_num
== 0)
42445 list
= dispatch_window_list
;
42447 list
= dispatch_window_list1
;
42449 fprintf (file
, "Window #%d:\n", list
->window_num
);
42450 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
42451 list
->num_insn
, list
->num_uops
, list
->window_size
);
42452 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42453 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
42455 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
42457 fprintf (file
, " insn info:\n");
42459 for (i
= 0; i
< MAX_INSN
; i
++)
42461 if (!list
->window
[i
].insn
)
42463 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42464 i
, group_name
[list
->window
[i
].group
],
42465 i
, (void *)list
->window
[i
].insn
,
42466 i
, list
->window
[i
].path
,
42467 i
, list
->window
[i
].byte_len
,
42468 i
, list
->window
[i
].imm_bytes
);
42472 /* Print to stdout a dispatch window. */
42474 DEBUG_FUNCTION
void
42475 debug_dispatch_window (int window_num
)
42477 debug_dispatch_window_file (stdout
, window_num
);
42480 /* Print INSN dispatch information to FILE. */
42482 DEBUG_FUNCTION
static void
42483 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
42486 enum insn_path path
;
42487 enum dispatch_group group
;
42489 int num_imm_operand
;
42490 int num_imm32_operand
;
42491 int num_imm64_operand
;
42493 if (INSN_CODE (insn
) < 0)
42496 byte_len
= min_insn_size (insn
);
42497 path
= get_insn_path (insn
);
42498 group
= get_insn_group (insn
);
42499 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42500 &num_imm64_operand
);
42502 fprintf (file
, " insn info:\n");
42503 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
42504 group_name
[group
], path
, byte_len
);
42505 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42506 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
42509 /* Print to STDERR the status of the ready list with respect to
42510 dispatch windows. */
42512 DEBUG_FUNCTION
void
42513 debug_ready_dispatch (void)
42516 int no_ready
= number_in_ready ();
42518 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
42520 for (i
= 0; i
< no_ready
; i
++)
42521 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
42524 /* This routine is the driver of the dispatch scheduler. */
42527 do_dispatch (rtx insn
, int mode
)
42529 if (mode
== DISPATCH_INIT
)
42530 init_dispatch_sched ();
42531 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
42532 add_to_dispatch_window (insn
);
42535 /* Return TRUE if Dispatch Scheduling is supported. */
42538 has_dispatch (rtx insn
, int action
)
42540 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
42541 && flag_dispatch_scheduler
)
42547 case IS_DISPATCH_ON
:
42552 return is_cmp (insn
);
42554 case DISPATCH_VIOLATION
:
42555 return dispatch_violation ();
42557 case FITS_DISPATCH_WINDOW
:
42558 return fits_dispatch_window (insn
);
42564 /* Implementation of reassociation_width target hook used by
42565 reassoc phase to identify parallelism level in reassociated
42566 tree. Statements tree_code is passed in OPC. Arguments type
42569 Currently parallel reassociation is enabled for Atom
42570 processors only and we set reassociation width to be 2
42571 because Atom may issue up to 2 instructions per cycle.
42573 Return value should be fixed if parallel reassociation is
42574 enabled for other processors. */
42577 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42578 enum machine_mode mode
)
42582 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42584 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42590 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42591 place emms and femms instructions. */
42593 static enum machine_mode
42594 ix86_preferred_simd_mode (enum machine_mode mode
)
42602 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42604 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42606 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42608 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42611 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42617 if (!TARGET_VECTORIZE_DOUBLE
)
42619 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42621 else if (TARGET_SSE2
)
42630 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42633 static unsigned int
42634 ix86_autovectorize_vector_sizes (void)
42636 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42641 /* Return class of registers which could be used for pseudo of MODE
42642 and of class RCLASS for spilling instead of memory. Return NO_REGS
42643 if it is not possible or non-profitable. */
42645 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42647 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42648 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42649 && INTEGER_CLASS_P (rclass
))
42654 /* Implement targetm.vectorize.init_cost. */
42657 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42659 unsigned *cost
= XNEWVEC (unsigned, 3);
42660 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42664 /* Implement targetm.vectorize.add_stmt_cost. */
42667 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42668 struct _stmt_vec_info
*stmt_info
, int misalign
,
42669 enum vect_cost_model_location where
)
42671 unsigned *cost
= (unsigned *) data
;
42672 unsigned retval
= 0;
42674 if (flag_vect_cost_model
)
42676 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42677 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42679 /* Statements in an inner loop relative to the loop being
42680 vectorized are weighted more heavily. The value here is
42681 arbitrary and could potentially be improved with analysis. */
42682 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42683 count
*= 50; /* FIXME. */
42685 retval
= (unsigned) (count
* stmt_cost
);
42686 cost
[where
] += retval
;
42692 /* Implement targetm.vectorize.finish_cost. */
42695 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42696 unsigned *body_cost
, unsigned *epilogue_cost
)
42698 unsigned *cost
= (unsigned *) data
;
42699 *prologue_cost
= cost
[vect_prologue
];
42700 *body_cost
= cost
[vect_body
];
42701 *epilogue_cost
= cost
[vect_epilogue
];
42704 /* Implement targetm.vectorize.destroy_cost_data. */
42707 ix86_destroy_cost_data (void *data
)
42712 /* Validate target specific memory model bits in VAL. */
42714 static unsigned HOST_WIDE_INT
42715 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42717 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42720 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42722 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42724 warning (OPT_Winvalid_memory_model
,
42725 "Unknown architecture specific memory model");
42726 return MEMMODEL_SEQ_CST
;
42728 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42729 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42731 warning (OPT_Winvalid_memory_model
,
42732 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42733 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42735 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42737 warning (OPT_Winvalid_memory_model
,
42738 "HLE_RELEASE not used with RELEASE or stronger memory model");
42739 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42744 /* Initialize the GCC target structure. */
42745 #undef TARGET_RETURN_IN_MEMORY
42746 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42748 #undef TARGET_LEGITIMIZE_ADDRESS
42749 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42751 #undef TARGET_ATTRIBUTE_TABLE
42752 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42753 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
42754 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
42755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42756 # undef TARGET_MERGE_DECL_ATTRIBUTES
42757 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42760 #undef TARGET_COMP_TYPE_ATTRIBUTES
42761 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42763 #undef TARGET_INIT_BUILTINS
42764 #define TARGET_INIT_BUILTINS ix86_init_builtins
42765 #undef TARGET_BUILTIN_DECL
42766 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42767 #undef TARGET_EXPAND_BUILTIN
42768 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42770 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42771 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42772 ix86_builtin_vectorized_function
42774 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42775 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42777 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42778 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42780 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42781 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42783 #undef TARGET_BUILTIN_RECIPROCAL
42784 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42786 #undef TARGET_ASM_FUNCTION_EPILOGUE
42787 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42789 #undef TARGET_ENCODE_SECTION_INFO
42790 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42791 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42793 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42796 #undef TARGET_ASM_OPEN_PAREN
42797 #define TARGET_ASM_OPEN_PAREN ""
42798 #undef TARGET_ASM_CLOSE_PAREN
42799 #define TARGET_ASM_CLOSE_PAREN ""
42801 #undef TARGET_ASM_BYTE_OP
42802 #define TARGET_ASM_BYTE_OP ASM_BYTE
42804 #undef TARGET_ASM_ALIGNED_HI_OP
42805 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42806 #undef TARGET_ASM_ALIGNED_SI_OP
42807 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42809 #undef TARGET_ASM_ALIGNED_DI_OP
42810 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42813 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42814 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42816 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42817 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42819 #undef TARGET_ASM_UNALIGNED_HI_OP
42820 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42821 #undef TARGET_ASM_UNALIGNED_SI_OP
42822 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42823 #undef TARGET_ASM_UNALIGNED_DI_OP
42824 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42826 #undef TARGET_PRINT_OPERAND
42827 #define TARGET_PRINT_OPERAND ix86_print_operand
42828 #undef TARGET_PRINT_OPERAND_ADDRESS
42829 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42830 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42831 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42832 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42833 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42835 #undef TARGET_SCHED_INIT_GLOBAL
42836 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42837 #undef TARGET_SCHED_ADJUST_COST
42838 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42839 #undef TARGET_SCHED_ISSUE_RATE
42840 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42841 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42842 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42843 ia32_multipass_dfa_lookahead
42845 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42846 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42848 #undef TARGET_MEMMODEL_CHECK
42849 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42852 #undef TARGET_HAVE_TLS
42853 #define TARGET_HAVE_TLS true
42855 #undef TARGET_CANNOT_FORCE_CONST_MEM
42856 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42857 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42858 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42860 #undef TARGET_DELEGITIMIZE_ADDRESS
42861 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42863 #undef TARGET_MS_BITFIELD_LAYOUT_P
42864 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42867 #undef TARGET_BINDS_LOCAL_P
42868 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42870 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42871 #undef TARGET_BINDS_LOCAL_P
42872 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42875 #undef TARGET_ASM_OUTPUT_MI_THUNK
42876 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42877 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42878 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42880 #undef TARGET_ASM_FILE_START
42881 #define TARGET_ASM_FILE_START x86_file_start
42883 #undef TARGET_OPTION_OVERRIDE
42884 #define TARGET_OPTION_OVERRIDE ix86_option_override
42886 #undef TARGET_REGISTER_MOVE_COST
42887 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42888 #undef TARGET_MEMORY_MOVE_COST
42889 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42890 #undef TARGET_RTX_COSTS
42891 #define TARGET_RTX_COSTS ix86_rtx_costs
42892 #undef TARGET_ADDRESS_COST
42893 #define TARGET_ADDRESS_COST ix86_address_cost
42895 #undef TARGET_FIXED_CONDITION_CODE_REGS
42896 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42897 #undef TARGET_CC_MODES_COMPATIBLE
42898 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42900 #undef TARGET_MACHINE_DEPENDENT_REORG
42901 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42903 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42904 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42906 #undef TARGET_BUILD_BUILTIN_VA_LIST
42907 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42909 #undef TARGET_FOLD_BUILTIN
42910 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42912 #undef TARGET_COMPARE_VERSION_PRIORITY
42913 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42915 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42916 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42917 ix86_generate_version_dispatcher_body
42919 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42920 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42921 ix86_get_function_versions_dispatcher
42923 #undef TARGET_ENUM_VA_LIST_P
42924 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42926 #undef TARGET_FN_ABI_VA_LIST
42927 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42929 #undef TARGET_CANONICAL_VA_LIST_TYPE
42930 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42932 #undef TARGET_EXPAND_BUILTIN_VA_START
42933 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42935 #undef TARGET_MD_ASM_CLOBBERS
42936 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42938 #undef TARGET_PROMOTE_PROTOTYPES
42939 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42940 #undef TARGET_STRUCT_VALUE_RTX
42941 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42942 #undef TARGET_SETUP_INCOMING_VARARGS
42943 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42944 #undef TARGET_MUST_PASS_IN_STACK
42945 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42946 #undef TARGET_FUNCTION_ARG_ADVANCE
42947 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42948 #undef TARGET_FUNCTION_ARG
42949 #define TARGET_FUNCTION_ARG ix86_function_arg
42950 #undef TARGET_FUNCTION_ARG_BOUNDARY
42951 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42952 #undef TARGET_PASS_BY_REFERENCE
42953 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42954 #undef TARGET_INTERNAL_ARG_POINTER
42955 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42956 #undef TARGET_UPDATE_STACK_BOUNDARY
42957 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42958 #undef TARGET_GET_DRAP_RTX
42959 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42960 #undef TARGET_STRICT_ARGUMENT_NAMING
42961 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42962 #undef TARGET_STATIC_CHAIN
42963 #define TARGET_STATIC_CHAIN ix86_static_chain
42964 #undef TARGET_TRAMPOLINE_INIT
42965 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42966 #undef TARGET_RETURN_POPS_ARGS
42967 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42969 #undef TARGET_LEGITIMATE_COMBINED_INSN
42970 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42972 #undef TARGET_ASAN_SHADOW_OFFSET
42973 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42975 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42976 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42978 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42979 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42981 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42982 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42984 #undef TARGET_C_MODE_FOR_SUFFIX
42985 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42988 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42989 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42992 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42993 #undef TARGET_INSERT_ATTRIBUTES
42994 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42997 #undef TARGET_MANGLE_TYPE
42998 #define TARGET_MANGLE_TYPE ix86_mangle_type
43001 #undef TARGET_STACK_PROTECT_FAIL
43002 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
43005 #undef TARGET_FUNCTION_VALUE
43006 #define TARGET_FUNCTION_VALUE ix86_function_value
43008 #undef TARGET_FUNCTION_VALUE_REGNO_P
43009 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
43011 #undef TARGET_PROMOTE_FUNCTION_MODE
43012 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
43014 #undef TARGET_MEMBER_TYPE_FORCES_BLK
43015 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
43017 #undef TARGET_INSTANTIATE_DECLS
43018 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
43020 #undef TARGET_SECONDARY_RELOAD
43021 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
43023 #undef TARGET_CLASS_MAX_NREGS
43024 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
43026 #undef TARGET_PREFERRED_RELOAD_CLASS
43027 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
43028 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
43029 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
43030 #undef TARGET_CLASS_LIKELY_SPILLED_P
43031 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
43033 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
43034 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
43035 ix86_builtin_vectorization_cost
43036 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
43037 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
43038 ix86_vectorize_vec_perm_const_ok
43039 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
43040 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
43041 ix86_preferred_simd_mode
43042 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
43043 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
43044 ix86_autovectorize_vector_sizes
43045 #undef TARGET_VECTORIZE_INIT_COST
43046 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
43047 #undef TARGET_VECTORIZE_ADD_STMT_COST
43048 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
43049 #undef TARGET_VECTORIZE_FINISH_COST
43050 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
43051 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
43052 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
43054 #undef TARGET_SET_CURRENT_FUNCTION
43055 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
43057 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
43058 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
43060 #undef TARGET_OPTION_SAVE
43061 #define TARGET_OPTION_SAVE ix86_function_specific_save
43063 #undef TARGET_OPTION_RESTORE
43064 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
43066 #undef TARGET_OPTION_PRINT
43067 #define TARGET_OPTION_PRINT ix86_function_specific_print
43069 #undef TARGET_OPTION_FUNCTION_VERSIONS
43070 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
43072 #undef TARGET_CAN_INLINE_P
43073 #define TARGET_CAN_INLINE_P ix86_can_inline_p
43075 #undef TARGET_EXPAND_TO_RTL_HOOK
43076 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
43078 #undef TARGET_LEGITIMATE_ADDRESS_P
43079 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
43081 #undef TARGET_LRA_P
43082 #define TARGET_LRA_P hook_bool_void_true
43084 #undef TARGET_REGISTER_PRIORITY
43085 #define TARGET_REGISTER_PRIORITY ix86_register_priority
43087 #undef TARGET_REGISTER_USAGE_LEVELING_P
43088 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
43090 #undef TARGET_LEGITIMATE_CONSTANT_P
43091 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
43093 #undef TARGET_FRAME_POINTER_REQUIRED
43094 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
43096 #undef TARGET_CAN_ELIMINATE
43097 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
43099 #undef TARGET_EXTRA_LIVE_ON_ENTRY
43100 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
43102 #undef TARGET_ASM_CODE_END
43103 #define TARGET_ASM_CODE_END ix86_code_end
43105 #undef TARGET_CONDITIONAL_REGISTER_USAGE
43106 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
43109 #undef TARGET_INIT_LIBFUNCS
43110 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
43113 #undef TARGET_SPILL_CLASS
43114 #define TARGET_SPILL_CLASS ix86_spill_class
43116 struct gcc_target targetm
= TARGET_INITIALIZER
;
43118 #include "gt-i386.h"