1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 #ifndef CHECK_STACK_LIMIT
69 #define CHECK_STACK_LIMIT (-1)
72 /* Return index of given mode in mult and division cost tables. */
73 #define MODE_INDEX(mode) \
74 ((mode) == QImode ? 0 \
75 : (mode) == HImode ? 1 \
76 : (mode) == SImode ? 2 \
77 : (mode) == DImode ? 3 \
80 /* Processor costs (relative to an add) */
81 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
82 #define COSTS_N_BYTES(N) ((N) * 2)
84 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
87 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
88 COSTS_N_BYTES (2), /* cost of an add instruction */
89 COSTS_N_BYTES (3), /* cost of a lea instruction */
90 COSTS_N_BYTES (2), /* variable shift costs */
91 COSTS_N_BYTES (3), /* constant shift costs */
92 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
93 COSTS_N_BYTES (3), /* HI */
94 COSTS_N_BYTES (3), /* SI */
95 COSTS_N_BYTES (3), /* DI */
96 COSTS_N_BYTES (5)}, /* other */
97 0, /* cost of multiply per each bit set */
98 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
99 COSTS_N_BYTES (3), /* HI */
100 COSTS_N_BYTES (3), /* SI */
101 COSTS_N_BYTES (3), /* DI */
102 COSTS_N_BYTES (5)}, /* other */
103 COSTS_N_BYTES (3), /* cost of movsx */
104 COSTS_N_BYTES (3), /* cost of movzx */
105 0, /* "large" insn */
107 2, /* cost for loading QImode using movzbl */
108 {2, 2, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 2, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 2}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {2, 2, 2}, /* cost of storing fp registers
116 in SFmode, DFmode and XFmode */
117 3, /* cost of moving MMX register */
118 {3, 3}, /* cost of loading MMX registers
119 in SImode and DImode */
120 {3, 3}, /* cost of storing MMX registers
121 in SImode and DImode */
122 3, /* cost of moving SSE register */
123 {3, 3, 3}, /* cost of loading SSE registers
124 in SImode, DImode and TImode */
125 {3, 3, 3}, /* cost of storing SSE registers
126 in SImode, DImode and TImode */
127 3, /* MMX or SSE register to integer */
128 0, /* size of l1 cache */
129 0, /* size of l2 cache */
130 0, /* size of prefetch block */
131 0, /* number of parallel prefetches */
133 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
134 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
135 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
136 COSTS_N_BYTES (2), /* cost of FABS instruction. */
137 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
138 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
139 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
140 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
141 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
142 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
143 1, /* scalar_stmt_cost. */
144 1, /* scalar load_cost. */
145 1, /* scalar_store_cost. */
146 1, /* vec_stmt_cost. */
147 1, /* vec_to_scalar_cost. */
148 1, /* scalar_to_vec_cost. */
149 1, /* vec_align_load_cost. */
150 1, /* vec_unalign_load_cost. */
151 1, /* vec_store_cost. */
152 1, /* cond_taken_branch_cost. */
153 1, /* cond_not_taken_branch_cost. */
156 /* Processor costs (relative to an add) */
158 struct processor_costs i386_cost
= { /* 386 specific costs */
159 COSTS_N_INSNS (1), /* cost of an add instruction */
160 COSTS_N_INSNS (1), /* cost of a lea instruction */
161 COSTS_N_INSNS (3), /* variable shift costs */
162 COSTS_N_INSNS (2), /* constant shift costs */
163 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
164 COSTS_N_INSNS (6), /* HI */
165 COSTS_N_INSNS (6), /* SI */
166 COSTS_N_INSNS (6), /* DI */
167 COSTS_N_INSNS (6)}, /* other */
168 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
169 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
170 COSTS_N_INSNS (23), /* HI */
171 COSTS_N_INSNS (23), /* SI */
172 COSTS_N_INSNS (23), /* DI */
173 COSTS_N_INSNS (23)}, /* other */
174 COSTS_N_INSNS (3), /* cost of movsx */
175 COSTS_N_INSNS (2), /* cost of movzx */
176 15, /* "large" insn */
178 4, /* cost for loading QImode using movzbl */
179 {2, 4, 2}, /* cost of loading integer registers
180 in QImode, HImode and SImode.
181 Relative to reg-reg move (2). */
182 {2, 4, 2}, /* cost of storing integer registers */
183 2, /* cost of reg,reg fld/fst */
184 {8, 8, 8}, /* cost of loading fp registers
185 in SFmode, DFmode and XFmode */
186 {8, 8, 8}, /* cost of storing fp registers
187 in SFmode, DFmode and XFmode */
188 2, /* cost of moving MMX register */
189 {4, 8}, /* cost of loading MMX registers
190 in SImode and DImode */
191 {4, 8}, /* cost of storing MMX registers
192 in SImode and DImode */
193 2, /* cost of moving SSE register */
194 {4, 8, 16}, /* cost of loading SSE registers
195 in SImode, DImode and TImode */
196 {4, 8, 16}, /* cost of storing SSE registers
197 in SImode, DImode and TImode */
198 3, /* MMX or SSE register to integer */
199 0, /* size of l1 cache */
200 0, /* size of l2 cache */
201 0, /* size of prefetch block */
202 0, /* number of parallel prefetches */
204 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
205 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
206 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
207 COSTS_N_INSNS (22), /* cost of FABS instruction. */
208 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
209 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
210 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
211 DUMMY_STRINGOP_ALGS
},
212 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
213 DUMMY_STRINGOP_ALGS
},
214 1, /* scalar_stmt_cost. */
215 1, /* scalar load_cost. */
216 1, /* scalar_store_cost. */
217 1, /* vec_stmt_cost. */
218 1, /* vec_to_scalar_cost. */
219 1, /* scalar_to_vec_cost. */
220 1, /* vec_align_load_cost. */
221 2, /* vec_unalign_load_cost. */
222 1, /* vec_store_cost. */
223 3, /* cond_taken_branch_cost. */
224 1, /* cond_not_taken_branch_cost. */
228 struct processor_costs i486_cost
= { /* 486 specific costs */
229 COSTS_N_INSNS (1), /* cost of an add instruction */
230 COSTS_N_INSNS (1), /* cost of a lea instruction */
231 COSTS_N_INSNS (3), /* variable shift costs */
232 COSTS_N_INSNS (2), /* constant shift costs */
233 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
234 COSTS_N_INSNS (12), /* HI */
235 COSTS_N_INSNS (12), /* SI */
236 COSTS_N_INSNS (12), /* DI */
237 COSTS_N_INSNS (12)}, /* other */
238 1, /* cost of multiply per each bit set */
239 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
240 COSTS_N_INSNS (40), /* HI */
241 COSTS_N_INSNS (40), /* SI */
242 COSTS_N_INSNS (40), /* DI */
243 COSTS_N_INSNS (40)}, /* other */
244 COSTS_N_INSNS (3), /* cost of movsx */
245 COSTS_N_INSNS (2), /* cost of movzx */
246 15, /* "large" insn */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {4, 8, 16}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {4, 8, 16}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 3, /* MMX or SSE register to integer */
269 4, /* size of l1 cache. 486 has 8kB cache
270 shared for code and data, so 4kB is
271 not really precise. */
272 4, /* size of l2 cache */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (3), /* cost of FABS instruction. */
280 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
282 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
283 DUMMY_STRINGOP_ALGS
},
284 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
285 DUMMY_STRINGOP_ALGS
},
286 1, /* scalar_stmt_cost. */
287 1, /* scalar load_cost. */
288 1, /* scalar_store_cost. */
289 1, /* vec_stmt_cost. */
290 1, /* vec_to_scalar_cost. */
291 1, /* scalar_to_vec_cost. */
292 1, /* vec_align_load_cost. */
293 2, /* vec_unalign_load_cost. */
294 1, /* vec_store_cost. */
295 3, /* cond_taken_branch_cost. */
296 1, /* cond_not_taken_branch_cost. */
300 struct processor_costs pentium_cost
= {
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (4), /* variable shift costs */
304 COSTS_N_INSNS (1), /* constant shift costs */
305 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (11), /* HI */
307 COSTS_N_INSNS (11), /* SI */
308 COSTS_N_INSNS (11), /* DI */
309 COSTS_N_INSNS (11)}, /* other */
310 0, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (25), /* HI */
313 COSTS_N_INSNS (25), /* SI */
314 COSTS_N_INSNS (25), /* DI */
315 COSTS_N_INSNS (25)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 8, /* "large" insn */
320 6, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {2, 2, 6}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {4, 4, 6}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 8, /* cost of moving MMX register */
331 {8, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {8, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 8, /* size of l1 cache. */
342 8, /* size of l2 cache */
343 0, /* size of prefetch block */
344 0, /* number of parallel prefetches */
346 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
347 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
348 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
349 COSTS_N_INSNS (1), /* cost of FABS instruction. */
350 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
351 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
352 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
353 DUMMY_STRINGOP_ALGS
},
354 {{libcall
, {{-1, rep_prefix_4_byte
, false}}},
355 DUMMY_STRINGOP_ALGS
},
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
370 struct processor_costs pentiumpro_cost
= {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (1), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (4), /* HI */
377 COSTS_N_INSNS (4), /* SI */
378 COSTS_N_INSNS (4), /* DI */
379 COSTS_N_INSNS (4)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (17), /* HI */
383 COSTS_N_INSNS (17), /* SI */
384 COSTS_N_INSNS (17), /* DI */
385 COSTS_N_INSNS (17)}, /* other */
386 COSTS_N_INSNS (1), /* cost of movsx */
387 COSTS_N_INSNS (1), /* cost of movzx */
388 8, /* "large" insn */
390 2, /* cost for loading QImode using movzbl */
391 {4, 4, 4}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 2, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 2, /* cost of moving MMX register */
401 {2, 2}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {2, 2}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {2, 2, 8}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {2, 2, 8}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 256, /* size of l2 cache */
413 32, /* size of prefetch block */
414 6, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
422 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
423 (we ensure the alignment). For small blocks inline loop is still a
424 noticeable win, for bigger blocks either rep movsl or rep movsb is
425 way to go. Rep movsb has apparently more expensive startup time in CPU,
426 but after 4K the difference is down in the noise. */
427 {{rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
428 {8192, rep_prefix_4_byte
, false},
429 {-1, rep_prefix_1_byte
, false}}},
430 DUMMY_STRINGOP_ALGS
},
431 {{rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
432 {8192, rep_prefix_4_byte
, false},
433 {-1, libcall
, false}}},
434 DUMMY_STRINGOP_ALGS
},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost
= {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
503 DUMMY_STRINGOP_ALGS
},
504 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
505 DUMMY_STRINGOP_ALGS
},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost
= {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
},
577 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
578 DUMMY_STRINGOP_ALGS
},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost
= {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
649 DUMMY_STRINGOP_ALGS
},
650 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
727 {-1, rep_prefix_4_byte
, false}}},
728 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
729 {-1, libcall
, false}}}},
730 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
731 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
732 {libcall
, {{48, unrolled_loop
, false},
733 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
734 4, /* scalar_stmt_cost. */
735 2, /* scalar load_cost. */
736 2, /* scalar_store_cost. */
737 5, /* vec_stmt_cost. */
738 0, /* vec_to_scalar_cost. */
739 2, /* scalar_to_vec_cost. */
740 2, /* vec_align_load_cost. */
741 3, /* vec_unalign_load_cost. */
742 3, /* vec_store_cost. */
743 3, /* cond_taken_branch_cost. */
744 2, /* cond_not_taken_branch_cost. */
747 struct processor_costs amdfam10_cost
= {
748 COSTS_N_INSNS (1), /* cost of an add instruction */
749 COSTS_N_INSNS (2), /* cost of a lea instruction */
750 COSTS_N_INSNS (1), /* variable shift costs */
751 COSTS_N_INSNS (1), /* constant shift costs */
752 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
753 COSTS_N_INSNS (4), /* HI */
754 COSTS_N_INSNS (3), /* SI */
755 COSTS_N_INSNS (4), /* DI */
756 COSTS_N_INSNS (5)}, /* other */
757 0, /* cost of multiply per each bit set */
758 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
759 COSTS_N_INSNS (35), /* HI */
760 COSTS_N_INSNS (51), /* SI */
761 COSTS_N_INSNS (83), /* DI */
762 COSTS_N_INSNS (83)}, /* other */
763 COSTS_N_INSNS (1), /* cost of movsx */
764 COSTS_N_INSNS (1), /* cost of movzx */
765 8, /* "large" insn */
767 4, /* cost for loading QImode using movzbl */
768 {3, 4, 3}, /* cost of loading integer registers
769 in QImode, HImode and SImode.
770 Relative to reg-reg move (2). */
771 {3, 4, 3}, /* cost of storing integer registers */
772 4, /* cost of reg,reg fld/fst */
773 {4, 4, 12}, /* cost of loading fp registers
774 in SFmode, DFmode and XFmode */
775 {6, 6, 8}, /* cost of storing fp registers
776 in SFmode, DFmode and XFmode */
777 2, /* cost of moving MMX register */
778 {3, 3}, /* cost of loading MMX registers
779 in SImode and DImode */
780 {4, 4}, /* cost of storing MMX registers
781 in SImode and DImode */
782 2, /* cost of moving SSE register */
783 {4, 4, 3}, /* cost of loading SSE registers
784 in SImode, DImode and TImode */
785 {4, 4, 5}, /* cost of storing SSE registers
786 in SImode, DImode and TImode */
787 3, /* MMX or SSE register to integer */
789 MOVD reg64, xmmreg Double FSTORE 4
790 MOVD reg32, xmmreg Double FSTORE 4
792 MOVD reg64, xmmreg Double FADD 3
794 MOVD reg32, xmmreg Double FADD 3
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
804 100, /* number of parallel prefetches */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
817 {-1, rep_prefix_4_byte
, false}}},
818 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}},
820 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
821 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
822 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
823 {-1, libcall
, false}}}},
824 4, /* scalar_stmt_cost. */
825 2, /* scalar load_cost. */
826 2, /* scalar_store_cost. */
827 6, /* vec_stmt_cost. */
828 0, /* vec_to_scalar_cost. */
829 2, /* scalar_to_vec_cost. */
830 2, /* vec_align_load_cost. */
831 2, /* vec_unalign_load_cost. */
832 2, /* vec_store_cost. */
833 2, /* cond_taken_branch_cost. */
834 1, /* cond_not_taken_branch_cost. */
837 struct processor_costs bdver1_cost
= {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (1), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (4), /* SI */
845 COSTS_N_INSNS (6), /* DI */
846 COSTS_N_INSNS (6)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
857 4, /* cost for loading QImode using movzbl */
858 {5, 5, 4}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {4, 4, 4}, /* cost of storing integer registers */
862 2, /* cost of reg,reg fld/fst */
863 {5, 5, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {4, 4, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {4, 4}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 4}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 4}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 2, /* MMX or SSE register to integer */
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
882 MOVD reg64, xmmreg Double FADD 3
884 MOVD reg32, xmmreg Double FADD 3
886 16, /* size of l1 cache. */
887 2048, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
894 100, /* number of parallel prefetches */
896 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
907 {-1, rep_prefix_4_byte
, false}}},
908 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
909 {-1, libcall
, false}}}},
910 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
911 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
912 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}},
914 6, /* scalar_stmt_cost. */
915 4, /* scalar load_cost. */
916 4, /* scalar_store_cost. */
917 6, /* vec_stmt_cost. */
918 0, /* vec_to_scalar_cost. */
919 2, /* scalar_to_vec_cost. */
920 4, /* vec_align_load_cost. */
921 4, /* vec_unalign_load_cost. */
922 4, /* vec_store_cost. */
923 2, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 struct processor_costs bdver2_cost
= {
928 COSTS_N_INSNS (1), /* cost of an add instruction */
929 COSTS_N_INSNS (1), /* cost of a lea instruction */
930 COSTS_N_INSNS (1), /* variable shift costs */
931 COSTS_N_INSNS (1), /* constant shift costs */
932 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
933 COSTS_N_INSNS (4), /* HI */
934 COSTS_N_INSNS (4), /* SI */
935 COSTS_N_INSNS (6), /* DI */
936 COSTS_N_INSNS (6)}, /* other */
937 0, /* cost of multiply per each bit set */
938 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
939 COSTS_N_INSNS (35), /* HI */
940 COSTS_N_INSNS (51), /* SI */
941 COSTS_N_INSNS (83), /* DI */
942 COSTS_N_INSNS (83)}, /* other */
943 COSTS_N_INSNS (1), /* cost of movsx */
944 COSTS_N_INSNS (1), /* cost of movzx */
945 8, /* "large" insn */
947 4, /* cost for loading QImode using movzbl */
948 {5, 5, 4}, /* cost of loading integer registers
949 in QImode, HImode and SImode.
950 Relative to reg-reg move (2). */
951 {4, 4, 4}, /* cost of storing integer registers */
952 2, /* cost of reg,reg fld/fst */
953 {5, 5, 12}, /* cost of loading fp registers
954 in SFmode, DFmode and XFmode */
955 {4, 4, 8}, /* cost of storing fp registers
956 in SFmode, DFmode and XFmode */
957 2, /* cost of moving MMX register */
958 {4, 4}, /* cost of loading MMX registers
959 in SImode and DImode */
960 {4, 4}, /* cost of storing MMX registers
961 in SImode and DImode */
962 2, /* cost of moving SSE register */
963 {4, 4, 4}, /* cost of loading SSE registers
964 in SImode, DImode and TImode */
965 {4, 4, 4}, /* cost of storing SSE registers
966 in SImode, DImode and TImode */
967 2, /* MMX or SSE register to integer */
969 MOVD reg64, xmmreg Double FSTORE 4
970 MOVD reg32, xmmreg Double FSTORE 4
972 MOVD reg64, xmmreg Double FADD 3
974 MOVD reg32, xmmreg Double FADD 3
976 16, /* size of l1 cache. */
977 2048, /* size of l2 cache. */
978 64, /* size of prefetch block */
979 /* New AMD processors never drop prefetches; if they cannot be performed
980 immediately, they are queued. We set number of simultaneous prefetches
981 to a large constant to reflect this (it probably is not a good idea not
982 to limit number of prefetches at all, as their execution also takes some
984 100, /* number of parallel prefetches */
986 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
987 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
988 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
989 COSTS_N_INSNS (2), /* cost of FABS instruction. */
990 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
991 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
993 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
994 very small blocks it is better to use loop. For large blocks, libcall
995 can do nontemporary accesses and beat inline considerably. */
996 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
997 {-1, rep_prefix_4_byte
, false}}},
998 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
999 {-1, libcall
, false}}}},
1000 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1001 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1002 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1003 {-1, libcall
, false}}}},
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 2, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs bdver3_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 16, /* size of l1 cache. */
1059 2048, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 /* New AMD processors never drop prefetches; if they cannot be performed
1062 immediately, they are queued. We set number of simultaneous prefetches
1063 to a large constant to reflect this (it probably is not a good idea not
1064 to limit number of prefetches at all, as their execution also takes some
1066 100, /* number of parallel prefetches */
1067 2, /* Branch cost */
1068 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1069 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1070 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1071 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1072 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1073 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1075 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1078 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1079 {-1, rep_prefix_4_byte
, false}}},
1080 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1081 {-1, libcall
, false}}}},
1082 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1083 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1084 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}},
1086 6, /* scalar_stmt_cost. */
1087 4, /* scalar load_cost. */
1088 4, /* scalar_store_cost. */
1089 6, /* vec_stmt_cost. */
1090 0, /* vec_to_scalar_cost. */
1091 2, /* scalar_to_vec_cost. */
1092 4, /* vec_align_load_cost. */
1093 4, /* vec_unalign_load_cost. */
1094 4, /* vec_store_cost. */
1095 2, /* cond_taken_branch_cost. */
1096 1, /* cond_not_taken_branch_cost. */
1099 struct processor_costs btver1_cost
= {
1100 COSTS_N_INSNS (1), /* cost of an add instruction */
1101 COSTS_N_INSNS (2), /* cost of a lea instruction */
1102 COSTS_N_INSNS (1), /* variable shift costs */
1103 COSTS_N_INSNS (1), /* constant shift costs */
1104 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1105 COSTS_N_INSNS (4), /* HI */
1106 COSTS_N_INSNS (3), /* SI */
1107 COSTS_N_INSNS (4), /* DI */
1108 COSTS_N_INSNS (5)}, /* other */
1109 0, /* cost of multiply per each bit set */
1110 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1111 COSTS_N_INSNS (35), /* HI */
1112 COSTS_N_INSNS (51), /* SI */
1113 COSTS_N_INSNS (83), /* DI */
1114 COSTS_N_INSNS (83)}, /* other */
1115 COSTS_N_INSNS (1), /* cost of movsx */
1116 COSTS_N_INSNS (1), /* cost of movzx */
1117 8, /* "large" insn */
1119 4, /* cost for loading QImode using movzbl */
1120 {3, 4, 3}, /* cost of loading integer registers
1121 in QImode, HImode and SImode.
1122 Relative to reg-reg move (2). */
1123 {3, 4, 3}, /* cost of storing integer registers */
1124 4, /* cost of reg,reg fld/fst */
1125 {4, 4, 12}, /* cost of loading fp registers
1126 in SFmode, DFmode and XFmode */
1127 {6, 6, 8}, /* cost of storing fp registers
1128 in SFmode, DFmode and XFmode */
1129 2, /* cost of moving MMX register */
1130 {3, 3}, /* cost of loading MMX registers
1131 in SImode and DImode */
1132 {4, 4}, /* cost of storing MMX registers
1133 in SImode and DImode */
1134 2, /* cost of moving SSE register */
1135 {4, 4, 3}, /* cost of loading SSE registers
1136 in SImode, DImode and TImode */
1137 {4, 4, 5}, /* cost of storing SSE registers
1138 in SImode, DImode and TImode */
1139 3, /* MMX or SSE register to integer */
1141 MOVD reg64, xmmreg Double FSTORE 4
1142 MOVD reg32, xmmreg Double FSTORE 4
1144 MOVD reg64, xmmreg Double FADD 3
1146 MOVD reg32, xmmreg Double FADD 3
1148 32, /* size of l1 cache. */
1149 512, /* size of l2 cache. */
1150 64, /* size of prefetch block */
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1164 {-1, rep_prefix_4_byte
, false}}},
1165 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1166 {-1, libcall
, false}}}},
1167 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1168 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1169 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs btver2_cost
= {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (2), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (3), /* SI */
1192 COSTS_N_INSNS (4), /* DI */
1193 COSTS_N_INSNS (5)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {3, 4, 3}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {3, 4, 3}, /* cost of storing integer registers */
1209 4, /* cost of reg,reg fld/fst */
1210 {4, 4, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {6, 6, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {3, 3}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 3}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 5}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 3, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 32, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 100, /* number of parallel prefetches */
1237 2, /* Branch cost */
1238 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1239 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1240 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1241 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1242 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1243 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1245 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1246 {-1, rep_prefix_4_byte
, false}}},
1247 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1248 {-1, libcall
, false}}}},
1249 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1250 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1251 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1252 {-1, libcall
, false}}}},
1253 4, /* scalar_stmt_cost. */
1254 2, /* scalar load_cost. */
1255 2, /* scalar_store_cost. */
1256 6, /* vec_stmt_cost. */
1257 0, /* vec_to_scalar_cost. */
1258 2, /* scalar_to_vec_cost. */
1259 2, /* vec_align_load_cost. */
1260 2, /* vec_unalign_load_cost. */
1261 2, /* vec_store_cost. */
1262 2, /* cond_taken_branch_cost. */
1263 1, /* cond_not_taken_branch_cost. */
1267 struct processor_costs pentium4_cost
= {
1268 COSTS_N_INSNS (1), /* cost of an add instruction */
1269 COSTS_N_INSNS (3), /* cost of a lea instruction */
1270 COSTS_N_INSNS (4), /* variable shift costs */
1271 COSTS_N_INSNS (4), /* constant shift costs */
1272 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1273 COSTS_N_INSNS (15), /* HI */
1274 COSTS_N_INSNS (15), /* SI */
1275 COSTS_N_INSNS (15), /* DI */
1276 COSTS_N_INSNS (15)}, /* other */
1277 0, /* cost of multiply per each bit set */
1278 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1279 COSTS_N_INSNS (56), /* HI */
1280 COSTS_N_INSNS (56), /* SI */
1281 COSTS_N_INSNS (56), /* DI */
1282 COSTS_N_INSNS (56)}, /* other */
1283 COSTS_N_INSNS (1), /* cost of movsx */
1284 COSTS_N_INSNS (1), /* cost of movzx */
1285 16, /* "large" insn */
1287 2, /* cost for loading QImode using movzbl */
1288 {4, 5, 4}, /* cost of loading integer registers
1289 in QImode, HImode and SImode.
1290 Relative to reg-reg move (2). */
1291 {2, 3, 2}, /* cost of storing integer registers */
1292 2, /* cost of reg,reg fld/fst */
1293 {2, 2, 6}, /* cost of loading fp registers
1294 in SFmode, DFmode and XFmode */
1295 {4, 4, 6}, /* cost of storing fp registers
1296 in SFmode, DFmode and XFmode */
1297 2, /* cost of moving MMX register */
1298 {2, 2}, /* cost of loading MMX registers
1299 in SImode and DImode */
1300 {2, 2}, /* cost of storing MMX registers
1301 in SImode and DImode */
1302 12, /* cost of moving SSE register */
1303 {12, 12, 12}, /* cost of loading SSE registers
1304 in SImode, DImode and TImode */
1305 {2, 2, 8}, /* cost of storing SSE registers
1306 in SImode, DImode and TImode */
1307 10, /* MMX or SSE register to integer */
1308 8, /* size of l1 cache. */
1309 256, /* size of l2 cache. */
1310 64, /* size of prefetch block */
1311 6, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1319 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1320 DUMMY_STRINGOP_ALGS
},
1321 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1322 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1323 DUMMY_STRINGOP_ALGS
},
1324 1, /* scalar_stmt_cost. */
1325 1, /* scalar load_cost. */
1326 1, /* scalar_store_cost. */
1327 1, /* vec_stmt_cost. */
1328 1, /* vec_to_scalar_cost. */
1329 1, /* scalar_to_vec_cost. */
1330 1, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 1, /* vec_store_cost. */
1333 3, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1338 struct processor_costs nocona_cost
= {
1339 COSTS_N_INSNS (1), /* cost of an add instruction */
1340 COSTS_N_INSNS (1), /* cost of a lea instruction */
1341 COSTS_N_INSNS (1), /* variable shift costs */
1342 COSTS_N_INSNS (1), /* constant shift costs */
1343 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1344 COSTS_N_INSNS (10), /* HI */
1345 COSTS_N_INSNS (10), /* SI */
1346 COSTS_N_INSNS (10), /* DI */
1347 COSTS_N_INSNS (10)}, /* other */
1348 0, /* cost of multiply per each bit set */
1349 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1350 COSTS_N_INSNS (66), /* HI */
1351 COSTS_N_INSNS (66), /* SI */
1352 COSTS_N_INSNS (66), /* DI */
1353 COSTS_N_INSNS (66)}, /* other */
1354 COSTS_N_INSNS (1), /* cost of movsx */
1355 COSTS_N_INSNS (1), /* cost of movzx */
1356 16, /* "large" insn */
1357 17, /* MOVE_RATIO */
1358 4, /* cost for loading QImode using movzbl */
1359 {4, 4, 4}, /* cost of loading integer registers
1360 in QImode, HImode and SImode.
1361 Relative to reg-reg move (2). */
1362 {4, 4, 4}, /* cost of storing integer registers */
1363 3, /* cost of reg,reg fld/fst */
1364 {12, 12, 12}, /* cost of loading fp registers
1365 in SFmode, DFmode and XFmode */
1366 {4, 4, 4}, /* cost of storing fp registers
1367 in SFmode, DFmode and XFmode */
1368 6, /* cost of moving MMX register */
1369 {12, 12}, /* cost of loading MMX registers
1370 in SImode and DImode */
1371 {12, 12}, /* cost of storing MMX registers
1372 in SImode and DImode */
1373 6, /* cost of moving SSE register */
1374 {12, 12, 12}, /* cost of loading SSE registers
1375 in SImode, DImode and TImode */
1376 {12, 12, 12}, /* cost of storing SSE registers
1377 in SImode, DImode and TImode */
1378 8, /* MMX or SSE register to integer */
1379 8, /* size of l1 cache. */
1380 1024, /* size of l2 cache. */
1381 128, /* size of prefetch block */
1382 8, /* number of parallel prefetches */
1383 1, /* Branch cost */
1384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1385 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1386 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1387 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1388 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1389 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1390 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1391 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1392 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}},
1393 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1394 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1395 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1396 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1397 1, /* scalar_stmt_cost. */
1398 1, /* scalar load_cost. */
1399 1, /* scalar_store_cost. */
1400 1, /* vec_stmt_cost. */
1401 1, /* vec_to_scalar_cost. */
1402 1, /* scalar_to_vec_cost. */
1403 1, /* vec_align_load_cost. */
1404 2, /* vec_unalign_load_cost. */
1405 1, /* vec_store_cost. */
1406 3, /* cond_taken_branch_cost. */
1407 1, /* cond_not_taken_branch_cost. */
1411 struct processor_costs atom_cost
= {
1412 COSTS_N_INSNS (1), /* cost of an add instruction */
1413 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1414 COSTS_N_INSNS (1), /* variable shift costs */
1415 COSTS_N_INSNS (1), /* constant shift costs */
1416 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1417 COSTS_N_INSNS (4), /* HI */
1418 COSTS_N_INSNS (3), /* SI */
1419 COSTS_N_INSNS (4), /* DI */
1420 COSTS_N_INSNS (2)}, /* other */
1421 0, /* cost of multiply per each bit set */
1422 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1423 COSTS_N_INSNS (26), /* HI */
1424 COSTS_N_INSNS (42), /* SI */
1425 COSTS_N_INSNS (74), /* DI */
1426 COSTS_N_INSNS (74)}, /* other */
1427 COSTS_N_INSNS (1), /* cost of movsx */
1428 COSTS_N_INSNS (1), /* cost of movzx */
1429 8, /* "large" insn */
1430 17, /* MOVE_RATIO */
1431 4, /* cost for loading QImode using movzbl */
1432 {4, 4, 4}, /* cost of loading integer registers
1433 in QImode, HImode and SImode.
1434 Relative to reg-reg move (2). */
1435 {4, 4, 4}, /* cost of storing integer registers */
1436 4, /* cost of reg,reg fld/fst */
1437 {12, 12, 12}, /* cost of loading fp registers
1438 in SFmode, DFmode and XFmode */
1439 {6, 6, 8}, /* cost of storing fp registers
1440 in SFmode, DFmode and XFmode */
1441 2, /* cost of moving MMX register */
1442 {8, 8}, /* cost of loading MMX registers
1443 in SImode and DImode */
1444 {8, 8}, /* cost of storing MMX registers
1445 in SImode and DImode */
1446 2, /* cost of moving SSE register */
1447 {8, 8, 8}, /* cost of loading SSE registers
1448 in SImode, DImode and TImode */
1449 {8, 8, 8}, /* cost of storing SSE registers
1450 in SImode, DImode and TImode */
1451 5, /* MMX or SSE register to integer */
1452 32, /* size of l1 cache. */
1453 256, /* size of l2 cache. */
1454 64, /* size of prefetch block */
1455 6, /* number of parallel prefetches */
1456 3, /* Branch cost */
1457 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1458 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1459 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1460 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1461 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1462 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1463 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1464 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1465 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1466 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1467 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1468 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1469 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1470 1, /* scalar_stmt_cost. */
1471 1, /* scalar load_cost. */
1472 1, /* scalar_store_cost. */
1473 1, /* vec_stmt_cost. */
1474 1, /* vec_to_scalar_cost. */
1475 1, /* scalar_to_vec_cost. */
1476 1, /* vec_align_load_cost. */
1477 2, /* vec_unalign_load_cost. */
1478 1, /* vec_store_cost. */
1479 3, /* cond_taken_branch_cost. */
1480 1, /* cond_not_taken_branch_cost. */
1483 /* Generic64 should produce code tuned for Nocona and K8. */
1485 struct processor_costs generic64_cost
= {
1486 COSTS_N_INSNS (1), /* cost of an add instruction */
1487 /* On all chips taken into consideration lea is 2 cycles and more. With
1488 this cost however our current implementation of synth_mult results in
1489 use of unnecessary temporary registers causing regression on several
1490 SPECfp benchmarks. */
1491 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1492 COSTS_N_INSNS (1), /* variable shift costs */
1493 COSTS_N_INSNS (1), /* constant shift costs */
1494 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1495 COSTS_N_INSNS (4), /* HI */
1496 COSTS_N_INSNS (3), /* SI */
1497 COSTS_N_INSNS (4), /* DI */
1498 COSTS_N_INSNS (2)}, /* other */
1499 0, /* cost of multiply per each bit set */
1500 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1501 COSTS_N_INSNS (26), /* HI */
1502 COSTS_N_INSNS (42), /* SI */
1503 COSTS_N_INSNS (74), /* DI */
1504 COSTS_N_INSNS (74)}, /* other */
1505 COSTS_N_INSNS (1), /* cost of movsx */
1506 COSTS_N_INSNS (1), /* cost of movzx */
1507 8, /* "large" insn */
1508 17, /* MOVE_RATIO */
1509 4, /* cost for loading QImode using movzbl */
1510 {4, 4, 4}, /* cost of loading integer registers
1511 in QImode, HImode and SImode.
1512 Relative to reg-reg move (2). */
1513 {4, 4, 4}, /* cost of storing integer registers */
1514 4, /* cost of reg,reg fld/fst */
1515 {12, 12, 12}, /* cost of loading fp registers
1516 in SFmode, DFmode and XFmode */
1517 {6, 6, 8}, /* cost of storing fp registers
1518 in SFmode, DFmode and XFmode */
1519 2, /* cost of moving MMX register */
1520 {8, 8}, /* cost of loading MMX registers
1521 in SImode and DImode */
1522 {8, 8}, /* cost of storing MMX registers
1523 in SImode and DImode */
1524 2, /* cost of moving SSE register */
1525 {8, 8, 8}, /* cost of loading SSE registers
1526 in SImode, DImode and TImode */
1527 {8, 8, 8}, /* cost of storing SSE registers
1528 in SImode, DImode and TImode */
1529 5, /* MMX or SSE register to integer */
1530 32, /* size of l1 cache. */
1531 512, /* size of l2 cache. */
1532 64, /* size of prefetch block */
1533 6, /* number of parallel prefetches */
1534 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1535 value is increased to perhaps more appropriate value of 5. */
1536 3, /* Branch cost */
1537 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1538 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1539 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1540 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1541 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1542 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1543 {DUMMY_STRINGOP_ALGS
,
1544 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1545 {-1, libcall
, false}}}},
1546 {DUMMY_STRINGOP_ALGS
,
1547 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1548 {-1, libcall
, false}}}},
1549 1, /* scalar_stmt_cost. */
1550 1, /* scalar load_cost. */
1551 1, /* scalar_store_cost. */
1552 1, /* vec_stmt_cost. */
1553 1, /* vec_to_scalar_cost. */
1554 1, /* scalar_to_vec_cost. */
1555 1, /* vec_align_load_cost. */
1556 2, /* vec_unalign_load_cost. */
1557 1, /* vec_store_cost. */
1558 3, /* cond_taken_branch_cost. */
1559 1, /* cond_not_taken_branch_cost. */
1562 /* core_cost should produce code tuned for Core familly of CPUs. */
1564 struct processor_costs core_cost
= {
1565 COSTS_N_INSNS (1), /* cost of an add instruction */
1566 /* On all chips taken into consideration lea is 2 cycles and more. With
1567 this cost however our current implementation of synth_mult results in
1568 use of unnecessary temporary registers causing regression on several
1569 SPECfp benchmarks. */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 64, /* size of l1 cache. */
1610 512, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 /* FIXME perhaps more appropriate value is 5. */
1614 3, /* Branch cost */
1615 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1616 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1617 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1618 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1619 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1620 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1621 {{libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1622 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1623 {-1, libcall
, false}}}},
1624 {{libcall
, {{6, loop_1_byte
, true},
1626 {8192, rep_prefix_4_byte
, true},
1627 {-1, libcall
, false}}},
1628 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1629 {-1, libcall
, false}}}},
1630 1, /* scalar_stmt_cost. */
1631 1, /* scalar load_cost. */
1632 1, /* scalar_store_cost. */
1633 1, /* vec_stmt_cost. */
1634 1, /* vec_to_scalar_cost. */
1635 1, /* scalar_to_vec_cost. */
1636 1, /* vec_align_load_cost. */
1637 2, /* vec_unalign_load_cost. */
1638 1, /* vec_store_cost. */
1639 3, /* cond_taken_branch_cost. */
1640 1, /* cond_not_taken_branch_cost. */
1643 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1646 struct processor_costs generic32_cost
= {
1647 COSTS_N_INSNS (1), /* cost of an add instruction */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 256, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1699 {-1, libcall
, false}}},
1700 DUMMY_STRINGOP_ALGS
},
1701 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1702 {-1, libcall
, false}}},
1703 DUMMY_STRINGOP_ALGS
},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Set by -mtune. */
1718 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1720 /* Set by -mtune or -Os. */
1721 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1723 /* Processor feature/optimization bitmasks. */
1724 #define m_386 (1<<PROCESSOR_I386)
1725 #define m_486 (1<<PROCESSOR_I486)
1726 #define m_PENT (1<<PROCESSOR_PENTIUM)
1727 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1728 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1729 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1730 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1731 #define m_CORE2 (1<<PROCESSOR_CORE2)
1732 #define m_COREI7 (1<<PROCESSOR_COREI7)
1733 #define m_CORE2I7 (m_CORE2 | m_COREI7)
1734 #define m_ATOM (1<<PROCESSOR_ATOM)
1736 #define m_GEODE (1<<PROCESSOR_GEODE)
1737 #define m_K6 (1<<PROCESSOR_K6)
1738 #define m_K6_GEODE (m_K6 | m_GEODE)
1739 #define m_K8 (1<<PROCESSOR_K8)
1740 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1741 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1742 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1743 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1744 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1745 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1746 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1747 #define m_BTVER (m_BTVER1 | m_BTVER2)
1748 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1749 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1750 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1752 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1753 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1755 /* Generic instruction choice should be common subset of supported CPUs
1756 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1757 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1759 /* Feature tests against the various tunings. */
1760 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1762 /* Feature tests against the various tunings used to create ix86_tune_features
1763 based on the processor mask. */
1764 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1765 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1766 negatively, so enabling for Generic64 seems like good code size
1767 tradeoff. We can't enable it for 32bit generic because it does not
1768 work well with PPro base chips. */
1769 m_386
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1771 /* X86_TUNE_PUSH_MEMORY */
1772 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1774 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1777 /* X86_TUNE_UNROLL_STRLEN */
1778 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1780 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1781 on simulation result. But after P4 was made, no performance benefit
1782 was observed with branch hints. It also increases the code size.
1783 As a result, icc never generates branch hints. */
1786 /* X86_TUNE_DOUBLE_WITH_ADD */
1789 /* X86_TUNE_USE_SAHF */
1790 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1792 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1793 partial dependencies. */
1794 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1796 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1797 register stalls on Generic32 compilation setting as well. However
1798 in current implementation the partial register stalls are not eliminated
1799 very well - they can be introduced via subregs synthesized by combine
1800 and can happen in caller/callee saving sequences. Because this option
1801 pays back little on PPro based chips and is in conflict with partial reg
1802 dependencies used by Athlon/P4 based chips, it is better to leave it off
1803 for generic32 for now. */
1806 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1807 m_CORE2I7
| m_GENERIC
,
1809 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1810 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1811 m_CORE2I7
| m_GENERIC
,
1813 /* X86_TUNE_USE_HIMODE_FIOP */
1814 m_386
| m_486
| m_K6_GEODE
,
1816 /* X86_TUNE_USE_SIMODE_FIOP */
1817 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1819 /* X86_TUNE_USE_MOV0 */
1822 /* X86_TUNE_USE_CLTD */
1823 ~(m_PENT
| m_ATOM
| m_K6
),
1825 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1828 /* X86_TUNE_SPLIT_LONG_MOVES */
1831 /* X86_TUNE_READ_MODIFY_WRITE */
1834 /* X86_TUNE_READ_MODIFY */
1837 /* X86_TUNE_PROMOTE_QIMODE */
1838 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1840 /* X86_TUNE_FAST_PREFIX */
1841 ~(m_386
| m_486
| m_PENT
),
1843 /* X86_TUNE_SINGLE_STRINGOP */
1844 m_386
| m_P4_NOCONA
,
1846 /* X86_TUNE_QIMODE_MATH */
1849 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1850 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1851 might be considered for Generic32 if our scheme for avoiding partial
1852 stalls was more effective. */
1855 /* X86_TUNE_PROMOTE_QI_REGS */
1858 /* X86_TUNE_PROMOTE_HI_REGS */
1861 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1862 over esp addition. */
1863 m_386
| m_486
| m_PENT
| m_PPRO
,
1865 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1866 over esp addition. */
1869 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1870 over esp subtraction. */
1871 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1873 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1874 over esp subtraction. */
1875 m_PENT
| m_K6_GEODE
,
1877 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1878 for DFmode copies */
1879 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1881 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1882 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1884 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1885 conflict here in between PPro/Pentium4 based chips that thread 128bit
1886 SSE registers as single units versus K8 based chips that divide SSE
1887 registers to two 64bit halves. This knob promotes all store destinations
1888 to be 128bit to allow register renaming on 128bit SSE units, but usually
1889 results in one extra microop on 64bit SSE units. Experimental results
1890 shows that disabling this option on P4 brings over 20% SPECfp regression,
1891 while enabling it on K8 brings roughly 2.4% regression that can be partly
1892 masked by careful scheduling of moves. */
1893 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1895 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1896 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
1898 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1901 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1904 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1905 are resolved on SSE register parts instead of whole registers, so we may
1906 maintain just lower part of scalar values in proper format leaving the
1907 upper part undefined. */
1910 /* X86_TUNE_SSE_TYPELESS_STORES */
1913 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1914 m_PPRO
| m_P4_NOCONA
,
1916 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1917 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1919 /* X86_TUNE_PROLOGUE_USING_MOVE */
1920 m_PPRO
| m_ATHLON_K8
,
1922 /* X86_TUNE_EPILOGUE_USING_MOVE */
1923 m_PPRO
| m_ATHLON_K8
,
1925 /* X86_TUNE_SHIFT1 */
1928 /* X86_TUNE_USE_FFREEP */
1931 /* X86_TUNE_INTER_UNIT_MOVES */
1932 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1934 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1935 ~(m_AMDFAM10
| m_BDVER
),
1937 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1938 than 4 branch instructions in the 16 byte window. */
1939 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1941 /* X86_TUNE_SCHEDULE */
1942 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1944 /* X86_TUNE_USE_BT */
1945 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1947 /* X86_TUNE_USE_INCDEC */
1948 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
1950 /* X86_TUNE_PAD_RETURNS */
1951 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1956 /* X86_TUNE_EXT_80387_CONSTANTS */
1957 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
1959 /* X86_TUNE_AVOID_VECTOR_DECODE */
1960 m_CORE2I7
| m_K8
| m_GENERIC64
,
1962 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1963 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1966 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1967 vector path on AMD machines. */
1968 m_CORE2I7
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1970 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1972 m_CORE2I7
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1974 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1978 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1979 but one byte longer. */
1982 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1983 operand that cannot be represented using a modRM byte. The XOR
1984 replacement is long decoded, so this split helps here as well. */
1987 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1989 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
1991 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1992 from integer to FP. */
1995 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1996 with a subsequent conditional jump instruction into a single
1997 compare-and-branch uop. */
2000 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2001 will impact LEA instruction selection. */
2004 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2008 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2009 at -O3. For the moment, the prefetching seems badly tuned for Intel
2011 m_K6_GEODE
| m_AMD_MULTIPLE
,
2013 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2014 the auto-vectorizer. */
2017 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2018 during reassociation of integer computation. */
2021 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2022 during reassociation of fp computation. */
2025 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2026 regs instead of memory. */
2027 m_COREI7
| m_CORE2I7
,
2029 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2030 a conditional move. */
2034 /* Feature tests against the various architecture variations. */
2035 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2037 /* Feature tests against the various architecture variations, used to create
2038 ix86_arch_features based on the processor mask. */
2039 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2040 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2041 ~(m_386
| m_486
| m_PENT
| m_K6
),
2043 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2046 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2049 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2052 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2056 static const unsigned int x86_accumulate_outgoing_args
2057 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2059 static const unsigned int x86_arch_always_fancy_math_387
2060 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2062 static const unsigned int x86_avx256_split_unaligned_load
2063 = m_COREI7
| m_GENERIC
;
2065 static const unsigned int x86_avx256_split_unaligned_store
2066 = m_COREI7
| m_BDVER
| m_GENERIC
;
2068 /* In case the average insn count for single function invocation is
2069 lower than this constant, emit fast (but longer) prologue and
2071 #define FAST_PROLOGUE_INSN_COUNT 20
2073 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2074 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2075 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2076 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2078 /* Array of the smallest class containing reg number REGNO, indexed by
2079 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2081 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2083 /* ax, dx, cx, bx */
2084 AREG
, DREG
, CREG
, BREG
,
2085 /* si, di, bp, sp */
2086 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2088 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2089 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2092 /* flags, fpsr, fpcr, frame */
2093 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2095 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2098 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2101 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2102 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2103 /* SSE REX registers */
2104 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2108 /* The "default" register map used in 32bit mode. */
2110 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2112 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2113 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2114 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2115 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2116 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2117 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2118 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2121 /* The "default" register map used in 64bit mode. */
2123 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2125 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2126 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2127 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2128 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2129 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2130 8,9,10,11,12,13,14,15, /* extended integer registers */
2131 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2134 /* Define the register numbers to be used in Dwarf debugging information.
2135 The SVR4 reference port C compiler uses the following register numbers
2136 in its Dwarf output code:
2137 0 for %eax (gcc regno = 0)
2138 1 for %ecx (gcc regno = 2)
2139 2 for %edx (gcc regno = 1)
2140 3 for %ebx (gcc regno = 3)
2141 4 for %esp (gcc regno = 7)
2142 5 for %ebp (gcc regno = 6)
2143 6 for %esi (gcc regno = 4)
2144 7 for %edi (gcc regno = 5)
2145 The following three DWARF register numbers are never generated by
2146 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2147 believes these numbers have these meanings.
2148 8 for %eip (no gcc equivalent)
2149 9 for %eflags (gcc regno = 17)
2150 10 for %trapno (no gcc equivalent)
2151 It is not at all clear how we should number the FP stack registers
2152 for the x86 architecture. If the version of SDB on x86/svr4 were
2153 a bit less brain dead with respect to floating-point then we would
2154 have a precedent to follow with respect to DWARF register numbers
2155 for x86 FP registers, but the SDB on x86/svr4 is so completely
2156 broken with respect to FP registers that it is hardly worth thinking
2157 of it as something to strive for compatibility with.
2158 The version of x86/svr4 SDB I have at the moment does (partially)
2159 seem to believe that DWARF register number 11 is associated with
2160 the x86 register %st(0), but that's about all. Higher DWARF
2161 register numbers don't seem to be associated with anything in
2162 particular, and even for DWARF regno 11, SDB only seems to under-
2163 stand that it should say that a variable lives in %st(0) (when
2164 asked via an `=' command) if we said it was in DWARF regno 11,
2165 but SDB still prints garbage when asked for the value of the
2166 variable in question (via a `/' command).
2167 (Also note that the labels SDB prints for various FP stack regs
2168 when doing an `x' command are all wrong.)
2169 Note that these problems generally don't affect the native SVR4
2170 C compiler because it doesn't allow the use of -O with -g and
2171 because when it is *not* optimizing, it allocates a memory
2172 location for each floating-point variable, and the memory
2173 location is what gets described in the DWARF AT_location
2174 attribute for the variable in question.
2175 Regardless of the severe mental illness of the x86/svr4 SDB, we
2176 do something sensible here and we use the following DWARF
2177 register numbers. Note that these are all stack-top-relative
2179 11 for %st(0) (gcc regno = 8)
2180 12 for %st(1) (gcc regno = 9)
2181 13 for %st(2) (gcc regno = 10)
2182 14 for %st(3) (gcc regno = 11)
2183 15 for %st(4) (gcc regno = 12)
2184 16 for %st(5) (gcc regno = 13)
2185 17 for %st(6) (gcc regno = 14)
2186 18 for %st(7) (gcc regno = 15)
2188 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2190 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2191 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2192 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2193 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2194 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2195 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2196 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2199 /* Define parameter passing and return registers. */
2201 static int const x86_64_int_parameter_registers
[6] =
2203 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2206 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2208 CX_REG
, DX_REG
, R8_REG
, R9_REG
2211 static int const x86_64_int_return_registers
[4] =
2213 AX_REG
, DX_REG
, DI_REG
, SI_REG
2216 /* Define the structure for the machine field in struct function. */
2218 struct GTY(()) stack_local_entry
{
2219 unsigned short mode
;
2222 struct stack_local_entry
*next
;
2225 /* Structure describing stack frame layout.
2226 Stack grows downward:
2232 saved static chain if ix86_static_chain_on_stack
2234 saved frame pointer if frame_pointer_needed
2235 <- HARD_FRAME_POINTER
2241 <- sse_regs_save_offset
2244 [va_arg registers] |
2248 [padding2] | = to_allocate
2257 int outgoing_arguments_size
;
2259 /* The offsets relative to ARG_POINTER. */
2260 HOST_WIDE_INT frame_pointer_offset
;
2261 HOST_WIDE_INT hard_frame_pointer_offset
;
2262 HOST_WIDE_INT stack_pointer_offset
;
2263 HOST_WIDE_INT hfp_save_offset
;
2264 HOST_WIDE_INT reg_save_offset
;
2265 HOST_WIDE_INT sse_reg_save_offset
;
2267 /* When save_regs_using_mov is set, emit prologue using
2268 move instead of push instructions. */
2269 bool save_regs_using_mov
;
2272 /* Which cpu are we scheduling for. */
2273 enum attr_cpu ix86_schedule
;
2275 /* Which cpu are we optimizing for. */
2276 enum processor_type ix86_tune
;
2278 /* Which instruction set architecture to use. */
2279 enum processor_type ix86_arch
;
2281 /* True if processor has SSE prefetch instruction. */
2282 unsigned char x86_prefetch_sse
;
2284 /* -mstackrealign option */
2285 static const char ix86_force_align_arg_pointer_string
[]
2286 = "force_align_arg_pointer";
2288 static rtx (*ix86_gen_leave
) (void);
2289 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2290 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2291 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2292 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2293 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2294 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2295 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2296 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2297 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2298 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2299 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2301 /* Preferred alignment for stack boundary in bits. */
2302 unsigned int ix86_preferred_stack_boundary
;
2304 /* Alignment for incoming stack boundary in bits specified at
2306 static unsigned int ix86_user_incoming_stack_boundary
;
2308 /* Default alignment for incoming stack boundary in bits. */
2309 static unsigned int ix86_default_incoming_stack_boundary
;
2311 /* Alignment for incoming stack boundary in bits. */
2312 unsigned int ix86_incoming_stack_boundary
;
2314 /* Calling abi specific va_list type nodes. */
2315 static GTY(()) tree sysv_va_list_type_node
;
2316 static GTY(()) tree ms_va_list_type_node
;
2318 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2319 char internal_label_prefix
[16];
2320 int internal_label_prefix_len
;
2322 /* Fence to use after loop using movnt. */
2325 /* Register class used for passing given 64bit part of the argument.
2326 These represent classes as documented by the PS ABI, with the exception
2327 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2328 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2330 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2331 whenever possible (upper half does contain padding). */
2332 enum x86_64_reg_class
2335 X86_64_INTEGER_CLASS
,
2336 X86_64_INTEGERSI_CLASS
,
2343 X86_64_COMPLEX_X87_CLASS
,
2347 #define MAX_CLASSES 4
2349 /* Table of constants used by fldpi, fldln2, etc.... */
2350 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2351 static bool ext_80387_constants_init
= 0;
2354 static struct machine_function
* ix86_init_machine_status (void);
2355 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2356 static bool ix86_function_value_regno_p (const unsigned int);
2357 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2359 static rtx
ix86_static_chain (const_tree
, bool);
2360 static int ix86_function_regparm (const_tree
, const_tree
);
2361 static void ix86_compute_frame_layout (struct ix86_frame
*);
2362 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2364 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2365 static tree
ix86_canonical_va_list_type (tree
);
2366 static void predict_jump (int);
2367 static unsigned int split_stack_prologue_scratch_regno (void);
2368 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2370 enum ix86_function_specific_strings
2372 IX86_FUNCTION_SPECIFIC_ARCH
,
2373 IX86_FUNCTION_SPECIFIC_TUNE
,
2374 IX86_FUNCTION_SPECIFIC_MAX
2377 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2378 const char *, enum fpmath_unit
, bool);
2379 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2380 static void ix86_function_specific_save (struct cl_target_option
*);
2381 static void ix86_function_specific_restore (struct cl_target_option
*);
2382 static void ix86_function_specific_print (FILE *, int,
2383 struct cl_target_option
*);
2384 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2385 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2386 struct gcc_options
*);
2387 static bool ix86_can_inline_p (tree
, tree
);
2388 static void ix86_set_current_function (tree
);
2389 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2391 static enum calling_abi
ix86_function_abi (const_tree
);
2394 #ifndef SUBTARGET32_DEFAULT_CPU
2395 #define SUBTARGET32_DEFAULT_CPU "i386"
2398 /* Whether -mtune= or -march= were specified */
2399 static int ix86_tune_defaulted
;
2400 static int ix86_arch_specified
;
2402 /* Vectorization library interface and handlers. */
2403 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2405 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2406 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2408 /* Processor target table, indexed by processor number */
2411 const struct processor_costs
*cost
; /* Processor costs */
2412 const int align_loop
; /* Default alignments. */
2413 const int align_loop_max_skip
;
2414 const int align_jump
;
2415 const int align_jump_max_skip
;
2416 const int align_func
;
2419 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2421 {&i386_cost
, 4, 3, 4, 3, 4},
2422 {&i486_cost
, 16, 15, 16, 15, 16},
2423 {&pentium_cost
, 16, 7, 16, 7, 16},
2424 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2425 {&geode_cost
, 0, 0, 0, 0, 0},
2426 {&k6_cost
, 32, 7, 32, 7, 32},
2427 {&athlon_cost
, 16, 7, 16, 7, 16},
2428 {&pentium4_cost
, 0, 0, 0, 0, 0},
2429 {&k8_cost
, 16, 7, 16, 7, 16},
2430 {&nocona_cost
, 0, 0, 0, 0, 0},
2432 {&core_cost
, 16, 10, 16, 10, 16},
2434 {&core_cost
, 16, 10, 16, 10, 16},
2435 {&generic32_cost
, 16, 7, 16, 7, 16},
2436 {&generic64_cost
, 16, 10, 16, 10, 16},
2437 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2438 {&bdver1_cost
, 32, 24, 32, 7, 32},
2439 {&bdver2_cost
, 32, 24, 32, 7, 32},
2440 {&bdver3_cost
, 32, 24, 32, 7, 32},
2441 {&btver1_cost
, 32, 24, 32, 7, 32},
2442 {&btver2_cost
, 32, 24, 32, 7, 32},
2443 {&atom_cost
, 16, 15, 16, 7, 16}
2446 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2479 gate_insert_vzeroupper (void)
2481 return TARGET_VZEROUPPER
;
2485 rest_of_handle_insert_vzeroupper (void)
2489 /* vzeroupper instructions are inserted immediately after reload to
2490 account for possible spills from 256bit registers. The pass
2491 reuses mode switching infrastructure by re-running mode insertion
2492 pass, so disable entities that have already been processed. */
2493 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2494 ix86_optimize_mode_switching
[i
] = 0;
2496 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2498 /* Call optimize_mode_switching. */
2499 pass_mode_switching
.pass
.execute ();
2503 struct rtl_opt_pass pass_insert_vzeroupper
=
2507 "vzeroupper", /* name */
2508 OPTGROUP_NONE
, /* optinfo_flags */
2509 gate_insert_vzeroupper
, /* gate */
2510 rest_of_handle_insert_vzeroupper
, /* execute */
2513 0, /* static_pass_number */
2514 TV_NONE
, /* tv_id */
2515 0, /* properties_required */
2516 0, /* properties_provided */
2517 0, /* properties_destroyed */
2518 0, /* todo_flags_start */
2519 TODO_df_finish
| TODO_verify_rtl_sharing
|
2520 0, /* todo_flags_finish */
2524 /* Return true if a red-zone is in use. */
2527 ix86_using_red_zone (void)
2529 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2532 /* Return a string that documents the current -m options. The caller is
2533 responsible for freeing the string. */
2536 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2537 const char *tune
, enum fpmath_unit fpmath
,
2540 struct ix86_target_opts
2542 const char *option
; /* option string */
2543 HOST_WIDE_INT mask
; /* isa mask options */
2546 /* This table is ordered so that options like -msse4.2 that imply
2547 preceding options while match those first. */
2548 static struct ix86_target_opts isa_opts
[] =
2550 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2551 { "-mfma", OPTION_MASK_ISA_FMA
},
2552 { "-mxop", OPTION_MASK_ISA_XOP
},
2553 { "-mlwp", OPTION_MASK_ISA_LWP
},
2554 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2555 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2556 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2557 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2558 { "-msse3", OPTION_MASK_ISA_SSE3
},
2559 { "-msse2", OPTION_MASK_ISA_SSE2
},
2560 { "-msse", OPTION_MASK_ISA_SSE
},
2561 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2562 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2563 { "-mmmx", OPTION_MASK_ISA_MMX
},
2564 { "-mabm", OPTION_MASK_ISA_ABM
},
2565 { "-mbmi", OPTION_MASK_ISA_BMI
},
2566 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2567 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2568 { "-mhle", OPTION_MASK_ISA_HLE
},
2569 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2570 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2571 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2572 { "-madx", OPTION_MASK_ISA_ADX
},
2573 { "-mtbm", OPTION_MASK_ISA_TBM
},
2574 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2575 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2576 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2577 { "-maes", OPTION_MASK_ISA_AES
},
2578 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2579 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2580 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2581 { "-mf16c", OPTION_MASK_ISA_F16C
},
2582 { "-mrtm", OPTION_MASK_ISA_RTM
},
2583 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2584 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2588 static struct ix86_target_opts flag_opts
[] =
2590 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2591 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2592 { "-m80387", MASK_80387
},
2593 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2594 { "-malign-double", MASK_ALIGN_DOUBLE
},
2595 { "-mcld", MASK_CLD
},
2596 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2597 { "-mieee-fp", MASK_IEEE_FP
},
2598 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2599 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2600 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2601 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2602 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2603 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2604 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2605 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2606 { "-mrecip", MASK_RECIP
},
2607 { "-mrtd", MASK_RTD
},
2608 { "-msseregparm", MASK_SSEREGPARM
},
2609 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2610 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2611 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2612 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2613 { "-mvzeroupper", MASK_VZEROUPPER
},
2614 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2615 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2616 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2619 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2622 char target_other
[40];
2632 memset (opts
, '\0', sizeof (opts
));
2634 /* Add -march= option. */
2637 opts
[num
][0] = "-march=";
2638 opts
[num
++][1] = arch
;
2641 /* Add -mtune= option. */
2644 opts
[num
][0] = "-mtune=";
2645 opts
[num
++][1] = tune
;
2648 /* Add -m32/-m64/-mx32. */
2649 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2651 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2655 isa
&= ~ (OPTION_MASK_ISA_64BIT
2656 | OPTION_MASK_ABI_64
2657 | OPTION_MASK_ABI_X32
);
2661 opts
[num
++][0] = abi
;
2663 /* Pick out the options in isa options. */
2664 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2666 if ((isa
& isa_opts
[i
].mask
) != 0)
2668 opts
[num
++][0] = isa_opts
[i
].option
;
2669 isa
&= ~ isa_opts
[i
].mask
;
2673 if (isa
&& add_nl_p
)
2675 opts
[num
++][0] = isa_other
;
2676 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2680 /* Add flag options. */
2681 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2683 if ((flags
& flag_opts
[i
].mask
) != 0)
2685 opts
[num
++][0] = flag_opts
[i
].option
;
2686 flags
&= ~ flag_opts
[i
].mask
;
2690 if (flags
&& add_nl_p
)
2692 opts
[num
++][0] = target_other
;
2693 sprintf (target_other
, "(other flags: %#x)", flags
);
2696 /* Add -fpmath= option. */
2699 opts
[num
][0] = "-mfpmath=";
2700 switch ((int) fpmath
)
2703 opts
[num
++][1] = "387";
2707 opts
[num
++][1] = "sse";
2710 case FPMATH_387
| FPMATH_SSE
:
2711 opts
[num
++][1] = "sse+387";
2723 gcc_assert (num
< ARRAY_SIZE (opts
));
2725 /* Size the string. */
2727 sep_len
= (add_nl_p
) ? 3 : 1;
2728 for (i
= 0; i
< num
; i
++)
2731 for (j
= 0; j
< 2; j
++)
2733 len
+= strlen (opts
[i
][j
]);
2736 /* Build the string. */
2737 ret
= ptr
= (char *) xmalloc (len
);
2740 for (i
= 0; i
< num
; i
++)
2744 for (j
= 0; j
< 2; j
++)
2745 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2752 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2760 for (j
= 0; j
< 2; j
++)
2763 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2765 line_len
+= len2
[j
];
2770 gcc_assert (ret
+ len
>= ptr
);
2775 /* Return true, if profiling code should be emitted before
2776 prologue. Otherwise it returns false.
2777 Note: For x86 with "hotfix" it is sorried. */
2779 ix86_profile_before_prologue (void)
2781 return flag_fentry
!= 0;
2784 /* Function that is callable from the debugger to print the current
2787 ix86_debug_options (void)
2789 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2790 ix86_arch_string
, ix86_tune_string
,
2795 fprintf (stderr
, "%s\n\n", opts
);
2799 fputs ("<no options>\n\n", stderr
);
2804 /* Override various settings based on options. If MAIN_ARGS_P, the
2805 options are from the command line, otherwise they are from
2809 ix86_option_override_internal (bool main_args_p
)
2812 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2813 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2818 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2819 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2820 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2821 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2822 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2823 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2824 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2825 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2826 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2827 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2828 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2829 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2830 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2831 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2832 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2833 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2834 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2835 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2836 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2837 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2838 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2839 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2840 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2841 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2842 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2843 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2844 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2845 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2846 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2847 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2848 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2849 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2850 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2851 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2852 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2853 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2854 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2855 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2856 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2857 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2859 /* if this reaches 64, need to widen struct pta flags below */
2863 const char *const name
; /* processor name or nickname. */
2864 const enum processor_type processor
;
2865 const enum attr_cpu schedule
;
2866 const unsigned HOST_WIDE_INT flags
;
2868 const processor_alias_table
[] =
2870 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2871 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2872 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2873 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2874 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2875 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2876 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2877 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2878 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2879 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2880 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2881 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2882 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2883 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2884 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2885 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2886 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2887 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2888 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2889 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2890 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2891 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2892 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2893 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
2894 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2895 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2896 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
2897 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
2898 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2899 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
2900 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
2901 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2902 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
2903 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
2904 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2905 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2906 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
2907 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2908 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
2909 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2910 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2911 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2912 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2913 {"core-avx2", PROCESSOR_COREI7
, CPU_COREI7
,
2914 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2915 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2916 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2917 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2918 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
2920 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2921 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2922 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
2923 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2924 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2925 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2926 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2927 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2928 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2929 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2930 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2931 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2932 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2933 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2934 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2935 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2936 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2937 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2938 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2939 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2940 {"k8", PROCESSOR_K8
, CPU_K8
,
2941 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2942 | PTA_SSE2
| PTA_NO_SAHF
},
2943 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2944 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2945 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2946 {"opteron", PROCESSOR_K8
, CPU_K8
,
2947 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2948 | PTA_SSE2
| PTA_NO_SAHF
},
2949 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2950 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2951 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2952 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2953 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2954 | PTA_SSE2
| PTA_NO_SAHF
},
2955 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2956 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2957 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2958 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2959 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2960 | PTA_SSE2
| PTA_NO_SAHF
},
2961 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2962 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2963 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2964 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2965 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2966 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2967 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
2968 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2969 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2970 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2971 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2972 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
2973 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2974 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2975 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2976 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2977 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2978 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
2979 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2980 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2981 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2982 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2983 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
2985 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
2986 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2987 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
2988 | PTA_FXSR
| PTA_XSAVE
},
2989 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
2990 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2991 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
2992 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2993 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
2994 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2996 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
2997 PTA_HLE
/* flags are only used for -march switch. */ },
2998 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3000 | PTA_HLE
/* flags are only used for -march switch. */ },
3003 /* -mrecip options. */
3006 const char *string
; /* option name */
3007 unsigned int mask
; /* mask bits to set */
3009 const recip_options
[] =
3011 { "all", RECIP_MASK_ALL
},
3012 { "none", RECIP_MASK_NONE
},
3013 { "div", RECIP_MASK_DIV
},
3014 { "sqrt", RECIP_MASK_SQRT
},
3015 { "vec-div", RECIP_MASK_VEC_DIV
},
3016 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3019 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3021 /* Set up prefix/suffix so the error messages refer to either the command
3022 line argument, or the attribute(target). */
3031 prefix
= "option(\"";
3036 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3037 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3038 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3039 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3040 #ifdef TARGET_BI_ARCH
3043 #if TARGET_BI_ARCH == 1
3044 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3045 is on and OPTION_MASK_ABI_X32 is off. We turn off
3046 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3049 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3051 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3052 on and OPTION_MASK_ABI_64 is off. We turn off
3053 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3056 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3063 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3064 OPTION_MASK_ABI_64 for TARGET_X32. */
3065 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3066 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3068 else if (TARGET_LP64
)
3070 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3071 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3072 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3073 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3076 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3077 SUBTARGET_OVERRIDE_OPTIONS
;
3080 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3081 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3084 /* -fPIC is the default for x86_64. */
3085 if (TARGET_MACHO
&& TARGET_64BIT
)
3088 /* Need to check -mtune=generic first. */
3089 if (ix86_tune_string
)
3091 if (!strcmp (ix86_tune_string
, "generic")
3092 || !strcmp (ix86_tune_string
, "i686")
3093 /* As special support for cross compilers we read -mtune=native
3094 as -mtune=generic. With native compilers we won't see the
3095 -mtune=native, as it was changed by the driver. */
3096 || !strcmp (ix86_tune_string
, "native"))
3099 ix86_tune_string
= "generic64";
3101 ix86_tune_string
= "generic32";
3103 /* If this call is for setting the option attribute, allow the
3104 generic32/generic64 that was previously set. */
3105 else if (!main_args_p
3106 && (!strcmp (ix86_tune_string
, "generic32")
3107 || !strcmp (ix86_tune_string
, "generic64")))
3109 else if (!strncmp (ix86_tune_string
, "generic", 7))
3110 error ("bad value (%s) for %stune=%s %s",
3111 ix86_tune_string
, prefix
, suffix
, sw
);
3112 else if (!strcmp (ix86_tune_string
, "x86-64"))
3113 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3114 "%stune=k8%s or %stune=generic%s instead as appropriate",
3115 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3119 if (ix86_arch_string
)
3120 ix86_tune_string
= ix86_arch_string
;
3121 if (!ix86_tune_string
)
3123 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3124 ix86_tune_defaulted
= 1;
3127 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3128 need to use a sensible tune option. */
3129 if (!strcmp (ix86_tune_string
, "generic")
3130 || !strcmp (ix86_tune_string
, "x86-64")
3131 || !strcmp (ix86_tune_string
, "i686"))
3134 ix86_tune_string
= "generic64";
3136 ix86_tune_string
= "generic32";
3140 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3142 /* rep; movq isn't available in 32-bit code. */
3143 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3144 ix86_stringop_alg
= no_stringop
;
3147 if (!ix86_arch_string
)
3148 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3150 ix86_arch_specified
= 1;
3152 if (global_options_set
.x_ix86_pmode
)
3154 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3155 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3156 error ("address mode %qs not supported in the %s bit mode",
3157 TARGET_64BIT
? "short" : "long",
3158 TARGET_64BIT
? "64" : "32");
3161 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3163 if (!global_options_set
.x_ix86_abi
)
3164 ix86_abi
= DEFAULT_ABI
;
3166 if (global_options_set
.x_ix86_cmodel
)
3168 switch (ix86_cmodel
)
3173 ix86_cmodel
= CM_SMALL_PIC
;
3175 error ("code model %qs not supported in the %s bit mode",
3182 ix86_cmodel
= CM_MEDIUM_PIC
;
3184 error ("code model %qs not supported in the %s bit mode",
3186 else if (TARGET_X32
)
3187 error ("code model %qs not supported in x32 mode",
3194 ix86_cmodel
= CM_LARGE_PIC
;
3196 error ("code model %qs not supported in the %s bit mode",
3198 else if (TARGET_X32
)
3199 error ("code model %qs not supported in x32 mode",
3205 error ("code model %s does not support PIC mode", "32");
3207 error ("code model %qs not supported in the %s bit mode",
3214 error ("code model %s does not support PIC mode", "kernel");
3215 ix86_cmodel
= CM_32
;
3218 error ("code model %qs not supported in the %s bit mode",
3228 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3229 use of rip-relative addressing. This eliminates fixups that
3230 would otherwise be needed if this object is to be placed in a
3231 DLL, and is essentially just as efficient as direct addressing. */
3232 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3233 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3234 else if (TARGET_64BIT
)
3235 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3237 ix86_cmodel
= CM_32
;
3239 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3241 error ("-masm=intel not supported in this configuration");
3242 ix86_asm_dialect
= ASM_ATT
;
3244 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3245 sorry ("%i-bit mode not compiled in",
3246 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3248 for (i
= 0; i
< pta_size
; i
++)
3249 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3251 ix86_schedule
= processor_alias_table
[i
].schedule
;
3252 ix86_arch
= processor_alias_table
[i
].processor
;
3253 /* Default cpu tuning to the architecture. */
3254 ix86_tune
= ix86_arch
;
3256 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3257 error ("CPU you selected does not support x86-64 "
3260 if (processor_alias_table
[i
].flags
& PTA_MMX
3261 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3262 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3263 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3264 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3265 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3266 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3268 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3269 if (processor_alias_table
[i
].flags
& PTA_SSE
3270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3271 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3272 if (processor_alias_table
[i
].flags
& PTA_SSE2
3273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3274 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3275 if (processor_alias_table
[i
].flags
& PTA_SSE3
3276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3277 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3278 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3280 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3281 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3284 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3287 if (processor_alias_table
[i
].flags
& PTA_AVX
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3290 if (processor_alias_table
[i
].flags
& PTA_AVX2
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3293 if (processor_alias_table
[i
].flags
& PTA_FMA
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3296 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3299 if (processor_alias_table
[i
].flags
& PTA_FMA4
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3302 if (processor_alias_table
[i
].flags
& PTA_XOP
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3305 if (processor_alias_table
[i
].flags
& PTA_LWP
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3308 if (processor_alias_table
[i
].flags
& PTA_ABM
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3311 if (processor_alias_table
[i
].flags
& PTA_BMI
3312 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3313 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3314 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3315 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3316 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3317 if (processor_alias_table
[i
].flags
& PTA_TBM
3318 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3319 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3320 if (processor_alias_table
[i
].flags
& PTA_BMI2
3321 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3322 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3323 if (processor_alias_table
[i
].flags
& PTA_CX16
3324 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3325 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3326 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3327 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3328 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3329 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3330 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3331 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3332 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3333 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3334 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3335 if (processor_alias_table
[i
].flags
& PTA_AES
3336 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3337 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3338 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3339 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3340 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3341 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3342 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3343 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3344 if (processor_alias_table
[i
].flags
& PTA_RDRND
3345 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3346 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3347 if (processor_alias_table
[i
].flags
& PTA_F16C
3348 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3349 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3350 if (processor_alias_table
[i
].flags
& PTA_RTM
3351 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3352 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3353 if (processor_alias_table
[i
].flags
& PTA_HLE
3354 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3355 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3356 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3357 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3358 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3359 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3360 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3361 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3362 if (processor_alias_table
[i
].flags
& PTA_ADX
3363 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3364 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3365 if (processor_alias_table
[i
].flags
& PTA_FXSR
3366 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3367 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3368 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3369 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3370 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3371 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3372 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3373 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3374 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3375 x86_prefetch_sse
= true;
3380 if (!strcmp (ix86_arch_string
, "generic"))
3381 error ("generic CPU can be used only for %stune=%s %s",
3382 prefix
, suffix
, sw
);
3383 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3384 error ("bad value (%s) for %sarch=%s %s",
3385 ix86_arch_string
, prefix
, suffix
, sw
);
3387 ix86_arch_mask
= 1u << ix86_arch
;
3388 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3389 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3391 for (i
= 0; i
< pta_size
; i
++)
3392 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3394 ix86_schedule
= processor_alias_table
[i
].schedule
;
3395 ix86_tune
= processor_alias_table
[i
].processor
;
3398 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3400 if (ix86_tune_defaulted
)
3402 ix86_tune_string
= "x86-64";
3403 for (i
= 0; i
< pta_size
; i
++)
3404 if (! strcmp (ix86_tune_string
,
3405 processor_alias_table
[i
].name
))
3407 ix86_schedule
= processor_alias_table
[i
].schedule
;
3408 ix86_tune
= processor_alias_table
[i
].processor
;
3411 error ("CPU you selected does not support x86-64 "
3417 /* Adjust tuning when compiling for 32-bit ABI. */
3420 case PROCESSOR_GENERIC64
:
3421 ix86_tune
= PROCESSOR_GENERIC32
;
3422 ix86_schedule
= CPU_PENTIUMPRO
;
3429 /* Intel CPUs have always interpreted SSE prefetch instructions as
3430 NOPs; so, we can enable SSE prefetch instructions even when
3431 -mtune (rather than -march) points us to a processor that has them.
3432 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3433 higher processors. */
3435 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3436 x86_prefetch_sse
= true;
3440 if (ix86_tune_specified
&& i
== pta_size
)
3441 error ("bad value (%s) for %stune=%s %s",
3442 ix86_tune_string
, prefix
, suffix
, sw
);
3444 ix86_tune_mask
= 1u << ix86_tune
;
3445 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3446 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3448 #ifndef USE_IX86_FRAME_POINTER
3449 #define USE_IX86_FRAME_POINTER 0
3452 #ifndef USE_X86_64_FRAME_POINTER
3453 #define USE_X86_64_FRAME_POINTER 0
3456 /* Set the default values for switches whose default depends on TARGET_64BIT
3457 in case they weren't overwritten by command line options. */
3460 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3461 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3462 if (flag_asynchronous_unwind_tables
== 2)
3463 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3464 if (flag_pcc_struct_return
== 2)
3465 flag_pcc_struct_return
= 0;
3469 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3470 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3471 if (flag_asynchronous_unwind_tables
== 2)
3472 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3473 if (flag_pcc_struct_return
== 2)
3474 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3477 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3479 ix86_cost
= &ix86_size_cost
;
3481 ix86_cost
= ix86_tune_cost
;
3483 /* Arrange to set up i386_stack_locals for all functions. */
3484 init_machine_status
= ix86_init_machine_status
;
3486 /* Validate -mregparm= value. */
3487 if (global_options_set
.x_ix86_regparm
)
3490 warning (0, "-mregparm is ignored in 64-bit mode");
3491 if (ix86_regparm
> REGPARM_MAX
)
3493 error ("-mregparm=%d is not between 0 and %d",
3494 ix86_regparm
, REGPARM_MAX
);
3499 ix86_regparm
= REGPARM_MAX
;
3501 /* Default align_* from the processor table. */
3502 if (align_loops
== 0)
3504 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3505 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3507 if (align_jumps
== 0)
3509 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3510 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3512 if (align_functions
== 0)
3514 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3517 /* Provide default for -mbranch-cost= value. */
3518 if (!global_options_set
.x_ix86_branch_cost
)
3519 ix86_branch_cost
= ix86_cost
->branch_cost
;
3523 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3525 /* Enable by default the SSE and MMX builtins. Do allow the user to
3526 explicitly disable any of these. In particular, disabling SSE and
3527 MMX for kernel code is extremely useful. */
3528 if (!ix86_arch_specified
)
3530 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3531 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3534 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3538 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3540 if (!ix86_arch_specified
)
3542 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3544 /* i386 ABI does not specify red zone. It still makes sense to use it
3545 when programmer takes care to stack from being destroyed. */
3546 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3547 target_flags
|= MASK_NO_RED_ZONE
;
3550 /* Keep nonleaf frame pointers. */
3551 if (flag_omit_frame_pointer
)
3552 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3553 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3554 flag_omit_frame_pointer
= 1;
3556 /* If we're doing fast math, we don't care about comparison order
3557 wrt NaNs. This lets us use a shorter comparison sequence. */
3558 if (flag_finite_math_only
)
3559 target_flags
&= ~MASK_IEEE_FP
;
3561 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3562 since the insns won't need emulation. */
3563 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3564 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3566 /* Likewise, if the target doesn't have a 387, or we've specified
3567 software floating point, don't use 387 inline intrinsics. */
3569 target_flags
|= MASK_NO_FANCY_MATH_387
;
3571 /* Turn on MMX builtins for -msse. */
3573 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3575 /* Enable SSE prefetch. */
3576 if (TARGET_SSE
|| TARGET_PRFCHW
)
3577 x86_prefetch_sse
= true;
3579 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3580 if (TARGET_SSE4_2
|| TARGET_ABM
)
3581 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3583 /* Turn on lzcnt instruction for -mabm. */
3585 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3587 /* Validate -mpreferred-stack-boundary= value or default it to
3588 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3589 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3590 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3592 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3593 int max
= (TARGET_SEH
? 4 : 12);
3595 if (ix86_preferred_stack_boundary_arg
< min
3596 || ix86_preferred_stack_boundary_arg
> max
)
3599 error ("-mpreferred-stack-boundary is not supported "
3602 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3603 ix86_preferred_stack_boundary_arg
, min
, max
);
3606 ix86_preferred_stack_boundary
3607 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3610 /* Set the default value for -mstackrealign. */
3611 if (ix86_force_align_arg_pointer
== -1)
3612 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3614 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3616 /* Validate -mincoming-stack-boundary= value or default it to
3617 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3618 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3619 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3621 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3622 || ix86_incoming_stack_boundary_arg
> 12)
3623 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3624 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3627 ix86_user_incoming_stack_boundary
3628 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3629 ix86_incoming_stack_boundary
3630 = ix86_user_incoming_stack_boundary
;
3634 /* Accept -msseregparm only if at least SSE support is enabled. */
3635 if (TARGET_SSEREGPARM
3637 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3639 if (global_options_set
.x_ix86_fpmath
)
3641 if (ix86_fpmath
& FPMATH_SSE
)
3645 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3646 ix86_fpmath
= FPMATH_387
;
3648 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3650 warning (0, "387 instruction set disabled, using SSE arithmetics");
3651 ix86_fpmath
= FPMATH_SSE
;
3656 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3658 /* If the i387 is disabled, then do not return values in it. */
3660 target_flags
&= ~MASK_FLOAT_RETURNS
;
3662 /* Use external vectorized library in vectorizing intrinsics. */
3663 if (global_options_set
.x_ix86_veclibabi_type
)
3664 switch (ix86_veclibabi_type
)
3666 case ix86_veclibabi_type_svml
:
3667 ix86_veclib_handler
= ix86_veclibabi_svml
;
3670 case ix86_veclibabi_type_acml
:
3671 ix86_veclib_handler
= ix86_veclibabi_acml
;
3678 if ((!USE_IX86_FRAME_POINTER
3679 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3680 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3682 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3684 /* ??? Unwind info is not correct around the CFG unless either a frame
3685 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3686 unwind info generation to be aware of the CFG and propagating states
3688 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3689 || flag_exceptions
|| flag_non_call_exceptions
)
3690 && flag_omit_frame_pointer
3691 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3693 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3694 warning (0, "unwind tables currently require either a frame pointer "
3695 "or %saccumulate-outgoing-args%s for correctness",
3697 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3700 /* If stack probes are required, the space used for large function
3701 arguments on the stack must also be probed, so enable
3702 -maccumulate-outgoing-args so this happens in the prologue. */
3703 if (TARGET_STACK_PROBE
3704 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3706 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3707 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3708 "for correctness", prefix
, suffix
);
3709 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3712 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3715 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3716 p
= strchr (internal_label_prefix
, 'X');
3717 internal_label_prefix_len
= p
- internal_label_prefix
;
3721 /* When scheduling description is not available, disable scheduler pass
3722 so it won't slow down the compilation and make x87 code slower. */
3723 if (!TARGET_SCHEDULE
)
3724 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3726 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3727 ix86_tune_cost
->simultaneous_prefetches
,
3728 global_options
.x_param_values
,
3729 global_options_set
.x_param_values
);
3730 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3731 ix86_tune_cost
->prefetch_block
,
3732 global_options
.x_param_values
,
3733 global_options_set
.x_param_values
);
3734 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3735 ix86_tune_cost
->l1_cache_size
,
3736 global_options
.x_param_values
,
3737 global_options_set
.x_param_values
);
3738 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3739 ix86_tune_cost
->l2_cache_size
,
3740 global_options
.x_param_values
,
3741 global_options_set
.x_param_values
);
3743 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3744 if (flag_prefetch_loop_arrays
< 0
3746 && (optimize
>= 3 || flag_profile_use
)
3747 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3748 flag_prefetch_loop_arrays
= 1;
3750 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3751 can be optimized to ap = __builtin_next_arg (0). */
3752 if (!TARGET_64BIT
&& !flag_split_stack
)
3753 targetm
.expand_builtin_va_start
= NULL
;
3757 ix86_gen_leave
= gen_leave_rex64
;
3758 if (Pmode
== DImode
)
3760 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3761 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3762 ix86_gen_tls_local_dynamic_base_64
3763 = gen_tls_local_dynamic_base_64_di
;
3767 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3768 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3769 ix86_gen_tls_local_dynamic_base_64
3770 = gen_tls_local_dynamic_base_64_si
;
3775 ix86_gen_leave
= gen_leave
;
3776 ix86_gen_monitor
= gen_sse3_monitor
;
3779 if (Pmode
== DImode
)
3781 ix86_gen_add3
= gen_adddi3
;
3782 ix86_gen_sub3
= gen_subdi3
;
3783 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3784 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3785 ix86_gen_andsp
= gen_anddi3
;
3786 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3787 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3788 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3792 ix86_gen_add3
= gen_addsi3
;
3793 ix86_gen_sub3
= gen_subsi3
;
3794 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3795 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3796 ix86_gen_andsp
= gen_andsi3
;
3797 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3798 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3799 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3803 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3805 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3808 if (!TARGET_64BIT
&& flag_pic
)
3810 if (flag_fentry
> 0)
3811 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3815 else if (TARGET_SEH
)
3817 if (flag_fentry
== 0)
3818 sorry ("-mno-fentry isn%'t compatible with SEH");
3821 else if (flag_fentry
< 0)
3823 #if defined(PROFILE_BEFORE_PROLOGUE)
3832 /* When not optimize for size, enable vzeroupper optimization for
3833 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3834 AVX unaligned load/store. */
3837 if (flag_expensive_optimizations
3838 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3839 target_flags
|= MASK_VZEROUPPER
;
3840 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3841 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3842 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3843 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3844 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3845 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3846 /* Enable 128-bit AVX instruction generation
3847 for the auto-vectorizer. */
3848 if (TARGET_AVX128_OPTIMAL
3849 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3850 target_flags
|= MASK_PREFER_AVX128
;
3855 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3856 target_flags
&= ~MASK_VZEROUPPER
;
3859 if (ix86_recip_name
)
3861 char *p
= ASTRDUP (ix86_recip_name
);
3863 unsigned int mask
, i
;
3866 while ((q
= strtok (p
, ",")) != NULL
)
3877 if (!strcmp (q
, "default"))
3878 mask
= RECIP_MASK_ALL
;
3881 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3882 if (!strcmp (q
, recip_options
[i
].string
))
3884 mask
= recip_options
[i
].mask
;
3888 if (i
== ARRAY_SIZE (recip_options
))
3890 error ("unknown option for -mrecip=%s", q
);
3892 mask
= RECIP_MASK_NONE
;
3896 recip_mask_explicit
|= mask
;
3898 recip_mask
&= ~mask
;
3905 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3906 else if (target_flags_explicit
& MASK_RECIP
)
3907 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3909 /* Default long double to 64-bit for Bionic. */
3910 if (TARGET_HAS_BIONIC
3911 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
3912 target_flags
|= MASK_LONG_DOUBLE_64
;
3914 /* Save the initial options in case the user does function specific
3917 target_option_default_node
= target_option_current_node
3918 = build_target_option_node ();
3921 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3924 ix86_option_override (void)
3926 static struct register_pass_info insert_vzeroupper_info
3927 = { &pass_insert_vzeroupper
.pass
, "reload",
3928 1, PASS_POS_INSERT_AFTER
3931 ix86_option_override_internal (true);
3934 /* This needs to be done at start up. It's convenient to do it here. */
3935 register_pass (&insert_vzeroupper_info
);
3938 /* Update register usage after having seen the compiler flags. */
3941 ix86_conditional_register_usage (void)
3946 /* The PIC register, if it exists, is fixed. */
3947 j
= PIC_OFFSET_TABLE_REGNUM
;
3948 if (j
!= INVALID_REGNUM
)
3949 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3951 /* For 32-bit targets, squash the REX registers. */
3954 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3955 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3956 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3957 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3960 /* See the definition of CALL_USED_REGISTERS in i386.h. */
3961 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
3962 : TARGET_64BIT
? (1 << 2)
3965 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3967 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3969 /* Set/reset conditionally defined registers from
3970 CALL_USED_REGISTERS initializer. */
3971 if (call_used_regs
[i
] > 1)
3972 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
3974 /* Calculate registers of CLOBBERED_REGS register set
3975 as call used registers from GENERAL_REGS register set. */
3976 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3977 && call_used_regs
[i
])
3978 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3981 /* If MMX is disabled, squash the registers. */
3983 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3984 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3985 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3987 /* If SSE is disabled, squash the registers. */
3989 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3990 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3991 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3993 /* If the FPU is disabled, squash the registers. */
3994 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3995 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3996 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3997 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option
*ptr
)
4006 ptr
->arch
= ix86_arch
;
4007 ptr
->schedule
= ix86_schedule
;
4008 ptr
->tune
= ix86_tune
;
4009 ptr
->branch_cost
= ix86_branch_cost
;
4010 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4011 ptr
->arch_specified
= ix86_arch_specified
;
4012 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4013 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4014 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr
->arch
== ix86_arch
);
4019 gcc_assert (ptr
->schedule
== ix86_schedule
);
4020 gcc_assert (ptr
->tune
== ix86_tune
);
4021 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4029 enum processor_type old_tune
= ix86_tune
;
4030 enum processor_type old_arch
= ix86_arch
;
4031 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4034 ix86_arch
= (enum processor_type
) ptr
->arch
;
4035 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4036 ix86_tune
= (enum processor_type
) ptr
->tune
;
4037 ix86_branch_cost
= ptr
->branch_cost
;
4038 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4039 ix86_arch_specified
= ptr
->arch_specified
;
4040 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4041 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4042 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch
!= ix86_arch
)
4047 ix86_arch_mask
= 1u << ix86_arch
;
4048 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4049 ix86_arch_features
[i
]
4050 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune
!= ix86_tune
)
4056 ix86_tune_mask
= 1u << ix86_tune
;
4057 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4058 ix86_tune_features
[i
]
4059 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file
, int indent
,
4067 struct cl_target_option
*ptr
)
4070 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4071 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4073 fprintf (file
, "%*sarch = %d (%s)\n",
4076 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4077 ? cpu_names
[ptr
->arch
]
4080 fprintf (file
, "%*stune = %d (%s)\n",
4083 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4084 ? cpu_names
[ptr
->tune
]
4087 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4091 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4092 free (target_string
);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4103 struct gcc_options
*enum_opts_set
)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type
;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4134 IX86_ATTR_ISA ("abm", OPT_mabm
),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4139 IX86_ATTR_ISA ("aes", OPT_maes
),
4140 IX86_ATTR_ISA ("avx", OPT_mavx
),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4145 IX86_ATTR_ISA ("sse", OPT_msse
),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4154 IX86_ATTR_ISA ("fma", OPT_mfma
),
4155 IX86_ATTR_ISA ("xop", OPT_mxop
),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4160 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4161 IX86_ATTR_ISA ("hle", OPT_mhle
),
4162 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4163 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4164 IX86_ATTR_ISA ("adx", OPT_madx
),
4165 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4166 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4167 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4170 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4172 /* string options */
4173 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4174 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4177 IX86_ATTR_YES ("cld",
4181 IX86_ATTR_NO ("fancy-math-387",
4182 OPT_mfancy_math_387
,
4183 MASK_NO_FANCY_MATH_387
),
4185 IX86_ATTR_YES ("ieee-fp",
4189 IX86_ATTR_YES ("inline-all-stringops",
4190 OPT_minline_all_stringops
,
4191 MASK_INLINE_ALL_STRINGOPS
),
4193 IX86_ATTR_YES ("inline-stringops-dynamically",
4194 OPT_minline_stringops_dynamically
,
4195 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4197 IX86_ATTR_NO ("align-stringops",
4198 OPT_mno_align_stringops
,
4199 MASK_NO_ALIGN_STRINGOPS
),
4201 IX86_ATTR_YES ("recip",
4207 /* If this is a list, recurse to get the options. */
4208 if (TREE_CODE (args
) == TREE_LIST
)
4212 for (; args
; args
= TREE_CHAIN (args
))
4213 if (TREE_VALUE (args
)
4214 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4215 p_strings
, enum_opts_set
))
4221 else if (TREE_CODE (args
) != STRING_CST
)
4224 /* Handle multiple arguments separated by commas. */
4225 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4227 while (next_optstr
&& *next_optstr
!= '\0')
4229 char *p
= next_optstr
;
4231 char *comma
= strchr (next_optstr
, ',');
4232 const char *opt_string
;
4233 size_t len
, opt_len
;
4238 enum ix86_opt_type type
= ix86_opt_unknown
;
4244 len
= comma
- next_optstr
;
4245 next_optstr
= comma
+ 1;
4253 /* Recognize no-xxx. */
4254 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4263 /* Find the option. */
4266 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4268 type
= attrs
[i
].type
;
4269 opt_len
= attrs
[i
].len
;
4270 if (ch
== attrs
[i
].string
[0]
4271 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4274 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4277 mask
= attrs
[i
].mask
;
4278 opt_string
= attrs
[i
].string
;
4283 /* Process the option. */
4286 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4290 else if (type
== ix86_opt_isa
)
4292 struct cl_decoded_option decoded
;
4294 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4295 ix86_handle_option (&global_options
, &global_options_set
,
4296 &decoded
, input_location
);
4299 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4301 if (type
== ix86_opt_no
)
4302 opt_set_p
= !opt_set_p
;
4305 target_flags
|= mask
;
4307 target_flags
&= ~mask
;
4310 else if (type
== ix86_opt_str
)
4314 error ("option(\"%s\") was already specified", opt_string
);
4318 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4321 else if (type
== ix86_opt_enum
)
4326 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4328 set_option (&global_options
, enum_opts_set
, opt
, value
,
4329 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4333 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4345 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4348 ix86_valid_target_attribute_tree (tree args
)
4350 const char *orig_arch_string
= ix86_arch_string
;
4351 const char *orig_tune_string
= ix86_tune_string
;
4352 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4353 int orig_tune_defaulted
= ix86_tune_defaulted
;
4354 int orig_arch_specified
= ix86_arch_specified
;
4355 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4358 struct cl_target_option
*def
4359 = TREE_TARGET_OPTION (target_option_default_node
);
4360 struct gcc_options enum_opts_set
;
4362 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4364 /* Process each of the options on the chain. */
4365 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4369 /* If the changed options are different from the default, rerun
4370 ix86_option_override_internal, and then save the options away.
4371 The string options are are attribute options, and will be undone
4372 when we copy the save structure. */
4373 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4374 || target_flags
!= def
->x_target_flags
4375 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4376 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4377 || enum_opts_set
.x_ix86_fpmath
)
4379 /* If we are using the default tune= or arch=, undo the string assigned,
4380 and use the default. */
4381 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4382 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4383 else if (!orig_arch_specified
)
4384 ix86_arch_string
= NULL
;
4386 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4387 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4388 else if (orig_tune_defaulted
)
4389 ix86_tune_string
= NULL
;
4391 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4392 if (enum_opts_set
.x_ix86_fpmath
)
4393 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4394 else if (!TARGET_64BIT
&& TARGET_SSE
)
4396 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4397 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4400 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4401 ix86_option_override_internal (false);
4403 /* Add any builtin functions with the new isa if any. */
4404 ix86_add_new_builtins (ix86_isa_flags
);
4406 /* Save the current options unless we are validating options for
4408 t
= build_target_option_node ();
4410 ix86_arch_string
= orig_arch_string
;
4411 ix86_tune_string
= orig_tune_string
;
4412 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4414 /* Free up memory allocated to hold the strings */
4415 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4416 free (option_strings
[i
]);
4422 /* Hook to validate attribute((target("string"))). */
4425 ix86_valid_target_attribute_p (tree fndecl
,
4426 tree
ARG_UNUSED (name
),
4428 int ARG_UNUSED (flags
))
4430 struct cl_target_option cur_target
;
4432 tree old_optimize
= build_optimization_node ();
4433 tree new_target
, new_optimize
;
4434 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4436 /* If the function changed the optimization levels as well as setting target
4437 options, start with the optimizations specified. */
4438 if (func_optimize
&& func_optimize
!= old_optimize
)
4439 cl_optimization_restore (&global_options
,
4440 TREE_OPTIMIZATION (func_optimize
));
4442 /* The target attributes may also change some optimization flags, so update
4443 the optimization options if necessary. */
4444 cl_target_option_save (&cur_target
, &global_options
);
4445 new_target
= ix86_valid_target_attribute_tree (args
);
4446 new_optimize
= build_optimization_node ();
4453 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4455 if (old_optimize
!= new_optimize
)
4456 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4459 cl_target_option_restore (&global_options
, &cur_target
);
4461 if (old_optimize
!= new_optimize
)
4462 cl_optimization_restore (&global_options
,
4463 TREE_OPTIMIZATION (old_optimize
));
4469 /* Hook to determine if one function can safely inline another. */
4472 ix86_can_inline_p (tree caller
, tree callee
)
4475 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4476 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4478 /* If callee has no option attributes, then it is ok to inline. */
4482 /* If caller has no option attributes, but callee does then it is not ok to
4484 else if (!caller_tree
)
4489 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4490 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4492 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4493 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4495 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4496 != callee_opts
->x_ix86_isa_flags
)
4499 /* See if we have the same non-isa options. */
4500 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4503 /* See if arch, tune, etc. are the same. */
4504 else if (caller_opts
->arch
!= callee_opts
->arch
)
4507 else if (caller_opts
->tune
!= callee_opts
->tune
)
4510 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4513 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4524 /* Remember the last target of ix86_set_current_function. */
4525 static GTY(()) tree ix86_previous_fndecl
;
4527 /* Establish appropriate back-end context for processing the function
4528 FNDECL. The argument might be NULL to indicate processing at top
4529 level, outside of any function scope. */
4531 ix86_set_current_function (tree fndecl
)
4533 /* Only change the context if the function changes. This hook is called
4534 several times in the course of compiling a function, and we don't want to
4535 slow things down too much or call target_reinit when it isn't safe. */
4536 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4538 tree old_tree
= (ix86_previous_fndecl
4539 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4542 tree new_tree
= (fndecl
4543 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4546 ix86_previous_fndecl
= fndecl
;
4547 if (old_tree
== new_tree
)
4552 cl_target_option_restore (&global_options
,
4553 TREE_TARGET_OPTION (new_tree
));
4559 struct cl_target_option
*def
4560 = TREE_TARGET_OPTION (target_option_current_node
);
4562 cl_target_option_restore (&global_options
, def
);
4569 /* Return true if this goes in large data/bss. */
4572 ix86_in_large_data_p (tree exp
)
4574 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4577 /* Functions are never large data. */
4578 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4581 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4583 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4584 if (strcmp (section
, ".ldata") == 0
4585 || strcmp (section
, ".lbss") == 0)
4591 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4593 /* If this is an incomplete type with size 0, then we can't put it
4594 in data because it might be too big when completed. */
4595 if (!size
|| size
> ix86_section_threshold
)
4602 /* Switch to the appropriate section for output of DECL.
4603 DECL is either a `VAR_DECL' node or a constant of some sort.
4604 RELOC indicates whether forming the initial value of DECL requires
4605 link-time relocations. */
4607 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4611 x86_64_elf_select_section (tree decl
, int reloc
,
4612 unsigned HOST_WIDE_INT align
)
4614 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4615 && ix86_in_large_data_p (decl
))
4617 const char *sname
= NULL
;
4618 unsigned int flags
= SECTION_WRITE
;
4619 switch (categorize_decl_for_section (decl
, reloc
))
4624 case SECCAT_DATA_REL
:
4625 sname
= ".ldata.rel";
4627 case SECCAT_DATA_REL_LOCAL
:
4628 sname
= ".ldata.rel.local";
4630 case SECCAT_DATA_REL_RO
:
4631 sname
= ".ldata.rel.ro";
4633 case SECCAT_DATA_REL_RO_LOCAL
:
4634 sname
= ".ldata.rel.ro.local";
4638 flags
|= SECTION_BSS
;
4641 case SECCAT_RODATA_MERGE_STR
:
4642 case SECCAT_RODATA_MERGE_STR_INIT
:
4643 case SECCAT_RODATA_MERGE_CONST
:
4647 case SECCAT_SRODATA
:
4654 /* We don't split these for medium model. Place them into
4655 default sections and hope for best. */
4660 /* We might get called with string constants, but get_named_section
4661 doesn't like them as they are not DECLs. Also, we need to set
4662 flags in that case. */
4664 return get_section (sname
, flags
, NULL
);
4665 return get_named_section (decl
, sname
, reloc
);
4668 return default_elf_select_section (decl
, reloc
, align
);
4671 /* Build up a unique section name, expressed as a
4672 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4673 RELOC indicates whether the initial value of EXP requires
4674 link-time relocations. */
4676 static void ATTRIBUTE_UNUSED
4677 x86_64_elf_unique_section (tree decl
, int reloc
)
4679 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4680 && ix86_in_large_data_p (decl
))
4682 const char *prefix
= NULL
;
4683 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4684 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4686 switch (categorize_decl_for_section (decl
, reloc
))
4689 case SECCAT_DATA_REL
:
4690 case SECCAT_DATA_REL_LOCAL
:
4691 case SECCAT_DATA_REL_RO
:
4692 case SECCAT_DATA_REL_RO_LOCAL
:
4693 prefix
= one_only
? ".ld" : ".ldata";
4696 prefix
= one_only
? ".lb" : ".lbss";
4699 case SECCAT_RODATA_MERGE_STR
:
4700 case SECCAT_RODATA_MERGE_STR_INIT
:
4701 case SECCAT_RODATA_MERGE_CONST
:
4702 prefix
= one_only
? ".lr" : ".lrodata";
4704 case SECCAT_SRODATA
:
4711 /* We don't split these for medium model. Place them into
4712 default sections and hope for best. */
4717 const char *name
, *linkonce
;
4720 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4721 name
= targetm
.strip_name_encoding (name
);
4723 /* If we're using one_only, then there needs to be a .gnu.linkonce
4724 prefix to the section name. */
4725 linkonce
= one_only
? ".gnu.linkonce" : "";
4727 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4729 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4733 default_unique_section (decl
, reloc
);
4736 #ifdef COMMON_ASM_OP
4737 /* This says how to output assembler code to declare an
4738 uninitialized external linkage data object.
4740 For medium model x86-64 we need to use .largecomm opcode for
4743 x86_elf_aligned_common (FILE *file
,
4744 const char *name
, unsigned HOST_WIDE_INT size
,
4747 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4748 && size
> (unsigned int)ix86_section_threshold
)
4749 fputs (".largecomm\t", file
);
4751 fputs (COMMON_ASM_OP
, file
);
4752 assemble_name (file
, name
);
4753 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4754 size
, align
/ BITS_PER_UNIT
);
4758 /* Utility function for targets to use in implementing
4759 ASM_OUTPUT_ALIGNED_BSS. */
4762 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4763 const char *name
, unsigned HOST_WIDE_INT size
,
4766 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4767 && size
> (unsigned int)ix86_section_threshold
)
4768 switch_to_section (get_named_section (decl
, ".lbss", 0));
4770 switch_to_section (bss_section
);
4771 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4772 #ifdef ASM_DECLARE_OBJECT_NAME
4773 last_assemble_variable_decl
= decl
;
4774 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4776 /* Standard thing is just output label for the object. */
4777 ASM_OUTPUT_LABEL (file
, name
);
4778 #endif /* ASM_DECLARE_OBJECT_NAME */
4779 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4782 /* Decide whether we must probe the stack before any space allocation
4783 on this target. It's essentially TARGET_STACK_PROBE except when
4784 -fstack-check causes the stack to be already probed differently. */
4787 ix86_target_stack_probe (void)
4789 /* Do not probe the stack twice if static stack checking is enabled. */
4790 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4793 return TARGET_STACK_PROBE
;
4796 /* Decide whether we can make a sibling call to a function. DECL is the
4797 declaration of the function being targeted by the call and EXP is the
4798 CALL_EXPR representing the call. */
4801 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4803 tree type
, decl_or_type
;
4806 /* If we are generating position-independent code, we cannot sibcall
4807 optimize any indirect call, or a direct call to a global function,
4808 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4812 && (!decl
|| !targetm
.binds_local_p (decl
)))
4815 /* If we need to align the outgoing stack, then sibcalling would
4816 unalign the stack, which may break the called function. */
4817 if (ix86_minimum_incoming_stack_boundary (true)
4818 < PREFERRED_STACK_BOUNDARY
)
4823 decl_or_type
= decl
;
4824 type
= TREE_TYPE (decl
);
4828 /* We're looking at the CALL_EXPR, we need the type of the function. */
4829 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4830 type
= TREE_TYPE (type
); /* pointer type */
4831 type
= TREE_TYPE (type
); /* function type */
4832 decl_or_type
= type
;
4835 /* Check that the return value locations are the same. Like
4836 if we are returning floats on the 80387 register stack, we cannot
4837 make a sibcall from a function that doesn't return a float to a
4838 function that does or, conversely, from a function that does return
4839 a float to a function that doesn't; the necessary stack adjustment
4840 would not be executed. This is also the place we notice
4841 differences in the return value ABI. Note that it is ok for one
4842 of the functions to have void return type as long as the return
4843 value of the other is passed in a register. */
4844 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4845 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4847 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4849 if (!rtx_equal_p (a
, b
))
4852 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4854 else if (!rtx_equal_p (a
, b
))
4859 /* The SYSV ABI has more call-clobbered registers;
4860 disallow sibcalls from MS to SYSV. */
4861 if (cfun
->machine
->call_abi
== MS_ABI
4862 && ix86_function_type_abi (type
) == SYSV_ABI
)
4867 /* If this call is indirect, we'll need to be able to use a
4868 call-clobbered register for the address of the target function.
4869 Make sure that all such registers are not used for passing
4870 parameters. Note that DLLIMPORT functions are indirect. */
4872 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4874 if (ix86_function_regparm (type
, NULL
) >= 3)
4876 /* ??? Need to count the actual number of registers to be used,
4877 not the possible number of registers. Fix later. */
4883 /* Otherwise okay. That also includes certain types of indirect calls. */
4887 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4888 and "sseregparm" calling convention attributes;
4889 arguments as in struct attribute_spec.handler. */
4892 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4894 int flags ATTRIBUTE_UNUSED
,
4897 if (TREE_CODE (*node
) != FUNCTION_TYPE
4898 && TREE_CODE (*node
) != METHOD_TYPE
4899 && TREE_CODE (*node
) != FIELD_DECL
4900 && TREE_CODE (*node
) != TYPE_DECL
)
4902 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4904 *no_add_attrs
= true;
4908 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4909 if (is_attribute_p ("regparm", name
))
4913 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4915 error ("fastcall and regparm attributes are not compatible");
4918 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4920 error ("regparam and thiscall attributes are not compatible");
4923 cst
= TREE_VALUE (args
);
4924 if (TREE_CODE (cst
) != INTEGER_CST
)
4926 warning (OPT_Wattributes
,
4927 "%qE attribute requires an integer constant argument",
4929 *no_add_attrs
= true;
4931 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4933 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4935 *no_add_attrs
= true;
4943 /* Do not warn when emulating the MS ABI. */
4944 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4945 && TREE_CODE (*node
) != METHOD_TYPE
)
4946 || ix86_function_type_abi (*node
) != MS_ABI
)
4947 warning (OPT_Wattributes
, "%qE attribute ignored",
4949 *no_add_attrs
= true;
4953 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4954 if (is_attribute_p ("fastcall", name
))
4956 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4958 error ("fastcall and cdecl attributes are not compatible");
4960 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4962 error ("fastcall and stdcall attributes are not compatible");
4964 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4966 error ("fastcall and regparm attributes are not compatible");
4968 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4970 error ("fastcall and thiscall attributes are not compatible");
4974 /* Can combine stdcall with fastcall (redundant), regparm and
4976 else if (is_attribute_p ("stdcall", name
))
4978 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4980 error ("stdcall and cdecl attributes are not compatible");
4982 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4984 error ("stdcall and fastcall attributes are not compatible");
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4988 error ("stdcall and thiscall attributes are not compatible");
4992 /* Can combine cdecl with regparm and sseregparm. */
4993 else if (is_attribute_p ("cdecl", name
))
4995 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4997 error ("stdcall and cdecl attributes are not compatible");
4999 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5001 error ("fastcall and cdecl attributes are not compatible");
5003 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5005 error ("cdecl and thiscall attributes are not compatible");
5008 else if (is_attribute_p ("thiscall", name
))
5010 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5011 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5015 error ("stdcall and thiscall attributes are not compatible");
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5019 error ("fastcall and thiscall attributes are not compatible");
5021 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5023 error ("cdecl and thiscall attributes are not compatible");
5027 /* Can combine sseregparm with all attributes. */
5032 /* The transactional memory builtins are implicitly regparm or fastcall
5033 depending on the ABI. Override the generic do-nothing attribute that
5034 these builtins were declared with, and replace it with one of the two
5035 attributes that we expect elsewhere. */
5038 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5039 tree args ATTRIBUTE_UNUSED
,
5040 int flags ATTRIBUTE_UNUSED
,
5045 /* In no case do we want to add the placeholder attribute. */
5046 *no_add_attrs
= true;
5048 /* The 64-bit ABI is unchanged for transactional memory. */
5052 /* ??? Is there a better way to validate 32-bit windows? We have
5053 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5054 if (CHECK_STACK_LIMIT
> 0)
5055 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5058 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5059 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5061 decl_attributes (node
, alt
, flags
);
5066 /* This function determines from TYPE the calling-convention. */
5069 ix86_get_callcvt (const_tree type
)
5071 unsigned int ret
= 0;
5076 return IX86_CALLCVT_CDECL
;
5078 attrs
= TYPE_ATTRIBUTES (type
);
5079 if (attrs
!= NULL_TREE
)
5081 if (lookup_attribute ("cdecl", attrs
))
5082 ret
|= IX86_CALLCVT_CDECL
;
5083 else if (lookup_attribute ("stdcall", attrs
))
5084 ret
|= IX86_CALLCVT_STDCALL
;
5085 else if (lookup_attribute ("fastcall", attrs
))
5086 ret
|= IX86_CALLCVT_FASTCALL
;
5087 else if (lookup_attribute ("thiscall", attrs
))
5088 ret
|= IX86_CALLCVT_THISCALL
;
5090 /* Regparam isn't allowed for thiscall and fastcall. */
5091 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5093 if (lookup_attribute ("regparm", attrs
))
5094 ret
|= IX86_CALLCVT_REGPARM
;
5095 if (lookup_attribute ("sseregparm", attrs
))
5096 ret
|= IX86_CALLCVT_SSEREGPARM
;
5099 if (IX86_BASE_CALLCVT(ret
) != 0)
5103 is_stdarg
= stdarg_p (type
);
5104 if (TARGET_RTD
&& !is_stdarg
)
5105 return IX86_CALLCVT_STDCALL
| ret
;
5109 || TREE_CODE (type
) != METHOD_TYPE
5110 || ix86_function_type_abi (type
) != MS_ABI
)
5111 return IX86_CALLCVT_CDECL
| ret
;
5113 return IX86_CALLCVT_THISCALL
;
5116 /* Return 0 if the attributes for two types are incompatible, 1 if they
5117 are compatible, and 2 if they are nearly compatible (which causes a
5118 warning to be generated). */
5121 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5123 unsigned int ccvt1
, ccvt2
;
5125 if (TREE_CODE (type1
) != FUNCTION_TYPE
5126 && TREE_CODE (type1
) != METHOD_TYPE
)
5129 ccvt1
= ix86_get_callcvt (type1
);
5130 ccvt2
= ix86_get_callcvt (type2
);
5133 if (ix86_function_regparm (type1
, NULL
)
5134 != ix86_function_regparm (type2
, NULL
))
5140 /* Return the regparm value for a function with the indicated TYPE and DECL.
5141 DECL may be NULL when calling function indirectly
5142 or considering a libcall. */
5145 ix86_function_regparm (const_tree type
, const_tree decl
)
5152 return (ix86_function_type_abi (type
) == SYSV_ABI
5153 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5154 ccvt
= ix86_get_callcvt (type
);
5155 regparm
= ix86_regparm
;
5157 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5159 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5162 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5166 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5168 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5171 /* Use register calling convention for local functions when possible. */
5173 && TREE_CODE (decl
) == FUNCTION_DECL
5175 && !(profile_flag
&& !flag_fentry
))
5177 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5178 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5179 if (i
&& i
->local
&& i
->can_change_signature
)
5181 int local_regparm
, globals
= 0, regno
;
5183 /* Make sure no regparm register is taken by a
5184 fixed register variable. */
5185 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5186 if (fixed_regs
[local_regparm
])
5189 /* We don't want to use regparm(3) for nested functions as
5190 these use a static chain pointer in the third argument. */
5191 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5194 /* In 32-bit mode save a register for the split stack. */
5195 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5198 /* Each fixed register usage increases register pressure,
5199 so less registers should be used for argument passing.
5200 This functionality can be overriden by an explicit
5202 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5203 if (fixed_regs
[regno
])
5207 = globals
< local_regparm
? local_regparm
- globals
: 0;
5209 if (local_regparm
> regparm
)
5210 regparm
= local_regparm
;
5217 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5218 DFmode (2) arguments in SSE registers for a function with the
5219 indicated TYPE and DECL. DECL may be NULL when calling function
5220 indirectly or considering a libcall. Otherwise return 0. */
5223 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5225 gcc_assert (!TARGET_64BIT
);
5227 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5228 by the sseregparm attribute. */
5229 if (TARGET_SSEREGPARM
5230 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5237 error ("calling %qD with attribute sseregparm without "
5238 "SSE/SSE2 enabled", decl
);
5240 error ("calling %qT with attribute sseregparm without "
5241 "SSE/SSE2 enabled", type
);
5249 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5250 (and DFmode for SSE2) arguments in SSE registers. */
5251 if (decl
&& TARGET_SSE_MATH
&& optimize
5252 && !(profile_flag
&& !flag_fentry
))
5254 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5255 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5256 if (i
&& i
->local
&& i
->can_change_signature
)
5257 return TARGET_SSE2
? 2 : 1;
5263 /* Return true if EAX is live at the start of the function. Used by
5264 ix86_expand_prologue to determine if we need special help before
5265 calling allocate_stack_worker. */
5268 ix86_eax_live_at_start_p (void)
5270 /* Cheat. Don't bother working forward from ix86_function_regparm
5271 to the function type to whether an actual argument is located in
5272 eax. Instead just look at cfg info, which is still close enough
5273 to correct at this point. This gives false positives for broken
5274 functions that might use uninitialized data that happens to be
5275 allocated in eax, but who cares? */
5276 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5280 ix86_keep_aggregate_return_pointer (tree fntype
)
5286 attr
= lookup_attribute ("callee_pop_aggregate_return",
5287 TYPE_ATTRIBUTES (fntype
));
5289 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5291 /* For 32-bit MS-ABI the default is to keep aggregate
5293 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5296 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5299 /* Value is the number of bytes of arguments automatically
5300 popped when returning from a subroutine call.
5301 FUNDECL is the declaration node of the function (as a tree),
5302 FUNTYPE is the data type of the function (as a tree),
5303 or for a library call it is an identifier node for the subroutine name.
5304 SIZE is the number of bytes of arguments passed on the stack.
5306 On the 80386, the RTD insn may be used to pop them if the number
5307 of args is fixed, but if the number is variable then the caller
5308 must pop them all. RTD can't be used for library calls now
5309 because the library is compiled with the Unix compiler.
5310 Use of RTD is a selectable option, since it is incompatible with
5311 standard Unix calling sequences. If the option is not selected,
5312 the caller must always pop the args.
5314 The attribute stdcall is equivalent to RTD on a per module basis. */
5317 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5321 /* None of the 64-bit ABIs pop arguments. */
5325 ccvt
= ix86_get_callcvt (funtype
);
5327 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5328 | IX86_CALLCVT_THISCALL
)) != 0
5329 && ! stdarg_p (funtype
))
5332 /* Lose any fake structure return argument if it is passed on the stack. */
5333 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5334 && !ix86_keep_aggregate_return_pointer (funtype
))
5336 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5338 return GET_MODE_SIZE (Pmode
);
5344 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5347 ix86_legitimate_combined_insn (rtx insn
)
5349 /* Check operand constraints in case hard registers were propagated
5350 into insn pattern. This check prevents combine pass from
5351 generating insn patterns with invalid hard register operands.
5352 These invalid insns can eventually confuse reload to error out
5353 with a spill failure. See also PRs 46829 and 46843. */
5354 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5358 extract_insn (insn
);
5359 preprocess_constraints ();
5361 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5363 rtx op
= recog_data
.operand
[i
];
5364 enum machine_mode mode
= GET_MODE (op
);
5365 struct operand_alternative
*op_alt
;
5370 /* A unary operator may be accepted by the predicate, but it
5371 is irrelevant for matching constraints. */
5375 if (GET_CODE (op
) == SUBREG
)
5377 if (REG_P (SUBREG_REG (op
))
5378 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5379 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5380 GET_MODE (SUBREG_REG (op
)),
5383 op
= SUBREG_REG (op
);
5386 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5389 op_alt
= recog_op_alt
[i
];
5391 /* Operand has no constraints, anything is OK. */
5392 win
= !recog_data
.n_alternatives
;
5394 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5396 if (op_alt
[j
].anything_ok
5397 || (op_alt
[j
].matches
!= -1
5399 (recog_data
.operand
[i
],
5400 recog_data
.operand
[op_alt
[j
].matches
]))
5401 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5416 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5418 static unsigned HOST_WIDE_INT
5419 ix86_asan_shadow_offset (void)
5421 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_LP64
? 44 : 29);
5424 /* Argument support functions. */
5426 /* Return true when register may be used to pass function parameters. */
5428 ix86_function_arg_regno_p (int regno
)
5431 const int *parm_regs
;
5436 return (regno
< REGPARM_MAX
5437 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5439 return (regno
< REGPARM_MAX
5440 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5441 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5442 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5443 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5448 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5453 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5454 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5458 /* TODO: The function should depend on current function ABI but
5459 builtins.c would need updating then. Therefore we use the
5462 /* RAX is used as hidden argument to va_arg functions. */
5463 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5466 if (ix86_abi
== MS_ABI
)
5467 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5469 parm_regs
= x86_64_int_parameter_registers
;
5470 for (i
= 0; i
< (ix86_abi
== MS_ABI
5471 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5472 if (regno
== parm_regs
[i
])
5477 /* Return if we do not know how to pass TYPE solely in registers. */
5480 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5482 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5485 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5486 The layout_type routine is crafty and tries to trick us into passing
5487 currently unsupported vector types on the stack by using TImode. */
5488 return (!TARGET_64BIT
&& mode
== TImode
5489 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5492 /* It returns the size, in bytes, of the area reserved for arguments passed
5493 in registers for the function represented by fndecl dependent to the used
5496 ix86_reg_parm_stack_space (const_tree fndecl
)
5498 enum calling_abi call_abi
= SYSV_ABI
;
5499 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5500 call_abi
= ix86_function_abi (fndecl
);
5502 call_abi
= ix86_function_type_abi (fndecl
);
5503 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5508 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5511 ix86_function_type_abi (const_tree fntype
)
5513 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5515 enum calling_abi abi
= ix86_abi
;
5516 if (abi
== SYSV_ABI
)
5518 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5521 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5529 ix86_function_ms_hook_prologue (const_tree fn
)
5531 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5533 if (decl_function_context (fn
) != NULL_TREE
)
5534 error_at (DECL_SOURCE_LOCATION (fn
),
5535 "ms_hook_prologue is not compatible with nested function");
5542 static enum calling_abi
5543 ix86_function_abi (const_tree fndecl
)
5547 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5550 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5553 ix86_cfun_abi (void)
5557 return cfun
->machine
->call_abi
;
5560 /* Write the extra assembler code needed to declare a function properly. */
5563 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5566 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5570 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5571 unsigned int filler_cc
= 0xcccccccc;
5573 for (i
= 0; i
< filler_count
; i
+= 4)
5574 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5577 #ifdef SUBTARGET_ASM_UNWIND_INIT
5578 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5581 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5583 /* Output magic byte marker, if hot-patch attribute is set. */
5588 /* leaq [%rsp + 0], %rsp */
5589 asm_fprintf (asm_out_file
, ASM_BYTE
5590 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5594 /* movl.s %edi, %edi
5596 movl.s %esp, %ebp */
5597 asm_fprintf (asm_out_file
, ASM_BYTE
5598 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5604 extern void init_regs (void);
5606 /* Implementation of call abi switching target hook. Specific to FNDECL
5607 the specific call register sets are set. See also
5608 ix86_conditional_register_usage for more details. */
5610 ix86_call_abi_override (const_tree fndecl
)
5612 if (fndecl
== NULL_TREE
)
5613 cfun
->machine
->call_abi
= ix86_abi
;
5615 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5618 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5619 expensive re-initialization of init_regs each time we switch function context
5620 since this is needed only during RTL expansion. */
5622 ix86_maybe_switch_abi (void)
5625 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5629 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5630 for a call to a function whose data type is FNTYPE.
5631 For a library call, FNTYPE is 0. */
5634 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5635 tree fntype
, /* tree ptr for function decl */
5636 rtx libname
, /* SYMBOL_REF of library name or 0 */
5640 struct cgraph_local_info
*i
;
5642 memset (cum
, 0, sizeof (*cum
));
5646 i
= cgraph_local_info (fndecl
);
5647 cum
->call_abi
= ix86_function_abi (fndecl
);
5652 cum
->call_abi
= ix86_function_type_abi (fntype
);
5655 cum
->caller
= caller
;
5657 /* Set up the number of registers to use for passing arguments. */
5659 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5660 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5661 "or subtarget optimization implying it");
5662 cum
->nregs
= ix86_regparm
;
5665 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5666 ? X86_64_REGPARM_MAX
5667 : X86_64_MS_REGPARM_MAX
);
5671 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5674 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5675 ? X86_64_SSE_REGPARM_MAX
5676 : X86_64_MS_SSE_REGPARM_MAX
);
5680 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5681 cum
->warn_avx
= true;
5682 cum
->warn_sse
= true;
5683 cum
->warn_mmx
= true;
5685 /* Because type might mismatch in between caller and callee, we need to
5686 use actual type of function for local calls.
5687 FIXME: cgraph_analyze can be told to actually record if function uses
5688 va_start so for local functions maybe_vaarg can be made aggressive
5690 FIXME: once typesytem is fixed, we won't need this code anymore. */
5691 if (i
&& i
->local
&& i
->can_change_signature
)
5692 fntype
= TREE_TYPE (fndecl
);
5693 cum
->maybe_vaarg
= (fntype
5694 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5699 /* If there are variable arguments, then we won't pass anything
5700 in registers in 32-bit mode. */
5701 if (stdarg_p (fntype
))
5712 /* Use ecx and edx registers if function has fastcall attribute,
5713 else look for regparm information. */
5716 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5717 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5720 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5722 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5728 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5731 /* Set up the number of SSE registers used for passing SFmode
5732 and DFmode arguments. Warn for mismatching ABI. */
5733 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5737 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5738 But in the case of vector types, it is some vector mode.
5740 When we have only some of our vector isa extensions enabled, then there
5741 are some modes for which vector_mode_supported_p is false. For these
5742 modes, the generic vector support in gcc will choose some non-vector mode
5743 in order to implement the type. By computing the natural mode, we'll
5744 select the proper ABI location for the operand and not depend on whatever
5745 the middle-end decides to do with these vector types.
5747 The midde-end can't deal with the vector types > 16 bytes. In this
5748 case, we return the original mode and warn ABI change if CUM isn't
5751 static enum machine_mode
5752 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5754 enum machine_mode mode
= TYPE_MODE (type
);
5756 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5758 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5759 if ((size
== 8 || size
== 16 || size
== 32)
5760 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5761 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5763 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5765 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5766 mode
= MIN_MODE_VECTOR_FLOAT
;
5768 mode
= MIN_MODE_VECTOR_INT
;
5770 /* Get the mode which has this inner mode and number of units. */
5771 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5772 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5773 && GET_MODE_INNER (mode
) == innermode
)
5775 if (size
== 32 && !TARGET_AVX
)
5777 static bool warnedavx
;
5784 warning (0, "AVX vector argument without AVX "
5785 "enabled changes the ABI");
5787 return TYPE_MODE (type
);
5789 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5791 static bool warnedsse
;
5798 warning (0, "SSE vector argument without SSE "
5799 "enabled changes the ABI");
5814 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5815 this may not agree with the mode that the type system has chosen for the
5816 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5817 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5820 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5825 if (orig_mode
!= BLKmode
)
5826 tmp
= gen_rtx_REG (orig_mode
, regno
);
5829 tmp
= gen_rtx_REG (mode
, regno
);
5830 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5831 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5837 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5838 of this code is to classify each 8bytes of incoming argument by the register
5839 class and assign registers accordingly. */
5841 /* Return the union class of CLASS1 and CLASS2.
5842 See the x86-64 PS ABI for details. */
5844 static enum x86_64_reg_class
5845 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5847 /* Rule #1: If both classes are equal, this is the resulting class. */
5848 if (class1
== class2
)
5851 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5853 if (class1
== X86_64_NO_CLASS
)
5855 if (class2
== X86_64_NO_CLASS
)
5858 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5859 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5860 return X86_64_MEMORY_CLASS
;
5862 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5863 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5864 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5865 return X86_64_INTEGERSI_CLASS
;
5866 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5867 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5868 return X86_64_INTEGER_CLASS
;
5870 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5872 if (class1
== X86_64_X87_CLASS
5873 || class1
== X86_64_X87UP_CLASS
5874 || class1
== X86_64_COMPLEX_X87_CLASS
5875 || class2
== X86_64_X87_CLASS
5876 || class2
== X86_64_X87UP_CLASS
5877 || class2
== X86_64_COMPLEX_X87_CLASS
)
5878 return X86_64_MEMORY_CLASS
;
5880 /* Rule #6: Otherwise class SSE is used. */
5881 return X86_64_SSE_CLASS
;
5884 /* Classify the argument of type TYPE and mode MODE.
5885 CLASSES will be filled by the register class used to pass each word
5886 of the operand. The number of words is returned. In case the parameter
5887 should be passed in memory, 0 is returned. As a special case for zero
5888 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5890 BIT_OFFSET is used internally for handling records and specifies offset
5891 of the offset in bits modulo 256 to avoid overflow cases.
5893 See the x86-64 PS ABI for details.
5897 classify_argument (enum machine_mode mode
, const_tree type
,
5898 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5900 HOST_WIDE_INT bytes
=
5901 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5903 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5905 /* Variable sized entities are always passed/returned in memory. */
5909 if (mode
!= VOIDmode
5910 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5913 /* Special case check for pointer to shared, on 64-bit target. */
5914 if (TARGET_64BIT
&& mode
== TImode
5915 && type
&& TREE_CODE (type
) == POINTER_TYPE
5916 && upc_shared_type_p (TREE_TYPE (type
)))
5918 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5922 if (type
&& AGGREGATE_TYPE_P (type
))
5926 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5928 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5932 for (i
= 0; i
< words
; i
++)
5933 classes
[i
] = X86_64_NO_CLASS
;
5935 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5936 signalize memory class, so handle it as special case. */
5939 classes
[0] = X86_64_NO_CLASS
;
5943 /* Classify each field of record and merge classes. */
5944 switch (TREE_CODE (type
))
5947 /* And now merge the fields of structure. */
5948 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5950 if (TREE_CODE (field
) == FIELD_DECL
)
5954 if (TREE_TYPE (field
) == error_mark_node
)
5957 /* Bitfields are always classified as integer. Handle them
5958 early, since later code would consider them to be
5959 misaligned integers. */
5960 if (DECL_BIT_FIELD (field
))
5962 for (i
= (int_bit_position (field
)
5963 + (bit_offset
% 64)) / 8 / 8;
5964 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5965 + tree_low_cst (DECL_SIZE (field
), 0)
5968 merge_classes (X86_64_INTEGER_CLASS
,
5975 type
= TREE_TYPE (field
);
5977 /* Flexible array member is ignored. */
5978 if (TYPE_MODE (type
) == BLKmode
5979 && TREE_CODE (type
) == ARRAY_TYPE
5980 && TYPE_SIZE (type
) == NULL_TREE
5981 && TYPE_DOMAIN (type
) != NULL_TREE
5982 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5987 if (!warned
&& warn_psabi
)
5990 inform (input_location
,
5991 "the ABI of passing struct with"
5992 " a flexible array member has"
5993 " changed in GCC 4.4");
5997 num
= classify_argument (TYPE_MODE (type
), type
,
5999 (int_bit_position (field
)
6000 + bit_offset
) % 256);
6003 pos
= (int_bit_position (field
)
6004 + (bit_offset
% 64)) / 8 / 8;
6005 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6007 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6014 /* Arrays are handled as small records. */
6017 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6018 TREE_TYPE (type
), subclasses
, bit_offset
);
6022 /* The partial classes are now full classes. */
6023 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6024 subclasses
[0] = X86_64_SSE_CLASS
;
6025 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6026 && !((bit_offset
% 64) == 0 && bytes
== 4))
6027 subclasses
[0] = X86_64_INTEGER_CLASS
;
6029 for (i
= 0; i
< words
; i
++)
6030 classes
[i
] = subclasses
[i
% num
];
6035 case QUAL_UNION_TYPE
:
6036 /* Unions are similar to RECORD_TYPE but offset is always 0.
6038 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6040 if (TREE_CODE (field
) == FIELD_DECL
)
6044 if (TREE_TYPE (field
) == error_mark_node
)
6047 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6048 TREE_TYPE (field
), subclasses
,
6052 for (i
= 0; i
< num
; i
++)
6053 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6064 /* When size > 16 bytes, if the first one isn't
6065 X86_64_SSE_CLASS or any other ones aren't
6066 X86_64_SSEUP_CLASS, everything should be passed in
6068 if (classes
[0] != X86_64_SSE_CLASS
)
6071 for (i
= 1; i
< words
; i
++)
6072 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6076 /* Final merger cleanup. */
6077 for (i
= 0; i
< words
; i
++)
6079 /* If one class is MEMORY, everything should be passed in
6081 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6084 /* The X86_64_SSEUP_CLASS should be always preceded by
6085 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6086 if (classes
[i
] == X86_64_SSEUP_CLASS
6087 && classes
[i
- 1] != X86_64_SSE_CLASS
6088 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6090 /* The first one should never be X86_64_SSEUP_CLASS. */
6091 gcc_assert (i
!= 0);
6092 classes
[i
] = X86_64_SSE_CLASS
;
6095 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6096 everything should be passed in memory. */
6097 if (classes
[i
] == X86_64_X87UP_CLASS
6098 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6102 /* The first one should never be X86_64_X87UP_CLASS. */
6103 gcc_assert (i
!= 0);
6104 if (!warned
&& warn_psabi
)
6107 inform (input_location
,
6108 "the ABI of passing union with long double"
6109 " has changed in GCC 4.4");
6117 /* Compute alignment needed. We align all types to natural boundaries with
6118 exception of XFmode that is aligned to 64bits. */
6119 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6121 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6124 mode_alignment
= 128;
6125 else if (mode
== XCmode
)
6126 mode_alignment
= 256;
6127 if (COMPLEX_MODE_P (mode
))
6128 mode_alignment
/= 2;
6129 /* Misaligned fields are always returned in memory. */
6130 if (bit_offset
% mode_alignment
)
6134 /* for V1xx modes, just use the base mode */
6135 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6136 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6137 mode
= GET_MODE_INNER (mode
);
6139 /* Classification of atomic types. */
6144 classes
[0] = X86_64_SSE_CLASS
;
6147 classes
[0] = X86_64_SSE_CLASS
;
6148 classes
[1] = X86_64_SSEUP_CLASS
;
6158 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6162 classes
[0] = X86_64_INTEGERSI_CLASS
;
6165 else if (size
<= 64)
6167 classes
[0] = X86_64_INTEGER_CLASS
;
6170 else if (size
<= 64+32)
6172 classes
[0] = X86_64_INTEGER_CLASS
;
6173 classes
[1] = X86_64_INTEGERSI_CLASS
;
6176 else if (size
<= 64+64)
6178 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6186 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6190 /* OImode shouldn't be used directly. */
6195 if (!(bit_offset
% 64))
6196 classes
[0] = X86_64_SSESF_CLASS
;
6198 classes
[0] = X86_64_SSE_CLASS
;
6201 classes
[0] = X86_64_SSEDF_CLASS
;
6204 classes
[0] = X86_64_X87_CLASS
;
6205 classes
[1] = X86_64_X87UP_CLASS
;
6208 classes
[0] = X86_64_SSE_CLASS
;
6209 classes
[1] = X86_64_SSEUP_CLASS
;
6212 classes
[0] = X86_64_SSE_CLASS
;
6213 if (!(bit_offset
% 64))
6219 if (!warned
&& warn_psabi
)
6222 inform (input_location
,
6223 "the ABI of passing structure with complex float"
6224 " member has changed in GCC 4.4");
6226 classes
[1] = X86_64_SSESF_CLASS
;
6230 classes
[0] = X86_64_SSEDF_CLASS
;
6231 classes
[1] = X86_64_SSEDF_CLASS
;
6234 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6237 /* This modes is larger than 16 bytes. */
6245 classes
[0] = X86_64_SSE_CLASS
;
6246 classes
[1] = X86_64_SSEUP_CLASS
;
6247 classes
[2] = X86_64_SSEUP_CLASS
;
6248 classes
[3] = X86_64_SSEUP_CLASS
;
6256 classes
[0] = X86_64_SSE_CLASS
;
6257 classes
[1] = X86_64_SSEUP_CLASS
;
6265 classes
[0] = X86_64_SSE_CLASS
;
6271 gcc_assert (VECTOR_MODE_P (mode
));
6276 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6278 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6279 classes
[0] = X86_64_INTEGERSI_CLASS
;
6281 classes
[0] = X86_64_INTEGER_CLASS
;
6282 classes
[1] = X86_64_INTEGER_CLASS
;
6283 return 1 + (bytes
> 8);
6287 /* Examine the argument and return set number of register required in each
6288 class. Return 0 iff parameter should be passed in memory. */
6290 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6291 int *int_nregs
, int *sse_nregs
)
6293 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6294 int n
= classify_argument (mode
, type
, regclass
, 0);
6300 for (n
--; n
>= 0; n
--)
6301 switch (regclass
[n
])
6303 case X86_64_INTEGER_CLASS
:
6304 case X86_64_INTEGERSI_CLASS
:
6307 case X86_64_SSE_CLASS
:
6308 case X86_64_SSESF_CLASS
:
6309 case X86_64_SSEDF_CLASS
:
6312 case X86_64_NO_CLASS
:
6313 case X86_64_SSEUP_CLASS
:
6315 case X86_64_X87_CLASS
:
6316 case X86_64_X87UP_CLASS
:
6320 case X86_64_COMPLEX_X87_CLASS
:
6321 return in_return
? 2 : 0;
6322 case X86_64_MEMORY_CLASS
:
6328 /* Construct container for the argument used by GCC interface. See
6329 FUNCTION_ARG for the detailed description. */
6332 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6333 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6334 const int *intreg
, int sse_regno
)
6336 /* The following variables hold the static issued_error state. */
6337 static bool issued_sse_arg_error
;
6338 static bool issued_sse_ret_error
;
6339 static bool issued_x87_ret_error
;
6341 enum machine_mode tmpmode
;
6343 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6344 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6348 int needed_sseregs
, needed_intregs
;
6349 rtx exp
[MAX_CLASSES
];
6352 n
= classify_argument (mode
, type
, regclass
, 0);
6355 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6358 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6361 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6362 some less clueful developer tries to use floating-point anyway. */
6363 if (needed_sseregs
&& !TARGET_SSE
)
6367 if (!issued_sse_ret_error
)
6369 error ("SSE register return with SSE disabled");
6370 issued_sse_ret_error
= true;
6373 else if (!issued_sse_arg_error
)
6375 error ("SSE register argument with SSE disabled");
6376 issued_sse_arg_error
= true;
6381 /* Likewise, error if the ABI requires us to return values in the
6382 x87 registers and the user specified -mno-80387. */
6383 if (!TARGET_80387
&& in_return
)
6384 for (i
= 0; i
< n
; i
++)
6385 if (regclass
[i
] == X86_64_X87_CLASS
6386 || regclass
[i
] == X86_64_X87UP_CLASS
6387 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6389 if (!issued_x87_ret_error
)
6391 error ("x87 register return with x87 disabled");
6392 issued_x87_ret_error
= true;
6397 /* First construct simple cases. Avoid SCmode, since we want to use
6398 single register to pass this type. */
6399 if (n
== 1 && mode
!= SCmode
)
6400 switch (regclass
[0])
6402 case X86_64_INTEGER_CLASS
:
6403 case X86_64_INTEGERSI_CLASS
:
6404 return gen_rtx_REG (mode
, intreg
[0]);
6405 case X86_64_SSE_CLASS
:
6406 case X86_64_SSESF_CLASS
:
6407 case X86_64_SSEDF_CLASS
:
6408 if (mode
!= BLKmode
)
6409 return gen_reg_or_parallel (mode
, orig_mode
,
6410 SSE_REGNO (sse_regno
));
6412 case X86_64_X87_CLASS
:
6413 case X86_64_COMPLEX_X87_CLASS
:
6414 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6415 case X86_64_NO_CLASS
:
6416 /* Zero sized array, struct or class. */
6422 && regclass
[0] == X86_64_SSE_CLASS
6423 && regclass
[1] == X86_64_SSEUP_CLASS
6425 return gen_reg_or_parallel (mode
, orig_mode
,
6426 SSE_REGNO (sse_regno
));
6428 && regclass
[0] == X86_64_SSE_CLASS
6429 && regclass
[1] == X86_64_SSEUP_CLASS
6430 && regclass
[2] == X86_64_SSEUP_CLASS
6431 && regclass
[3] == X86_64_SSEUP_CLASS
6433 return gen_reg_or_parallel (mode
, orig_mode
,
6434 SSE_REGNO (sse_regno
));
6436 && regclass
[0] == X86_64_X87_CLASS
6437 && regclass
[1] == X86_64_X87UP_CLASS
)
6438 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6441 && regclass
[0] == X86_64_INTEGER_CLASS
6442 && regclass
[1] == X86_64_INTEGER_CLASS
6443 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6444 && intreg
[0] + 1 == intreg
[1])
6445 return gen_rtx_REG (mode
, intreg
[0]);
6447 /* Otherwise figure out the entries of the PARALLEL. */
6448 for (i
= 0; i
< n
; i
++)
6452 switch (regclass
[i
])
6454 case X86_64_NO_CLASS
:
6456 case X86_64_INTEGER_CLASS
:
6457 case X86_64_INTEGERSI_CLASS
:
6458 /* Merge TImodes on aligned occasions here too. */
6459 if (i
* 8 + 8 > bytes
)
6461 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6462 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6466 /* We've requested 24 bytes we
6467 don't have mode for. Use DImode. */
6468 if (tmpmode
== BLKmode
)
6471 = gen_rtx_EXPR_LIST (VOIDmode
,
6472 gen_rtx_REG (tmpmode
, *intreg
),
6476 case X86_64_SSESF_CLASS
:
6478 = gen_rtx_EXPR_LIST (VOIDmode
,
6479 gen_rtx_REG (SFmode
,
6480 SSE_REGNO (sse_regno
)),
6484 case X86_64_SSEDF_CLASS
:
6486 = gen_rtx_EXPR_LIST (VOIDmode
,
6487 gen_rtx_REG (DFmode
,
6488 SSE_REGNO (sse_regno
)),
6492 case X86_64_SSE_CLASS
:
6500 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6510 && regclass
[1] == X86_64_SSEUP_CLASS
6511 && regclass
[2] == X86_64_SSEUP_CLASS
6512 && regclass
[3] == X86_64_SSEUP_CLASS
);
6520 = gen_rtx_EXPR_LIST (VOIDmode
,
6521 gen_rtx_REG (tmpmode
,
6522 SSE_REGNO (sse_regno
)),
6531 /* Empty aligned struct, union or class. */
6535 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6536 for (i
= 0; i
< nexps
; i
++)
6537 XVECEXP (ret
, 0, i
) = exp
[i
];
6541 /* Update the data in CUM to advance over an argument of mode MODE
6542 and data type TYPE. (TYPE is null for libcalls where that information
6543 may not be available.) */
6546 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6547 const_tree type
, HOST_WIDE_INT bytes
,
6548 HOST_WIDE_INT words
)
6564 cum
->words
+= words
;
6565 cum
->nregs
-= words
;
6566 cum
->regno
+= words
;
6568 if (cum
->nregs
<= 0)
6576 /* OImode shouldn't be used directly. */
6580 if (cum
->float_in_sse
< 2)
6583 if (cum
->float_in_sse
< 1)
6600 if (!type
|| !AGGREGATE_TYPE_P (type
))
6602 cum
->sse_words
+= words
;
6603 cum
->sse_nregs
-= 1;
6604 cum
->sse_regno
+= 1;
6605 if (cum
->sse_nregs
<= 0)
6619 if (!type
|| !AGGREGATE_TYPE_P (type
))
6621 cum
->mmx_words
+= words
;
6622 cum
->mmx_nregs
-= 1;
6623 cum
->mmx_regno
+= 1;
6624 if (cum
->mmx_nregs
<= 0)
6635 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6636 const_tree type
, HOST_WIDE_INT words
, bool named
)
6638 int int_nregs
, sse_nregs
;
6640 /* Unnamed 256bit vector mode parameters are passed on stack. */
6641 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6644 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6645 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6647 cum
->nregs
-= int_nregs
;
6648 cum
->sse_nregs
-= sse_nregs
;
6649 cum
->regno
+= int_nregs
;
6650 cum
->sse_regno
+= sse_nregs
;
6654 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6655 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6656 cum
->words
+= words
;
6661 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6662 HOST_WIDE_INT words
)
6664 /* Otherwise, this should be passed indirect. */
6665 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6667 cum
->words
+= words
;
6675 /* Update the data in CUM to advance over an argument of mode MODE and
6676 data type TYPE. (TYPE is null for libcalls where that information
6677 may not be available.) */
6680 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6681 const_tree type
, bool named
)
6683 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6684 HOST_WIDE_INT bytes
, words
;
6686 if (mode
== BLKmode
)
6687 bytes
= int_size_in_bytes (type
);
6689 bytes
= GET_MODE_SIZE (mode
);
6690 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6693 mode
= type_natural_mode (type
, NULL
);
6695 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6696 function_arg_advance_ms_64 (cum
, bytes
, words
);
6697 else if (TARGET_64BIT
)
6698 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6700 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6703 /* Define where to put the arguments to a function.
6704 Value is zero to push the argument on the stack,
6705 or a hard register in which to store the argument.
6707 MODE is the argument's machine mode.
6708 TYPE is the data type of the argument (as a tree).
6709 This is null for libcalls where that information may
6711 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6712 the preceding args and about the function being called.
6713 NAMED is nonzero if this argument is a named parameter
6714 (otherwise it is an extra parameter matching an ellipsis). */
6717 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6718 enum machine_mode orig_mode
, const_tree type
,
6719 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6721 static bool warnedsse
, warnedmmx
;
6723 /* Avoid the AL settings for the Unix64 ABI. */
6724 if (mode
== VOIDmode
)
6740 if (words
<= cum
->nregs
)
6742 int regno
= cum
->regno
;
6744 /* Fastcall allocates the first two DWORD (SImode) or
6745 smaller arguments to ECX and EDX if it isn't an
6751 || (type
&& AGGREGATE_TYPE_P (type
)))
6754 /* ECX not EAX is the first allocated register. */
6755 if (regno
== AX_REG
)
6758 return gen_rtx_REG (mode
, regno
);
6763 if (cum
->float_in_sse
< 2)
6766 if (cum
->float_in_sse
< 1)
6770 /* In 32bit, we pass TImode in xmm registers. */
6777 if (!type
|| !AGGREGATE_TYPE_P (type
))
6779 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6782 warning (0, "SSE vector argument without SSE enabled "
6786 return gen_reg_or_parallel (mode
, orig_mode
,
6787 cum
->sse_regno
+ FIRST_SSE_REG
);
6792 /* OImode shouldn't be used directly. */
6801 if (!type
|| !AGGREGATE_TYPE_P (type
))
6804 return gen_reg_or_parallel (mode
, orig_mode
,
6805 cum
->sse_regno
+ FIRST_SSE_REG
);
6815 if (!type
|| !AGGREGATE_TYPE_P (type
))
6817 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6820 warning (0, "MMX vector argument without MMX enabled "
6824 return gen_reg_or_parallel (mode
, orig_mode
,
6825 cum
->mmx_regno
+ FIRST_MMX_REG
);
6834 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6835 enum machine_mode orig_mode
, const_tree type
, bool named
)
6837 /* Handle a hidden AL argument containing number of registers
6838 for varargs x86-64 functions. */
6839 if (mode
== VOIDmode
)
6840 return GEN_INT (cum
->maybe_vaarg
6841 ? (cum
->sse_nregs
< 0
6842 ? X86_64_SSE_REGPARM_MAX
6857 /* Unnamed 256bit vector mode parameters are passed on stack. */
6863 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6865 &x86_64_int_parameter_registers
[cum
->regno
],
6870 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6871 enum machine_mode orig_mode
, bool named
,
6872 HOST_WIDE_INT bytes
)
6876 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6877 We use value of -2 to specify that current function call is MSABI. */
6878 if (mode
== VOIDmode
)
6879 return GEN_INT (-2);
6881 /* If we've run out of registers, it goes on the stack. */
6882 if (cum
->nregs
== 0)
6885 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6887 /* Only floating point modes are passed in anything but integer regs. */
6888 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6891 regno
= cum
->regno
+ FIRST_SSE_REG
;
6896 /* Unnamed floating parameters are passed in both the
6897 SSE and integer registers. */
6898 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6899 t2
= gen_rtx_REG (mode
, regno
);
6900 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6901 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6902 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6905 /* Handle aggregated types passed in register. */
6906 if (orig_mode
== BLKmode
)
6908 if (bytes
> 0 && bytes
<= 8)
6909 mode
= (bytes
> 4 ? DImode
: SImode
);
6910 if (mode
== BLKmode
)
6914 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6917 /* Return where to put the arguments to a function.
6918 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6920 MODE is the argument's machine mode. TYPE is the data type of the
6921 argument. It is null for libcalls where that information may not be
6922 available. CUM gives information about the preceding args and about
6923 the function being called. NAMED is nonzero if this argument is a
6924 named parameter (otherwise it is an extra parameter matching an
6928 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6929 const_tree type
, bool named
)
6931 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6932 enum machine_mode mode
= omode
;
6933 HOST_WIDE_INT bytes
, words
;
6936 if (mode
== BLKmode
)
6937 bytes
= int_size_in_bytes (type
);
6939 bytes
= GET_MODE_SIZE (mode
);
6940 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6942 /* To simplify the code below, represent vector types with a vector mode
6943 even if MMX/SSE are not active. */
6944 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6945 mode
= type_natural_mode (type
, cum
);
6947 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6948 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6949 else if (TARGET_64BIT
)
6950 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6952 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6957 /* A C expression that indicates when an argument must be passed by
6958 reference. If nonzero for an argument, a copy of that argument is
6959 made in memory and a pointer to the argument is passed instead of
6960 the argument itself. The pointer is passed in whatever way is
6961 appropriate for passing a pointer to that type. */
6964 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6965 enum machine_mode mode ATTRIBUTE_UNUSED
,
6966 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6968 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6970 /* See Windows x64 Software Convention. */
6971 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6973 int msize
= (int) GET_MODE_SIZE (mode
);
6976 /* Arrays are passed by reference. */
6977 if (TREE_CODE (type
) == ARRAY_TYPE
)
6980 if (AGGREGATE_TYPE_P (type
))
6982 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6983 are passed by reference. */
6984 msize
= int_size_in_bytes (type
);
6988 /* __m128 is passed by reference. */
6990 case 1: case 2: case 4: case 8:
6996 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7002 /* Return true when TYPE should be 128bit aligned for 32bit argument
7003 passing ABI. XXX: This function is obsolete and is only used for
7004 checking psABI compatibility with previous versions of GCC. */
7007 ix86_compat_aligned_value_p (const_tree type
)
7009 enum machine_mode mode
= TYPE_MODE (type
);
7010 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7014 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7016 if (TYPE_ALIGN (type
) < 128)
7019 if (AGGREGATE_TYPE_P (type
))
7021 /* Walk the aggregates recursively. */
7022 switch (TREE_CODE (type
))
7026 case QUAL_UNION_TYPE
:
7030 /* Walk all the structure fields. */
7031 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7033 if (TREE_CODE (field
) == FIELD_DECL
7034 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7041 /* Just for use if some languages passes arrays by value. */
7042 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7053 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7054 XXX: This function is obsolete and is only used for checking psABI
7055 compatibility with previous versions of GCC. */
7058 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7059 const_tree type
, unsigned int align
)
7061 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7062 natural boundaries. */
7063 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7065 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7066 make an exception for SSE modes since these require 128bit
7069 The handling here differs from field_alignment. ICC aligns MMX
7070 arguments to 4 byte boundaries, while structure fields are aligned
7071 to 8 byte boundaries. */
7074 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7075 align
= PARM_BOUNDARY
;
7079 if (!ix86_compat_aligned_value_p (type
))
7080 align
= PARM_BOUNDARY
;
7083 if (align
> BIGGEST_ALIGNMENT
)
7084 align
= BIGGEST_ALIGNMENT
;
7088 /* Return true when TYPE should be 128bit aligned for 32bit argument
7092 ix86_contains_aligned_value_p (const_tree type
)
7094 enum machine_mode mode
= TYPE_MODE (type
);
7096 if (mode
== XFmode
|| mode
== XCmode
)
7099 if (TYPE_ALIGN (type
) < 128)
7102 if (AGGREGATE_TYPE_P (type
))
7104 /* Walk the aggregates recursively. */
7105 switch (TREE_CODE (type
))
7109 case QUAL_UNION_TYPE
:
7113 /* Walk all the structure fields. */
7114 for (field
= TYPE_FIELDS (type
);
7116 field
= DECL_CHAIN (field
))
7118 if (TREE_CODE (field
) == FIELD_DECL
7119 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7126 /* Just for use if some languages passes arrays by value. */
7127 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7136 return TYPE_ALIGN (type
) >= 128;
7141 /* Gives the alignment boundary, in bits, of an argument with the
7142 specified mode and type. */
7145 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7150 /* Since the main variant type is used for call, we convert it to
7151 the main variant type. */
7152 type
= TYPE_MAIN_VARIANT (type
);
7153 align
= TYPE_ALIGN (type
);
7156 align
= GET_MODE_ALIGNMENT (mode
);
7157 if (align
< PARM_BOUNDARY
)
7158 align
= PARM_BOUNDARY
;
7162 unsigned int saved_align
= align
;
7166 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7169 if (mode
== XFmode
|| mode
== XCmode
)
7170 align
= PARM_BOUNDARY
;
7172 else if (!ix86_contains_aligned_value_p (type
))
7173 align
= PARM_BOUNDARY
;
7176 align
= PARM_BOUNDARY
;
7181 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7185 inform (input_location
,
7186 "The ABI for passing parameters with %d-byte"
7187 " alignment has changed in GCC 4.6",
7188 align
/ BITS_PER_UNIT
);
7195 /* Return true if N is a possible register number of function value. */
7198 ix86_function_value_regno_p (const unsigned int regno
)
7205 case FIRST_FLOAT_REG
:
7206 /* TODO: The function should depend on current function ABI but
7207 builtins.c would need updating then. Therefore we use the
7209 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7211 return TARGET_FLOAT_RETURNS_IN_80387
;
7217 if (TARGET_MACHO
|| TARGET_64BIT
)
7225 /* Define how to find the value returned by a function.
7226 VALTYPE is the data type of the value (as a tree).
7227 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7228 otherwise, FUNC is 0. */
7231 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7232 const_tree fntype
, const_tree fn
)
7236 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7237 we normally prevent this case when mmx is not available. However
7238 some ABIs may require the result to be returned like DImode. */
7239 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7240 regno
= FIRST_MMX_REG
;
7242 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7243 we prevent this case when sse is not available. However some ABIs
7244 may require the result to be returned like integer TImode. */
7245 else if (mode
== TImode
7246 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7247 regno
= FIRST_SSE_REG
;
7249 /* 32-byte vector modes in %ymm0. */
7250 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7251 regno
= FIRST_SSE_REG
;
7253 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7254 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7255 regno
= FIRST_FLOAT_REG
;
7257 /* Most things go in %eax. */
7260 /* Override FP return register with %xmm0 for local functions when
7261 SSE math is enabled or for functions with sseregparm attribute. */
7262 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7264 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7265 if ((sse_level
>= 1 && mode
== SFmode
)
7266 || (sse_level
== 2 && mode
== DFmode
))
7267 regno
= FIRST_SSE_REG
;
7270 /* OImode shouldn't be used directly. */
7271 gcc_assert (mode
!= OImode
);
7273 return gen_rtx_REG (orig_mode
, regno
);
7277 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7282 /* Handle libcalls, which don't provide a type node. */
7283 if (valtype
== NULL
)
7297 regno
= FIRST_SSE_REG
;
7301 regno
= FIRST_FLOAT_REG
;
7309 return gen_rtx_REG (mode
, regno
);
7311 else if (POINTER_TYPE_P (valtype
)
7312 && !upc_shared_type_p (TREE_TYPE (valtype
)))
7314 /* Pointers are always returned in word_mode. */
7318 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7319 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7320 x86_64_int_return_registers
, 0);
7322 /* For zero sized structures, construct_container returns NULL, but we
7323 need to keep rest of compiler happy by returning meaningful value. */
7325 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7331 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7333 unsigned int regno
= AX_REG
;
7337 switch (GET_MODE_SIZE (mode
))
7340 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7341 && !COMPLEX_MODE_P (mode
))
7342 regno
= FIRST_SSE_REG
;
7346 if (mode
== SFmode
|| mode
== DFmode
)
7347 regno
= FIRST_SSE_REG
;
7353 return gen_rtx_REG (orig_mode
, regno
);
7357 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7358 enum machine_mode orig_mode
, enum machine_mode mode
)
7360 const_tree fn
, fntype
;
7363 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7364 fn
= fntype_or_decl
;
7365 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7367 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7368 return function_value_ms_64 (orig_mode
, mode
);
7369 else if (TARGET_64BIT
)
7370 return function_value_64 (orig_mode
, mode
, valtype
);
7372 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7376 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7377 bool outgoing ATTRIBUTE_UNUSED
)
7379 enum machine_mode mode
, orig_mode
;
7381 orig_mode
= TYPE_MODE (valtype
);
7382 mode
= type_natural_mode (valtype
, NULL
);
7383 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7386 /* Pointer function arguments and return values are promoted to
7389 static enum machine_mode
7390 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7391 int *punsignedp
, const_tree fntype
,
7394 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7396 if (upc_shared_type_p (TREE_TYPE (type
)))
7399 return TYPE_MODE (upc_pts_rep_type_node
);
7401 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7404 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7408 /* Return true if a structure, union or array with MODE containing FIELD
7409 should be accessed using BLKmode. */
7412 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7414 /* Union with XFmode must be in BLKmode. */
7415 return (mode
== XFmode
7416 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7417 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7421 ix86_libcall_value (enum machine_mode mode
)
7423 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7426 /* Return true iff type is returned in memory. */
7428 static bool ATTRIBUTE_UNUSED
7429 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7433 if (mode
== BLKmode
)
7436 size
= int_size_in_bytes (type
);
7438 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7441 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7443 /* User-created vectors small enough to fit in EAX. */
7447 /* MMX/3dNow values are returned in MM0,
7448 except when it doesn't exits or the ABI prescribes otherwise. */
7450 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7452 /* SSE values are returned in XMM0, except when it doesn't exist. */
7456 /* AVX values are returned in YMM0, except when it doesn't exist. */
7467 /* OImode shouldn't be used directly. */
7468 gcc_assert (mode
!= OImode
);
7473 static bool ATTRIBUTE_UNUSED
7474 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7476 int needed_intregs
, needed_sseregs
;
7477 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7480 static bool ATTRIBUTE_UNUSED
7481 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7483 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7485 /* __m128 is returned in xmm0. */
7486 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7487 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7490 /* Otherwise, the size must be exactly in [1248]. */
7491 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7495 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7497 #ifdef SUBTARGET_RETURN_IN_MEMORY
7498 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7500 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7504 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7505 return return_in_memory_ms_64 (type
, mode
);
7507 return return_in_memory_64 (type
, mode
);
7510 return return_in_memory_32 (type
, mode
);
7514 /* When returning SSE vector types, we have a choice of either
7515 (1) being abi incompatible with a -march switch, or
7516 (2) generating an error.
7517 Given no good solution, I think the safest thing is one warning.
7518 The user won't be able to use -Werror, but....
7520 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7521 called in response to actually generating a caller or callee that
7522 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7523 via aggregate_value_p for general type probing from tree-ssa. */
7526 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7528 static bool warnedsse
, warnedmmx
;
7530 if (!TARGET_64BIT
&& type
)
7532 /* Look at the return type of the function, not the function type. */
7533 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7535 if (!TARGET_SSE
&& !warnedsse
)
7538 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7541 warning (0, "SSE vector return without SSE enabled "
7546 if (!TARGET_MMX
&& !warnedmmx
)
7548 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7551 warning (0, "MMX vector return without MMX enabled "
7561 /* Create the va_list data type. */
7563 /* Returns the calling convention specific va_list date type.
7564 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7567 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7569 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7571 /* For i386 we use plain pointer to argument area. */
7572 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7573 return build_pointer_type (char_type_node
);
7575 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7576 type_decl
= build_decl (BUILTINS_LOCATION
,
7577 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7579 f_gpr
= build_decl (BUILTINS_LOCATION
,
7580 FIELD_DECL
, get_identifier ("gp_offset"),
7581 unsigned_type_node
);
7582 f_fpr
= build_decl (BUILTINS_LOCATION
,
7583 FIELD_DECL
, get_identifier ("fp_offset"),
7584 unsigned_type_node
);
7585 f_ovf
= build_decl (BUILTINS_LOCATION
,
7586 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7588 f_sav
= build_decl (BUILTINS_LOCATION
,
7589 FIELD_DECL
, get_identifier ("reg_save_area"),
7592 va_list_gpr_counter_field
= f_gpr
;
7593 va_list_fpr_counter_field
= f_fpr
;
7595 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7596 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7597 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7598 DECL_FIELD_CONTEXT (f_sav
) = record
;
7600 TYPE_STUB_DECL (record
) = type_decl
;
7601 TYPE_NAME (record
) = type_decl
;
7602 TYPE_FIELDS (record
) = f_gpr
;
7603 DECL_CHAIN (f_gpr
) = f_fpr
;
7604 DECL_CHAIN (f_fpr
) = f_ovf
;
7605 DECL_CHAIN (f_ovf
) = f_sav
;
7607 layout_type (record
);
7609 /* The correct type is an array type of one element. */
7610 return build_array_type (record
, build_index_type (size_zero_node
));
7613 /* Setup the builtin va_list data type and for 64-bit the additional
7614 calling convention specific va_list data types. */
7617 ix86_build_builtin_va_list (void)
7619 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7621 /* Initialize abi specific va_list builtin types. */
7625 if (ix86_abi
== MS_ABI
)
7627 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7628 if (TREE_CODE (t
) != RECORD_TYPE
)
7629 t
= build_variant_type_copy (t
);
7630 sysv_va_list_type_node
= t
;
7635 if (TREE_CODE (t
) != RECORD_TYPE
)
7636 t
= build_variant_type_copy (t
);
7637 sysv_va_list_type_node
= t
;
7639 if (ix86_abi
!= MS_ABI
)
7641 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7642 if (TREE_CODE (t
) != RECORD_TYPE
)
7643 t
= build_variant_type_copy (t
);
7644 ms_va_list_type_node
= t
;
7649 if (TREE_CODE (t
) != RECORD_TYPE
)
7650 t
= build_variant_type_copy (t
);
7651 ms_va_list_type_node
= t
;
7658 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7661 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7667 /* GPR size of varargs save area. */
7668 if (cfun
->va_list_gpr_size
)
7669 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7671 ix86_varargs_gpr_size
= 0;
7673 /* FPR size of varargs save area. We don't need it if we don't pass
7674 anything in SSE registers. */
7675 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7676 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7678 ix86_varargs_fpr_size
= 0;
7680 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7683 save_area
= frame_pointer_rtx
;
7684 set
= get_varargs_alias_set ();
7686 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7687 if (max
> X86_64_REGPARM_MAX
)
7688 max
= X86_64_REGPARM_MAX
;
7690 for (i
= cum
->regno
; i
< max
; i
++)
7692 mem
= gen_rtx_MEM (word_mode
,
7693 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7694 MEM_NOTRAP_P (mem
) = 1;
7695 set_mem_alias_set (mem
, set
);
7696 emit_move_insn (mem
,
7697 gen_rtx_REG (word_mode
,
7698 x86_64_int_parameter_registers
[i
]));
7701 if (ix86_varargs_fpr_size
)
7703 enum machine_mode smode
;
7706 /* Now emit code to save SSE registers. The AX parameter contains number
7707 of SSE parameter registers used to call this function, though all we
7708 actually check here is the zero/non-zero status. */
7710 label
= gen_label_rtx ();
7711 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7712 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7715 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7716 we used movdqa (i.e. TImode) instead? Perhaps even better would
7717 be if we could determine the real mode of the data, via a hook
7718 into pass_stdarg. Ignore all that for now. */
7720 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7721 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7723 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7724 if (max
> X86_64_SSE_REGPARM_MAX
)
7725 max
= X86_64_SSE_REGPARM_MAX
;
7727 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7729 mem
= plus_constant (Pmode
, save_area
,
7730 i
* 16 + ix86_varargs_gpr_size
);
7731 mem
= gen_rtx_MEM (smode
, mem
);
7732 MEM_NOTRAP_P (mem
) = 1;
7733 set_mem_alias_set (mem
, set
);
7734 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7736 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7744 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7746 alias_set_type set
= get_varargs_alias_set ();
7749 /* Reset to zero, as there might be a sysv vaarg used
7751 ix86_varargs_gpr_size
= 0;
7752 ix86_varargs_fpr_size
= 0;
7754 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7758 mem
= gen_rtx_MEM (Pmode
,
7759 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7760 i
* UNITS_PER_WORD
));
7761 MEM_NOTRAP_P (mem
) = 1;
7762 set_mem_alias_set (mem
, set
);
7764 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7765 emit_move_insn (mem
, reg
);
7770 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7771 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7774 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7775 CUMULATIVE_ARGS next_cum
;
7778 /* This argument doesn't appear to be used anymore. Which is good,
7779 because the old code here didn't suppress rtl generation. */
7780 gcc_assert (!no_rtl
);
7785 fntype
= TREE_TYPE (current_function_decl
);
7787 /* For varargs, we do not want to skip the dummy va_dcl argument.
7788 For stdargs, we do want to skip the last named argument. */
7790 if (stdarg_p (fntype
))
7791 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7794 if (cum
->call_abi
== MS_ABI
)
7795 setup_incoming_varargs_ms_64 (&next_cum
);
7797 setup_incoming_varargs_64 (&next_cum
);
7800 /* Checks if TYPE is of kind va_list char *. */
7803 is_va_list_char_pointer (tree type
)
7807 /* For 32-bit it is always true. */
7810 canonic
= ix86_canonical_va_list_type (type
);
7811 return (canonic
== ms_va_list_type_node
7812 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7815 /* Implement va_start. */
7818 ix86_va_start (tree valist
, rtx nextarg
)
7820 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7821 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7822 tree gpr
, fpr
, ovf
, sav
, t
;
7826 if (flag_split_stack
7827 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7829 unsigned int scratch_regno
;
7831 /* When we are splitting the stack, we can't refer to the stack
7832 arguments using internal_arg_pointer, because they may be on
7833 the old stack. The split stack prologue will arrange to
7834 leave a pointer to the old stack arguments in a scratch
7835 register, which we here copy to a pseudo-register. The split
7836 stack prologue can't set the pseudo-register directly because
7837 it (the prologue) runs before any registers have been saved. */
7839 scratch_regno
= split_stack_prologue_scratch_regno ();
7840 if (scratch_regno
!= INVALID_REGNUM
)
7844 reg
= gen_reg_rtx (Pmode
);
7845 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7848 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7852 push_topmost_sequence ();
7853 emit_insn_after (seq
, entry_of_function ());
7854 pop_topmost_sequence ();
7858 /* Only 64bit target needs something special. */
7859 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7861 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7862 std_expand_builtin_va_start (valist
, nextarg
);
7867 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7868 next
= expand_binop (ptr_mode
, add_optab
,
7869 cfun
->machine
->split_stack_varargs_pointer
,
7870 crtl
->args
.arg_offset_rtx
,
7871 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7872 convert_move (va_r
, next
, 0);
7877 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7878 f_fpr
= DECL_CHAIN (f_gpr
);
7879 f_ovf
= DECL_CHAIN (f_fpr
);
7880 f_sav
= DECL_CHAIN (f_ovf
);
7882 valist
= build_simple_mem_ref (valist
);
7883 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7884 /* The following should be folded into the MEM_REF offset. */
7885 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7887 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7889 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7891 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7894 /* Count number of gp and fp argument registers used. */
7895 words
= crtl
->args
.info
.words
;
7896 n_gpr
= crtl
->args
.info
.regno
;
7897 n_fpr
= crtl
->args
.info
.sse_regno
;
7899 if (cfun
->va_list_gpr_size
)
7901 type
= TREE_TYPE (gpr
);
7902 t
= build2 (MODIFY_EXPR
, type
,
7903 gpr
, build_int_cst (type
, n_gpr
* 8));
7904 TREE_SIDE_EFFECTS (t
) = 1;
7905 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7908 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7910 type
= TREE_TYPE (fpr
);
7911 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7912 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7913 TREE_SIDE_EFFECTS (t
) = 1;
7914 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7917 /* Find the overflow area. */
7918 type
= TREE_TYPE (ovf
);
7919 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7920 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7922 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7923 t
= make_tree (type
, ovf_rtx
);
7925 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7926 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7927 TREE_SIDE_EFFECTS (t
) = 1;
7928 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7930 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7932 /* Find the register save area.
7933 Prologue of the function save it right above stack frame. */
7934 type
= TREE_TYPE (sav
);
7935 t
= make_tree (type
, frame_pointer_rtx
);
7936 if (!ix86_varargs_gpr_size
)
7937 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7938 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7939 TREE_SIDE_EFFECTS (t
) = 1;
7940 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7944 /* Implement va_arg. */
7947 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7950 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7951 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7952 tree gpr
, fpr
, ovf
, sav
, t
;
7954 tree lab_false
, lab_over
= NULL_TREE
;
7959 enum machine_mode nat_mode
;
7960 unsigned int arg_boundary
;
7962 /* Only 64bit target needs something special. */
7963 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7964 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7966 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7967 f_fpr
= DECL_CHAIN (f_gpr
);
7968 f_ovf
= DECL_CHAIN (f_fpr
);
7969 f_sav
= DECL_CHAIN (f_ovf
);
7971 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7972 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7973 valist
= build_va_arg_indirect_ref (valist
);
7974 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7975 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7976 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7978 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7980 type
= build_pointer_type (type
);
7981 size
= int_size_in_bytes (type
);
7982 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7984 nat_mode
= type_natural_mode (type
, NULL
);
7993 /* Unnamed 256bit vector mode parameters are passed on stack. */
7994 if (!TARGET_64BIT_MS_ABI
)
8001 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8002 type
, 0, X86_64_REGPARM_MAX
,
8003 X86_64_SSE_REGPARM_MAX
, intreg
,
8008 /* Pull the value out of the saved registers. */
8010 addr
= create_tmp_var (ptr_type_node
, "addr");
8014 int needed_intregs
, needed_sseregs
;
8016 tree int_addr
, sse_addr
;
8018 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8019 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8021 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8023 need_temp
= (!REG_P (container
)
8024 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8025 || TYPE_ALIGN (type
) > 128));
8027 /* In case we are passing structure, verify that it is consecutive block
8028 on the register save area. If not we need to do moves. */
8029 if (!need_temp
&& !REG_P (container
))
8031 /* Verify that all registers are strictly consecutive */
8032 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8036 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8038 rtx slot
= XVECEXP (container
, 0, i
);
8039 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8040 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8048 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8050 rtx slot
= XVECEXP (container
, 0, i
);
8051 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8052 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8064 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8065 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8068 /* First ensure that we fit completely in registers. */
8071 t
= build_int_cst (TREE_TYPE (gpr
),
8072 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8073 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8074 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8075 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8076 gimplify_and_add (t
, pre_p
);
8080 t
= build_int_cst (TREE_TYPE (fpr
),
8081 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8082 + X86_64_REGPARM_MAX
* 8);
8083 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8084 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8085 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8086 gimplify_and_add (t
, pre_p
);
8089 /* Compute index to start of area used for integer regs. */
8092 /* int_addr = gpr + sav; */
8093 t
= fold_build_pointer_plus (sav
, gpr
);
8094 gimplify_assign (int_addr
, t
, pre_p
);
8098 /* sse_addr = fpr + sav; */
8099 t
= fold_build_pointer_plus (sav
, fpr
);
8100 gimplify_assign (sse_addr
, t
, pre_p
);
8104 int i
, prev_size
= 0;
8105 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8108 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8109 gimplify_assign (addr
, t
, pre_p
);
8111 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8113 rtx slot
= XVECEXP (container
, 0, i
);
8114 rtx reg
= XEXP (slot
, 0);
8115 enum machine_mode mode
= GET_MODE (reg
);
8121 tree dest_addr
, dest
;
8122 int cur_size
= GET_MODE_SIZE (mode
);
8124 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8125 prev_size
= INTVAL (XEXP (slot
, 1));
8126 if (prev_size
+ cur_size
> size
)
8128 cur_size
= size
- prev_size
;
8129 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8130 if (mode
== BLKmode
)
8133 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8134 if (mode
== GET_MODE (reg
))
8135 addr_type
= build_pointer_type (piece_type
);
8137 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8139 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8142 if (SSE_REGNO_P (REGNO (reg
)))
8144 src_addr
= sse_addr
;
8145 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8149 src_addr
= int_addr
;
8150 src_offset
= REGNO (reg
) * 8;
8152 src_addr
= fold_convert (addr_type
, src_addr
);
8153 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8155 dest_addr
= fold_convert (daddr_type
, addr
);
8156 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8157 if (cur_size
== GET_MODE_SIZE (mode
))
8159 src
= build_va_arg_indirect_ref (src_addr
);
8160 dest
= build_va_arg_indirect_ref (dest_addr
);
8162 gimplify_assign (dest
, src
, pre_p
);
8167 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8168 3, dest_addr
, src_addr
,
8169 size_int (cur_size
));
8170 gimplify_and_add (copy
, pre_p
);
8172 prev_size
+= cur_size
;
8178 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8179 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8180 gimplify_assign (gpr
, t
, pre_p
);
8185 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8186 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8187 gimplify_assign (fpr
, t
, pre_p
);
8190 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8192 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8195 /* ... otherwise out of the overflow area. */
8197 /* When we align parameter on stack for caller, if the parameter
8198 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8199 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8200 here with caller. */
8201 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8202 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8203 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8205 /* Care for on-stack alignment if needed. */
8206 if (arg_boundary
<= 64 || size
== 0)
8210 HOST_WIDE_INT align
= arg_boundary
/ 8;
8211 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8212 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8213 build_int_cst (TREE_TYPE (t
), -align
));
8216 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8217 gimplify_assign (addr
, t
, pre_p
);
8219 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8220 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8223 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8225 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8226 addr
= fold_convert (ptrtype
, addr
);
8229 addr
= build_va_arg_indirect_ref (addr
);
8230 return build_va_arg_indirect_ref (addr
);
8233 /* Return true if OPNUM's MEM should be matched
8234 in movabs* patterns. */
8237 ix86_check_movabs (rtx insn
, int opnum
)
8241 set
= PATTERN (insn
);
8242 if (GET_CODE (set
) == PARALLEL
)
8243 set
= XVECEXP (set
, 0, 0);
8244 gcc_assert (GET_CODE (set
) == SET
);
8245 mem
= XEXP (set
, opnum
);
8246 while (GET_CODE (mem
) == SUBREG
)
8247 mem
= SUBREG_REG (mem
);
8248 gcc_assert (MEM_P (mem
));
8249 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8252 /* Initialize the table of extra 80387 mathematical constants. */
8255 init_ext_80387_constants (void)
8257 static const char * cst
[5] =
8259 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8260 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8261 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8262 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8263 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8267 for (i
= 0; i
< 5; i
++)
8269 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8270 /* Ensure each constant is rounded to XFmode precision. */
8271 real_convert (&ext_80387_constants_table
[i
],
8272 XFmode
, &ext_80387_constants_table
[i
]);
8275 ext_80387_constants_init
= 1;
8278 /* Return non-zero if the constant is something that
8279 can be loaded with a special instruction. */
8282 standard_80387_constant_p (rtx x
)
8284 enum machine_mode mode
= GET_MODE (x
);
8288 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8291 if (x
== CONST0_RTX (mode
))
8293 if (x
== CONST1_RTX (mode
))
8296 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8298 /* For XFmode constants, try to find a special 80387 instruction when
8299 optimizing for size or on those CPUs that benefit from them. */
8301 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8305 if (! ext_80387_constants_init
)
8306 init_ext_80387_constants ();
8308 for (i
= 0; i
< 5; i
++)
8309 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8313 /* Load of the constant -0.0 or -1.0 will be split as
8314 fldz;fchs or fld1;fchs sequence. */
8315 if (real_isnegzero (&r
))
8317 if (real_identical (&r
, &dconstm1
))
8323 /* Return the opcode of the special instruction to be used to load
8327 standard_80387_constant_opcode (rtx x
)
8329 switch (standard_80387_constant_p (x
))
8353 /* Return the CONST_DOUBLE representing the 80387 constant that is
8354 loaded by the specified special instruction. The argument IDX
8355 matches the return value from standard_80387_constant_p. */
8358 standard_80387_constant_rtx (int idx
)
8362 if (! ext_80387_constants_init
)
8363 init_ext_80387_constants ();
8379 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8383 /* Return 1 if X is all 0s and 2 if x is all 1s
8384 in supported SSE/AVX vector mode. */
8387 standard_sse_constant_p (rtx x
)
8389 enum machine_mode mode
= GET_MODE (x
);
8391 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8393 if (vector_all_ones_operand (x
, mode
))
8415 /* Return the opcode of the special instruction to be used to load
8419 standard_sse_constant_opcode (rtx insn
, rtx x
)
8421 switch (standard_sse_constant_p (x
))
8424 switch (get_attr_mode (insn
))
8427 return "%vpxor\t%0, %d0";
8429 return "%vxorpd\t%0, %d0";
8431 return "%vxorps\t%0, %d0";
8434 return "vpxor\t%x0, %x0, %x0";
8436 return "vxorpd\t%x0, %x0, %x0";
8438 return "vxorps\t%x0, %x0, %x0";
8446 return "vpcmpeqd\t%0, %0, %0";
8448 return "pcmpeqd\t%0, %0";
8456 /* Returns true if OP contains a symbol reference */
8459 symbolic_reference_mentioned_p (rtx op
)
8464 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8467 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8468 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8474 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8475 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8479 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8486 /* Return true if it is appropriate to emit `ret' instructions in the
8487 body of a function. Do this only if the epilogue is simple, needing a
8488 couple of insns. Prior to reloading, we can't tell how many registers
8489 must be saved, so return false then. Return false if there is no frame
8490 marker to de-allocate. */
8493 ix86_can_use_return_insn_p (void)
8495 struct ix86_frame frame
;
8497 if (! reload_completed
|| frame_pointer_needed
)
8500 /* Don't allow more than 32k pop, since that's all we can do
8501 with one instruction. */
8502 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8505 ix86_compute_frame_layout (&frame
);
8506 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8507 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8510 /* Value should be nonzero if functions must have frame pointers.
8511 Zero means the frame pointer need not be set up (and parms may
8512 be accessed via the stack pointer) in functions that seem suitable. */
8515 ix86_frame_pointer_required (void)
8517 /* If we accessed previous frames, then the generated code expects
8518 to be able to access the saved ebp value in our frame. */
8519 if (cfun
->machine
->accesses_prev_frame
)
8522 /* Several x86 os'es need a frame pointer for other reasons,
8523 usually pertaining to setjmp. */
8524 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8527 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8528 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8531 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8532 allocation is 4GB. */
8533 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8536 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8537 turns off the frame pointer by default. Turn it back on now if
8538 we've not got a leaf function. */
8539 if (TARGET_OMIT_LEAF_FRAME_POINTER
8541 || ix86_current_function_calls_tls_descriptor
))
8544 if (crtl
->profile
&& !flag_fentry
)
8550 /* Record that the current function accesses previous call frames. */
8553 ix86_setup_frame_addresses (void)
8555 cfun
->machine
->accesses_prev_frame
= 1;
8558 #ifndef USE_HIDDEN_LINKONCE
8559 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8560 # define USE_HIDDEN_LINKONCE 1
8562 # define USE_HIDDEN_LINKONCE 0
8566 static int pic_labels_used
;
8568 /* Fills in the label name that should be used for a pc thunk for
8569 the given register. */
8572 get_pc_thunk_name (char name
[32], unsigned int regno
)
8574 gcc_assert (!TARGET_64BIT
);
8576 if (USE_HIDDEN_LINKONCE
)
8577 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8579 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8583 /* This function generates code for -fpic that loads %ebx with
8584 the return address of the caller and then returns. */
8587 ix86_code_end (void)
8592 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8597 if (!(pic_labels_used
& (1 << regno
)))
8600 get_pc_thunk_name (name
, regno
);
8602 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8603 get_identifier (name
),
8604 build_function_type_list (void_type_node
, NULL_TREE
));
8605 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8606 NULL_TREE
, void_type_node
);
8607 TREE_PUBLIC (decl
) = 1;
8608 TREE_STATIC (decl
) = 1;
8609 DECL_IGNORED_P (decl
) = 1;
8614 switch_to_section (darwin_sections
[text_coal_section
]);
8615 fputs ("\t.weak_definition\t", asm_out_file
);
8616 assemble_name (asm_out_file
, name
);
8617 fputs ("\n\t.private_extern\t", asm_out_file
);
8618 assemble_name (asm_out_file
, name
);
8619 putc ('\n', asm_out_file
);
8620 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8621 DECL_WEAK (decl
) = 1;
8625 if (USE_HIDDEN_LINKONCE
)
8627 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8629 targetm
.asm_out
.unique_section (decl
, 0);
8630 switch_to_section (get_named_section (decl
, NULL
, 0));
8632 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8633 fputs ("\t.hidden\t", asm_out_file
);
8634 assemble_name (asm_out_file
, name
);
8635 putc ('\n', asm_out_file
);
8636 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8640 switch_to_section (text_section
);
8641 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8644 DECL_INITIAL (decl
) = make_node (BLOCK
);
8645 current_function_decl
= decl
;
8646 init_function_start (decl
);
8647 first_function_block_is_cold
= false;
8648 /* Make sure unwind info is emitted for the thunk if needed. */
8649 final_start_function (emit_barrier (), asm_out_file
, 1);
8651 /* Pad stack IP move with 4 instructions (two NOPs count
8652 as one instruction). */
8653 if (TARGET_PAD_SHORT_FUNCTION
)
8658 fputs ("\tnop\n", asm_out_file
);
8661 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8662 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8663 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8664 fputs ("\tret\n", asm_out_file
);
8665 final_end_function ();
8666 init_insn_lengths ();
8667 free_after_compilation (cfun
);
8669 current_function_decl
= NULL
;
8672 if (flag_split_stack
)
8673 file_end_indicate_split_stack ();
8676 /* Emit code for the SET_GOT patterns. */
8679 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8685 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8687 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8688 xops
[2] = gen_rtx_MEM (Pmode
,
8689 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8690 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8692 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8693 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8694 an unadorned address. */
8695 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8696 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8697 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8701 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8705 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8707 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8710 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8711 is what will be referenced by the Mach-O PIC subsystem. */
8713 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8716 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8717 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8722 get_pc_thunk_name (name
, REGNO (dest
));
8723 pic_labels_used
|= 1 << REGNO (dest
);
8725 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8726 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8727 output_asm_insn ("call\t%X2", xops
);
8728 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8729 is what will be referenced by the Mach-O PIC subsystem. */
8732 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8734 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8735 CODE_LABEL_NUMBER (label
));
8740 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8745 /* Generate an "push" pattern for input ARG. */
8750 struct machine_function
*m
= cfun
->machine
;
8752 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8753 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8754 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8756 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8757 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8759 return gen_rtx_SET (VOIDmode
,
8760 gen_rtx_MEM (word_mode
,
8761 gen_rtx_PRE_DEC (Pmode
,
8762 stack_pointer_rtx
)),
8766 /* Generate an "pop" pattern for input ARG. */
8771 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8772 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8774 return gen_rtx_SET (VOIDmode
,
8776 gen_rtx_MEM (word_mode
,
8777 gen_rtx_POST_INC (Pmode
,
8778 stack_pointer_rtx
)));
8781 /* Return >= 0 if there is an unused call-clobbered register available
8782 for the entire function. */
8785 ix86_select_alt_pic_regnum (void)
8789 && !ix86_current_function_calls_tls_descriptor
)
8792 /* Can't use the same register for both PIC and DRAP. */
8794 drap
= REGNO (crtl
->drap_reg
);
8797 for (i
= 2; i
>= 0; --i
)
8798 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8802 return INVALID_REGNUM
;
8805 /* Return TRUE if we need to save REGNO. */
8808 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8810 if (pic_offset_table_rtx
8811 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8812 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8814 || crtl
->calls_eh_return
8815 || crtl
->uses_const_pool
))
8816 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8818 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8823 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8824 if (test
== INVALID_REGNUM
)
8831 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8834 return (df_regs_ever_live_p (regno
)
8835 && !call_used_regs
[regno
]
8836 && !fixed_regs
[regno
]
8837 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8840 /* Return number of saved general prupose registers. */
8843 ix86_nsaved_regs (void)
8848 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8849 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8854 /* Return number of saved SSE registrers. */
8857 ix86_nsaved_sseregs (void)
8862 if (!TARGET_64BIT_MS_ABI
)
8864 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8865 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8870 /* Given FROM and TO register numbers, say whether this elimination is
8871 allowed. If stack alignment is needed, we can only replace argument
8872 pointer with hard frame pointer, or replace frame pointer with stack
8873 pointer. Otherwise, frame pointer elimination is automatically
8874 handled and all other eliminations are valid. */
8877 ix86_can_eliminate (const int from
, const int to
)
8879 if (stack_realign_fp
)
8880 return ((from
== ARG_POINTER_REGNUM
8881 && to
== HARD_FRAME_POINTER_REGNUM
)
8882 || (from
== FRAME_POINTER_REGNUM
8883 && to
== STACK_POINTER_REGNUM
));
8885 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8888 /* Return the offset between two registers, one to be eliminated, and the other
8889 its replacement, at the start of a routine. */
8892 ix86_initial_elimination_offset (int from
, int to
)
8894 struct ix86_frame frame
;
8895 ix86_compute_frame_layout (&frame
);
8897 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8898 return frame
.hard_frame_pointer_offset
;
8899 else if (from
== FRAME_POINTER_REGNUM
8900 && to
== HARD_FRAME_POINTER_REGNUM
)
8901 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8904 gcc_assert (to
== STACK_POINTER_REGNUM
);
8906 if (from
== ARG_POINTER_REGNUM
)
8907 return frame
.stack_pointer_offset
;
8909 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8910 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8914 /* In a dynamically-aligned function, we can't know the offset from
8915 stack pointer to frame pointer, so we must ensure that setjmp
8916 eliminates fp against the hard fp (%ebp) rather than trying to
8917 index from %esp up to the top of the frame across a gap that is
8918 of unknown (at compile-time) size. */
8920 ix86_builtin_setjmp_frame_value (void)
8922 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8925 /* When using -fsplit-stack, the allocation routines set a field in
8926 the TCB to the bottom of the stack plus this much space, measured
8929 #define SPLIT_STACK_AVAILABLE 256
8931 /* Fill structure ix86_frame about frame of currently computed function. */
8934 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8936 unsigned HOST_WIDE_INT stack_alignment_needed
;
8937 HOST_WIDE_INT offset
;
8938 unsigned HOST_WIDE_INT preferred_alignment
;
8939 HOST_WIDE_INT size
= get_frame_size ();
8940 HOST_WIDE_INT to_allocate
;
8942 frame
->nregs
= ix86_nsaved_regs ();
8943 frame
->nsseregs
= ix86_nsaved_sseregs ();
8945 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8946 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8948 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8949 function prologues and leaf. */
8950 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8951 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8952 || ix86_current_function_calls_tls_descriptor
))
8954 preferred_alignment
= 16;
8955 stack_alignment_needed
= 16;
8956 crtl
->preferred_stack_boundary
= 128;
8957 crtl
->stack_alignment_needed
= 128;
8960 gcc_assert (!size
|| stack_alignment_needed
);
8961 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8962 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8964 /* For SEH we have to limit the amount of code movement into the prologue.
8965 At present we do this via a BLOCKAGE, at which point there's very little
8966 scheduling that can be done, which means that there's very little point
8967 in doing anything except PUSHs. */
8969 cfun
->machine
->use_fast_prologue_epilogue
= false;
8971 /* During reload iteration the amount of registers saved can change.
8972 Recompute the value as needed. Do not recompute when amount of registers
8973 didn't change as reload does multiple calls to the function and does not
8974 expect the decision to change within single iteration. */
8975 else if (!optimize_function_for_size_p (cfun
)
8976 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8978 int count
= frame
->nregs
;
8979 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8981 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8983 /* The fast prologue uses move instead of push to save registers. This
8984 is significantly longer, but also executes faster as modern hardware
8985 can execute the moves in parallel, but can't do that for push/pop.
8987 Be careful about choosing what prologue to emit: When function takes
8988 many instructions to execute we may use slow version as well as in
8989 case function is known to be outside hot spot (this is known with
8990 feedback only). Weight the size of function by number of registers
8991 to save as it is cheap to use one or two push instructions but very
8992 slow to use many of them. */
8994 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8995 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8996 || (flag_branch_probabilities
8997 && node
->frequency
< NODE_FREQUENCY_HOT
))
8998 cfun
->machine
->use_fast_prologue_epilogue
= false;
9000 cfun
->machine
->use_fast_prologue_epilogue
9001 = !expensive_function_p (count
);
9004 frame
->save_regs_using_mov
9005 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9006 /* If static stack checking is enabled and done with probes,
9007 the registers need to be saved before allocating the frame. */
9008 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9010 /* Skip return address. */
9011 offset
= UNITS_PER_WORD
;
9013 /* Skip pushed static chain. */
9014 if (ix86_static_chain_on_stack
)
9015 offset
+= UNITS_PER_WORD
;
9017 /* Skip saved base pointer. */
9018 if (frame_pointer_needed
)
9019 offset
+= UNITS_PER_WORD
;
9020 frame
->hfp_save_offset
= offset
;
9022 /* The traditional frame pointer location is at the top of the frame. */
9023 frame
->hard_frame_pointer_offset
= offset
;
9025 /* Register save area */
9026 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9027 frame
->reg_save_offset
= offset
;
9029 /* On SEH target, registers are pushed just before the frame pointer
9032 frame
->hard_frame_pointer_offset
= offset
;
9034 /* Align and set SSE register save area. */
9035 if (frame
->nsseregs
)
9037 /* The only ABI that has saved SSE registers (Win64) also has a
9038 16-byte aligned default stack, and thus we don't need to be
9039 within the re-aligned local stack frame to save them. */
9040 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9041 offset
= (offset
+ 16 - 1) & -16;
9042 offset
+= frame
->nsseregs
* 16;
9044 frame
->sse_reg_save_offset
= offset
;
9046 /* The re-aligned stack starts here. Values before this point are not
9047 directly comparable with values below this point. In order to make
9048 sure that no value happens to be the same before and after, force
9049 the alignment computation below to add a non-zero value. */
9050 if (stack_realign_fp
)
9051 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9054 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9055 offset
+= frame
->va_arg_size
;
9057 /* Align start of frame for local function. */
9058 if (stack_realign_fp
9059 || offset
!= frame
->sse_reg_save_offset
9062 || cfun
->calls_alloca
9063 || ix86_current_function_calls_tls_descriptor
)
9064 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9066 /* Frame pointer points here. */
9067 frame
->frame_pointer_offset
= offset
;
9071 /* Add outgoing arguments area. Can be skipped if we eliminated
9072 all the function calls as dead code.
9073 Skipping is however impossible when function calls alloca. Alloca
9074 expander assumes that last crtl->outgoing_args_size
9075 of stack frame are unused. */
9076 if (ACCUMULATE_OUTGOING_ARGS
9077 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9078 || ix86_current_function_calls_tls_descriptor
))
9080 offset
+= crtl
->outgoing_args_size
;
9081 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9084 frame
->outgoing_arguments_size
= 0;
9086 /* Align stack boundary. Only needed if we're calling another function
9088 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9089 || ix86_current_function_calls_tls_descriptor
)
9090 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9092 /* We've reached end of stack frame. */
9093 frame
->stack_pointer_offset
= offset
;
9095 /* Size prologue needs to allocate. */
9096 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9098 if ((!to_allocate
&& frame
->nregs
<= 1)
9099 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9100 frame
->save_regs_using_mov
= false;
9102 if (ix86_using_red_zone ()
9103 && crtl
->sp_is_unchanging
9105 && !ix86_current_function_calls_tls_descriptor
)
9107 frame
->red_zone_size
= to_allocate
;
9108 if (frame
->save_regs_using_mov
)
9109 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9110 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9111 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9114 frame
->red_zone_size
= 0;
9115 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9117 /* The SEH frame pointer location is near the bottom of the frame.
9118 This is enforced by the fact that the difference between the
9119 stack pointer and the frame pointer is limited to 240 bytes in
9120 the unwind data structure. */
9125 /* If we can leave the frame pointer where it is, do so. Also, returns
9126 the establisher frame for __builtin_frame_address (0). */
9127 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9128 if (diff
<= SEH_MAX_FRAME_SIZE
9129 && (diff
> 240 || (diff
& 15) != 0)
9130 && !crtl
->accesses_prior_frames
)
9132 /* Ideally we'd determine what portion of the local stack frame
9133 (within the constraint of the lowest 240) is most heavily used.
9134 But without that complication, simply bias the frame pointer
9135 by 128 bytes so as to maximize the amount of the local stack
9136 frame that is addressable with 8-bit offsets. */
9137 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9142 /* This is semi-inlined memory_address_length, but simplified
9143 since we know that we're always dealing with reg+offset, and
9144 to avoid having to create and discard all that rtl. */
9147 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9153 /* EBP and R13 cannot be encoded without an offset. */
9154 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9156 else if (IN_RANGE (offset
, -128, 127))
9159 /* ESP and R12 must be encoded with a SIB byte. */
9160 if (regno
== SP_REG
|| regno
== R12_REG
)
9166 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9167 The valid base registers are taken from CFUN->MACHINE->FS. */
9170 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9172 const struct machine_function
*m
= cfun
->machine
;
9173 rtx base_reg
= NULL
;
9174 HOST_WIDE_INT base_offset
= 0;
9176 if (m
->use_fast_prologue_epilogue
)
9178 /* Choose the base register most likely to allow the most scheduling
9179 opportunities. Generally FP is valid throughout the function,
9180 while DRAP must be reloaded within the epilogue. But choose either
9181 over the SP due to increased encoding size. */
9185 base_reg
= hard_frame_pointer_rtx
;
9186 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9188 else if (m
->fs
.drap_valid
)
9190 base_reg
= crtl
->drap_reg
;
9191 base_offset
= 0 - cfa_offset
;
9193 else if (m
->fs
.sp_valid
)
9195 base_reg
= stack_pointer_rtx
;
9196 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9201 HOST_WIDE_INT toffset
;
9204 /* Choose the base register with the smallest address encoding.
9205 With a tie, choose FP > DRAP > SP. */
9208 base_reg
= stack_pointer_rtx
;
9209 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9210 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9212 if (m
->fs
.drap_valid
)
9214 toffset
= 0 - cfa_offset
;
9215 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9218 base_reg
= crtl
->drap_reg
;
9219 base_offset
= toffset
;
9225 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9226 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9229 base_reg
= hard_frame_pointer_rtx
;
9230 base_offset
= toffset
;
9235 gcc_assert (base_reg
!= NULL
);
9237 return plus_constant (Pmode
, base_reg
, base_offset
);
9240 /* Emit code to save registers in the prologue. */
9243 ix86_emit_save_regs (void)
9248 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9249 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9251 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9252 RTX_FRAME_RELATED_P (insn
) = 1;
9256 /* Emit a single register save at CFA - CFA_OFFSET. */
9259 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9260 HOST_WIDE_INT cfa_offset
)
9262 struct machine_function
*m
= cfun
->machine
;
9263 rtx reg
= gen_rtx_REG (mode
, regno
);
9264 rtx mem
, addr
, base
, insn
;
9266 addr
= choose_baseaddr (cfa_offset
);
9267 mem
= gen_frame_mem (mode
, addr
);
9269 /* For SSE saves, we need to indicate the 128-bit alignment. */
9270 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9272 insn
= emit_move_insn (mem
, reg
);
9273 RTX_FRAME_RELATED_P (insn
) = 1;
9276 if (GET_CODE (base
) == PLUS
)
9277 base
= XEXP (base
, 0);
9278 gcc_checking_assert (REG_P (base
));
9280 /* When saving registers into a re-aligned local stack frame, avoid
9281 any tricky guessing by dwarf2out. */
9282 if (m
->fs
.realigned
)
9284 gcc_checking_assert (stack_realign_drap
);
9286 if (regno
== REGNO (crtl
->drap_reg
))
9288 /* A bit of a hack. We force the DRAP register to be saved in
9289 the re-aligned stack frame, which provides us with a copy
9290 of the CFA that will last past the prologue. Install it. */
9291 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9292 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9293 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9294 mem
= gen_rtx_MEM (mode
, addr
);
9295 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9299 /* The frame pointer is a stable reference within the
9300 aligned frame. Use it. */
9301 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9302 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9303 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9304 mem
= gen_rtx_MEM (mode
, addr
);
9305 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9306 gen_rtx_SET (VOIDmode
, mem
, reg
));
9310 /* The memory may not be relative to the current CFA register,
9311 which means that we may need to generate a new pattern for
9312 use by the unwind info. */
9313 else if (base
!= m
->fs
.cfa_reg
)
9315 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9316 m
->fs
.cfa_offset
- cfa_offset
);
9317 mem
= gen_rtx_MEM (mode
, addr
);
9318 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9322 /* Emit code to save registers using MOV insns.
9323 First register is stored at CFA - CFA_OFFSET. */
9325 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9329 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9330 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9332 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9333 cfa_offset
-= UNITS_PER_WORD
;
9337 /* Emit code to save SSE registers using MOV insns.
9338 First register is stored at CFA - CFA_OFFSET. */
9340 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9344 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9345 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9347 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9352 static GTY(()) rtx queued_cfa_restores
;
9354 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9355 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9356 Don't add the note if the previously saved value will be left untouched
9357 within stack red-zone till return, as unwinders can find the same value
9358 in the register and on the stack. */
9361 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9363 if (!crtl
->shrink_wrapped
9364 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9369 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9370 RTX_FRAME_RELATED_P (insn
) = 1;
9374 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9377 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9380 ix86_add_queued_cfa_restore_notes (rtx insn
)
9383 if (!queued_cfa_restores
)
9385 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9387 XEXP (last
, 1) = REG_NOTES (insn
);
9388 REG_NOTES (insn
) = queued_cfa_restores
;
9389 queued_cfa_restores
= NULL_RTX
;
9390 RTX_FRAME_RELATED_P (insn
) = 1;
9393 /* Expand prologue or epilogue stack adjustment.
9394 The pattern exist to put a dependency on all ebp-based memory accesses.
9395 STYLE should be negative if instructions should be marked as frame related,
9396 zero if %r11 register is live and cannot be freely used and positive
9400 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9401 int style
, bool set_cfa
)
9403 struct machine_function
*m
= cfun
->machine
;
9405 bool add_frame_related_expr
= false;
9407 if (Pmode
== SImode
)
9408 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9409 else if (x86_64_immediate_operand (offset
, DImode
))
9410 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9414 /* r11 is used by indirect sibcall return as well, set before the
9415 epilogue and used after the epilogue. */
9417 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9420 gcc_assert (src
!= hard_frame_pointer_rtx
9421 && dest
!= hard_frame_pointer_rtx
);
9422 tmp
= hard_frame_pointer_rtx
;
9424 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9426 add_frame_related_expr
= true;
9428 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9431 insn
= emit_insn (insn
);
9433 ix86_add_queued_cfa_restore_notes (insn
);
9439 gcc_assert (m
->fs
.cfa_reg
== src
);
9440 m
->fs
.cfa_offset
+= INTVAL (offset
);
9441 m
->fs
.cfa_reg
= dest
;
9443 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9444 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9445 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9446 RTX_FRAME_RELATED_P (insn
) = 1;
9450 RTX_FRAME_RELATED_P (insn
) = 1;
9451 if (add_frame_related_expr
)
9453 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9454 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9455 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9459 if (dest
== stack_pointer_rtx
)
9461 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9462 bool valid
= m
->fs
.sp_valid
;
9464 if (src
== hard_frame_pointer_rtx
)
9466 valid
= m
->fs
.fp_valid
;
9467 ooffset
= m
->fs
.fp_offset
;
9469 else if (src
== crtl
->drap_reg
)
9471 valid
= m
->fs
.drap_valid
;
9476 /* Else there are two possibilities: SP itself, which we set
9477 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9478 taken care of this by hand along the eh_return path. */
9479 gcc_checking_assert (src
== stack_pointer_rtx
9480 || offset
== const0_rtx
);
9483 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9484 m
->fs
.sp_valid
= valid
;
9488 /* Find an available register to be used as dynamic realign argument
9489 pointer regsiter. Such a register will be written in prologue and
9490 used in begin of body, so it must not be
9491 1. parameter passing register.
9493 We reuse static-chain register if it is available. Otherwise, we
9494 use DI for i386 and R13 for x86-64. We chose R13 since it has
9497 Return: the regno of chosen register. */
9500 find_drap_reg (void)
9502 tree decl
= cfun
->decl
;
9506 /* Use R13 for nested function or function need static chain.
9507 Since function with tail call may use any caller-saved
9508 registers in epilogue, DRAP must not use caller-saved
9509 register in such case. */
9510 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9517 /* Use DI for nested function or function need static chain.
9518 Since function with tail call may use any caller-saved
9519 registers in epilogue, DRAP must not use caller-saved
9520 register in such case. */
9521 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9524 /* Reuse static chain register if it isn't used for parameter
9526 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9528 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9529 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9536 /* Return minimum incoming stack alignment. */
9539 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9541 unsigned int incoming_stack_boundary
;
9543 /* Prefer the one specified at command line. */
9544 if (ix86_user_incoming_stack_boundary
)
9545 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9546 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9547 if -mstackrealign is used, it isn't used for sibcall check and
9548 estimated stack alignment is 128bit. */
9551 && ix86_force_align_arg_pointer
9552 && crtl
->stack_alignment_estimated
== 128)
9553 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9555 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9557 /* Incoming stack alignment can be changed on individual functions
9558 via force_align_arg_pointer attribute. We use the smallest
9559 incoming stack boundary. */
9560 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9561 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9562 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9563 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9565 /* The incoming stack frame has to be aligned at least at
9566 parm_stack_boundary. */
9567 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9568 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9570 /* Stack at entrance of main is aligned by runtime. We use the
9571 smallest incoming stack boundary. */
9572 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9573 && DECL_NAME (current_function_decl
)
9574 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9575 && DECL_FILE_SCOPE_P (current_function_decl
))
9576 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9578 return incoming_stack_boundary
;
9581 /* Update incoming stack boundary and estimated stack alignment. */
9584 ix86_update_stack_boundary (void)
9586 ix86_incoming_stack_boundary
9587 = ix86_minimum_incoming_stack_boundary (false);
9589 /* x86_64 vararg needs 16byte stack alignment for register save
9593 && crtl
->stack_alignment_estimated
< 128)
9594 crtl
->stack_alignment_estimated
= 128;
9597 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9598 needed or an rtx for DRAP otherwise. */
9601 ix86_get_drap_rtx (void)
9603 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9604 crtl
->need_drap
= true;
9606 if (stack_realign_drap
)
9608 /* Assign DRAP to vDRAP and returns vDRAP */
9609 unsigned int regno
= find_drap_reg ();
9614 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9615 crtl
->drap_reg
= arg_ptr
;
9618 drap_vreg
= copy_to_reg (arg_ptr
);
9622 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9625 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9626 RTX_FRAME_RELATED_P (insn
) = 1;
9634 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9637 ix86_internal_arg_pointer (void)
9639 return virtual_incoming_args_rtx
;
9642 struct scratch_reg
{
9647 /* Return a short-lived scratch register for use on function entry.
9648 In 32-bit mode, it is valid only after the registers are saved
9649 in the prologue. This register must be released by means of
9650 release_scratch_register_on_entry once it is dead. */
9653 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9661 /* We always use R11 in 64-bit mode. */
9666 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9668 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9670 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9671 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9672 int regparm
= ix86_function_regparm (fntype
, decl
);
9674 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9676 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9677 for the static chain register. */
9678 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9679 && drap_regno
!= AX_REG
)
9681 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9682 for the static chain register. */
9683 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9685 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9687 /* ecx is the static chain register. */
9688 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9690 && drap_regno
!= CX_REG
)
9692 else if (ix86_save_reg (BX_REG
, true))
9694 /* esi is the static chain register. */
9695 else if (!(regparm
== 3 && static_chain_p
)
9696 && ix86_save_reg (SI_REG
, true))
9698 else if (ix86_save_reg (DI_REG
, true))
9702 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9707 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9710 rtx insn
= emit_insn (gen_push (sr
->reg
));
9711 RTX_FRAME_RELATED_P (insn
) = 1;
9715 /* Release a scratch register obtained from the preceding function. */
9718 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9722 struct machine_function
*m
= cfun
->machine
;
9723 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9725 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9726 RTX_FRAME_RELATED_P (insn
) = 1;
9727 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9728 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9729 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9730 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9734 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9736 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9739 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9741 /* We skip the probe for the first interval + a small dope of 4 words and
9742 probe that many bytes past the specified size to maintain a protection
9743 area at the botton of the stack. */
9744 const int dope
= 4 * UNITS_PER_WORD
;
9745 rtx size_rtx
= GEN_INT (size
), last
;
9747 /* See if we have a constant small number of probes to generate. If so,
9748 that's the easy case. The run-time loop is made up of 11 insns in the
9749 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9750 for n # of intervals. */
9751 if (size
<= 5 * PROBE_INTERVAL
)
9753 HOST_WIDE_INT i
, adjust
;
9754 bool first_probe
= true;
9756 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9757 values of N from 1 until it exceeds SIZE. If only one probe is
9758 needed, this will not generate any code. Then adjust and probe
9759 to PROBE_INTERVAL + SIZE. */
9760 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9764 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9765 first_probe
= false;
9768 adjust
= PROBE_INTERVAL
;
9770 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9771 plus_constant (Pmode
, stack_pointer_rtx
,
9773 emit_stack_probe (stack_pointer_rtx
);
9777 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9779 adjust
= size
+ PROBE_INTERVAL
- i
;
9781 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9782 plus_constant (Pmode
, stack_pointer_rtx
,
9784 emit_stack_probe (stack_pointer_rtx
);
9786 /* Adjust back to account for the additional first interval. */
9787 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9788 plus_constant (Pmode
, stack_pointer_rtx
,
9789 PROBE_INTERVAL
+ dope
)));
9792 /* Otherwise, do the same as above, but in a loop. Note that we must be
9793 extra careful with variables wrapping around because we might be at
9794 the very top (or the very bottom) of the address space and we have
9795 to be able to handle this case properly; in particular, we use an
9796 equality test for the loop condition. */
9799 HOST_WIDE_INT rounded_size
;
9800 struct scratch_reg sr
;
9802 get_scratch_register_on_entry (&sr
);
9805 /* Step 1: round SIZE to the previous multiple of the interval. */
9807 rounded_size
= size
& -PROBE_INTERVAL
;
9810 /* Step 2: compute initial and final value of the loop counter. */
9812 /* SP = SP_0 + PROBE_INTERVAL. */
9813 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9814 plus_constant (Pmode
, stack_pointer_rtx
,
9815 - (PROBE_INTERVAL
+ dope
))));
9817 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9818 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9819 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9820 gen_rtx_PLUS (Pmode
, sr
.reg
,
9821 stack_pointer_rtx
)));
9826 while (SP != LAST_ADDR)
9828 SP = SP + PROBE_INTERVAL
9832 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9833 values of N from 1 until it is equal to ROUNDED_SIZE. */
9835 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9838 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9839 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9841 if (size
!= rounded_size
)
9843 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9844 plus_constant (Pmode
, stack_pointer_rtx
,
9845 rounded_size
- size
)));
9846 emit_stack_probe (stack_pointer_rtx
);
9849 /* Adjust back to account for the additional first interval. */
9850 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9851 plus_constant (Pmode
, stack_pointer_rtx
,
9852 PROBE_INTERVAL
+ dope
)));
9854 release_scratch_register_on_entry (&sr
);
9857 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9859 /* Even if the stack pointer isn't the CFA register, we need to correctly
9860 describe the adjustments made to it, in particular differentiate the
9861 frame-related ones from the frame-unrelated ones. */
9864 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9865 XVECEXP (expr
, 0, 0)
9866 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9867 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9868 XVECEXP (expr
, 0, 1)
9869 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9870 plus_constant (Pmode
, stack_pointer_rtx
,
9871 PROBE_INTERVAL
+ dope
+ size
));
9872 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9873 RTX_FRAME_RELATED_P (last
) = 1;
9875 cfun
->machine
->fs
.sp_offset
+= size
;
9878 /* Make sure nothing is scheduled before we are done. */
9879 emit_insn (gen_blockage ());
9882 /* Adjust the stack pointer up to REG while probing it. */
9885 output_adjust_stack_and_probe (rtx reg
)
9887 static int labelno
= 0;
9888 char loop_lab
[32], end_lab
[32];
9891 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9892 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9894 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9896 /* Jump to END_LAB if SP == LAST_ADDR. */
9897 xops
[0] = stack_pointer_rtx
;
9899 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9900 fputs ("\tje\t", asm_out_file
);
9901 assemble_name_raw (asm_out_file
, end_lab
);
9902 fputc ('\n', asm_out_file
);
9904 /* SP = SP + PROBE_INTERVAL. */
9905 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9906 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9909 xops
[1] = const0_rtx
;
9910 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9912 fprintf (asm_out_file
, "\tjmp\t");
9913 assemble_name_raw (asm_out_file
, loop_lab
);
9914 fputc ('\n', asm_out_file
);
9916 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9921 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9922 inclusive. These are offsets from the current stack pointer. */
9925 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9927 /* See if we have a constant small number of probes to generate. If so,
9928 that's the easy case. The run-time loop is made up of 7 insns in the
9929 generic case while the compile-time loop is made up of n insns for n #
9931 if (size
<= 7 * PROBE_INTERVAL
)
9935 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9936 it exceeds SIZE. If only one probe is needed, this will not
9937 generate any code. Then probe at FIRST + SIZE. */
9938 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9939 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9942 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9946 /* Otherwise, do the same as above, but in a loop. Note that we must be
9947 extra careful with variables wrapping around because we might be at
9948 the very top (or the very bottom) of the address space and we have
9949 to be able to handle this case properly; in particular, we use an
9950 equality test for the loop condition. */
9953 HOST_WIDE_INT rounded_size
, last
;
9954 struct scratch_reg sr
;
9956 get_scratch_register_on_entry (&sr
);
9959 /* Step 1: round SIZE to the previous multiple of the interval. */
9961 rounded_size
= size
& -PROBE_INTERVAL
;
9964 /* Step 2: compute initial and final value of the loop counter. */
9966 /* TEST_OFFSET = FIRST. */
9967 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9969 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9970 last
= first
+ rounded_size
;
9975 while (TEST_ADDR != LAST_ADDR)
9977 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9981 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9982 until it is equal to ROUNDED_SIZE. */
9984 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9987 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9988 that SIZE is equal to ROUNDED_SIZE. */
9990 if (size
!= rounded_size
)
9991 emit_stack_probe (plus_constant (Pmode
,
9992 gen_rtx_PLUS (Pmode
,
9995 rounded_size
- size
));
9997 release_scratch_register_on_entry (&sr
);
10000 /* Make sure nothing is scheduled before we are done. */
10001 emit_insn (gen_blockage ());
10004 /* Probe a range of stack addresses from REG to END, inclusive. These are
10005 offsets from the current stack pointer. */
10008 output_probe_stack_range (rtx reg
, rtx end
)
10010 static int labelno
= 0;
10011 char loop_lab
[32], end_lab
[32];
10014 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10015 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10017 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10019 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10022 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10023 fputs ("\tje\t", asm_out_file
);
10024 assemble_name_raw (asm_out_file
, end_lab
);
10025 fputc ('\n', asm_out_file
);
10027 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10028 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10029 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10031 /* Probe at TEST_ADDR. */
10032 xops
[0] = stack_pointer_rtx
;
10034 xops
[2] = const0_rtx
;
10035 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10037 fprintf (asm_out_file
, "\tjmp\t");
10038 assemble_name_raw (asm_out_file
, loop_lab
);
10039 fputc ('\n', asm_out_file
);
10041 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10046 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10047 to be generated in correct form. */
10049 ix86_finalize_stack_realign_flags (void)
10051 /* Check if stack realign is really needed after reload, and
10052 stores result in cfun */
10053 unsigned int incoming_stack_boundary
10054 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10055 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10056 unsigned int stack_realign
= (incoming_stack_boundary
10058 ? crtl
->max_used_stack_slot_alignment
10059 : crtl
->stack_alignment_needed
));
10061 if (crtl
->stack_realign_finalized
)
10063 /* After stack_realign_needed is finalized, we can't no longer
10065 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10069 /* If the only reason for frame_pointer_needed is that we conservatively
10070 assumed stack realignment might be needed, but in the end nothing that
10071 needed the stack alignment had been spilled, clear frame_pointer_needed
10072 and say we don't need stack realignment. */
10074 && !crtl
->need_drap
10075 && frame_pointer_needed
10077 && flag_omit_frame_pointer
10078 && crtl
->sp_is_unchanging
10079 && !ix86_current_function_calls_tls_descriptor
10080 && !crtl
->accesses_prior_frames
10081 && !cfun
->calls_alloca
10082 && !crtl
->calls_eh_return
10083 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10084 && !ix86_frame_pointer_required ()
10085 && get_frame_size () == 0
10086 && ix86_nsaved_sseregs () == 0
10087 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10089 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10092 CLEAR_HARD_REG_SET (prologue_used
);
10093 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10094 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10095 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10096 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10097 HARD_FRAME_POINTER_REGNUM
);
10101 FOR_BB_INSNS (bb
, insn
)
10102 if (NONDEBUG_INSN_P (insn
)
10103 && requires_stack_frame_p (insn
, prologue_used
,
10104 set_up_by_prologue
))
10106 crtl
->stack_realign_needed
= stack_realign
;
10107 crtl
->stack_realign_finalized
= true;
10112 frame_pointer_needed
= false;
10113 stack_realign
= false;
10114 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10115 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10116 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10117 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10118 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10119 df_finish_pass (true);
10120 df_scan_alloc (NULL
);
10122 df_compute_regs_ever_live (true);
10126 crtl
->stack_realign_needed
= stack_realign
;
10127 crtl
->stack_realign_finalized
= true;
10130 /* Expand the prologue into a bunch of separate insns. */
10133 ix86_expand_prologue (void)
10135 struct machine_function
*m
= cfun
->machine
;
10138 struct ix86_frame frame
;
10139 HOST_WIDE_INT allocate
;
10140 bool int_registers_saved
;
10141 bool sse_registers_saved
;
10143 ix86_finalize_stack_realign_flags ();
10145 /* DRAP should not coexist with stack_realign_fp */
10146 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10148 memset (&m
->fs
, 0, sizeof (m
->fs
));
10150 /* Initialize CFA state for before the prologue. */
10151 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10152 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10154 /* Track SP offset to the CFA. We continue tracking this after we've
10155 swapped the CFA register away from SP. In the case of re-alignment
10156 this is fudged; we're interested to offsets within the local frame. */
10157 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10158 m
->fs
.sp_valid
= true;
10160 ix86_compute_frame_layout (&frame
);
10162 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10164 /* We should have already generated an error for any use of
10165 ms_hook on a nested function. */
10166 gcc_checking_assert (!ix86_static_chain_on_stack
);
10168 /* Check if profiling is active and we shall use profiling before
10169 prologue variant. If so sorry. */
10170 if (crtl
->profile
&& flag_fentry
!= 0)
10171 sorry ("ms_hook_prologue attribute isn%'t compatible "
10172 "with -mfentry for 32-bit");
10174 /* In ix86_asm_output_function_label we emitted:
10175 8b ff movl.s %edi,%edi
10177 8b ec movl.s %esp,%ebp
10179 This matches the hookable function prologue in Win32 API
10180 functions in Microsoft Windows XP Service Pack 2 and newer.
10181 Wine uses this to enable Windows apps to hook the Win32 API
10182 functions provided by Wine.
10184 What that means is that we've already set up the frame pointer. */
10186 if (frame_pointer_needed
10187 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10191 /* We've decided to use the frame pointer already set up.
10192 Describe this to the unwinder by pretending that both
10193 push and mov insns happen right here.
10195 Putting the unwind info here at the end of the ms_hook
10196 is done so that we can make absolutely certain we get
10197 the required byte sequence at the start of the function,
10198 rather than relying on an assembler that can produce
10199 the exact encoding required.
10201 However it does mean (in the unpatched case) that we have
10202 a 1 insn window where the asynchronous unwind info is
10203 incorrect. However, if we placed the unwind info at
10204 its correct location we would have incorrect unwind info
10205 in the patched case. Which is probably all moot since
10206 I don't expect Wine generates dwarf2 unwind info for the
10207 system libraries that use this feature. */
10209 insn
= emit_insn (gen_blockage ());
10211 push
= gen_push (hard_frame_pointer_rtx
);
10212 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10213 stack_pointer_rtx
);
10214 RTX_FRAME_RELATED_P (push
) = 1;
10215 RTX_FRAME_RELATED_P (mov
) = 1;
10217 RTX_FRAME_RELATED_P (insn
) = 1;
10218 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10219 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10221 /* Note that gen_push incremented m->fs.cfa_offset, even
10222 though we didn't emit the push insn here. */
10223 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10224 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10225 m
->fs
.fp_valid
= true;
10229 /* The frame pointer is not needed so pop %ebp again.
10230 This leaves us with a pristine state. */
10231 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10235 /* The first insn of a function that accepts its static chain on the
10236 stack is to push the register that would be filled in by a direct
10237 call. This insn will be skipped by the trampoline. */
10238 else if (ix86_static_chain_on_stack
)
10240 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10241 emit_insn (gen_blockage ());
10243 /* We don't want to interpret this push insn as a register save,
10244 only as a stack adjustment. The real copy of the register as
10245 a save will be done later, if needed. */
10246 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10247 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10248 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10249 RTX_FRAME_RELATED_P (insn
) = 1;
10252 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10253 of DRAP is needed and stack realignment is really needed after reload */
10254 if (stack_realign_drap
)
10256 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10258 /* Only need to push parameter pointer reg if it is caller saved. */
10259 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10261 /* Push arg pointer reg */
10262 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10263 RTX_FRAME_RELATED_P (insn
) = 1;
10266 /* Grab the argument pointer. */
10267 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10268 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10269 RTX_FRAME_RELATED_P (insn
) = 1;
10270 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10271 m
->fs
.cfa_offset
= 0;
10273 /* Align the stack. */
10274 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10276 GEN_INT (-align_bytes
)));
10277 RTX_FRAME_RELATED_P (insn
) = 1;
10279 /* Replicate the return address on the stack so that return
10280 address can be reached via (argp - 1) slot. This is needed
10281 to implement macro RETURN_ADDR_RTX and intrinsic function
10282 expand_builtin_return_addr etc. */
10283 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10284 t
= gen_frame_mem (word_mode
, t
);
10285 insn
= emit_insn (gen_push (t
));
10286 RTX_FRAME_RELATED_P (insn
) = 1;
10288 /* For the purposes of frame and register save area addressing,
10289 we've started over with a new frame. */
10290 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10291 m
->fs
.realigned
= true;
10294 int_registers_saved
= (frame
.nregs
== 0);
10295 sse_registers_saved
= (frame
.nsseregs
== 0);
10297 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10299 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10300 slower on all targets. Also sdb doesn't like it. */
10301 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10302 RTX_FRAME_RELATED_P (insn
) = 1;
10304 /* Push registers now, before setting the frame pointer
10306 if (!int_registers_saved
10308 && !frame
.save_regs_using_mov
)
10310 ix86_emit_save_regs ();
10311 int_registers_saved
= true;
10312 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10315 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10317 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10318 RTX_FRAME_RELATED_P (insn
) = 1;
10320 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10321 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10322 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10323 m
->fs
.fp_valid
= true;
10327 if (!int_registers_saved
)
10329 /* If saving registers via PUSH, do so now. */
10330 if (!frame
.save_regs_using_mov
)
10332 ix86_emit_save_regs ();
10333 int_registers_saved
= true;
10334 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10337 /* When using red zone we may start register saving before allocating
10338 the stack frame saving one cycle of the prologue. However, avoid
10339 doing this if we have to probe the stack; at least on x86_64 the
10340 stack probe can turn into a call that clobbers a red zone location. */
10341 else if (ix86_using_red_zone ()
10342 && (! TARGET_STACK_PROBE
10343 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10345 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10346 int_registers_saved
= true;
10350 if (stack_realign_fp
)
10352 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10353 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10355 /* The computation of the size of the re-aligned stack frame means
10356 that we must allocate the size of the register save area before
10357 performing the actual alignment. Otherwise we cannot guarantee
10358 that there's enough storage above the realignment point. */
10359 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10360 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10361 GEN_INT (m
->fs
.sp_offset
10362 - frame
.sse_reg_save_offset
),
10365 /* Align the stack. */
10366 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10368 GEN_INT (-align_bytes
)));
10370 /* For the purposes of register save area addressing, the stack
10371 pointer is no longer valid. As for the value of sp_offset,
10372 see ix86_compute_frame_layout, which we need to match in order
10373 to pass verification of stack_pointer_offset at the end. */
10374 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10375 m
->fs
.sp_valid
= false;
10378 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10380 if (flag_stack_usage_info
)
10382 /* We start to count from ARG_POINTER. */
10383 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10385 /* If it was realigned, take into account the fake frame. */
10386 if (stack_realign_drap
)
10388 if (ix86_static_chain_on_stack
)
10389 stack_size
+= UNITS_PER_WORD
;
10391 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10392 stack_size
+= UNITS_PER_WORD
;
10394 /* This over-estimates by 1 minimal-stack-alignment-unit but
10395 mitigates that by counting in the new return address slot. */
10396 current_function_dynamic_stack_size
10397 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10400 current_function_static_stack_size
= stack_size
;
10403 /* On SEH target with very large frame size, allocate an area to save
10404 SSE registers (as the very large allocation won't be described). */
10406 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10407 && !sse_registers_saved
)
10409 HOST_WIDE_INT sse_size
=
10410 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10412 gcc_assert (int_registers_saved
);
10414 /* No need to do stack checking as the area will be immediately
10416 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10417 GEN_INT (-sse_size
), -1,
10418 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10419 allocate
-= sse_size
;
10420 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10421 sse_registers_saved
= true;
10424 /* The stack has already been decremented by the instruction calling us
10425 so probe if the size is non-negative to preserve the protection area. */
10426 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10428 /* We expect the registers to be saved when probes are used. */
10429 gcc_assert (int_registers_saved
);
10431 if (STACK_CHECK_MOVING_SP
)
10433 ix86_adjust_stack_and_probe (allocate
);
10438 HOST_WIDE_INT size
= allocate
;
10440 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10441 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10443 if (TARGET_STACK_PROBE
)
10444 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10446 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10452 else if (!ix86_target_stack_probe ()
10453 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10455 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10456 GEN_INT (-allocate
), -1,
10457 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10461 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10463 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10464 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10465 bool eax_live
= false;
10466 bool r10_live
= false;
10469 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10470 if (!TARGET_64BIT_MS_ABI
)
10471 eax_live
= ix86_eax_live_at_start_p ();
10473 /* Note that SEH directives need to continue tracking the stack
10474 pointer even after the frame pointer has been set up. */
10477 insn
= emit_insn (gen_push (eax
));
10478 allocate
-= UNITS_PER_WORD
;
10479 if (sp_is_cfa_reg
|| TARGET_SEH
)
10482 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10483 RTX_FRAME_RELATED_P (insn
) = 1;
10489 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10490 insn
= emit_insn (gen_push (r10
));
10491 allocate
-= UNITS_PER_WORD
;
10492 if (sp_is_cfa_reg
|| TARGET_SEH
)
10495 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10496 RTX_FRAME_RELATED_P (insn
) = 1;
10500 emit_move_insn (eax
, GEN_INT (allocate
));
10501 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10503 /* Use the fact that AX still contains ALLOCATE. */
10504 adjust_stack_insn
= (Pmode
== DImode
10505 ? gen_pro_epilogue_adjust_stack_di_sub
10506 : gen_pro_epilogue_adjust_stack_si_sub
);
10508 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10509 stack_pointer_rtx
, eax
));
10511 if (sp_is_cfa_reg
|| TARGET_SEH
)
10514 m
->fs
.cfa_offset
+= allocate
;
10515 RTX_FRAME_RELATED_P (insn
) = 1;
10516 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10517 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10518 plus_constant (Pmode
, stack_pointer_rtx
,
10521 m
->fs
.sp_offset
+= allocate
;
10523 if (r10_live
&& eax_live
)
10525 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10526 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10527 gen_frame_mem (word_mode
, t
));
10528 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10529 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10530 gen_frame_mem (word_mode
, t
));
10532 else if (eax_live
|| r10_live
)
10534 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10535 emit_move_insn (gen_rtx_REG (word_mode
,
10536 (eax_live
? AX_REG
: R10_REG
)),
10537 gen_frame_mem (word_mode
, t
));
10540 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10542 /* If we havn't already set up the frame pointer, do so now. */
10543 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10545 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10546 GEN_INT (frame
.stack_pointer_offset
10547 - frame
.hard_frame_pointer_offset
));
10548 insn
= emit_insn (insn
);
10549 RTX_FRAME_RELATED_P (insn
) = 1;
10550 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10552 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10553 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10554 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10555 m
->fs
.fp_valid
= true;
10558 if (!int_registers_saved
)
10559 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10560 if (!sse_registers_saved
)
10561 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10563 pic_reg_used
= false;
10564 if (pic_offset_table_rtx
10565 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10568 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10570 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10571 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10573 pic_reg_used
= true;
10580 if (ix86_cmodel
== CM_LARGE_PIC
)
10582 rtx label
, tmp_reg
;
10584 gcc_assert (Pmode
== DImode
);
10585 label
= gen_label_rtx ();
10586 emit_label (label
);
10587 LABEL_PRESERVE_P (label
) = 1;
10588 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10589 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10590 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10592 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10593 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10594 pic_offset_table_rtx
, tmp_reg
));
10597 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10601 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10602 RTX_FRAME_RELATED_P (insn
) = 1;
10603 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10607 /* In the pic_reg_used case, make sure that the got load isn't deleted
10608 when mcount needs it. Blockage to avoid call movement across mcount
10609 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10611 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10612 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10614 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10616 /* vDRAP is setup but after reload it turns out stack realign
10617 isn't necessary, here we will emit prologue to setup DRAP
10618 without stack realign adjustment */
10619 t
= choose_baseaddr (0);
10620 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10623 /* Prevent instructions from being scheduled into register save push
10624 sequence when access to the redzone area is done through frame pointer.
10625 The offset between the frame pointer and the stack pointer is calculated
10626 relative to the value of the stack pointer at the end of the function
10627 prologue, and moving instructions that access redzone area via frame
10628 pointer inside push sequence violates this assumption. */
10629 if (frame_pointer_needed
&& frame
.red_zone_size
)
10630 emit_insn (gen_memory_blockage ());
10632 /* Emit cld instruction if stringops are used in the function. */
10633 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10634 emit_insn (gen_cld ());
10636 /* SEH requires that the prologue end within 256 bytes of the start of
10637 the function. Prevent instruction schedules that would extend that.
10638 Further, prevent alloca modifications to the stack pointer from being
10639 combined with prologue modifications. */
10641 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10644 /* Emit code to restore REG using a POP insn. */
10647 ix86_emit_restore_reg_using_pop (rtx reg
)
10649 struct machine_function
*m
= cfun
->machine
;
10650 rtx insn
= emit_insn (gen_pop (reg
));
10652 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10653 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10655 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10656 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10658 /* Previously we'd represented the CFA as an expression
10659 like *(%ebp - 8). We've just popped that value from
10660 the stack, which means we need to reset the CFA to
10661 the drap register. This will remain until we restore
10662 the stack pointer. */
10663 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10664 RTX_FRAME_RELATED_P (insn
) = 1;
10666 /* This means that the DRAP register is valid for addressing too. */
10667 m
->fs
.drap_valid
= true;
10671 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10673 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10674 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10675 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10676 RTX_FRAME_RELATED_P (insn
) = 1;
10678 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10681 /* When the frame pointer is the CFA, and we pop it, we are
10682 swapping back to the stack pointer as the CFA. This happens
10683 for stack frames that don't allocate other data, so we assume
10684 the stack pointer is now pointing at the return address, i.e.
10685 the function entry state, which makes the offset be 1 word. */
10686 if (reg
== hard_frame_pointer_rtx
)
10688 m
->fs
.fp_valid
= false;
10689 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10691 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10692 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10694 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10695 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10696 GEN_INT (m
->fs
.cfa_offset
)));
10697 RTX_FRAME_RELATED_P (insn
) = 1;
10702 /* Emit code to restore saved registers using POP insns. */
10705 ix86_emit_restore_regs_using_pop (void)
10707 unsigned int regno
;
10709 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10710 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10711 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10714 /* Emit code and notes for the LEAVE instruction. */
10717 ix86_emit_leave (void)
10719 struct machine_function
*m
= cfun
->machine
;
10720 rtx insn
= emit_insn (ix86_gen_leave ());
10722 ix86_add_queued_cfa_restore_notes (insn
);
10724 gcc_assert (m
->fs
.fp_valid
);
10725 m
->fs
.sp_valid
= true;
10726 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10727 m
->fs
.fp_valid
= false;
10729 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10731 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10732 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10734 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10735 plus_constant (Pmode
, stack_pointer_rtx
,
10737 RTX_FRAME_RELATED_P (insn
) = 1;
10739 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10743 /* Emit code to restore saved registers using MOV insns.
10744 First register is restored from CFA - CFA_OFFSET. */
10746 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10747 bool maybe_eh_return
)
10749 struct machine_function
*m
= cfun
->machine
;
10750 unsigned int regno
;
10752 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10753 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10755 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10758 mem
= choose_baseaddr (cfa_offset
);
10759 mem
= gen_frame_mem (word_mode
, mem
);
10760 insn
= emit_move_insn (reg
, mem
);
10762 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10764 /* Previously we'd represented the CFA as an expression
10765 like *(%ebp - 8). We've just popped that value from
10766 the stack, which means we need to reset the CFA to
10767 the drap register. This will remain until we restore
10768 the stack pointer. */
10769 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10770 RTX_FRAME_RELATED_P (insn
) = 1;
10772 /* This means that the DRAP register is valid for addressing. */
10773 m
->fs
.drap_valid
= true;
10776 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10778 cfa_offset
-= UNITS_PER_WORD
;
10782 /* Emit code to restore saved registers using MOV insns.
10783 First register is restored from CFA - CFA_OFFSET. */
10785 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10786 bool maybe_eh_return
)
10788 unsigned int regno
;
10790 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10791 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10793 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10796 mem
= choose_baseaddr (cfa_offset
);
10797 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10798 set_mem_align (mem
, 128);
10799 emit_move_insn (reg
, mem
);
10801 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10807 /* Restore function stack, frame, and registers. */
10810 ix86_expand_epilogue (int style
)
10812 struct machine_function
*m
= cfun
->machine
;
10813 struct machine_frame_state frame_state_save
= m
->fs
;
10814 struct ix86_frame frame
;
10815 bool restore_regs_via_mov
;
10818 ix86_finalize_stack_realign_flags ();
10819 ix86_compute_frame_layout (&frame
);
10821 m
->fs
.sp_valid
= (!frame_pointer_needed
10822 || (crtl
->sp_is_unchanging
10823 && !stack_realign_fp
));
10824 gcc_assert (!m
->fs
.sp_valid
10825 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10827 /* The FP must be valid if the frame pointer is present. */
10828 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10829 gcc_assert (!m
->fs
.fp_valid
10830 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10832 /* We must have *some* valid pointer to the stack frame. */
10833 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10835 /* The DRAP is never valid at this point. */
10836 gcc_assert (!m
->fs
.drap_valid
);
10838 /* See the comment about red zone and frame
10839 pointer usage in ix86_expand_prologue. */
10840 if (frame_pointer_needed
&& frame
.red_zone_size
)
10841 emit_insn (gen_memory_blockage ());
10843 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10844 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10846 /* Determine the CFA offset of the end of the red-zone. */
10847 m
->fs
.red_zone_offset
= 0;
10848 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10850 /* The red-zone begins below the return address. */
10851 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10853 /* When the register save area is in the aligned portion of
10854 the stack, determine the maximum runtime displacement that
10855 matches up with the aligned frame. */
10856 if (stack_realign_drap
)
10857 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10861 /* Special care must be taken for the normal return case of a function
10862 using eh_return: the eax and edx registers are marked as saved, but
10863 not restored along this path. Adjust the save location to match. */
10864 if (crtl
->calls_eh_return
&& style
!= 2)
10865 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10867 /* EH_RETURN requires the use of moves to function properly. */
10868 if (crtl
->calls_eh_return
)
10869 restore_regs_via_mov
= true;
10870 /* SEH requires the use of pops to identify the epilogue. */
10871 else if (TARGET_SEH
)
10872 restore_regs_via_mov
= false;
10873 /* If we're only restoring one register and sp is not valid then
10874 using a move instruction to restore the register since it's
10875 less work than reloading sp and popping the register. */
10876 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10877 restore_regs_via_mov
= true;
10878 else if (TARGET_EPILOGUE_USING_MOVE
10879 && cfun
->machine
->use_fast_prologue_epilogue
10880 && (frame
.nregs
> 1
10881 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10882 restore_regs_via_mov
= true;
10883 else if (frame_pointer_needed
10885 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10886 restore_regs_via_mov
= true;
10887 else if (frame_pointer_needed
10888 && TARGET_USE_LEAVE
10889 && cfun
->machine
->use_fast_prologue_epilogue
10890 && frame
.nregs
== 1)
10891 restore_regs_via_mov
= true;
10893 restore_regs_via_mov
= false;
10895 if (restore_regs_via_mov
|| frame
.nsseregs
)
10897 /* Ensure that the entire register save area is addressable via
10898 the stack pointer, if we will restore via sp. */
10900 && m
->fs
.sp_offset
> 0x7fffffff
10901 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10902 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10904 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10905 GEN_INT (m
->fs
.sp_offset
10906 - frame
.sse_reg_save_offset
),
10908 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10912 /* If there are any SSE registers to restore, then we have to do it
10913 via moves, since there's obviously no pop for SSE regs. */
10914 if (frame
.nsseregs
)
10915 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10918 if (restore_regs_via_mov
)
10923 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10925 /* eh_return epilogues need %ecx added to the stack pointer. */
10928 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10930 /* Stack align doesn't work with eh_return. */
10931 gcc_assert (!stack_realign_drap
);
10932 /* Neither does regparm nested functions. */
10933 gcc_assert (!ix86_static_chain_on_stack
);
10935 if (frame_pointer_needed
)
10937 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10938 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10939 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10941 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10942 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10944 /* Note that we use SA as a temporary CFA, as the return
10945 address is at the proper place relative to it. We
10946 pretend this happens at the FP restore insn because
10947 prior to this insn the FP would be stored at the wrong
10948 offset relative to SA, and after this insn we have no
10949 other reasonable register to use for the CFA. We don't
10950 bother resetting the CFA to the SP for the duration of
10951 the return insn. */
10952 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10953 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10954 ix86_add_queued_cfa_restore_notes (insn
);
10955 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10956 RTX_FRAME_RELATED_P (insn
) = 1;
10958 m
->fs
.cfa_reg
= sa
;
10959 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10960 m
->fs
.fp_valid
= false;
10962 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10963 const0_rtx
, style
, false);
10967 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10968 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10969 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10970 ix86_add_queued_cfa_restore_notes (insn
);
10972 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10973 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10975 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10976 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10977 plus_constant (Pmode
, stack_pointer_rtx
,
10979 RTX_FRAME_RELATED_P (insn
) = 1;
10982 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10983 m
->fs
.sp_valid
= true;
10988 /* SEH requires that the function end with (1) a stack adjustment
10989 if necessary, (2) a sequence of pops, and (3) a return or
10990 jump instruction. Prevent insns from the function body from
10991 being scheduled into this sequence. */
10994 /* Prevent a catch region from being adjacent to the standard
10995 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10996 several other flags that would be interesting to test are
10998 if (flag_non_call_exceptions
)
10999 emit_insn (gen_nops (const1_rtx
));
11001 emit_insn (gen_blockage ());
11004 /* First step is to deallocate the stack frame so that we can
11005 pop the registers. Also do it on SEH target for very large
11006 frame as the emitted instructions aren't allowed by the ABI in
11008 if (!m
->fs
.sp_valid
11010 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11011 >= SEH_MAX_FRAME_SIZE
)))
11013 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11014 GEN_INT (m
->fs
.fp_offset
11015 - frame
.reg_save_offset
),
11018 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11020 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11021 GEN_INT (m
->fs
.sp_offset
11022 - frame
.reg_save_offset
),
11024 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11027 ix86_emit_restore_regs_using_pop ();
11030 /* If we used a stack pointer and haven't already got rid of it,
11032 if (m
->fs
.fp_valid
)
11034 /* If the stack pointer is valid and pointing at the frame
11035 pointer store address, then we only need a pop. */
11036 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11037 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11038 /* Leave results in shorter dependency chains on CPUs that are
11039 able to grok it fast. */
11040 else if (TARGET_USE_LEAVE
11041 || optimize_function_for_size_p (cfun
)
11042 || !cfun
->machine
->use_fast_prologue_epilogue
)
11043 ix86_emit_leave ();
11046 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11047 hard_frame_pointer_rtx
,
11048 const0_rtx
, style
, !using_drap
);
11049 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11055 int param_ptr_offset
= UNITS_PER_WORD
;
11058 gcc_assert (stack_realign_drap
);
11060 if (ix86_static_chain_on_stack
)
11061 param_ptr_offset
+= UNITS_PER_WORD
;
11062 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11063 param_ptr_offset
+= UNITS_PER_WORD
;
11065 insn
= emit_insn (gen_rtx_SET
11066 (VOIDmode
, stack_pointer_rtx
,
11067 gen_rtx_PLUS (Pmode
,
11069 GEN_INT (-param_ptr_offset
))));
11070 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11071 m
->fs
.cfa_offset
= param_ptr_offset
;
11072 m
->fs
.sp_offset
= param_ptr_offset
;
11073 m
->fs
.realigned
= false;
11075 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11076 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11077 GEN_INT (param_ptr_offset
)));
11078 RTX_FRAME_RELATED_P (insn
) = 1;
11080 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11081 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11084 /* At this point the stack pointer must be valid, and we must have
11085 restored all of the registers. We may not have deallocated the
11086 entire stack frame. We've delayed this until now because it may
11087 be possible to merge the local stack deallocation with the
11088 deallocation forced by ix86_static_chain_on_stack. */
11089 gcc_assert (m
->fs
.sp_valid
);
11090 gcc_assert (!m
->fs
.fp_valid
);
11091 gcc_assert (!m
->fs
.realigned
);
11092 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11094 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11095 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11099 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11101 /* Sibcall epilogues don't want a return instruction. */
11104 m
->fs
= frame_state_save
;
11108 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11110 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11112 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11113 address, do explicit add, and jump indirectly to the caller. */
11115 if (crtl
->args
.pops_args
>= 65536)
11117 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11120 /* There is no "pascal" calling convention in any 64bit ABI. */
11121 gcc_assert (!TARGET_64BIT
);
11123 insn
= emit_insn (gen_pop (ecx
));
11124 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11125 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11127 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11128 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11129 add_reg_note (insn
, REG_CFA_REGISTER
,
11130 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11131 RTX_FRAME_RELATED_P (insn
) = 1;
11133 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11135 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11138 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11141 emit_jump_insn (gen_simple_return_internal ());
11143 /* Restore the state back to the state from the prologue,
11144 so that it's correct for the next epilogue. */
11145 m
->fs
= frame_state_save
;
11148 /* Reset from the function's potential modifications. */
11151 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11152 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11154 if (pic_offset_table_rtx
)
11155 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11157 /* Mach-O doesn't support labels at the end of objects, so if
11158 it looks like we might want one, insert a NOP. */
11160 rtx insn
= get_last_insn ();
11161 rtx deleted_debug_label
= NULL_RTX
;
11164 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11166 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11167 notes only, instead set their CODE_LABEL_NUMBER to -1,
11168 otherwise there would be code generation differences
11169 in between -g and -g0. */
11170 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11171 deleted_debug_label
= insn
;
11172 insn
= PREV_INSN (insn
);
11177 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11178 fputs ("\tnop\n", file
);
11179 else if (deleted_debug_label
)
11180 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11181 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11182 CODE_LABEL_NUMBER (insn
) = -1;
11188 /* Return a scratch register to use in the split stack prologue. The
11189 split stack prologue is used for -fsplit-stack. It is the first
11190 instructions in the function, even before the regular prologue.
11191 The scratch register can be any caller-saved register which is not
11192 used for parameters or for the static chain. */
11194 static unsigned int
11195 split_stack_prologue_scratch_regno (void)
11201 bool is_fastcall
, is_thiscall
;
11204 is_fastcall
= (lookup_attribute ("fastcall",
11205 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11207 is_thiscall
= (lookup_attribute ("thiscall",
11208 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11210 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11214 if (DECL_STATIC_CHAIN (cfun
->decl
))
11216 sorry ("-fsplit-stack does not support fastcall with "
11217 "nested function");
11218 return INVALID_REGNUM
;
11222 else if (is_thiscall
)
11224 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11228 else if (regparm
< 3)
11230 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11236 sorry ("-fsplit-stack does not support 2 register "
11237 " parameters for a nested function");
11238 return INVALID_REGNUM
;
11245 /* FIXME: We could make this work by pushing a register
11246 around the addition and comparison. */
11247 sorry ("-fsplit-stack does not support 3 register parameters");
11248 return INVALID_REGNUM
;
11253 /* A SYMBOL_REF for the function which allocates new stackspace for
11256 static GTY(()) rtx split_stack_fn
;
11258 /* A SYMBOL_REF for the more stack function when using the large
11261 static GTY(()) rtx split_stack_fn_large
;
11263 /* Handle -fsplit-stack. These are the first instructions in the
11264 function, even before the regular prologue. */
11267 ix86_expand_split_stack_prologue (void)
11269 struct ix86_frame frame
;
11270 HOST_WIDE_INT allocate
;
11271 unsigned HOST_WIDE_INT args_size
;
11272 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11273 rtx scratch_reg
= NULL_RTX
;
11274 rtx varargs_label
= NULL_RTX
;
11277 gcc_assert (flag_split_stack
&& reload_completed
);
11279 ix86_finalize_stack_realign_flags ();
11280 ix86_compute_frame_layout (&frame
);
11281 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11283 /* This is the label we will branch to if we have enough stack
11284 space. We expect the basic block reordering pass to reverse this
11285 branch if optimizing, so that we branch in the unlikely case. */
11286 label
= gen_label_rtx ();
11288 /* We need to compare the stack pointer minus the frame size with
11289 the stack boundary in the TCB. The stack boundary always gives
11290 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11291 can compare directly. Otherwise we need to do an addition. */
11293 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11294 UNSPEC_STACK_CHECK
);
11295 limit
= gen_rtx_CONST (Pmode
, limit
);
11296 limit
= gen_rtx_MEM (Pmode
, limit
);
11297 if (allocate
< SPLIT_STACK_AVAILABLE
)
11298 current
= stack_pointer_rtx
;
11301 unsigned int scratch_regno
;
11304 /* We need a scratch register to hold the stack pointer minus
11305 the required frame size. Since this is the very start of the
11306 function, the scratch register can be any caller-saved
11307 register which is not used for parameters. */
11308 offset
= GEN_INT (- allocate
);
11309 scratch_regno
= split_stack_prologue_scratch_regno ();
11310 if (scratch_regno
== INVALID_REGNUM
)
11312 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11313 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11315 /* We don't use ix86_gen_add3 in this case because it will
11316 want to split to lea, but when not optimizing the insn
11317 will not be split after this point. */
11318 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11319 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11324 emit_move_insn (scratch_reg
, offset
);
11325 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11326 stack_pointer_rtx
));
11328 current
= scratch_reg
;
11331 ix86_expand_branch (GEU
, current
, limit
, label
);
11332 jump_insn
= get_last_insn ();
11333 JUMP_LABEL (jump_insn
) = label
;
11335 /* Mark the jump as very likely to be taken. */
11336 add_reg_note (jump_insn
, REG_BR_PROB
,
11337 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11339 if (split_stack_fn
== NULL_RTX
)
11340 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11341 fn
= split_stack_fn
;
11343 /* Get more stack space. We pass in the desired stack space and the
11344 size of the arguments to copy to the new stack. In 32-bit mode
11345 we push the parameters; __morestack will return on a new stack
11346 anyhow. In 64-bit mode we pass the parameters in r10 and
11348 allocate_rtx
= GEN_INT (allocate
);
11349 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11350 call_fusage
= NULL_RTX
;
11355 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11356 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11358 /* If this function uses a static chain, it will be in %r10.
11359 Preserve it across the call to __morestack. */
11360 if (DECL_STATIC_CHAIN (cfun
->decl
))
11364 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11365 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11366 use_reg (&call_fusage
, rax
);
11369 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11371 HOST_WIDE_INT argval
;
11373 gcc_assert (Pmode
== DImode
);
11374 /* When using the large model we need to load the address
11375 into a register, and we've run out of registers. So we
11376 switch to a different calling convention, and we call a
11377 different function: __morestack_large. We pass the
11378 argument size in the upper 32 bits of r10 and pass the
11379 frame size in the lower 32 bits. */
11380 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11381 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11383 if (split_stack_fn_large
== NULL_RTX
)
11384 split_stack_fn_large
=
11385 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11387 if (ix86_cmodel
== CM_LARGE_PIC
)
11391 label
= gen_label_rtx ();
11392 emit_label (label
);
11393 LABEL_PRESERVE_P (label
) = 1;
11394 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11395 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11396 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11397 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11399 x
= gen_rtx_CONST (Pmode
, x
);
11400 emit_move_insn (reg11
, x
);
11401 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11402 x
= gen_const_mem (Pmode
, x
);
11403 emit_move_insn (reg11
, x
);
11406 emit_move_insn (reg11
, split_stack_fn_large
);
11410 argval
= ((args_size
<< 16) << 16) + allocate
;
11411 emit_move_insn (reg10
, GEN_INT (argval
));
11415 emit_move_insn (reg10
, allocate_rtx
);
11416 emit_move_insn (reg11
, GEN_INT (args_size
));
11417 use_reg (&call_fusage
, reg11
);
11420 use_reg (&call_fusage
, reg10
);
11424 emit_insn (gen_push (GEN_INT (args_size
)));
11425 emit_insn (gen_push (allocate_rtx
));
11427 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11428 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11430 add_function_usage_to (call_insn
, call_fusage
);
11432 /* In order to make call/return prediction work right, we now need
11433 to execute a return instruction. See
11434 libgcc/config/i386/morestack.S for the details on how this works.
11436 For flow purposes gcc must not see this as a return
11437 instruction--we need control flow to continue at the subsequent
11438 label. Therefore, we use an unspec. */
11439 gcc_assert (crtl
->args
.pops_args
< 65536);
11440 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11442 /* If we are in 64-bit mode and this function uses a static chain,
11443 we saved %r10 in %rax before calling _morestack. */
11444 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11445 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11446 gen_rtx_REG (word_mode
, AX_REG
));
11448 /* If this function calls va_start, we need to store a pointer to
11449 the arguments on the old stack, because they may not have been
11450 all copied to the new stack. At this point the old stack can be
11451 found at the frame pointer value used by __morestack, because
11452 __morestack has set that up before calling back to us. Here we
11453 store that pointer in a scratch register, and in
11454 ix86_expand_prologue we store the scratch register in a stack
11456 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11458 unsigned int scratch_regno
;
11462 scratch_regno
= split_stack_prologue_scratch_regno ();
11463 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11464 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11468 return address within this function
11469 return address of caller of this function
11471 So we add three words to get to the stack arguments.
11475 return address within this function
11476 first argument to __morestack
11477 second argument to __morestack
11478 return address of caller of this function
11480 So we add five words to get to the stack arguments.
11482 words
= TARGET_64BIT
? 3 : 5;
11483 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11484 gen_rtx_PLUS (Pmode
, frame_reg
,
11485 GEN_INT (words
* UNITS_PER_WORD
))));
11487 varargs_label
= gen_label_rtx ();
11488 emit_jump_insn (gen_jump (varargs_label
));
11489 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11494 emit_label (label
);
11495 LABEL_NUSES (label
) = 1;
11497 /* If this function calls va_start, we now have to set the scratch
11498 register for the case where we do not call __morestack. In this
11499 case we need to set it based on the stack pointer. */
11500 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11502 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11503 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11504 GEN_INT (UNITS_PER_WORD
))));
11506 emit_label (varargs_label
);
11507 LABEL_NUSES (varargs_label
) = 1;
11511 /* We may have to tell the dataflow pass that the split stack prologue
11512 is initializing a scratch register. */
11515 ix86_live_on_entry (bitmap regs
)
11517 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11519 gcc_assert (flag_split_stack
);
11520 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11524 /* Determine if op is suitable SUBREG RTX for address. */
11527 ix86_address_subreg_operand (rtx op
)
11529 enum machine_mode mode
;
11534 mode
= GET_MODE (op
);
11536 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11539 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11540 failures when the register is one word out of a two word structure. */
11541 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11544 /* Allow only SUBREGs of non-eliminable hard registers. */
11545 return register_no_elim_operand (op
, mode
);
11548 /* Extract the parts of an RTL expression that is a valid memory address
11549 for an instruction. Return 0 if the structure of the address is
11550 grossly off. Return -1 if the address contains ASHIFT, so it is not
11551 strictly valid, but still used for computing length of lea instruction. */
11554 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11556 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11557 rtx base_reg
, index_reg
;
11558 HOST_WIDE_INT scale
= 1;
11559 rtx scale_rtx
= NULL_RTX
;
11562 enum ix86_address_seg seg
= SEG_DEFAULT
;
11564 /* Allow zero-extended SImode addresses,
11565 they will be emitted with addr32 prefix. */
11566 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11568 if (GET_CODE (addr
) == ZERO_EXTEND
11569 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11571 addr
= XEXP (addr
, 0);
11572 if (CONST_INT_P (addr
))
11575 else if (GET_CODE (addr
) == AND
11576 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11578 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11579 if (addr
== NULL_RTX
)
11582 if (CONST_INT_P (addr
))
11587 /* Allow SImode subregs of DImode addresses,
11588 they will be emitted with addr32 prefix. */
11589 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11591 if (GET_CODE (addr
) == SUBREG
11592 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11594 addr
= SUBREG_REG (addr
);
11595 if (CONST_INT_P (addr
))
11602 else if (GET_CODE (addr
) == SUBREG
)
11604 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11609 else if (GET_CODE (addr
) == PLUS
)
11611 rtx addends
[4], op
;
11619 addends
[n
++] = XEXP (op
, 1);
11622 while (GET_CODE (op
) == PLUS
);
11627 for (i
= n
; i
>= 0; --i
)
11630 switch (GET_CODE (op
))
11635 index
= XEXP (op
, 0);
11636 scale_rtx
= XEXP (op
, 1);
11642 index
= XEXP (op
, 0);
11643 tmp
= XEXP (op
, 1);
11644 if (!CONST_INT_P (tmp
))
11646 scale
= INTVAL (tmp
);
11647 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11649 scale
= 1 << scale
;
11654 if (GET_CODE (op
) != UNSPEC
)
11659 if (XINT (op
, 1) == UNSPEC_TP
11660 && TARGET_TLS_DIRECT_SEG_REFS
11661 && seg
== SEG_DEFAULT
)
11662 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11668 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11695 else if (GET_CODE (addr
) == MULT
)
11697 index
= XEXP (addr
, 0); /* index*scale */
11698 scale_rtx
= XEXP (addr
, 1);
11700 else if (GET_CODE (addr
) == ASHIFT
)
11702 /* We're called for lea too, which implements ashift on occasion. */
11703 index
= XEXP (addr
, 0);
11704 tmp
= XEXP (addr
, 1);
11705 if (!CONST_INT_P (tmp
))
11707 scale
= INTVAL (tmp
);
11708 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11710 scale
= 1 << scale
;
11713 else if (CONST_INT_P (addr
))
11715 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11718 /* Constant addresses are sign extended to 64bit, we have to
11719 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11721 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11727 disp
= addr
; /* displacement */
11733 else if (GET_CODE (index
) == SUBREG
11734 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11740 /* Address override works only on the (%reg) part of %fs:(%reg). */
11741 if (seg
!= SEG_DEFAULT
11742 && ((base
&& GET_MODE (base
) != word_mode
)
11743 || (index
&& GET_MODE (index
) != word_mode
)))
11746 /* Extract the integral value of scale. */
11749 if (!CONST_INT_P (scale_rtx
))
11751 scale
= INTVAL (scale_rtx
);
11754 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11755 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11757 /* Avoid useless 0 displacement. */
11758 if (disp
== const0_rtx
&& (base
|| index
))
11761 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11762 if (base_reg
&& index_reg
&& scale
== 1
11763 && (index_reg
== arg_pointer_rtx
11764 || index_reg
== frame_pointer_rtx
11765 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11768 tmp
= base
, base
= index
, index
= tmp
;
11769 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11772 /* Special case: %ebp cannot be encoded as a base without a displacement.
11776 && (base_reg
== hard_frame_pointer_rtx
11777 || base_reg
== frame_pointer_rtx
11778 || base_reg
== arg_pointer_rtx
11779 || (REG_P (base_reg
)
11780 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11781 || REGNO (base_reg
) == R13_REG
))))
11784 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11785 Avoid this by transforming to [%esi+0].
11786 Reload calls address legitimization without cfun defined, so we need
11787 to test cfun for being non-NULL. */
11788 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11789 && base_reg
&& !index_reg
&& !disp
11790 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11793 /* Special case: encode reg+reg instead of reg*2. */
11794 if (!base
&& index
&& scale
== 2)
11795 base
= index
, base_reg
= index_reg
, scale
= 1;
11797 /* Special case: scaling cannot be encoded without base or displacement. */
11798 if (!base
&& !disp
&& index
&& scale
!= 1)
11802 out
->index
= index
;
11804 out
->scale
= scale
;
11810 /* Return cost of the memory address x.
11811 For i386, it is better to use a complex address than let gcc copy
11812 the address into a reg and make a new pseudo. But not if the address
11813 requires to two regs - that would mean more pseudos with longer
11816 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11817 addr_space_t as ATTRIBUTE_UNUSED
,
11818 bool speed ATTRIBUTE_UNUSED
)
11820 struct ix86_address parts
;
11822 int ok
= ix86_decompose_address (x
, &parts
);
11826 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11827 parts
.base
= SUBREG_REG (parts
.base
);
11828 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11829 parts
.index
= SUBREG_REG (parts
.index
);
11831 /* Attempt to minimize number of registers in the address. */
11833 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11835 && (!REG_P (parts
.index
)
11836 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11840 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11842 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11843 && parts
.base
!= parts
.index
)
11846 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11847 since it's predecode logic can't detect the length of instructions
11848 and it degenerates to vector decoded. Increase cost of such
11849 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11850 to split such addresses or even refuse such addresses at all.
11852 Following addressing modes are affected:
11857 The first and last case may be avoidable by explicitly coding the zero in
11858 memory address, but I don't have AMD-K6 machine handy to check this
11862 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11863 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11864 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11870 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11871 this is used for to form addresses to local data when -fPIC is in
11875 darwin_local_data_pic (rtx disp
)
11877 return (GET_CODE (disp
) == UNSPEC
11878 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11881 /* Determine if a given RTX is a valid constant. We already know this
11882 satisfies CONSTANT_P. */
11885 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11887 switch (GET_CODE (x
))
11892 if (GET_CODE (x
) == PLUS
)
11894 if (!CONST_INT_P (XEXP (x
, 1)))
11899 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11902 /* Only some unspecs are valid as "constants". */
11903 if (GET_CODE (x
) == UNSPEC
)
11904 switch (XINT (x
, 1))
11907 case UNSPEC_GOTOFF
:
11908 case UNSPEC_PLTOFF
:
11909 return TARGET_64BIT
;
11911 case UNSPEC_NTPOFF
:
11912 x
= XVECEXP (x
, 0, 0);
11913 return (GET_CODE (x
) == SYMBOL_REF
11914 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11915 case UNSPEC_DTPOFF
:
11916 x
= XVECEXP (x
, 0, 0);
11917 return (GET_CODE (x
) == SYMBOL_REF
11918 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11923 /* We must have drilled down to a symbol. */
11924 if (GET_CODE (x
) == LABEL_REF
)
11926 if (GET_CODE (x
) != SYMBOL_REF
)
11931 /* TLS symbols are never valid. */
11932 if (SYMBOL_REF_TLS_MODEL (x
))
11935 /* DLLIMPORT symbols are never valid. */
11936 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11937 && SYMBOL_REF_DLLIMPORT_P (x
))
11941 /* mdynamic-no-pic */
11942 if (MACHO_DYNAMIC_NO_PIC_P
)
11943 return machopic_symbol_defined_p (x
);
11948 if (GET_MODE (x
) == TImode
11949 && x
!= CONST0_RTX (TImode
)
11955 if (!standard_sse_constant_p (x
))
11962 /* Otherwise we handle everything else in the move patterns. */
11966 /* Determine if it's legal to put X into the constant pool. This
11967 is not possible for the address of thread-local symbols, which
11968 is checked above. */
11971 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11973 /* We can always put integral constants and vectors in memory. */
11974 switch (GET_CODE (x
))
11984 return !ix86_legitimate_constant_p (mode
, x
);
11988 /* Nonzero if the constant value X is a legitimate general operand
11989 when generating PIC code. It is given that flag_pic is on and
11990 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11993 legitimate_pic_operand_p (rtx x
)
11997 switch (GET_CODE (x
))
12000 inner
= XEXP (x
, 0);
12001 if (GET_CODE (inner
) == PLUS
12002 && CONST_INT_P (XEXP (inner
, 1)))
12003 inner
= XEXP (inner
, 0);
12005 /* Only some unspecs are valid as "constants". */
12006 if (GET_CODE (inner
) == UNSPEC
)
12007 switch (XINT (inner
, 1))
12010 case UNSPEC_GOTOFF
:
12011 case UNSPEC_PLTOFF
:
12012 return TARGET_64BIT
;
12014 x
= XVECEXP (inner
, 0, 0);
12015 return (GET_CODE (x
) == SYMBOL_REF
12016 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12017 case UNSPEC_MACHOPIC_OFFSET
:
12018 return legitimate_pic_address_disp_p (x
);
12026 return legitimate_pic_address_disp_p (x
);
12033 /* Determine if a given CONST RTX is a valid memory displacement
12037 legitimate_pic_address_disp_p (rtx disp
)
12041 /* In 64bit mode we can allow direct addresses of symbols and labels
12042 when they are not dynamic symbols. */
12045 rtx op0
= disp
, op1
;
12047 switch (GET_CODE (disp
))
12053 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12055 op0
= XEXP (XEXP (disp
, 0), 0);
12056 op1
= XEXP (XEXP (disp
, 0), 1);
12057 if (!CONST_INT_P (op1
)
12058 || INTVAL (op1
) >= 16*1024*1024
12059 || INTVAL (op1
) < -16*1024*1024)
12061 if (GET_CODE (op0
) == LABEL_REF
)
12063 if (GET_CODE (op0
) == CONST
12064 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12065 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12067 if (GET_CODE (op0
) == UNSPEC
12068 && XINT (op0
, 1) == UNSPEC_PCREL
)
12070 if (GET_CODE (op0
) != SYMBOL_REF
)
12075 /* TLS references should always be enclosed in UNSPEC. */
12076 if (SYMBOL_REF_TLS_MODEL (op0
))
12078 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12079 && ix86_cmodel
!= CM_LARGE_PIC
)
12087 if (GET_CODE (disp
) != CONST
)
12089 disp
= XEXP (disp
, 0);
12093 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12094 of GOT tables. We should not need these anyway. */
12095 if (GET_CODE (disp
) != UNSPEC
12096 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12097 && XINT (disp
, 1) != UNSPEC_GOTOFF
12098 && XINT (disp
, 1) != UNSPEC_PCREL
12099 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12102 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12103 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12109 if (GET_CODE (disp
) == PLUS
)
12111 if (!CONST_INT_P (XEXP (disp
, 1)))
12113 disp
= XEXP (disp
, 0);
12117 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12120 if (GET_CODE (disp
) != UNSPEC
)
12123 switch (XINT (disp
, 1))
12128 /* We need to check for both symbols and labels because VxWorks loads
12129 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12131 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12132 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12133 case UNSPEC_GOTOFF
:
12134 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12135 While ABI specify also 32bit relocation but we don't produce it in
12136 small PIC model at all. */
12137 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12138 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12140 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12142 case UNSPEC_GOTTPOFF
:
12143 case UNSPEC_GOTNTPOFF
:
12144 case UNSPEC_INDNTPOFF
:
12147 disp
= XVECEXP (disp
, 0, 0);
12148 return (GET_CODE (disp
) == SYMBOL_REF
12149 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12150 case UNSPEC_NTPOFF
:
12151 disp
= XVECEXP (disp
, 0, 0);
12152 return (GET_CODE (disp
) == SYMBOL_REF
12153 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12154 case UNSPEC_DTPOFF
:
12155 disp
= XVECEXP (disp
, 0, 0);
12156 return (GET_CODE (disp
) == SYMBOL_REF
12157 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12163 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12164 replace the input X, or the original X if no replacement is called for.
12165 The output parameter *WIN is 1 if the calling macro should goto WIN,
12166 0 if it should not. */
12169 ix86_legitimize_reload_address (rtx x
,
12170 enum machine_mode mode ATTRIBUTE_UNUSED
,
12171 int opnum
, int type
,
12172 int ind_levels ATTRIBUTE_UNUSED
)
12174 /* Reload can generate:
12176 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12180 This RTX is rejected from ix86_legitimate_address_p due to
12181 non-strictness of base register 97. Following this rejection,
12182 reload pushes all three components into separate registers,
12183 creating invalid memory address RTX.
12185 Following code reloads only the invalid part of the
12186 memory address RTX. */
12188 if (GET_CODE (x
) == PLUS
12189 && REG_P (XEXP (x
, 1))
12190 && GET_CODE (XEXP (x
, 0)) == PLUS
12191 && REG_P (XEXP (XEXP (x
, 0), 1)))
12194 bool something_reloaded
= false;
12196 base
= XEXP (XEXP (x
, 0), 1);
12197 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12199 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12200 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12201 opnum
, (enum reload_type
) type
);
12202 something_reloaded
= true;
12205 index
= XEXP (x
, 1);
12206 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12208 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12209 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12210 opnum
, (enum reload_type
) type
);
12211 something_reloaded
= true;
12214 gcc_assert (something_reloaded
);
12221 /* Recognizes RTL expressions that are valid memory addresses for an
12222 instruction. The MODE argument is the machine mode for the MEM
12223 expression that wants to use this address.
12225 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12226 convert common non-canonical forms to canonical form so that they will
12230 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12231 rtx addr
, bool strict
)
12233 struct ix86_address parts
;
12234 rtx base
, index
, disp
;
12235 HOST_WIDE_INT scale
;
12237 if (ix86_decompose_address (addr
, &parts
) <= 0)
12238 /* Decomposition failed. */
12242 index
= parts
.index
;
12244 scale
= parts
.scale
;
12246 /* Validate base register. */
12253 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12254 reg
= SUBREG_REG (base
);
12256 /* Base is not a register. */
12259 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12262 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12263 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12264 /* Base is not valid. */
12268 /* Validate index register. */
12275 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12276 reg
= SUBREG_REG (index
);
12278 /* Index is not a register. */
12281 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12284 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12285 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12286 /* Index is not valid. */
12290 /* Index and base should have the same mode. */
12292 && GET_MODE (base
) != GET_MODE (index
))
12295 /* Validate scale factor. */
12299 /* Scale without index. */
12302 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12303 /* Scale is not a valid multiplier. */
12307 /* Validate displacement. */
12310 if (GET_CODE (disp
) == CONST
12311 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12312 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12313 switch (XINT (XEXP (disp
, 0), 1))
12315 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12316 used. While ABI specify also 32bit relocations, we don't produce
12317 them at all and use IP relative instead. */
12319 case UNSPEC_GOTOFF
:
12320 gcc_assert (flag_pic
);
12322 goto is_legitimate_pic
;
12324 /* 64bit address unspec. */
12327 case UNSPEC_GOTPCREL
:
12329 gcc_assert (flag_pic
);
12330 goto is_legitimate_pic
;
12332 case UNSPEC_GOTTPOFF
:
12333 case UNSPEC_GOTNTPOFF
:
12334 case UNSPEC_INDNTPOFF
:
12335 case UNSPEC_NTPOFF
:
12336 case UNSPEC_DTPOFF
:
12339 case UNSPEC_STACK_CHECK
:
12340 gcc_assert (flag_split_stack
);
12344 /* Invalid address unspec. */
12348 else if (SYMBOLIC_CONST (disp
)
12352 && MACHOPIC_INDIRECT
12353 && !machopic_operand_p (disp
)
12359 if (TARGET_64BIT
&& (index
|| base
))
12361 /* foo@dtpoff(%rX) is ok. */
12362 if (GET_CODE (disp
) != CONST
12363 || GET_CODE (XEXP (disp
, 0)) != PLUS
12364 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12365 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12366 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12367 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12368 /* Non-constant pic memory reference. */
12371 else if ((!TARGET_MACHO
|| flag_pic
)
12372 && ! legitimate_pic_address_disp_p (disp
))
12373 /* Displacement is an invalid pic construct. */
12376 else if (MACHO_DYNAMIC_NO_PIC_P
12377 && !ix86_legitimate_constant_p (Pmode
, disp
))
12378 /* displacment must be referenced via non_lazy_pointer */
12382 /* This code used to verify that a symbolic pic displacement
12383 includes the pic_offset_table_rtx register.
12385 While this is good idea, unfortunately these constructs may
12386 be created by "adds using lea" optimization for incorrect
12395 This code is nonsensical, but results in addressing
12396 GOT table with pic_offset_table_rtx base. We can't
12397 just refuse it easily, since it gets matched by
12398 "addsi3" pattern, that later gets split to lea in the
12399 case output register differs from input. While this
12400 can be handled by separate addsi pattern for this case
12401 that never results in lea, this seems to be easier and
12402 correct fix for crash to disable this test. */
12404 else if (GET_CODE (disp
) != LABEL_REF
12405 && !CONST_INT_P (disp
)
12406 && (GET_CODE (disp
) != CONST
12407 || !ix86_legitimate_constant_p (Pmode
, disp
))
12408 && (GET_CODE (disp
) != SYMBOL_REF
12409 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12410 /* Displacement is not constant. */
12412 else if (TARGET_64BIT
12413 && !x86_64_immediate_operand (disp
, VOIDmode
))
12414 /* Displacement is out of range. */
12418 /* Everything looks valid. */
12422 /* Determine if a given RTX is a valid constant address. */
12425 constant_address_p (rtx x
)
12427 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12430 /* Return a unique alias set for the GOT. */
12432 static alias_set_type
12433 ix86_GOT_alias_set (void)
12435 static alias_set_type set
= -1;
12437 set
= new_alias_set ();
12441 /* Return a legitimate reference for ORIG (an address) using the
12442 register REG. If REG is 0, a new pseudo is generated.
12444 There are two types of references that must be handled:
12446 1. Global data references must load the address from the GOT, via
12447 the PIC reg. An insn is emitted to do this load, and the reg is
12450 2. Static data references, constant pool addresses, and code labels
12451 compute the address as an offset from the GOT, whose base is in
12452 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12453 differentiate them from global data objects. The returned
12454 address is the PIC reg + an unspec constant.
12456 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12457 reg also appears in the address. */
12460 legitimize_pic_address (rtx orig
, rtx reg
)
12463 rtx new_rtx
= orig
;
12466 if (TARGET_MACHO
&& !TARGET_64BIT
)
12469 reg
= gen_reg_rtx (Pmode
);
12470 /* Use the generic Mach-O PIC machinery. */
12471 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12475 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12477 else if (TARGET_64BIT
12478 && ix86_cmodel
!= CM_SMALL_PIC
12479 && gotoff_operand (addr
, Pmode
))
12482 /* This symbol may be referenced via a displacement from the PIC
12483 base address (@GOTOFF). */
12485 if (reload_in_progress
)
12486 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12487 if (GET_CODE (addr
) == CONST
)
12488 addr
= XEXP (addr
, 0);
12489 if (GET_CODE (addr
) == PLUS
)
12491 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12493 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12496 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12497 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12499 tmpreg
= gen_reg_rtx (Pmode
);
12502 emit_move_insn (tmpreg
, new_rtx
);
12506 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12507 tmpreg
, 1, OPTAB_DIRECT
);
12510 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12512 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12514 /* This symbol may be referenced via a displacement from the PIC
12515 base address (@GOTOFF). */
12517 if (reload_in_progress
)
12518 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12519 if (GET_CODE (addr
) == CONST
)
12520 addr
= XEXP (addr
, 0);
12521 if (GET_CODE (addr
) == PLUS
)
12523 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12525 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12528 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12529 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12530 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12534 emit_move_insn (reg
, new_rtx
);
12538 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12539 /* We can't use @GOTOFF for text labels on VxWorks;
12540 see gotoff_operand. */
12541 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12543 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12545 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12546 return legitimize_dllimport_symbol (addr
, true);
12547 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12548 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12549 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12551 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12552 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12556 /* For x64 PE-COFF there is no GOT table. So we use address
12558 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12560 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12561 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12564 reg
= gen_reg_rtx (Pmode
);
12565 emit_move_insn (reg
, new_rtx
);
12568 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12570 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12571 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12572 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12573 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12576 reg
= gen_reg_rtx (Pmode
);
12577 /* Use directly gen_movsi, otherwise the address is loaded
12578 into register for CSE. We don't want to CSE this addresses,
12579 instead we CSE addresses from the GOT table, so skip this. */
12580 emit_insn (gen_movsi (reg
, new_rtx
));
12585 /* This symbol must be referenced via a load from the
12586 Global Offset Table (@GOT). */
12588 if (reload_in_progress
)
12589 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12590 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12591 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12593 new_rtx
= force_reg (Pmode
, new_rtx
);
12594 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12595 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12596 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12599 reg
= gen_reg_rtx (Pmode
);
12600 emit_move_insn (reg
, new_rtx
);
12606 if (CONST_INT_P (addr
)
12607 && !x86_64_immediate_operand (addr
, VOIDmode
))
12611 emit_move_insn (reg
, addr
);
12615 new_rtx
= force_reg (Pmode
, addr
);
12617 else if (GET_CODE (addr
) == CONST
)
12619 addr
= XEXP (addr
, 0);
12621 /* We must match stuff we generate before. Assume the only
12622 unspecs that can get here are ours. Not that we could do
12623 anything with them anyway.... */
12624 if (GET_CODE (addr
) == UNSPEC
12625 || (GET_CODE (addr
) == PLUS
12626 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12628 gcc_assert (GET_CODE (addr
) == PLUS
);
12630 if (GET_CODE (addr
) == PLUS
)
12632 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12634 /* Check first to see if this is a constant offset from a @GOTOFF
12635 symbol reference. */
12636 if (gotoff_operand (op0
, Pmode
)
12637 && CONST_INT_P (op1
))
12641 if (reload_in_progress
)
12642 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12643 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12645 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12646 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12647 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12651 emit_move_insn (reg
, new_rtx
);
12657 if (INTVAL (op1
) < -16*1024*1024
12658 || INTVAL (op1
) >= 16*1024*1024)
12660 if (!x86_64_immediate_operand (op1
, Pmode
))
12661 op1
= force_reg (Pmode
, op1
);
12662 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12668 rtx base
= legitimize_pic_address (op0
, reg
);
12669 enum machine_mode mode
= GET_MODE (base
);
12671 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12673 if (CONST_INT_P (new_rtx
))
12675 if (INTVAL (new_rtx
) < -16*1024*1024
12676 || INTVAL (new_rtx
) >= 16*1024*1024)
12678 if (!x86_64_immediate_operand (new_rtx
, mode
))
12679 new_rtx
= force_reg (mode
, new_rtx
);
12681 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12684 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12688 if (GET_CODE (new_rtx
) == PLUS
12689 && CONSTANT_P (XEXP (new_rtx
, 1)))
12691 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12692 new_rtx
= XEXP (new_rtx
, 1);
12694 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12702 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12705 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12707 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12709 if (GET_MODE (tp
) != tp_mode
)
12711 gcc_assert (GET_MODE (tp
) == SImode
);
12712 gcc_assert (tp_mode
== DImode
);
12714 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12718 tp
= copy_to_mode_reg (tp_mode
, tp
);
12723 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12725 static GTY(()) rtx ix86_tls_symbol
;
12728 ix86_tls_get_addr (void)
12730 if (!ix86_tls_symbol
)
12733 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12734 ? "___tls_get_addr" : "__tls_get_addr");
12736 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12739 return ix86_tls_symbol
;
12742 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12744 static GTY(()) rtx ix86_tls_module_base_symbol
;
12747 ix86_tls_module_base (void)
12749 if (!ix86_tls_module_base_symbol
)
12751 ix86_tls_module_base_symbol
12752 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12754 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12755 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12758 return ix86_tls_module_base_symbol
;
12761 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12762 false if we expect this to be used for a memory address and true if
12763 we expect to load the address into a register. */
12766 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12768 rtx dest
, base
, off
;
12769 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12770 enum machine_mode tp_mode
= Pmode
;
12775 case TLS_MODEL_GLOBAL_DYNAMIC
:
12776 dest
= gen_reg_rtx (Pmode
);
12781 pic
= pic_offset_table_rtx
;
12784 pic
= gen_reg_rtx (Pmode
);
12785 emit_insn (gen_set_got (pic
));
12789 if (TARGET_GNU2_TLS
)
12792 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12794 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12796 tp
= get_thread_pointer (Pmode
, true);
12797 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12799 if (GET_MODE (x
) != Pmode
)
12800 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12802 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12806 rtx caddr
= ix86_tls_get_addr ();
12810 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12815 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12816 insns
= get_insns ();
12819 if (GET_MODE (x
) != Pmode
)
12820 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12822 RTL_CONST_CALL_P (insns
) = 1;
12823 emit_libcall_block (insns
, dest
, rax
, x
);
12826 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12830 case TLS_MODEL_LOCAL_DYNAMIC
:
12831 base
= gen_reg_rtx (Pmode
);
12836 pic
= pic_offset_table_rtx
;
12839 pic
= gen_reg_rtx (Pmode
);
12840 emit_insn (gen_set_got (pic
));
12844 if (TARGET_GNU2_TLS
)
12846 rtx tmp
= ix86_tls_module_base ();
12849 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12851 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12853 tp
= get_thread_pointer (Pmode
, true);
12854 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12855 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12859 rtx caddr
= ix86_tls_get_addr ();
12863 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12868 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
12869 insns
= get_insns ();
12872 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12873 share the LD_BASE result with other LD model accesses. */
12874 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12875 UNSPEC_TLS_LD_BASE
);
12877 RTL_CONST_CALL_P (insns
) = 1;
12878 emit_libcall_block (insns
, base
, rax
, eqv
);
12881 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12884 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12885 off
= gen_rtx_CONST (Pmode
, off
);
12887 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12889 if (TARGET_GNU2_TLS
)
12891 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12893 if (GET_MODE (x
) != Pmode
)
12894 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12896 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12900 case TLS_MODEL_INITIAL_EXEC
:
12903 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12905 /* The Sun linker took the AMD64 TLS spec literally
12906 and can only handle %rax as destination of the
12907 initial executable code sequence. */
12909 dest
= gen_reg_rtx (DImode
);
12910 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12914 /* Generate DImode references to avoid %fs:(%reg32)
12915 problems and linker IE->LE relaxation bug. */
12918 type
= UNSPEC_GOTNTPOFF
;
12922 if (reload_in_progress
)
12923 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12924 pic
= pic_offset_table_rtx
;
12925 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12927 else if (!TARGET_ANY_GNU_TLS
)
12929 pic
= gen_reg_rtx (Pmode
);
12930 emit_insn (gen_set_got (pic
));
12931 type
= UNSPEC_GOTTPOFF
;
12936 type
= UNSPEC_INDNTPOFF
;
12939 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12940 off
= gen_rtx_CONST (tp_mode
, off
);
12942 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12943 off
= gen_const_mem (tp_mode
, off
);
12944 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12946 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12948 base
= get_thread_pointer (tp_mode
,
12949 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12950 off
= force_reg (tp_mode
, off
);
12951 return gen_rtx_PLUS (tp_mode
, base
, off
);
12955 base
= get_thread_pointer (Pmode
, true);
12956 dest
= gen_reg_rtx (Pmode
);
12957 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12961 case TLS_MODEL_LOCAL_EXEC
:
12962 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12963 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12964 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12965 off
= gen_rtx_CONST (Pmode
, off
);
12967 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12969 base
= get_thread_pointer (Pmode
,
12970 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12971 return gen_rtx_PLUS (Pmode
, base
, off
);
12975 base
= get_thread_pointer (Pmode
, true);
12976 dest
= gen_reg_rtx (Pmode
);
12977 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12982 gcc_unreachable ();
12988 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12991 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12992 htab_t dllimport_map
;
12995 get_dllimport_decl (tree decl
)
12997 struct tree_map
*h
, in
;
13000 const char *prefix
;
13001 size_t namelen
, prefixlen
;
13006 if (!dllimport_map
)
13007 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13009 in
.hash
= htab_hash_pointer (decl
);
13010 in
.base
.from
= decl
;
13011 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13012 h
= (struct tree_map
*) *loc
;
13016 *loc
= h
= ggc_alloc_tree_map ();
13018 h
->base
.from
= decl
;
13019 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13020 VAR_DECL
, NULL
, ptr_type_node
);
13021 DECL_ARTIFICIAL (to
) = 1;
13022 DECL_IGNORED_P (to
) = 1;
13023 DECL_EXTERNAL (to
) = 1;
13024 TREE_READONLY (to
) = 1;
13026 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13027 name
= targetm
.strip_name_encoding (name
);
13028 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13029 ? "*__imp_" : "*__imp__";
13030 namelen
= strlen (name
);
13031 prefixlen
= strlen (prefix
);
13032 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13033 memcpy (imp_name
, prefix
, prefixlen
);
13034 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13036 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13037 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13038 SET_SYMBOL_REF_DECL (rtl
, to
);
13039 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13041 rtl
= gen_const_mem (Pmode
, rtl
);
13042 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13044 SET_DECL_RTL (to
, rtl
);
13045 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13050 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13051 true if we require the result be a register. */
13054 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13059 gcc_assert (SYMBOL_REF_DECL (symbol
));
13060 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13062 x
= DECL_RTL (imp_decl
);
13064 x
= force_reg (Pmode
, x
);
13068 /* Try machine-dependent ways of modifying an illegitimate address
13069 to be legitimate. If we find one, return the new, valid address.
13070 This macro is used in only one place: `memory_address' in explow.c.
13072 OLDX is the address as it was before break_out_memory_refs was called.
13073 In some cases it is useful to look at this to decide what needs to be done.
13075 It is always safe for this macro to do nothing. It exists to recognize
13076 opportunities to optimize the output.
13078 For the 80386, we handle X+REG by loading X into a register R and
13079 using R+REG. R will go in a general reg and indexing will be used.
13080 However, if REG is a broken-out memory address or multiplication,
13081 nothing needs to be done because REG can certainly go in a general reg.
13083 When -fpic is used, special handling is needed for symbolic references.
13084 See comments by legitimize_pic_address in i386.c for details. */
13087 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13088 enum machine_mode mode
)
13093 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13095 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13096 if (GET_CODE (x
) == CONST
13097 && GET_CODE (XEXP (x
, 0)) == PLUS
13098 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13099 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13101 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13102 (enum tls_model
) log
, false);
13103 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13106 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13108 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13109 return legitimize_dllimport_symbol (x
, true);
13110 if (GET_CODE (x
) == CONST
13111 && GET_CODE (XEXP (x
, 0)) == PLUS
13112 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13113 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13115 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13116 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13120 if (flag_pic
&& SYMBOLIC_CONST (x
))
13121 return legitimize_pic_address (x
, 0);
13124 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13125 return machopic_indirect_data_reference (x
, 0);
13128 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13129 if (GET_CODE (x
) == ASHIFT
13130 && CONST_INT_P (XEXP (x
, 1))
13131 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13134 log
= INTVAL (XEXP (x
, 1));
13135 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13136 GEN_INT (1 << log
));
13139 if (GET_CODE (x
) == PLUS
)
13141 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13143 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13144 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13145 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13148 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13149 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13150 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13151 GEN_INT (1 << log
));
13154 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13155 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13156 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13159 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13160 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13161 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13162 GEN_INT (1 << log
));
13165 /* Put multiply first if it isn't already. */
13166 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13168 rtx tmp
= XEXP (x
, 0);
13169 XEXP (x
, 0) = XEXP (x
, 1);
13174 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13175 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13176 created by virtual register instantiation, register elimination, and
13177 similar optimizations. */
13178 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13181 x
= gen_rtx_PLUS (Pmode
,
13182 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13183 XEXP (XEXP (x
, 1), 0)),
13184 XEXP (XEXP (x
, 1), 1));
13188 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13189 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13190 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13191 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13192 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13193 && CONSTANT_P (XEXP (x
, 1)))
13196 rtx other
= NULL_RTX
;
13198 if (CONST_INT_P (XEXP (x
, 1)))
13200 constant
= XEXP (x
, 1);
13201 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13203 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13205 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13206 other
= XEXP (x
, 1);
13214 x
= gen_rtx_PLUS (Pmode
,
13215 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13216 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13217 plus_constant (Pmode
, other
,
13218 INTVAL (constant
)));
13222 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13225 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13228 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13231 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13234 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13238 && REG_P (XEXP (x
, 1))
13239 && REG_P (XEXP (x
, 0)))
13242 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13245 x
= legitimize_pic_address (x
, 0);
13248 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13251 if (REG_P (XEXP (x
, 0)))
13253 rtx temp
= gen_reg_rtx (Pmode
);
13254 rtx val
= force_operand (XEXP (x
, 1), temp
);
13257 val
= convert_to_mode (Pmode
, val
, 1);
13258 emit_move_insn (temp
, val
);
13261 XEXP (x
, 1) = temp
;
13265 else if (REG_P (XEXP (x
, 1)))
13267 rtx temp
= gen_reg_rtx (Pmode
);
13268 rtx val
= force_operand (XEXP (x
, 0), temp
);
13271 val
= convert_to_mode (Pmode
, val
, 1);
13272 emit_move_insn (temp
, val
);
13275 XEXP (x
, 0) = temp
;
13283 /* Print an integer constant expression in assembler syntax. Addition
13284 and subtraction are the only arithmetic that may appear in these
13285 expressions. FILE is the stdio stream to write to, X is the rtx, and
13286 CODE is the operand print code from the output string. */
13289 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13293 switch (GET_CODE (x
))
13296 gcc_assert (flag_pic
);
13301 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13302 output_addr_const (file
, x
);
13305 const char *name
= XSTR (x
, 0);
13307 /* Mark the decl as referenced so that cgraph will
13308 output the function. */
13309 if (SYMBOL_REF_DECL (x
))
13310 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13313 if (MACHOPIC_INDIRECT
13314 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13315 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13317 assemble_name (file
, name
);
13319 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13320 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13321 fputs ("@PLT", file
);
13328 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13329 assemble_name (asm_out_file
, buf
);
13333 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13337 /* This used to output parentheses around the expression,
13338 but that does not work on the 386 (either ATT or BSD assembler). */
13339 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13343 if (GET_MODE (x
) == VOIDmode
)
13345 /* We can use %d if the number is <32 bits and positive. */
13346 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13347 fprintf (file
, "0x%lx%08lx",
13348 (unsigned long) CONST_DOUBLE_HIGH (x
),
13349 (unsigned long) CONST_DOUBLE_LOW (x
));
13351 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13354 /* We can't handle floating point constants;
13355 TARGET_PRINT_OPERAND must handle them. */
13356 output_operand_lossage ("floating constant misused");
13360 /* Some assemblers need integer constants to appear first. */
13361 if (CONST_INT_P (XEXP (x
, 0)))
13363 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13365 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13369 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13370 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13372 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13378 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13379 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13381 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13383 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13387 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13389 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13394 gcc_assert (XVECLEN (x
, 0) == 1);
13395 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13396 switch (XINT (x
, 1))
13399 fputs ("@GOT", file
);
13401 case UNSPEC_GOTOFF
:
13402 fputs ("@GOTOFF", file
);
13404 case UNSPEC_PLTOFF
:
13405 fputs ("@PLTOFF", file
);
13408 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13409 "(%rip)" : "[rip]", file
);
13411 case UNSPEC_GOTPCREL
:
13412 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13413 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13415 case UNSPEC_GOTTPOFF
:
13416 /* FIXME: This might be @TPOFF in Sun ld too. */
13417 fputs ("@gottpoff", file
);
13420 fputs ("@tpoff", file
);
13422 case UNSPEC_NTPOFF
:
13424 fputs ("@tpoff", file
);
13426 fputs ("@ntpoff", file
);
13428 case UNSPEC_DTPOFF
:
13429 fputs ("@dtpoff", file
);
13431 case UNSPEC_GOTNTPOFF
:
13433 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13434 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13436 fputs ("@gotntpoff", file
);
13438 case UNSPEC_INDNTPOFF
:
13439 fputs ("@indntpoff", file
);
13442 case UNSPEC_MACHOPIC_OFFSET
:
13444 machopic_output_function_base_name (file
);
13448 output_operand_lossage ("invalid UNSPEC as operand");
13454 output_operand_lossage ("invalid expression as operand");
13458 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13459 We need to emit DTP-relative relocations. */
13461 static void ATTRIBUTE_UNUSED
13462 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13464 fputs (ASM_LONG
, file
);
13465 output_addr_const (file
, x
);
13466 fputs ("@dtpoff", file
);
13472 fputs (", 0", file
);
13475 gcc_unreachable ();
13479 /* Return true if X is a representation of the PIC register. This copes
13480 with calls from ix86_find_base_term, where the register might have
13481 been replaced by a cselib value. */
13484 ix86_pic_register_p (rtx x
)
13486 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13487 return (pic_offset_table_rtx
13488 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13490 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13493 /* Helper function for ix86_delegitimize_address.
13494 Attempt to delegitimize TLS local-exec accesses. */
13497 ix86_delegitimize_tls_address (rtx orig_x
)
13499 rtx x
= orig_x
, unspec
;
13500 struct ix86_address addr
;
13502 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13506 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13508 if (ix86_decompose_address (x
, &addr
) == 0
13509 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13510 || addr
.disp
== NULL_RTX
13511 || GET_CODE (addr
.disp
) != CONST
)
13513 unspec
= XEXP (addr
.disp
, 0);
13514 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13515 unspec
= XEXP (unspec
, 0);
13516 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13518 x
= XVECEXP (unspec
, 0, 0);
13519 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13520 if (unspec
!= XEXP (addr
.disp
, 0))
13521 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13524 rtx idx
= addr
.index
;
13525 if (addr
.scale
!= 1)
13526 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13527 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13530 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13531 if (MEM_P (orig_x
))
13532 x
= replace_equiv_address_nv (orig_x
, x
);
13536 /* In the name of slightly smaller debug output, and to cater to
13537 general assembler lossage, recognize PIC+GOTOFF and turn it back
13538 into a direct symbol reference.
13540 On Darwin, this is necessary to avoid a crash, because Darwin
13541 has a different PIC label for each routine but the DWARF debugging
13542 information is not associated with any particular routine, so it's
13543 necessary to remove references to the PIC label from RTL stored by
13544 the DWARF output code. */
13547 ix86_delegitimize_address (rtx x
)
13549 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13550 /* addend is NULL or some rtx if x is something+GOTOFF where
13551 something doesn't include the PIC register. */
13552 rtx addend
= NULL_RTX
;
13553 /* reg_addend is NULL or a multiple of some register. */
13554 rtx reg_addend
= NULL_RTX
;
13555 /* const_addend is NULL or a const_int. */
13556 rtx const_addend
= NULL_RTX
;
13557 /* This is the result, or NULL. */
13558 rtx result
= NULL_RTX
;
13567 if (GET_CODE (x
) == CONST
13568 && GET_CODE (XEXP (x
, 0)) == PLUS
13569 && GET_MODE (XEXP (x
, 0)) == Pmode
13570 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13571 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13572 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13574 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13575 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13576 if (MEM_P (orig_x
))
13577 x
= replace_equiv_address_nv (orig_x
, x
);
13580 if (GET_CODE (x
) != CONST
13581 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13582 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13583 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13584 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13585 return ix86_delegitimize_tls_address (orig_x
);
13586 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13587 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13589 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13597 if (GET_CODE (x
) != PLUS
13598 || GET_CODE (XEXP (x
, 1)) != CONST
)
13599 return ix86_delegitimize_tls_address (orig_x
);
13601 if (ix86_pic_register_p (XEXP (x
, 0)))
13602 /* %ebx + GOT/GOTOFF */
13604 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13606 /* %ebx + %reg * scale + GOT/GOTOFF */
13607 reg_addend
= XEXP (x
, 0);
13608 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13609 reg_addend
= XEXP (reg_addend
, 1);
13610 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13611 reg_addend
= XEXP (reg_addend
, 0);
13614 reg_addend
= NULL_RTX
;
13615 addend
= XEXP (x
, 0);
13619 addend
= XEXP (x
, 0);
13621 x
= XEXP (XEXP (x
, 1), 0);
13622 if (GET_CODE (x
) == PLUS
13623 && CONST_INT_P (XEXP (x
, 1)))
13625 const_addend
= XEXP (x
, 1);
13629 if (GET_CODE (x
) == UNSPEC
13630 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13631 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13632 result
= XVECEXP (x
, 0, 0);
13634 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13635 && !MEM_P (orig_x
))
13636 result
= XVECEXP (x
, 0, 0);
13639 return ix86_delegitimize_tls_address (orig_x
);
13642 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13644 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13647 /* If the rest of original X doesn't involve the PIC register, add
13648 addend and subtract pic_offset_table_rtx. This can happen e.g.
13650 leal (%ebx, %ecx, 4), %ecx
13652 movl foo@GOTOFF(%ecx), %edx
13653 in which case we return (%ecx - %ebx) + foo. */
13654 if (pic_offset_table_rtx
)
13655 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13656 pic_offset_table_rtx
),
13661 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13663 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13664 if (result
== NULL_RTX
)
13670 /* If X is a machine specific address (i.e. a symbol or label being
13671 referenced as a displacement from the GOT implemented using an
13672 UNSPEC), then return the base term. Otherwise return X. */
13675 ix86_find_base_term (rtx x
)
13681 if (GET_CODE (x
) != CONST
)
13683 term
= XEXP (x
, 0);
13684 if (GET_CODE (term
) == PLUS
13685 && (CONST_INT_P (XEXP (term
, 1))
13686 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13687 term
= XEXP (term
, 0);
13688 if (GET_CODE (term
) != UNSPEC
13689 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13690 && XINT (term
, 1) != UNSPEC_PCREL
))
13693 return XVECEXP (term
, 0, 0);
13696 return ix86_delegitimize_address (x
);
13700 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13701 bool fp
, FILE *file
)
13703 const char *suffix
;
13705 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13707 code
= ix86_fp_compare_code_to_integer (code
);
13711 code
= reverse_condition (code
);
13762 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13766 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13767 Those same assemblers have the same but opposite lossage on cmov. */
13768 if (mode
== CCmode
)
13769 suffix
= fp
? "nbe" : "a";
13770 else if (mode
== CCCmode
)
13773 gcc_unreachable ();
13789 gcc_unreachable ();
13793 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13810 gcc_unreachable ();
13814 /* ??? As above. */
13815 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13816 suffix
= fp
? "nb" : "ae";
13819 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13823 /* ??? As above. */
13824 if (mode
== CCmode
)
13826 else if (mode
== CCCmode
)
13827 suffix
= fp
? "nb" : "ae";
13829 gcc_unreachable ();
13832 suffix
= fp
? "u" : "p";
13835 suffix
= fp
? "nu" : "np";
13838 gcc_unreachable ();
13840 fputs (suffix
, file
);
13843 /* Print the name of register X to FILE based on its machine mode and number.
13844 If CODE is 'w', pretend the mode is HImode.
13845 If CODE is 'b', pretend the mode is QImode.
13846 If CODE is 'k', pretend the mode is SImode.
13847 If CODE is 'q', pretend the mode is DImode.
13848 If CODE is 'x', pretend the mode is V4SFmode.
13849 If CODE is 't', pretend the mode is V8SFmode.
13850 If CODE is 'h', pretend the reg is the 'high' byte register.
13851 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13852 If CODE is 'd', duplicate the operand for AVX instruction.
13856 print_reg (rtx x
, int code
, FILE *file
)
13859 unsigned int regno
;
13860 bool duplicated
= code
== 'd' && TARGET_AVX
;
13862 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13867 gcc_assert (TARGET_64BIT
);
13868 fputs ("rip", file
);
13872 regno
= true_regnum (x
);
13873 gcc_assert (regno
!= ARG_POINTER_REGNUM
13874 && regno
!= FRAME_POINTER_REGNUM
13875 && regno
!= FLAGS_REG
13876 && regno
!= FPSR_REG
13877 && regno
!= FPCR_REG
);
13879 if (code
== 'w' || MMX_REG_P (x
))
13881 else if (code
== 'b')
13883 else if (code
== 'k')
13885 else if (code
== 'q')
13887 else if (code
== 'y')
13889 else if (code
== 'h')
13891 else if (code
== 'x')
13893 else if (code
== 't')
13896 code
= GET_MODE_SIZE (GET_MODE (x
));
13898 /* Irritatingly, AMD extended registers use different naming convention
13899 from the normal registers: "r%d[bwd]" */
13900 if (REX_INT_REGNO_P (regno
))
13902 gcc_assert (TARGET_64BIT
);
13904 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
13908 error ("extended registers have no high halves");
13923 error ("unsupported operand size for extended register");
13933 if (STACK_TOP_P (x
))
13942 if (! ANY_FP_REG_P (x
))
13943 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13948 reg
= hi_reg_name
[regno
];
13951 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13953 reg
= qi_reg_name
[regno
];
13956 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13958 reg
= qi_high_reg_name
[regno
];
13963 gcc_assert (!duplicated
);
13965 fputs (hi_reg_name
[regno
] + 1, file
);
13970 gcc_unreachable ();
13976 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13977 fprintf (file
, ", %%%s", reg
);
13979 fprintf (file
, ", %s", reg
);
13983 /* Locate some local-dynamic symbol still in use by this function
13984 so that we can print its name in some tls_local_dynamic_base
13988 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13992 if (GET_CODE (x
) == SYMBOL_REF
13993 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13995 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14002 static const char *
14003 get_some_local_dynamic_name (void)
14007 if (cfun
->machine
->some_ld_name
)
14008 return cfun
->machine
->some_ld_name
;
14010 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14011 if (NONDEBUG_INSN_P (insn
)
14012 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14013 return cfun
->machine
->some_ld_name
;
14018 /* Meaning of CODE:
14019 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14020 C -- print opcode suffix for set/cmov insn.
14021 c -- like C, but print reversed condition
14022 F,f -- likewise, but for floating-point.
14023 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14025 R -- print the prefix for register names.
14026 z -- print the opcode suffix for the size of the current operand.
14027 Z -- likewise, with special suffixes for x87 instructions.
14028 * -- print a star (in certain assembler syntax)
14029 A -- print an absolute memory reference.
14030 E -- print address with DImode register names if TARGET_64BIT.
14031 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14032 s -- print a shift double count, followed by the assemblers argument
14034 b -- print the QImode name of the register for the indicated operand.
14035 %b0 would print %al if operands[0] is reg 0.
14036 w -- likewise, print the HImode name of the register.
14037 k -- likewise, print the SImode name of the register.
14038 q -- likewise, print the DImode name of the register.
14039 x -- likewise, print the V4SFmode name of the register.
14040 t -- likewise, print the V8SFmode name of the register.
14041 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14042 y -- print "st(0)" instead of "st" as a register.
14043 d -- print duplicated register operand for AVX instruction.
14044 D -- print condition for SSE cmp instruction.
14045 P -- if PIC, print an @PLT suffix.
14046 p -- print raw symbol name.
14047 X -- don't print any sort of PIC '@' suffix for a symbol.
14048 & -- print some in-use local-dynamic symbol name.
14049 H -- print a memory address offset by 8; used for sse high-parts
14050 Y -- print condition for XOP pcom* instruction.
14051 + -- print a branch hint as 'cs' or 'ds' prefix
14052 ; -- print a semicolon (after prefixes due to bug in older gas).
14053 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14054 @ -- print a segment register of thread base pointer load
14055 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14059 ix86_print_operand (FILE *file
, rtx x
, int code
)
14066 switch (ASSEMBLER_DIALECT
)
14073 /* Intel syntax. For absolute addresses, registers should not
14074 be surrounded by braces. */
14078 ix86_print_operand (file
, x
, 0);
14085 gcc_unreachable ();
14088 ix86_print_operand (file
, x
, 0);
14092 /* Wrap address in an UNSPEC to declare special handling. */
14094 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14096 output_address (x
);
14100 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14105 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14110 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14115 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14120 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14125 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14130 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14131 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14134 switch (GET_MODE_SIZE (GET_MODE (x
)))
14149 output_operand_lossage
14150 ("invalid operand size for operand code 'O'");
14159 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14161 /* Opcodes don't get size suffixes if using Intel opcodes. */
14162 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14165 switch (GET_MODE_SIZE (GET_MODE (x
)))
14184 output_operand_lossage
14185 ("invalid operand size for operand code 'z'");
14190 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14192 (0, "non-integer operand used with operand code 'z'");
14196 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14197 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14200 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14202 switch (GET_MODE_SIZE (GET_MODE (x
)))
14205 #ifdef HAVE_AS_IX86_FILDS
14215 #ifdef HAVE_AS_IX86_FILDQ
14218 fputs ("ll", file
);
14226 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14228 /* 387 opcodes don't get size suffixes
14229 if the operands are registers. */
14230 if (STACK_REG_P (x
))
14233 switch (GET_MODE_SIZE (GET_MODE (x
)))
14254 output_operand_lossage
14255 ("invalid operand type used with operand code 'Z'");
14259 output_operand_lossage
14260 ("invalid operand size for operand code 'Z'");
14278 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14280 ix86_print_operand (file
, x
, 0);
14281 fputs (", ", file
);
14286 switch (GET_CODE (x
))
14289 fputs ("neq", file
);
14292 fputs ("eq", file
);
14296 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14300 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14304 fputs ("le", file
);
14308 fputs ("lt", file
);
14311 fputs ("unord", file
);
14314 fputs ("ord", file
);
14317 fputs ("ueq", file
);
14320 fputs ("nlt", file
);
14323 fputs ("nle", file
);
14326 fputs ("ule", file
);
14329 fputs ("ult", file
);
14332 fputs ("une", file
);
14335 output_operand_lossage ("operand is not a condition code, "
14336 "invalid operand code 'Y'");
14342 /* Little bit of braindamage here. The SSE compare instructions
14343 does use completely different names for the comparisons that the
14344 fp conditional moves. */
14345 switch (GET_CODE (x
))
14350 fputs ("eq_us", file
);
14354 fputs ("eq", file
);
14359 fputs ("nge", file
);
14363 fputs ("lt", file
);
14368 fputs ("ngt", file
);
14372 fputs ("le", file
);
14375 fputs ("unord", file
);
14380 fputs ("neq_oq", file
);
14384 fputs ("neq", file
);
14389 fputs ("ge", file
);
14393 fputs ("nlt", file
);
14398 fputs ("gt", file
);
14402 fputs ("nle", file
);
14405 fputs ("ord", file
);
14408 output_operand_lossage ("operand is not a condition code, "
14409 "invalid operand code 'D'");
14416 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14417 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14423 if (!COMPARISON_P (x
))
14425 output_operand_lossage ("operand is not a condition code, "
14426 "invalid operand code '%c'", code
);
14429 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14430 code
== 'c' || code
== 'f',
14431 code
== 'F' || code
== 'f',
14436 if (!offsettable_memref_p (x
))
14438 output_operand_lossage ("operand is not an offsettable memory "
14439 "reference, invalid operand code 'H'");
14442 /* It doesn't actually matter what mode we use here, as we're
14443 only going to use this for printing. */
14444 x
= adjust_address_nv (x
, DImode
, 8);
14448 gcc_assert (CONST_INT_P (x
));
14450 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14451 #ifdef HAVE_AS_IX86_HLE
14452 fputs ("xacquire ", file
);
14454 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14456 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14457 #ifdef HAVE_AS_IX86_HLE
14458 fputs ("xrelease ", file
);
14460 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14462 /* We do not want to print value of the operand. */
14466 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14472 const char *name
= get_some_local_dynamic_name ();
14474 output_operand_lossage ("'%%&' used without any "
14475 "local dynamic TLS references");
14477 assemble_name (file
, name
);
14486 || optimize_function_for_size_p (cfun
)
14487 || !TARGET_BRANCH_PREDICTION_HINTS
)
14490 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14493 int pred_val
= INTVAL (XEXP (x
, 0));
14495 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14496 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14498 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14500 = final_forward_branch_p (current_output_insn
) == 0;
14502 /* Emit hints only in the case default branch prediction
14503 heuristics would fail. */
14504 if (taken
!= cputaken
)
14506 /* We use 3e (DS) prefix for taken branches and
14507 2e (CS) prefix for not taken branches. */
14509 fputs ("ds ; ", file
);
14511 fputs ("cs ; ", file
);
14519 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14525 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14528 /* The kernel uses a different segment register for performance
14529 reasons; a system call would not have to trash the userspace
14530 segment register, which would be expensive. */
14531 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14532 fputs ("fs", file
);
14534 fputs ("gs", file
);
14538 putc (TARGET_AVX2
? 'i' : 'f', file
);
14542 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14543 fputs ("addr32 ", file
);
14547 output_operand_lossage ("invalid operand code '%c'", code
);
14552 print_reg (x
, code
, file
);
14554 else if (MEM_P (x
))
14556 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14557 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14558 && GET_MODE (x
) != BLKmode
)
14561 switch (GET_MODE_SIZE (GET_MODE (x
)))
14563 case 1: size
= "BYTE"; break;
14564 case 2: size
= "WORD"; break;
14565 case 4: size
= "DWORD"; break;
14566 case 8: size
= "QWORD"; break;
14567 case 12: size
= "TBYTE"; break;
14569 if (GET_MODE (x
) == XFmode
)
14574 case 32: size
= "YMMWORD"; break;
14576 gcc_unreachable ();
14579 /* Check for explicit size override (codes 'b', 'w', 'k',
14583 else if (code
== 'w')
14585 else if (code
== 'k')
14587 else if (code
== 'q')
14589 else if (code
== 'x')
14592 fputs (size
, file
);
14593 fputs (" PTR ", file
);
14597 /* Avoid (%rip) for call operands. */
14598 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14599 && !CONST_INT_P (x
))
14600 output_addr_const (file
, x
);
14601 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14602 output_operand_lossage ("invalid constraints for operand");
14604 output_address (x
);
14607 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14612 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14613 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14615 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14617 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14619 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14621 fprintf (file
, "0x%08x", (unsigned int) l
);
14624 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14629 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14630 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14632 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14634 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14637 /* These float cases don't actually occur as immediate operands. */
14638 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14642 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14643 fputs (dstr
, file
);
14648 /* We have patterns that allow zero sets of memory, for instance.
14649 In 64-bit mode, we should probably support all 8-byte vectors,
14650 since we can in fact encode that into an immediate. */
14651 if (GET_CODE (x
) == CONST_VECTOR
)
14653 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14657 if (code
!= 'P' && code
!= 'p')
14659 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14661 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14664 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14665 || GET_CODE (x
) == LABEL_REF
)
14667 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14670 fputs ("OFFSET FLAT:", file
);
14673 if (CONST_INT_P (x
))
14674 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14675 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14676 output_pic_addr_const (file
, x
, code
);
14678 output_addr_const (file
, x
);
14683 ix86_print_operand_punct_valid_p (unsigned char code
)
14685 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14686 || code
== ';' || code
== '~' || code
== '^');
14689 /* Print a memory operand whose address is ADDR. */
14692 ix86_print_operand_address (FILE *file
, rtx addr
)
14694 struct ix86_address parts
;
14695 rtx base
, index
, disp
;
14701 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14703 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14704 gcc_assert (parts
.index
== NULL_RTX
);
14705 parts
.index
= XVECEXP (addr
, 0, 1);
14706 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14707 addr
= XVECEXP (addr
, 0, 0);
14710 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14712 gcc_assert (TARGET_64BIT
);
14713 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14717 ok
= ix86_decompose_address (addr
, &parts
);
14722 index
= parts
.index
;
14724 scale
= parts
.scale
;
14732 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14734 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14737 gcc_unreachable ();
14740 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14741 if (TARGET_64BIT
&& !base
&& !index
)
14745 if (GET_CODE (disp
) == CONST
14746 && GET_CODE (XEXP (disp
, 0)) == PLUS
14747 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14748 symbol
= XEXP (XEXP (disp
, 0), 0);
14750 if (GET_CODE (symbol
) == LABEL_REF
14751 || (GET_CODE (symbol
) == SYMBOL_REF
14752 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14755 if (!base
&& !index
)
14757 /* Displacement only requires special attention. */
14759 if (CONST_INT_P (disp
))
14761 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14762 fputs ("ds:", file
);
14763 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14766 output_pic_addr_const (file
, disp
, 0);
14768 output_addr_const (file
, disp
);
14772 /* Print SImode register names to force addr32 prefix. */
14773 if (SImode_address_operand (addr
, VOIDmode
))
14775 #ifdef ENABLE_CHECKING
14776 gcc_assert (TARGET_64BIT
);
14777 switch (GET_CODE (addr
))
14780 gcc_assert (GET_MODE (addr
) == SImode
);
14781 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14785 gcc_assert (GET_MODE (addr
) == DImode
);
14788 gcc_unreachable ();
14791 gcc_assert (!code
);
14797 && CONST_INT_P (disp
)
14798 && INTVAL (disp
) < -16*1024*1024)
14800 /* X32 runs in 64-bit mode, where displacement, DISP, in
14801 address DISP(%r64), is encoded as 32-bit immediate sign-
14802 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14803 address is %r64 + 0xffffffffbffffd00. When %r64 <
14804 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14805 which is invalid for x32. The correct address is %r64
14806 - 0x40000300 == 0xf7ffdd64. To properly encode
14807 -0x40000300(%r64) for x32, we zero-extend negative
14808 displacement by forcing addr32 prefix which truncates
14809 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14810 zero-extend all negative displacements, including -1(%rsp).
14811 However, for small negative displacements, sign-extension
14812 won't cause overflow. We only zero-extend negative
14813 displacements if they < -16*1024*1024, which is also used
14814 to check legitimate address displacements for PIC. */
14818 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14823 output_pic_addr_const (file
, disp
, 0);
14824 else if (GET_CODE (disp
) == LABEL_REF
)
14825 output_asm_label (disp
);
14827 output_addr_const (file
, disp
);
14832 print_reg (base
, code
, file
);
14836 print_reg (index
, vsib
? 0 : code
, file
);
14837 if (scale
!= 1 || vsib
)
14838 fprintf (file
, ",%d", scale
);
14844 rtx offset
= NULL_RTX
;
14848 /* Pull out the offset of a symbol; print any symbol itself. */
14849 if (GET_CODE (disp
) == CONST
14850 && GET_CODE (XEXP (disp
, 0)) == PLUS
14851 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14853 offset
= XEXP (XEXP (disp
, 0), 1);
14854 disp
= gen_rtx_CONST (VOIDmode
,
14855 XEXP (XEXP (disp
, 0), 0));
14859 output_pic_addr_const (file
, disp
, 0);
14860 else if (GET_CODE (disp
) == LABEL_REF
)
14861 output_asm_label (disp
);
14862 else if (CONST_INT_P (disp
))
14865 output_addr_const (file
, disp
);
14871 print_reg (base
, code
, file
);
14874 if (INTVAL (offset
) >= 0)
14876 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14880 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14887 print_reg (index
, vsib
? 0 : code
, file
);
14888 if (scale
!= 1 || vsib
)
14889 fprintf (file
, "*%d", scale
);
14896 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14899 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14903 if (GET_CODE (x
) != UNSPEC
)
14906 op
= XVECEXP (x
, 0, 0);
14907 switch (XINT (x
, 1))
14909 case UNSPEC_GOTTPOFF
:
14910 output_addr_const (file
, op
);
14911 /* FIXME: This might be @TPOFF in Sun ld. */
14912 fputs ("@gottpoff", file
);
14915 output_addr_const (file
, op
);
14916 fputs ("@tpoff", file
);
14918 case UNSPEC_NTPOFF
:
14919 output_addr_const (file
, op
);
14921 fputs ("@tpoff", file
);
14923 fputs ("@ntpoff", file
);
14925 case UNSPEC_DTPOFF
:
14926 output_addr_const (file
, op
);
14927 fputs ("@dtpoff", file
);
14929 case UNSPEC_GOTNTPOFF
:
14930 output_addr_const (file
, op
);
14932 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14933 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14935 fputs ("@gotntpoff", file
);
14937 case UNSPEC_INDNTPOFF
:
14938 output_addr_const (file
, op
);
14939 fputs ("@indntpoff", file
);
14942 case UNSPEC_MACHOPIC_OFFSET
:
14943 output_addr_const (file
, op
);
14945 machopic_output_function_base_name (file
);
14949 case UNSPEC_STACK_CHECK
:
14953 gcc_assert (flag_split_stack
);
14955 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14956 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14958 gcc_unreachable ();
14961 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14972 /* Split one or more double-mode RTL references into pairs of half-mode
14973 references. The RTL can be REG, offsettable MEM, integer constant, or
14974 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14975 split and "num" is its length. lo_half and hi_half are output arrays
14976 that parallel "operands". */
14979 split_double_mode (enum machine_mode mode
, rtx operands
[],
14980 int num
, rtx lo_half
[], rtx hi_half
[])
14982 enum machine_mode half_mode
;
14988 half_mode
= DImode
;
14991 half_mode
= SImode
;
14994 gcc_unreachable ();
14997 byte
= GET_MODE_SIZE (half_mode
);
15001 rtx op
= operands
[num
];
15003 /* simplify_subreg refuse to split volatile memory addresses,
15004 but we still have to handle it. */
15007 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15008 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15012 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15013 GET_MODE (op
) == VOIDmode
15014 ? mode
: GET_MODE (op
), 0);
15015 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15016 GET_MODE (op
) == VOIDmode
15017 ? mode
: GET_MODE (op
), byte
);
15022 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15023 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15024 is the expression of the binary operation. The output may either be
15025 emitted here, or returned to the caller, like all output_* functions.
15027 There is no guarantee that the operands are the same mode, as they
15028 might be within FLOAT or FLOAT_EXTEND expressions. */
15030 #ifndef SYSV386_COMPAT
15031 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15032 wants to fix the assemblers because that causes incompatibility
15033 with gcc. No-one wants to fix gcc because that causes
15034 incompatibility with assemblers... You can use the option of
15035 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15036 #define SYSV386_COMPAT 1
15040 output_387_binary_op (rtx insn
, rtx
*operands
)
15042 static char buf
[40];
15045 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15047 #ifdef ENABLE_CHECKING
15048 /* Even if we do not want to check the inputs, this documents input
15049 constraints. Which helps in understanding the following code. */
15050 if (STACK_REG_P (operands
[0])
15051 && ((REG_P (operands
[1])
15052 && REGNO (operands
[0]) == REGNO (operands
[1])
15053 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15054 || (REG_P (operands
[2])
15055 && REGNO (operands
[0]) == REGNO (operands
[2])
15056 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15057 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15060 gcc_assert (is_sse
);
15063 switch (GET_CODE (operands
[3]))
15066 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15067 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15075 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15076 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15084 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15085 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15093 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15094 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15102 gcc_unreachable ();
15109 strcpy (buf
, ssep
);
15110 if (GET_MODE (operands
[0]) == SFmode
)
15111 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15113 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15117 strcpy (buf
, ssep
+ 1);
15118 if (GET_MODE (operands
[0]) == SFmode
)
15119 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15121 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15127 switch (GET_CODE (operands
[3]))
15131 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15133 rtx temp
= operands
[2];
15134 operands
[2] = operands
[1];
15135 operands
[1] = temp
;
15138 /* know operands[0] == operands[1]. */
15140 if (MEM_P (operands
[2]))
15146 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15148 if (STACK_TOP_P (operands
[0]))
15149 /* How is it that we are storing to a dead operand[2]?
15150 Well, presumably operands[1] is dead too. We can't
15151 store the result to st(0) as st(0) gets popped on this
15152 instruction. Instead store to operands[2] (which I
15153 think has to be st(1)). st(1) will be popped later.
15154 gcc <= 2.8.1 didn't have this check and generated
15155 assembly code that the Unixware assembler rejected. */
15156 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15158 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15162 if (STACK_TOP_P (operands
[0]))
15163 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15165 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15170 if (MEM_P (operands
[1]))
15176 if (MEM_P (operands
[2]))
15182 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15185 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15186 derived assemblers, confusingly reverse the direction of
15187 the operation for fsub{r} and fdiv{r} when the
15188 destination register is not st(0). The Intel assembler
15189 doesn't have this brain damage. Read !SYSV386_COMPAT to
15190 figure out what the hardware really does. */
15191 if (STACK_TOP_P (operands
[0]))
15192 p
= "{p\t%0, %2|rp\t%2, %0}";
15194 p
= "{rp\t%2, %0|p\t%0, %2}";
15196 if (STACK_TOP_P (operands
[0]))
15197 /* As above for fmul/fadd, we can't store to st(0). */
15198 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15200 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15205 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15208 if (STACK_TOP_P (operands
[0]))
15209 p
= "{rp\t%0, %1|p\t%1, %0}";
15211 p
= "{p\t%1, %0|rp\t%0, %1}";
15213 if (STACK_TOP_P (operands
[0]))
15214 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15216 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15221 if (STACK_TOP_P (operands
[0]))
15223 if (STACK_TOP_P (operands
[1]))
15224 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15226 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15229 else if (STACK_TOP_P (operands
[1]))
15232 p
= "{\t%1, %0|r\t%0, %1}";
15234 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15240 p
= "{r\t%2, %0|\t%0, %2}";
15242 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15248 gcc_unreachable ();
15255 /* Check if a 256bit AVX register is referenced inside of EXP. */
15258 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15262 if (GET_CODE (exp
) == SUBREG
)
15263 exp
= SUBREG_REG (exp
);
15266 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15272 /* Return needed mode for entity in optimize_mode_switching pass. */
15275 ix86_avx_u128_mode_needed (rtx insn
)
15281 /* Needed mode is set to AVX_U128_CLEAN if there are
15282 no 256bit modes used in function arguments. */
15283 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15285 link
= XEXP (link
, 1))
15287 if (GET_CODE (XEXP (link
, 0)) == USE
)
15289 rtx arg
= XEXP (XEXP (link
, 0), 0);
15291 if (ix86_check_avx256_register (&arg
, NULL
))
15292 return AVX_U128_ANY
;
15296 return AVX_U128_CLEAN
;
15299 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15300 changes state only when a 256bit register is written to, but we need
15301 to prevent the compiler from moving optimal insertion point above
15302 eventual read from 256bit register. */
15303 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15304 return AVX_U128_DIRTY
;
15306 return AVX_U128_ANY
;
15309 /* Return mode that i387 must be switched into
15310 prior to the execution of insn. */
15313 ix86_i387_mode_needed (int entity
, rtx insn
)
15315 enum attr_i387_cw mode
;
15317 /* The mode UNINITIALIZED is used to store control word after a
15318 function call or ASM pattern. The mode ANY specify that function
15319 has no requirements on the control word and make no changes in the
15320 bits we are interested in. */
15323 || (NONJUMP_INSN_P (insn
)
15324 && (asm_noperands (PATTERN (insn
)) >= 0
15325 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15326 return I387_CW_UNINITIALIZED
;
15328 if (recog_memoized (insn
) < 0)
15329 return I387_CW_ANY
;
15331 mode
= get_attr_i387_cw (insn
);
15336 if (mode
== I387_CW_TRUNC
)
15341 if (mode
== I387_CW_FLOOR
)
15346 if (mode
== I387_CW_CEIL
)
15351 if (mode
== I387_CW_MASK_PM
)
15356 gcc_unreachable ();
15359 return I387_CW_ANY
;
15362 /* Return mode that entity must be switched into
15363 prior to the execution of insn. */
15366 ix86_mode_needed (int entity
, rtx insn
)
15371 return ix86_avx_u128_mode_needed (insn
);
15376 return ix86_i387_mode_needed (entity
, insn
);
15378 gcc_unreachable ();
15383 /* Check if a 256bit AVX register is referenced in stores. */
15386 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15388 if (ix86_check_avx256_register (&dest
, NULL
))
15390 bool *used
= (bool *) data
;
15395 /* Calculate mode of upper 128bit AVX registers after the insn. */
15398 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15400 rtx pat
= PATTERN (insn
);
15402 if (vzeroupper_operation (pat
, VOIDmode
)
15403 || vzeroall_operation (pat
, VOIDmode
))
15404 return AVX_U128_CLEAN
;
15406 /* We know that state is clean after CALL insn if there are no
15407 256bit registers used in the function return register. */
15410 bool avx_reg256_found
= false;
15411 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15412 if (!avx_reg256_found
)
15413 return AVX_U128_CLEAN
;
15416 /* Otherwise, return current mode. Remember that if insn
15417 references AVX 256bit registers, the mode was already changed
15418 to DIRTY from MODE_NEEDED. */
15422 /* Return the mode that an insn results in. */
15425 ix86_mode_after (int entity
, int mode
, rtx insn
)
15430 return ix86_avx_u128_mode_after (mode
, insn
);
15437 gcc_unreachable ();
15442 ix86_avx_u128_mode_entry (void)
15446 /* Entry mode is set to AVX_U128_DIRTY if there are
15447 256bit modes used in function arguments. */
15448 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15449 arg
= TREE_CHAIN (arg
))
15451 rtx incoming
= DECL_INCOMING_RTL (arg
);
15453 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15454 return AVX_U128_DIRTY
;
15457 return AVX_U128_CLEAN
;
15460 /* Return a mode that ENTITY is assumed to be
15461 switched to at function entry. */
15464 ix86_mode_entry (int entity
)
15469 return ix86_avx_u128_mode_entry ();
15474 return I387_CW_ANY
;
15476 gcc_unreachable ();
15481 ix86_avx_u128_mode_exit (void)
15483 rtx reg
= crtl
->return_rtx
;
15485 /* Exit mode is set to AVX_U128_DIRTY if there are
15486 256bit modes used in the function return register. */
15487 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15488 return AVX_U128_DIRTY
;
15490 return AVX_U128_CLEAN
;
15493 /* Return a mode that ENTITY is assumed to be
15494 switched to at function exit. */
15497 ix86_mode_exit (int entity
)
15502 return ix86_avx_u128_mode_exit ();
15507 return I387_CW_ANY
;
15509 gcc_unreachable ();
15513 /* Output code to initialize control word copies used by trunc?f?i and
15514 rounding patterns. CURRENT_MODE is set to current control word,
15515 while NEW_MODE is set to new control word. */
15518 emit_i387_cw_initialization (int mode
)
15520 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15523 enum ix86_stack_slot slot
;
15525 rtx reg
= gen_reg_rtx (HImode
);
15527 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15528 emit_move_insn (reg
, copy_rtx (stored_mode
));
15530 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15531 || optimize_function_for_size_p (cfun
))
15535 case I387_CW_TRUNC
:
15536 /* round toward zero (truncate) */
15537 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15538 slot
= SLOT_CW_TRUNC
;
15541 case I387_CW_FLOOR
:
15542 /* round down toward -oo */
15543 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15544 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15545 slot
= SLOT_CW_FLOOR
;
15549 /* round up toward +oo */
15550 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15551 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15552 slot
= SLOT_CW_CEIL
;
15555 case I387_CW_MASK_PM
:
15556 /* mask precision exception for nearbyint() */
15557 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15558 slot
= SLOT_CW_MASK_PM
;
15562 gcc_unreachable ();
15569 case I387_CW_TRUNC
:
15570 /* round toward zero (truncate) */
15571 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15572 slot
= SLOT_CW_TRUNC
;
15575 case I387_CW_FLOOR
:
15576 /* round down toward -oo */
15577 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15578 slot
= SLOT_CW_FLOOR
;
15582 /* round up toward +oo */
15583 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15584 slot
= SLOT_CW_CEIL
;
15587 case I387_CW_MASK_PM
:
15588 /* mask precision exception for nearbyint() */
15589 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15590 slot
= SLOT_CW_MASK_PM
;
15594 gcc_unreachable ();
15598 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15600 new_mode
= assign_386_stack_local (HImode
, slot
);
15601 emit_move_insn (new_mode
, reg
);
15604 /* Emit vzeroupper. */
15607 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15611 /* Cancel automatic vzeroupper insertion if there are
15612 live call-saved SSE registers at the insertion point. */
15614 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15615 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15619 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15620 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15623 emit_insn (gen_avx_vzeroupper ());
15626 /* Generate one or more insns to set ENTITY to MODE. */
15629 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15634 if (mode
== AVX_U128_CLEAN
)
15635 ix86_avx_emit_vzeroupper (regs_live
);
15641 if (mode
!= I387_CW_ANY
15642 && mode
!= I387_CW_UNINITIALIZED
)
15643 emit_i387_cw_initialization (mode
);
15646 gcc_unreachable ();
15650 /* Output code for INSN to convert a float to a signed int. OPERANDS
15651 are the insn operands. The output may be [HSD]Imode and the input
15652 operand may be [SDX]Fmode. */
15655 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15657 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15658 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15659 int round_mode
= get_attr_i387_cw (insn
);
15661 /* Jump through a hoop or two for DImode, since the hardware has no
15662 non-popping instruction. We used to do this a different way, but
15663 that was somewhat fragile and broke with post-reload splitters. */
15664 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15665 output_asm_insn ("fld\t%y1", operands
);
15667 gcc_assert (STACK_TOP_P (operands
[1]));
15668 gcc_assert (MEM_P (operands
[0]));
15669 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15672 output_asm_insn ("fisttp%Z0\t%0", operands
);
15675 if (round_mode
!= I387_CW_ANY
)
15676 output_asm_insn ("fldcw\t%3", operands
);
15677 if (stack_top_dies
|| dimode_p
)
15678 output_asm_insn ("fistp%Z0\t%0", operands
);
15680 output_asm_insn ("fist%Z0\t%0", operands
);
15681 if (round_mode
!= I387_CW_ANY
)
15682 output_asm_insn ("fldcw\t%2", operands
);
15688 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15689 have the values zero or one, indicates the ffreep insn's operand
15690 from the OPERANDS array. */
15692 static const char *
15693 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15695 if (TARGET_USE_FFREEP
)
15696 #ifdef HAVE_AS_IX86_FFREEP
15697 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15700 static char retval
[32];
15701 int regno
= REGNO (operands
[opno
]);
15703 gcc_assert (STACK_REGNO_P (regno
));
15705 regno
-= FIRST_STACK_REG
;
15707 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15712 return opno
? "fstp\t%y1" : "fstp\t%y0";
15716 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15717 should be used. UNORDERED_P is true when fucom should be used. */
15720 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15722 int stack_top_dies
;
15723 rtx cmp_op0
, cmp_op1
;
15724 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15728 cmp_op0
= operands
[0];
15729 cmp_op1
= operands
[1];
15733 cmp_op0
= operands
[1];
15734 cmp_op1
= operands
[2];
15739 if (GET_MODE (operands
[0]) == SFmode
)
15741 return "%vucomiss\t{%1, %0|%0, %1}";
15743 return "%vcomiss\t{%1, %0|%0, %1}";
15746 return "%vucomisd\t{%1, %0|%0, %1}";
15748 return "%vcomisd\t{%1, %0|%0, %1}";
15751 gcc_assert (STACK_TOP_P (cmp_op0
));
15753 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15755 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15757 if (stack_top_dies
)
15759 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15760 return output_387_ffreep (operands
, 1);
15763 return "ftst\n\tfnstsw\t%0";
15766 if (STACK_REG_P (cmp_op1
)
15768 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15769 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15771 /* If both the top of the 387 stack dies, and the other operand
15772 is also a stack register that dies, then this must be a
15773 `fcompp' float compare */
15777 /* There is no double popping fcomi variant. Fortunately,
15778 eflags is immune from the fstp's cc clobbering. */
15780 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15782 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15783 return output_387_ffreep (operands
, 0);
15788 return "fucompp\n\tfnstsw\t%0";
15790 return "fcompp\n\tfnstsw\t%0";
15795 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15797 static const char * const alt
[16] =
15799 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15800 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15801 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15802 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15804 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15805 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15809 "fcomi\t{%y1, %0|%0, %y1}",
15810 "fcomip\t{%y1, %0|%0, %y1}",
15811 "fucomi\t{%y1, %0|%0, %y1}",
15812 "fucomip\t{%y1, %0|%0, %y1}",
15823 mask
= eflags_p
<< 3;
15824 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15825 mask
|= unordered_p
<< 1;
15826 mask
|= stack_top_dies
;
15828 gcc_assert (mask
< 16);
15837 ix86_output_addr_vec_elt (FILE *file
, int value
)
15839 const char *directive
= ASM_LONG
;
15843 directive
= ASM_QUAD
;
15845 gcc_assert (!TARGET_64BIT
);
15848 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15852 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15854 const char *directive
= ASM_LONG
;
15857 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15858 directive
= ASM_QUAD
;
15860 gcc_assert (!TARGET_64BIT
);
15862 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15863 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15864 fprintf (file
, "%s%s%d-%s%d\n",
15865 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15866 else if (HAVE_AS_GOTOFF_IN_DATA
)
15867 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15869 else if (TARGET_MACHO
)
15871 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15872 machopic_output_function_base_name (file
);
15877 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15878 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15881 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15885 ix86_expand_clear (rtx dest
)
15889 /* We play register width games, which are only valid after reload. */
15890 gcc_assert (reload_completed
);
15892 /* Avoid HImode and its attendant prefix byte. */
15893 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15894 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15895 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15897 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15898 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15900 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15901 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15907 /* X is an unchanging MEM. If it is a constant pool reference, return
15908 the constant pool rtx, else NULL. */
15911 maybe_get_pool_constant (rtx x
)
15913 x
= ix86_delegitimize_address (XEXP (x
, 0));
15915 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15916 return get_pool_constant (x
);
15922 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15925 enum tls_model model
;
15930 if (GET_CODE (op1
) == SYMBOL_REF
)
15932 model
= SYMBOL_REF_TLS_MODEL (op1
);
15935 op1
= legitimize_tls_address (op1
, model
, true);
15936 op1
= force_operand (op1
, op0
);
15939 op1
= convert_to_mode (mode
, op1
, 1);
15941 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15942 && SYMBOL_REF_DLLIMPORT_P (op1
))
15943 op1
= legitimize_dllimport_symbol (op1
, false);
15945 else if (GET_CODE (op1
) == CONST
15946 && GET_CODE (XEXP (op1
, 0)) == PLUS
15947 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15949 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15950 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15953 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15955 tmp
= legitimize_tls_address (symbol
, model
, true);
15956 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15957 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15958 tmp
= legitimize_dllimport_symbol (symbol
, true);
15962 tmp
= force_operand (tmp
, NULL
);
15963 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15964 op0
, 1, OPTAB_DIRECT
);
15967 op1
= convert_to_mode (mode
, tmp
, 1);
15971 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15972 && symbolic_operand (op1
, mode
))
15974 if (TARGET_MACHO
&& !TARGET_64BIT
)
15977 /* dynamic-no-pic */
15978 if (MACHOPIC_INDIRECT
)
15980 rtx temp
= ((reload_in_progress
15981 || ((op0
&& REG_P (op0
))
15983 ? op0
: gen_reg_rtx (Pmode
));
15984 op1
= machopic_indirect_data_reference (op1
, temp
);
15986 op1
= machopic_legitimize_pic_address (op1
, mode
,
15987 temp
== op1
? 0 : temp
);
15989 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15991 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15995 if (GET_CODE (op0
) == MEM
)
15996 op1
= force_reg (Pmode
, op1
);
16000 if (GET_CODE (temp
) != REG
)
16001 temp
= gen_reg_rtx (Pmode
);
16002 temp
= legitimize_pic_address (op1
, temp
);
16007 /* dynamic-no-pic */
16013 op1
= force_reg (mode
, op1
);
16014 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16016 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16017 op1
= legitimize_pic_address (op1
, reg
);
16020 op1
= convert_to_mode (mode
, op1
, 1);
16027 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16028 || !push_operand (op0
, mode
))
16030 op1
= force_reg (mode
, op1
);
16032 if (push_operand (op0
, mode
)
16033 && ! general_no_elim_operand (op1
, mode
))
16034 op1
= copy_to_mode_reg (mode
, op1
);
16036 /* Force large constants in 64bit compilation into register
16037 to get them CSEed. */
16038 if (can_create_pseudo_p ()
16039 && (mode
== DImode
) && TARGET_64BIT
16040 && immediate_operand (op1
, mode
)
16041 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16042 && !register_operand (op0
, mode
)
16044 op1
= copy_to_mode_reg (mode
, op1
);
16046 if (can_create_pseudo_p ()
16047 && FLOAT_MODE_P (mode
)
16048 && GET_CODE (op1
) == CONST_DOUBLE
)
16050 /* If we are loading a floating point constant to a register,
16051 force the value to memory now, since we'll get better code
16052 out the back end. */
16054 op1
= validize_mem (force_const_mem (mode
, op1
));
16055 if (!register_operand (op0
, mode
))
16057 rtx temp
= gen_reg_rtx (mode
);
16058 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16059 emit_move_insn (op0
, temp
);
16065 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16069 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16071 rtx op0
= operands
[0], op1
= operands
[1];
16072 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16074 /* Force constants other than zero into memory. We do not know how
16075 the instructions used to build constants modify the upper 64 bits
16076 of the register, once we have that information we may be able
16077 to handle some of them more efficiently. */
16078 if (can_create_pseudo_p ()
16079 && register_operand (op0
, mode
)
16080 && (CONSTANT_P (op1
)
16081 || (GET_CODE (op1
) == SUBREG
16082 && CONSTANT_P (SUBREG_REG (op1
))))
16083 && !standard_sse_constant_p (op1
))
16084 op1
= validize_mem (force_const_mem (mode
, op1
));
16086 /* We need to check memory alignment for SSE mode since attribute
16087 can make operands unaligned. */
16088 if (can_create_pseudo_p ()
16089 && SSE_REG_MODE_P (mode
)
16090 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16091 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16095 /* ix86_expand_vector_move_misalign() does not like constants ... */
16096 if (CONSTANT_P (op1
)
16097 || (GET_CODE (op1
) == SUBREG
16098 && CONSTANT_P (SUBREG_REG (op1
))))
16099 op1
= validize_mem (force_const_mem (mode
, op1
));
16101 /* ... nor both arguments in memory. */
16102 if (!register_operand (op0
, mode
)
16103 && !register_operand (op1
, mode
))
16104 op1
= force_reg (mode
, op1
);
16106 tmp
[0] = op0
; tmp
[1] = op1
;
16107 ix86_expand_vector_move_misalign (mode
, tmp
);
16111 /* Make operand1 a register if it isn't already. */
16112 if (can_create_pseudo_p ()
16113 && !register_operand (op0
, mode
)
16114 && !register_operand (op1
, mode
))
16116 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16120 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16123 /* Split 32-byte AVX unaligned load and store if needed. */
16126 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16129 rtx (*extract
) (rtx
, rtx
, rtx
);
16130 rtx (*load_unaligned
) (rtx
, rtx
);
16131 rtx (*store_unaligned
) (rtx
, rtx
);
16132 enum machine_mode mode
;
16134 switch (GET_MODE (op0
))
16137 gcc_unreachable ();
16139 extract
= gen_avx_vextractf128v32qi
;
16140 load_unaligned
= gen_avx_loaddqu256
;
16141 store_unaligned
= gen_avx_storedqu256
;
16145 extract
= gen_avx_vextractf128v8sf
;
16146 load_unaligned
= gen_avx_loadups256
;
16147 store_unaligned
= gen_avx_storeups256
;
16151 extract
= gen_avx_vextractf128v4df
;
16152 load_unaligned
= gen_avx_loadupd256
;
16153 store_unaligned
= gen_avx_storeupd256
;
16160 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16162 rtx r
= gen_reg_rtx (mode
);
16163 m
= adjust_address (op1
, mode
, 0);
16164 emit_move_insn (r
, m
);
16165 m
= adjust_address (op1
, mode
, 16);
16166 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16167 emit_move_insn (op0
, r
);
16170 emit_insn (load_unaligned (op0
, op1
));
16172 else if (MEM_P (op0
))
16174 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16176 m
= adjust_address (op0
, mode
, 0);
16177 emit_insn (extract (m
, op1
, const0_rtx
));
16178 m
= adjust_address (op0
, mode
, 16);
16179 emit_insn (extract (m
, op1
, const1_rtx
));
16182 emit_insn (store_unaligned (op0
, op1
));
16185 gcc_unreachable ();
16188 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16189 straight to ix86_expand_vector_move. */
16190 /* Code generation for scalar reg-reg moves of single and double precision data:
16191 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16195 if (x86_sse_partial_reg_dependency == true)
16200 Code generation for scalar loads of double precision data:
16201 if (x86_sse_split_regs == true)
16202 movlpd mem, reg (gas syntax)
16206 Code generation for unaligned packed loads of single precision data
16207 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16208 if (x86_sse_unaligned_move_optimal)
16211 if (x86_sse_partial_reg_dependency == true)
16223 Code generation for unaligned packed loads of double precision data
16224 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16225 if (x86_sse_unaligned_move_optimal)
16228 if (x86_sse_split_regs == true)
16241 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16249 && GET_MODE_SIZE (mode
) == 32)
16251 switch (GET_MODE_CLASS (mode
))
16253 case MODE_VECTOR_INT
:
16255 op0
= gen_lowpart (V32QImode
, op0
);
16256 op1
= gen_lowpart (V32QImode
, op1
);
16259 case MODE_VECTOR_FLOAT
:
16260 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16264 gcc_unreachable ();
16272 /* ??? If we have typed data, then it would appear that using
16273 movdqu is the only way to get unaligned data loaded with
16275 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16277 op0
= gen_lowpart (V16QImode
, op0
);
16278 op1
= gen_lowpart (V16QImode
, op1
);
16279 /* We will eventually emit movups based on insn attributes. */
16280 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16282 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16287 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16288 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16289 || optimize_function_for_size_p (cfun
))
16291 /* We will eventually emit movups based on insn attributes. */
16292 emit_insn (gen_sse2_loadupd (op0
, op1
));
16296 /* When SSE registers are split into halves, we can avoid
16297 writing to the top half twice. */
16298 if (TARGET_SSE_SPLIT_REGS
)
16300 emit_clobber (op0
);
16305 /* ??? Not sure about the best option for the Intel chips.
16306 The following would seem to satisfy; the register is
16307 entirely cleared, breaking the dependency chain. We
16308 then store to the upper half, with a dependency depth
16309 of one. A rumor has it that Intel recommends two movsd
16310 followed by an unpacklpd, but this is unconfirmed. And
16311 given that the dependency depth of the unpacklpd would
16312 still be one, I'm not sure why this would be better. */
16313 zero
= CONST0_RTX (V2DFmode
);
16316 m
= adjust_address (op1
, DFmode
, 0);
16317 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16318 m
= adjust_address (op1
, DFmode
, 8);
16319 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16324 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16325 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16326 || optimize_function_for_size_p (cfun
))
16328 op0
= gen_lowpart (V4SFmode
, op0
);
16329 op1
= gen_lowpart (V4SFmode
, op1
);
16330 emit_insn (gen_sse_loadups (op0
, op1
));
16334 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16335 emit_move_insn (op0
, CONST0_RTX (mode
));
16337 emit_clobber (op0
);
16339 if (mode
!= V4SFmode
)
16340 op0
= gen_lowpart (V4SFmode
, op0
);
16342 m
= adjust_address (op1
, V2SFmode
, 0);
16343 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16344 m
= adjust_address (op1
, V2SFmode
, 8);
16345 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16348 else if (MEM_P (op0
))
16350 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16352 op0
= gen_lowpart (V16QImode
, op0
);
16353 op1
= gen_lowpart (V16QImode
, op1
);
16354 /* We will eventually emit movups based on insn attributes. */
16355 emit_insn (gen_sse2_storedqu (op0
, op1
));
16357 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16360 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16361 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16362 || optimize_function_for_size_p (cfun
))
16363 /* We will eventually emit movups based on insn attributes. */
16364 emit_insn (gen_sse2_storeupd (op0
, op1
));
16367 m
= adjust_address (op0
, DFmode
, 0);
16368 emit_insn (gen_sse2_storelpd (m
, op1
));
16369 m
= adjust_address (op0
, DFmode
, 8);
16370 emit_insn (gen_sse2_storehpd (m
, op1
));
16375 if (mode
!= V4SFmode
)
16376 op1
= gen_lowpart (V4SFmode
, op1
);
16379 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16380 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16381 || optimize_function_for_size_p (cfun
))
16383 op0
= gen_lowpart (V4SFmode
, op0
);
16384 emit_insn (gen_sse_storeups (op0
, op1
));
16388 m
= adjust_address (op0
, V2SFmode
, 0);
16389 emit_insn (gen_sse_storelps (m
, op1
));
16390 m
= adjust_address (op0
, V2SFmode
, 8);
16391 emit_insn (gen_sse_storehps (m
, op1
));
16396 gcc_unreachable ();
16399 /* Expand a push in MODE. This is some mode for which we do not support
16400 proper push instructions, at least from the registers that we expect
16401 the value to live in. */
16404 ix86_expand_push (enum machine_mode mode
, rtx x
)
16408 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16409 GEN_INT (-GET_MODE_SIZE (mode
)),
16410 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16411 if (tmp
!= stack_pointer_rtx
)
16412 emit_move_insn (stack_pointer_rtx
, tmp
);
16414 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16416 /* When we push an operand onto stack, it has to be aligned at least
16417 at the function argument boundary. However since we don't have
16418 the argument type, we can't determine the actual argument
16420 emit_move_insn (tmp
, x
);
16423 /* Helper function of ix86_fixup_binary_operands to canonicalize
16424 operand order. Returns true if the operands should be swapped. */
16427 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16430 rtx dst
= operands
[0];
16431 rtx src1
= operands
[1];
16432 rtx src2
= operands
[2];
16434 /* If the operation is not commutative, we can't do anything. */
16435 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16438 /* Highest priority is that src1 should match dst. */
16439 if (rtx_equal_p (dst
, src1
))
16441 if (rtx_equal_p (dst
, src2
))
16444 /* Next highest priority is that immediate constants come second. */
16445 if (immediate_operand (src2
, mode
))
16447 if (immediate_operand (src1
, mode
))
16450 /* Lowest priority is that memory references should come second. */
16460 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16461 destination to use for the operation. If different from the true
16462 destination in operands[0], a copy operation will be required. */
16465 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16468 rtx dst
= operands
[0];
16469 rtx src1
= operands
[1];
16470 rtx src2
= operands
[2];
16472 /* Canonicalize operand order. */
16473 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16477 /* It is invalid to swap operands of different modes. */
16478 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16485 /* Both source operands cannot be in memory. */
16486 if (MEM_P (src1
) && MEM_P (src2
))
16488 /* Optimization: Only read from memory once. */
16489 if (rtx_equal_p (src1
, src2
))
16491 src2
= force_reg (mode
, src2
);
16495 src2
= force_reg (mode
, src2
);
16498 /* If the destination is memory, and we do not have matching source
16499 operands, do things in registers. */
16500 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16501 dst
= gen_reg_rtx (mode
);
16503 /* Source 1 cannot be a constant. */
16504 if (CONSTANT_P (src1
))
16505 src1
= force_reg (mode
, src1
);
16507 /* Source 1 cannot be a non-matching memory. */
16508 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16509 src1
= force_reg (mode
, src1
);
16511 /* Improve address combine. */
16513 && GET_MODE_CLASS (mode
) == MODE_INT
16515 src2
= force_reg (mode
, src2
);
16517 operands
[1] = src1
;
16518 operands
[2] = src2
;
16522 /* Similarly, but assume that the destination has already been
16523 set up properly. */
16526 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16527 enum machine_mode mode
, rtx operands
[])
16529 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16530 gcc_assert (dst
== operands
[0]);
16533 /* Attempt to expand a binary operator. Make the expansion closer to the
16534 actual machine, then just general_operand, which will allow 3 separate
16535 memory references (one output, two input) in a single insn. */
16538 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16541 rtx src1
, src2
, dst
, op
, clob
;
16543 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16544 src1
= operands
[1];
16545 src2
= operands
[2];
16547 /* Emit the instruction. */
16549 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16550 if (reload_in_progress
)
16552 /* Reload doesn't know about the flags register, and doesn't know that
16553 it doesn't want to clobber it. We can only do this with PLUS. */
16554 gcc_assert (code
== PLUS
);
16557 else if (reload_completed
16559 && !rtx_equal_p (dst
, src1
))
16561 /* This is going to be an LEA; avoid splitting it later. */
16566 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16567 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16570 /* Fix up the destination if needed. */
16571 if (dst
!= operands
[0])
16572 emit_move_insn (operands
[0], dst
);
16575 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16576 the given OPERANDS. */
16579 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16582 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16583 if (GET_CODE (operands
[1]) == SUBREG
)
16588 else if (GET_CODE (operands
[2]) == SUBREG
)
16593 /* Optimize (__m128i) d | (__m128i) e and similar code
16594 when d and e are float vectors into float vector logical
16595 insn. In C/C++ without using intrinsics there is no other way
16596 to express vector logical operation on float vectors than
16597 to cast them temporarily to integer vectors. */
16599 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16600 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16601 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16602 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16603 && SUBREG_BYTE (op1
) == 0
16604 && (GET_CODE (op2
) == CONST_VECTOR
16605 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16606 && SUBREG_BYTE (op2
) == 0))
16607 && can_create_pseudo_p ())
16610 switch (GET_MODE (SUBREG_REG (op1
)))
16616 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16617 if (GET_CODE (op2
) == CONST_VECTOR
)
16619 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16620 op2
= force_reg (GET_MODE (dst
), op2
);
16625 op2
= SUBREG_REG (operands
[2]);
16626 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16627 op2
= force_reg (GET_MODE (dst
), op2
);
16629 op1
= SUBREG_REG (op1
);
16630 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16631 op1
= force_reg (GET_MODE (dst
), op1
);
16632 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16633 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16635 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16641 if (!nonimmediate_operand (operands
[1], mode
))
16642 operands
[1] = force_reg (mode
, operands
[1]);
16643 if (!nonimmediate_operand (operands
[2], mode
))
16644 operands
[2] = force_reg (mode
, operands
[2]);
16645 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16646 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16647 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16651 /* Return TRUE or FALSE depending on whether the binary operator meets the
16652 appropriate constraints. */
16655 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16658 rtx dst
= operands
[0];
16659 rtx src1
= operands
[1];
16660 rtx src2
= operands
[2];
16662 /* Both source operands cannot be in memory. */
16663 if (MEM_P (src1
) && MEM_P (src2
))
16666 /* Canonicalize operand order for commutative operators. */
16667 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16674 /* If the destination is memory, we must have a matching source operand. */
16675 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16678 /* Source 1 cannot be a constant. */
16679 if (CONSTANT_P (src1
))
16682 /* Source 1 cannot be a non-matching memory. */
16683 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16684 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16685 return (code
== AND
16688 || (TARGET_64BIT
&& mode
== DImode
))
16689 && satisfies_constraint_L (src2
));
16694 /* Attempt to expand a unary operator. Make the expansion closer to the
16695 actual machine, then just general_operand, which will allow 2 separate
16696 memory references (one output, one input) in a single insn. */
16699 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16702 int matching_memory
;
16703 rtx src
, dst
, op
, clob
;
16708 /* If the destination is memory, and we do not have matching source
16709 operands, do things in registers. */
16710 matching_memory
= 0;
16713 if (rtx_equal_p (dst
, src
))
16714 matching_memory
= 1;
16716 dst
= gen_reg_rtx (mode
);
16719 /* When source operand is memory, destination must match. */
16720 if (MEM_P (src
) && !matching_memory
)
16721 src
= force_reg (mode
, src
);
16723 /* Emit the instruction. */
16725 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16726 if (reload_in_progress
|| code
== NOT
)
16728 /* Reload doesn't know about the flags register, and doesn't know that
16729 it doesn't want to clobber it. */
16730 gcc_assert (code
== NOT
);
16735 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16736 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16739 /* Fix up the destination if needed. */
16740 if (dst
!= operands
[0])
16741 emit_move_insn (operands
[0], dst
);
16744 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16745 divisor are within the range [0-255]. */
16748 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16751 rtx end_label
, qimode_label
;
16752 rtx insn
, div
, mod
;
16753 rtx scratch
, tmp0
, tmp1
, tmp2
;
16754 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16755 rtx (*gen_zero_extend
) (rtx
, rtx
);
16756 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16761 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16762 gen_test_ccno_1
= gen_testsi_ccno_1
;
16763 gen_zero_extend
= gen_zero_extendqisi2
;
16766 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16767 gen_test_ccno_1
= gen_testdi_ccno_1
;
16768 gen_zero_extend
= gen_zero_extendqidi2
;
16771 gcc_unreachable ();
16774 end_label
= gen_label_rtx ();
16775 qimode_label
= gen_label_rtx ();
16777 scratch
= gen_reg_rtx (mode
);
16779 /* Use 8bit unsigned divimod if dividend and divisor are within
16780 the range [0-255]. */
16781 emit_move_insn (scratch
, operands
[2]);
16782 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16783 scratch
, 1, OPTAB_DIRECT
);
16784 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16785 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16786 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16787 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16788 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16790 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16791 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16792 JUMP_LABEL (insn
) = qimode_label
;
16794 /* Generate original signed/unsigned divimod. */
16795 div
= gen_divmod4_1 (operands
[0], operands
[1],
16796 operands
[2], operands
[3]);
16799 /* Branch to the end. */
16800 emit_jump_insn (gen_jump (end_label
));
16803 /* Generate 8bit unsigned divide. */
16804 emit_label (qimode_label
);
16805 /* Don't use operands[0] for result of 8bit divide since not all
16806 registers support QImode ZERO_EXTRACT. */
16807 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16808 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16809 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16810 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16814 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16815 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16819 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16820 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16823 /* Extract remainder from AH. */
16824 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16825 if (REG_P (operands
[1]))
16826 insn
= emit_move_insn (operands
[1], tmp1
);
16829 /* Need a new scratch register since the old one has result
16831 scratch
= gen_reg_rtx (mode
);
16832 emit_move_insn (scratch
, tmp1
);
16833 insn
= emit_move_insn (operands
[1], scratch
);
16835 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16837 /* Zero extend quotient from AL. */
16838 tmp1
= gen_lowpart (QImode
, tmp0
);
16839 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16840 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16842 emit_label (end_label
);
16845 #define LEA_MAX_STALL (3)
16846 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16848 /* Increase given DISTANCE in half-cycles according to
16849 dependencies between PREV and NEXT instructions.
16850 Add 1 half-cycle if there is no dependency and
16851 go to next cycle if there is some dependecy. */
16853 static unsigned int
16854 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16859 if (!prev
|| !next
)
16860 return distance
+ (distance
& 1) + 2;
16862 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16863 return distance
+ 1;
16865 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16866 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16867 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16868 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16869 return distance
+ (distance
& 1) + 2;
16871 return distance
+ 1;
16874 /* Function checks if instruction INSN defines register number
16875 REGNO1 or REGNO2. */
16878 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16883 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16884 if (DF_REF_REG_DEF_P (*def_rec
)
16885 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16886 && (regno1
== DF_REF_REGNO (*def_rec
)
16887 || regno2
== DF_REF_REGNO (*def_rec
)))
16895 /* Function checks if instruction INSN uses register number
16896 REGNO as a part of address expression. */
16899 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16903 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16904 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16910 /* Search backward for non-agu definition of register number REGNO1
16911 or register number REGNO2 in basic block starting from instruction
16912 START up to head of basic block or instruction INSN.
16914 Function puts true value into *FOUND var if definition was found
16915 and false otherwise.
16917 Distance in half-cycles between START and found instruction or head
16918 of BB is added to DISTANCE and returned. */
16921 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16922 rtx insn
, int distance
,
16923 rtx start
, bool *found
)
16925 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16933 && distance
< LEA_SEARCH_THRESHOLD
)
16935 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16937 distance
= increase_distance (prev
, next
, distance
);
16938 if (insn_defines_reg (regno1
, regno2
, prev
))
16940 if (recog_memoized (prev
) < 0
16941 || get_attr_type (prev
) != TYPE_LEA
)
16950 if (prev
== BB_HEAD (bb
))
16953 prev
= PREV_INSN (prev
);
16959 /* Search backward for non-agu definition of register number REGNO1
16960 or register number REGNO2 in INSN's basic block until
16961 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16962 2. Reach neighbour BBs boundary, or
16963 3. Reach agu definition.
16964 Returns the distance between the non-agu definition point and INSN.
16965 If no definition point, returns -1. */
16968 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16971 basic_block bb
= BLOCK_FOR_INSN (insn
);
16973 bool found
= false;
16975 if (insn
!= BB_HEAD (bb
))
16976 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16977 distance
, PREV_INSN (insn
),
16980 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16984 bool simple_loop
= false;
16986 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16989 simple_loop
= true;
16994 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16996 BB_END (bb
), &found
);
16999 int shortest_dist
= -1;
17000 bool found_in_bb
= false;
17002 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17005 = distance_non_agu_define_in_bb (regno1
, regno2
,
17011 if (shortest_dist
< 0)
17012 shortest_dist
= bb_dist
;
17013 else if (bb_dist
> 0)
17014 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17020 distance
= shortest_dist
;
17024 /* get_attr_type may modify recog data. We want to make sure
17025 that recog data is valid for instruction INSN, on which
17026 distance_non_agu_define is called. INSN is unchanged here. */
17027 extract_insn_cached (insn
);
17032 return distance
>> 1;
17035 /* Return the distance in half-cycles between INSN and the next
17036 insn that uses register number REGNO in memory address added
17037 to DISTANCE. Return -1 if REGNO0 is set.
17039 Put true value into *FOUND if register usage was found and
17041 Put true value into *REDEFINED if register redefinition was
17042 found and false otherwise. */
17045 distance_agu_use_in_bb (unsigned int regno
,
17046 rtx insn
, int distance
, rtx start
,
17047 bool *found
, bool *redefined
)
17049 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17054 *redefined
= false;
17058 && distance
< LEA_SEARCH_THRESHOLD
)
17060 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17062 distance
= increase_distance(prev
, next
, distance
);
17063 if (insn_uses_reg_mem (regno
, next
))
17065 /* Return DISTANCE if OP0 is used in memory
17066 address in NEXT. */
17071 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17073 /* Return -1 if OP0 is set in NEXT. */
17081 if (next
== BB_END (bb
))
17084 next
= NEXT_INSN (next
);
17090 /* Return the distance between INSN and the next insn that uses
17091 register number REGNO0 in memory address. Return -1 if no such
17092 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17095 distance_agu_use (unsigned int regno0
, rtx insn
)
17097 basic_block bb
= BLOCK_FOR_INSN (insn
);
17099 bool found
= false;
17100 bool redefined
= false;
17102 if (insn
!= BB_END (bb
))
17103 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17105 &found
, &redefined
);
17107 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17111 bool simple_loop
= false;
17113 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17116 simple_loop
= true;
17121 distance
= distance_agu_use_in_bb (regno0
, insn
,
17122 distance
, BB_HEAD (bb
),
17123 &found
, &redefined
);
17126 int shortest_dist
= -1;
17127 bool found_in_bb
= false;
17128 bool redefined_in_bb
= false;
17130 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17133 = distance_agu_use_in_bb (regno0
, insn
,
17134 distance
, BB_HEAD (e
->dest
),
17135 &found_in_bb
, &redefined_in_bb
);
17138 if (shortest_dist
< 0)
17139 shortest_dist
= bb_dist
;
17140 else if (bb_dist
> 0)
17141 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17147 distance
= shortest_dist
;
17151 if (!found
|| redefined
)
17154 return distance
>> 1;
17157 /* Define this macro to tune LEA priority vs ADD, it take effect when
17158 there is a dilemma of choicing LEA or ADD
17159 Negative value: ADD is more preferred than LEA
17161 Positive value: LEA is more preferred than ADD*/
17162 #define IX86_LEA_PRIORITY 0
17164 /* Return true if usage of lea INSN has performance advantage
17165 over a sequence of instructions. Instructions sequence has
17166 SPLIT_COST cycles higher latency than lea latency. */
17169 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17170 unsigned int regno2
, int split_cost
)
17172 int dist_define
, dist_use
;
17174 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17175 dist_use
= distance_agu_use (regno0
, insn
);
17177 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17179 /* If there is no non AGU operand definition, no AGU
17180 operand usage and split cost is 0 then both lea
17181 and non lea variants have same priority. Currently
17182 we prefer lea for 64 bit code and non lea on 32 bit
17184 if (dist_use
< 0 && split_cost
== 0)
17185 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17190 /* With longer definitions distance lea is more preferable.
17191 Here we change it to take into account splitting cost and
17193 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17195 /* If there is no use in memory addess then we just check
17196 that split cost exceeds AGU stall. */
17198 return dist_define
> LEA_MAX_STALL
;
17200 /* If this insn has both backward non-agu dependence and forward
17201 agu dependence, the one with short distance takes effect. */
17202 return dist_define
>= dist_use
;
17205 /* Return true if it is legal to clobber flags by INSN and
17206 false otherwise. */
17209 ix86_ok_to_clobber_flags (rtx insn
)
17211 basic_block bb
= BLOCK_FOR_INSN (insn
);
17217 if (NONDEBUG_INSN_P (insn
))
17219 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17220 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17223 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17227 if (insn
== BB_END (bb
))
17230 insn
= NEXT_INSN (insn
);
17233 live
= df_get_live_out(bb
);
17234 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17237 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17238 move and add to avoid AGU stalls. */
17241 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17243 unsigned int regno0
, regno1
, regno2
;
17245 /* Check if we need to optimize. */
17246 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17249 /* Check it is correct to split here. */
17250 if (!ix86_ok_to_clobber_flags(insn
))
17253 regno0
= true_regnum (operands
[0]);
17254 regno1
= true_regnum (operands
[1]);
17255 regno2
= true_regnum (operands
[2]);
17257 /* We need to split only adds with non destructive
17258 destination operand. */
17259 if (regno0
== regno1
|| regno0
== regno2
)
17262 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17265 /* Return true if we should emit lea instruction instead of mov
17269 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17271 unsigned int regno0
, regno1
;
17273 /* Check if we need to optimize. */
17274 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17277 /* Use lea for reg to reg moves only. */
17278 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17281 regno0
= true_regnum (operands
[0]);
17282 regno1
= true_regnum (operands
[1]);
17284 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17287 /* Return true if we need to split lea into a sequence of
17288 instructions to avoid AGU stalls. */
17291 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17293 unsigned int regno0
, regno1
, regno2
;
17295 struct ix86_address parts
;
17298 /* Check we need to optimize. */
17299 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17302 /* Check it is correct to split here. */
17303 if (!ix86_ok_to_clobber_flags(insn
))
17306 ok
= ix86_decompose_address (operands
[1], &parts
);
17309 /* There should be at least two components in the address. */
17310 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17311 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17314 /* We should not split into add if non legitimate pic
17315 operand is used as displacement. */
17316 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17319 regno0
= true_regnum (operands
[0]) ;
17320 regno1
= INVALID_REGNUM
;
17321 regno2
= INVALID_REGNUM
;
17324 regno1
= true_regnum (parts
.base
);
17326 regno2
= true_regnum (parts
.index
);
17330 /* Compute how many cycles we will add to execution time
17331 if split lea into a sequence of instructions. */
17332 if (parts
.base
|| parts
.index
)
17334 /* Have to use mov instruction if non desctructive
17335 destination form is used. */
17336 if (regno1
!= regno0
&& regno2
!= regno0
)
17339 /* Have to add index to base if both exist. */
17340 if (parts
.base
&& parts
.index
)
17343 /* Have to use shift and adds if scale is 2 or greater. */
17344 if (parts
.scale
> 1)
17346 if (regno0
!= regno1
)
17348 else if (regno2
== regno0
)
17351 split_cost
+= parts
.scale
;
17354 /* Have to use add instruction with immediate if
17355 disp is non zero. */
17356 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17359 /* Subtract the price of lea. */
17363 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17366 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17367 matches destination. RTX includes clobber of FLAGS_REG. */
17370 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17375 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17376 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17378 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17381 /* Return true if regno1 def is nearest to the insn. */
17384 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17387 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17391 while (prev
&& prev
!= start
)
17393 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17395 prev
= PREV_INSN (prev
);
17398 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17400 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17402 prev
= PREV_INSN (prev
);
17405 /* None of the regs is defined in the bb. */
17409 /* Split lea instructions into a sequence of instructions
17410 which are executed on ALU to avoid AGU stalls.
17411 It is assumed that it is allowed to clobber flags register
17412 at lea position. */
17415 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17417 unsigned int regno0
, regno1
, regno2
;
17418 struct ix86_address parts
;
17422 ok
= ix86_decompose_address (operands
[1], &parts
);
17425 target
= gen_lowpart (mode
, operands
[0]);
17427 regno0
= true_regnum (target
);
17428 regno1
= INVALID_REGNUM
;
17429 regno2
= INVALID_REGNUM
;
17433 parts
.base
= gen_lowpart (mode
, parts
.base
);
17434 regno1
= true_regnum (parts
.base
);
17439 parts
.index
= gen_lowpart (mode
, parts
.index
);
17440 regno2
= true_regnum (parts
.index
);
17444 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17446 if (parts
.scale
> 1)
17448 /* Case r1 = r1 + ... */
17449 if (regno1
== regno0
)
17451 /* If we have a case r1 = r1 + C * r1 then we
17452 should use multiplication which is very
17453 expensive. Assume cost model is wrong if we
17454 have such case here. */
17455 gcc_assert (regno2
!= regno0
);
17457 for (adds
= parts
.scale
; adds
> 0; adds
--)
17458 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17462 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17463 if (regno0
!= regno2
)
17464 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17466 /* Use shift for scaling. */
17467 ix86_emit_binop (ASHIFT
, mode
, target
,
17468 GEN_INT (exact_log2 (parts
.scale
)));
17471 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17473 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17474 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17477 else if (!parts
.base
&& !parts
.index
)
17479 gcc_assert(parts
.disp
);
17480 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17486 if (regno0
!= regno2
)
17487 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17489 else if (!parts
.index
)
17491 if (regno0
!= regno1
)
17492 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17496 if (regno0
== regno1
)
17498 else if (regno0
== regno2
)
17504 /* Find better operand for SET instruction, depending
17505 on which definition is farther from the insn. */
17506 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17507 tmp
= parts
.index
, tmp1
= parts
.base
;
17509 tmp
= parts
.base
, tmp1
= parts
.index
;
17511 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17513 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17514 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17516 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17520 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17523 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17524 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17528 /* Return true if it is ok to optimize an ADD operation to LEA
17529 operation to avoid flag register consumation. For most processors,
17530 ADD is faster than LEA. For the processors like ATOM, if the
17531 destination register of LEA holds an actual address which will be
17532 used soon, LEA is better and otherwise ADD is better. */
17535 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17537 unsigned int regno0
= true_regnum (operands
[0]);
17538 unsigned int regno1
= true_regnum (operands
[1]);
17539 unsigned int regno2
= true_regnum (operands
[2]);
17541 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17542 if (regno0
!= regno1
&& regno0
!= regno2
)
17545 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17548 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17551 /* Return true if destination reg of SET_BODY is shift count of
17555 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17561 /* Retrieve destination of SET_BODY. */
17562 switch (GET_CODE (set_body
))
17565 set_dest
= SET_DEST (set_body
);
17566 if (!set_dest
|| !REG_P (set_dest
))
17570 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17571 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17579 /* Retrieve shift count of USE_BODY. */
17580 switch (GET_CODE (use_body
))
17583 shift_rtx
= XEXP (use_body
, 1);
17586 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17587 if (ix86_dep_by_shift_count_body (set_body
,
17588 XVECEXP (use_body
, 0, i
)))
17596 && (GET_CODE (shift_rtx
) == ASHIFT
17597 || GET_CODE (shift_rtx
) == LSHIFTRT
17598 || GET_CODE (shift_rtx
) == ASHIFTRT
17599 || GET_CODE (shift_rtx
) == ROTATE
17600 || GET_CODE (shift_rtx
) == ROTATERT
))
17602 rtx shift_count
= XEXP (shift_rtx
, 1);
17604 /* Return true if shift count is dest of SET_BODY. */
17605 if (REG_P (shift_count
))
17607 /* Add check since it can be invoked before register
17608 allocation in pre-reload schedule. */
17609 if (reload_completed
17610 && true_regnum (set_dest
) == true_regnum (shift_count
))
17612 else if (REGNO(set_dest
) == REGNO(shift_count
))
17620 /* Return true if destination reg of SET_INSN is shift count of
17624 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17626 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17627 PATTERN (use_insn
));
17630 /* Return TRUE or FALSE depending on whether the unary operator meets the
17631 appropriate constraints. */
17634 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17635 enum machine_mode mode ATTRIBUTE_UNUSED
,
17636 rtx operands
[2] ATTRIBUTE_UNUSED
)
17638 /* If one of operands is memory, source and destination must match. */
17639 if ((MEM_P (operands
[0])
17640 || MEM_P (operands
[1]))
17641 && ! rtx_equal_p (operands
[0], operands
[1]))
17646 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17647 are ok, keeping in mind the possible movddup alternative. */
17650 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17652 if (MEM_P (operands
[0]))
17653 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17654 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17655 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17659 /* Post-reload splitter for converting an SF or DFmode value in an
17660 SSE register into an unsigned SImode. */
17663 ix86_split_convert_uns_si_sse (rtx operands
[])
17665 enum machine_mode vecmode
;
17666 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17668 large
= operands
[1];
17669 zero_or_two31
= operands
[2];
17670 input
= operands
[3];
17671 two31
= operands
[4];
17672 vecmode
= GET_MODE (large
);
17673 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17675 /* Load up the value into the low element. We must ensure that the other
17676 elements are valid floats -- zero is the easiest such value. */
17679 if (vecmode
== V4SFmode
)
17680 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17682 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17686 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17687 emit_move_insn (value
, CONST0_RTX (vecmode
));
17688 if (vecmode
== V4SFmode
)
17689 emit_insn (gen_sse_movss (value
, value
, input
));
17691 emit_insn (gen_sse2_movsd (value
, value
, input
));
17694 emit_move_insn (large
, two31
);
17695 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17697 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17698 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17700 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17701 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17703 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17704 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17706 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17707 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17709 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17710 if (vecmode
== V4SFmode
)
17711 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17713 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17716 emit_insn (gen_xorv4si3 (value
, value
, large
));
17719 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17720 Expects the 64-bit DImode to be supplied in a pair of integral
17721 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17722 -mfpmath=sse, !optimize_size only. */
17725 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17727 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17728 rtx int_xmm
, fp_xmm
;
17729 rtx biases
, exponents
;
17732 int_xmm
= gen_reg_rtx (V4SImode
);
17733 if (TARGET_INTER_UNIT_MOVES
)
17734 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17735 else if (TARGET_SSE_SPLIT_REGS
)
17737 emit_clobber (int_xmm
);
17738 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17742 x
= gen_reg_rtx (V2DImode
);
17743 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17744 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17747 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17748 gen_rtvec (4, GEN_INT (0x43300000UL
),
17749 GEN_INT (0x45300000UL
),
17750 const0_rtx
, const0_rtx
));
17751 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17753 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17754 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17756 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17757 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17758 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17759 (0x1.0p84 + double(fp_value_hi_xmm)).
17760 Note these exponents differ by 32. */
17762 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17764 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17765 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17766 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17767 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17768 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17769 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17770 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17771 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17772 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17774 /* Add the upper and lower DFmode values together. */
17776 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17779 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17780 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17781 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17784 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17787 /* Not used, but eases macroization of patterns. */
17789 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17790 rtx input ATTRIBUTE_UNUSED
)
17792 gcc_unreachable ();
17795 /* Convert an unsigned SImode value into a DFmode. Only currently used
17796 for SSE, but applicable anywhere. */
17799 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17801 REAL_VALUE_TYPE TWO31r
;
17804 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17805 NULL
, 1, OPTAB_DIRECT
);
17807 fp
= gen_reg_rtx (DFmode
);
17808 emit_insn (gen_floatsidf2 (fp
, x
));
17810 real_ldexp (&TWO31r
, &dconst1
, 31);
17811 x
= const_double_from_real_value (TWO31r
, DFmode
);
17813 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17815 emit_move_insn (target
, x
);
17818 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17819 32-bit mode; otherwise we have a direct convert instruction. */
17822 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17824 REAL_VALUE_TYPE TWO32r
;
17825 rtx fp_lo
, fp_hi
, x
;
17827 fp_lo
= gen_reg_rtx (DFmode
);
17828 fp_hi
= gen_reg_rtx (DFmode
);
17830 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17832 real_ldexp (&TWO32r
, &dconst1
, 32);
17833 x
= const_double_from_real_value (TWO32r
, DFmode
);
17834 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17836 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17838 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17841 emit_move_insn (target
, x
);
17844 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17845 For x86_32, -mfpmath=sse, !optimize_size only. */
17847 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17849 REAL_VALUE_TYPE ONE16r
;
17850 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17852 real_ldexp (&ONE16r
, &dconst1
, 16);
17853 x
= const_double_from_real_value (ONE16r
, SFmode
);
17854 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17855 NULL
, 0, OPTAB_DIRECT
);
17856 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17857 NULL
, 0, OPTAB_DIRECT
);
17858 fp_hi
= gen_reg_rtx (SFmode
);
17859 fp_lo
= gen_reg_rtx (SFmode
);
17860 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17861 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17862 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17864 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17866 if (!rtx_equal_p (target
, fp_hi
))
17867 emit_move_insn (target
, fp_hi
);
17870 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17871 a vector of unsigned ints VAL to vector of floats TARGET. */
17874 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17877 REAL_VALUE_TYPE TWO16r
;
17878 enum machine_mode intmode
= GET_MODE (val
);
17879 enum machine_mode fltmode
= GET_MODE (target
);
17880 rtx (*cvt
) (rtx
, rtx
);
17882 if (intmode
== V4SImode
)
17883 cvt
= gen_floatv4siv4sf2
;
17885 cvt
= gen_floatv8siv8sf2
;
17886 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17887 tmp
[0] = force_reg (intmode
, tmp
[0]);
17888 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17890 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17891 NULL_RTX
, 1, OPTAB_DIRECT
);
17892 tmp
[3] = gen_reg_rtx (fltmode
);
17893 emit_insn (cvt (tmp
[3], tmp
[1]));
17894 tmp
[4] = gen_reg_rtx (fltmode
);
17895 emit_insn (cvt (tmp
[4], tmp
[2]));
17896 real_ldexp (&TWO16r
, &dconst1
, 16);
17897 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17898 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17899 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17901 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17903 if (tmp
[7] != target
)
17904 emit_move_insn (target
, tmp
[7]);
17907 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17908 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17909 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17910 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17913 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17915 REAL_VALUE_TYPE TWO31r
;
17916 rtx two31r
, tmp
[4];
17917 enum machine_mode mode
= GET_MODE (val
);
17918 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17919 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17920 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17923 for (i
= 0; i
< 3; i
++)
17924 tmp
[i
] = gen_reg_rtx (mode
);
17925 real_ldexp (&TWO31r
, &dconst1
, 31);
17926 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17927 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17928 two31r
= force_reg (mode
, two31r
);
17931 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17932 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17933 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17934 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17935 default: gcc_unreachable ();
17937 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17938 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17939 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17941 if (intmode
== V4SImode
|| TARGET_AVX2
)
17942 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17943 gen_lowpart (intmode
, tmp
[0]),
17944 GEN_INT (31), NULL_RTX
, 0,
17948 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17949 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17950 *xorp
= expand_simple_binop (intmode
, AND
,
17951 gen_lowpart (intmode
, tmp
[0]),
17952 two31
, NULL_RTX
, 0,
17955 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17959 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17960 then replicate the value for all elements of the vector
17964 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17968 enum machine_mode scalar_mode
;
17985 n_elt
= GET_MODE_NUNITS (mode
);
17986 v
= rtvec_alloc (n_elt
);
17987 scalar_mode
= GET_MODE_INNER (mode
);
17989 RTVEC_ELT (v
, 0) = value
;
17991 for (i
= 1; i
< n_elt
; ++i
)
17992 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17994 return gen_rtx_CONST_VECTOR (mode
, v
);
17997 gcc_unreachable ();
18001 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18002 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18003 for an SSE register. If VECT is true, then replicate the mask for
18004 all elements of the vector register. If INVERT is true, then create
18005 a mask excluding the sign bit. */
18008 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18010 enum machine_mode vec_mode
, imode
;
18011 HOST_WIDE_INT hi
, lo
;
18016 /* Find the sign bit, sign extended to 2*HWI. */
18024 mode
= GET_MODE_INNER (mode
);
18026 lo
= 0x80000000, hi
= lo
< 0;
18034 mode
= GET_MODE_INNER (mode
);
18036 if (HOST_BITS_PER_WIDE_INT
>= 64)
18037 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18039 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18044 vec_mode
= VOIDmode
;
18045 if (HOST_BITS_PER_WIDE_INT
>= 64)
18048 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18055 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18059 lo
= ~lo
, hi
= ~hi
;
18065 mask
= immed_double_const (lo
, hi
, imode
);
18067 vec
= gen_rtvec (2, v
, mask
);
18068 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18069 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18076 gcc_unreachable ();
18080 lo
= ~lo
, hi
= ~hi
;
18082 /* Force this value into the low part of a fp vector constant. */
18083 mask
= immed_double_const (lo
, hi
, imode
);
18084 mask
= gen_lowpart (mode
, mask
);
18086 if (vec_mode
== VOIDmode
)
18087 return force_reg (mode
, mask
);
18089 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18090 return force_reg (vec_mode
, v
);
18093 /* Generate code for floating point ABS or NEG. */
18096 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18099 rtx mask
, set
, dst
, src
;
18100 bool use_sse
= false;
18101 bool vector_mode
= VECTOR_MODE_P (mode
);
18102 enum machine_mode vmode
= mode
;
18106 else if (mode
== TFmode
)
18108 else if (TARGET_SSE_MATH
)
18110 use_sse
= SSE_FLOAT_MODE_P (mode
);
18111 if (mode
== SFmode
)
18113 else if (mode
== DFmode
)
18117 /* NEG and ABS performed with SSE use bitwise mask operations.
18118 Create the appropriate mask now. */
18120 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18127 set
= gen_rtx_fmt_e (code
, mode
, src
);
18128 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18135 use
= gen_rtx_USE (VOIDmode
, mask
);
18137 par
= gen_rtvec (2, set
, use
);
18140 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18141 par
= gen_rtvec (3, set
, use
, clob
);
18143 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18149 /* Expand a copysign operation. Special case operand 0 being a constant. */
18152 ix86_expand_copysign (rtx operands
[])
18154 enum machine_mode mode
, vmode
;
18155 rtx dest
, op0
, op1
, mask
, nmask
;
18157 dest
= operands
[0];
18161 mode
= GET_MODE (dest
);
18163 if (mode
== SFmode
)
18165 else if (mode
== DFmode
)
18170 if (GET_CODE (op0
) == CONST_DOUBLE
)
18172 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18174 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18175 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18177 if (mode
== SFmode
|| mode
== DFmode
)
18179 if (op0
== CONST0_RTX (mode
))
18180 op0
= CONST0_RTX (vmode
);
18183 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18185 op0
= force_reg (vmode
, v
);
18188 else if (op0
!= CONST0_RTX (mode
))
18189 op0
= force_reg (mode
, op0
);
18191 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18193 if (mode
== SFmode
)
18194 copysign_insn
= gen_copysignsf3_const
;
18195 else if (mode
== DFmode
)
18196 copysign_insn
= gen_copysigndf3_const
;
18198 copysign_insn
= gen_copysigntf3_const
;
18200 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18204 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18206 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18207 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18209 if (mode
== SFmode
)
18210 copysign_insn
= gen_copysignsf3_var
;
18211 else if (mode
== DFmode
)
18212 copysign_insn
= gen_copysigndf3_var
;
18214 copysign_insn
= gen_copysigntf3_var
;
18216 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18220 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18221 be a constant, and so has already been expanded into a vector constant. */
18224 ix86_split_copysign_const (rtx operands
[])
18226 enum machine_mode mode
, vmode
;
18227 rtx dest
, op0
, mask
, x
;
18229 dest
= operands
[0];
18231 mask
= operands
[3];
18233 mode
= GET_MODE (dest
);
18234 vmode
= GET_MODE (mask
);
18236 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18237 x
= gen_rtx_AND (vmode
, dest
, mask
);
18238 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18240 if (op0
!= CONST0_RTX (vmode
))
18242 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18243 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18247 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18248 so we have to do two masks. */
18251 ix86_split_copysign_var (rtx operands
[])
18253 enum machine_mode mode
, vmode
;
18254 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18256 dest
= operands
[0];
18257 scratch
= operands
[1];
18260 nmask
= operands
[4];
18261 mask
= operands
[5];
18263 mode
= GET_MODE (dest
);
18264 vmode
= GET_MODE (mask
);
18266 if (rtx_equal_p (op0
, op1
))
18268 /* Shouldn't happen often (it's useless, obviously), but when it does
18269 we'd generate incorrect code if we continue below. */
18270 emit_move_insn (dest
, op0
);
18274 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18276 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18278 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18279 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18282 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18283 x
= gen_rtx_NOT (vmode
, dest
);
18284 x
= gen_rtx_AND (vmode
, x
, op0
);
18285 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18289 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18291 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18293 else /* alternative 2,4 */
18295 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18296 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18297 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18299 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18301 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18303 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18304 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18306 else /* alternative 3,4 */
18308 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18310 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18311 x
= gen_rtx_AND (vmode
, dest
, op0
);
18313 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18316 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18317 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18320 /* Return TRUE or FALSE depending on whether the first SET in INSN
18321 has source and destination with matching CC modes, and that the
18322 CC mode is at least as constrained as REQ_MODE. */
18325 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18328 enum machine_mode set_mode
;
18330 set
= PATTERN (insn
);
18331 if (GET_CODE (set
) == PARALLEL
)
18332 set
= XVECEXP (set
, 0, 0);
18333 gcc_assert (GET_CODE (set
) == SET
);
18334 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18336 set_mode
= GET_MODE (SET_DEST (set
));
18340 if (req_mode
!= CCNOmode
18341 && (req_mode
!= CCmode
18342 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18346 if (req_mode
== CCGCmode
)
18350 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18354 if (req_mode
== CCZmode
)
18364 if (set_mode
!= req_mode
)
18369 gcc_unreachable ();
18372 return GET_MODE (SET_SRC (set
)) == set_mode
;
18375 /* Generate insn patterns to do an integer compare of OPERANDS. */
18378 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18380 enum machine_mode cmpmode
;
18383 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18384 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18386 /* This is very simple, but making the interface the same as in the
18387 FP case makes the rest of the code easier. */
18388 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18389 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18391 /* Return the test that should be put into the flags user, i.e.
18392 the bcc, scc, or cmov instruction. */
18393 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18396 /* Figure out whether to use ordered or unordered fp comparisons.
18397 Return the appropriate mode to use. */
18400 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18402 /* ??? In order to make all comparisons reversible, we do all comparisons
18403 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18404 all forms trapping and nontrapping comparisons, we can make inequality
18405 comparisons trapping again, since it results in better code when using
18406 FCOM based compares. */
18407 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18411 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18413 enum machine_mode mode
= GET_MODE (op0
);
18415 if (SCALAR_FLOAT_MODE_P (mode
))
18417 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18418 return ix86_fp_compare_mode (code
);
18423 /* Only zero flag is needed. */
18424 case EQ
: /* ZF=0 */
18425 case NE
: /* ZF!=0 */
18427 /* Codes needing carry flag. */
18428 case GEU
: /* CF=0 */
18429 case LTU
: /* CF=1 */
18430 /* Detect overflow checks. They need just the carry flag. */
18431 if (GET_CODE (op0
) == PLUS
18432 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18436 case GTU
: /* CF=0 & ZF=0 */
18437 case LEU
: /* CF=1 | ZF=1 */
18438 /* Detect overflow checks. They need just the carry flag. */
18439 if (GET_CODE (op0
) == MINUS
18440 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18444 /* Codes possibly doable only with sign flag when
18445 comparing against zero. */
18446 case GE
: /* SF=OF or SF=0 */
18447 case LT
: /* SF<>OF or SF=1 */
18448 if (op1
== const0_rtx
)
18451 /* For other cases Carry flag is not required. */
18453 /* Codes doable only with sign flag when comparing
18454 against zero, but we miss jump instruction for it
18455 so we need to use relational tests against overflow
18456 that thus needs to be zero. */
18457 case GT
: /* ZF=0 & SF=OF */
18458 case LE
: /* ZF=1 | SF<>OF */
18459 if (op1
== const0_rtx
)
18463 /* strcmp pattern do (use flags) and combine may ask us for proper
18468 gcc_unreachable ();
18472 /* Return the fixed registers used for condition codes. */
18475 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18482 /* If two condition code modes are compatible, return a condition code
18483 mode which is compatible with both. Otherwise, return
18486 static enum machine_mode
18487 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18492 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18495 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18496 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18499 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18501 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18507 gcc_unreachable ();
18537 /* These are only compatible with themselves, which we already
18544 /* Return a comparison we can do and that it is equivalent to
18545 swap_condition (code) apart possibly from orderedness.
18546 But, never change orderedness if TARGET_IEEE_FP, returning
18547 UNKNOWN in that case if necessary. */
18549 static enum rtx_code
18550 ix86_fp_swap_condition (enum rtx_code code
)
18554 case GT
: /* GTU - CF=0 & ZF=0 */
18555 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18556 case GE
: /* GEU - CF=0 */
18557 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18558 case UNLT
: /* LTU - CF=1 */
18559 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18560 case UNLE
: /* LEU - CF=1 | ZF=1 */
18561 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18563 return swap_condition (code
);
18567 /* Return cost of comparison CODE using the best strategy for performance.
18568 All following functions do use number of instructions as a cost metrics.
18569 In future this should be tweaked to compute bytes for optimize_size and
18570 take into account performance of various instructions on various CPUs. */
18573 ix86_fp_comparison_cost (enum rtx_code code
)
18577 /* The cost of code using bit-twiddling on %ah. */
18594 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18598 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18601 gcc_unreachable ();
18604 switch (ix86_fp_comparison_strategy (code
))
18606 case IX86_FPCMP_COMI
:
18607 return arith_cost
> 4 ? 3 : 2;
18608 case IX86_FPCMP_SAHF
:
18609 return arith_cost
> 4 ? 4 : 3;
18615 /* Return strategy to use for floating-point. We assume that fcomi is always
18616 preferrable where available, since that is also true when looking at size
18617 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18619 enum ix86_fpcmp_strategy
18620 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18622 /* Do fcomi/sahf based test when profitable. */
18625 return IX86_FPCMP_COMI
;
18627 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18628 return IX86_FPCMP_SAHF
;
18630 return IX86_FPCMP_ARITH
;
18633 /* Swap, force into registers, or otherwise massage the two operands
18634 to a fp comparison. The operands are updated in place; the new
18635 comparison code is returned. */
18637 static enum rtx_code
18638 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18640 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18641 rtx op0
= *pop0
, op1
= *pop1
;
18642 enum machine_mode op_mode
= GET_MODE (op0
);
18643 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18645 /* All of the unordered compare instructions only work on registers.
18646 The same is true of the fcomi compare instructions. The XFmode
18647 compare instructions require registers except when comparing
18648 against zero or when converting operand 1 from fixed point to
18652 && (fpcmp_mode
== CCFPUmode
18653 || (op_mode
== XFmode
18654 && ! (standard_80387_constant_p (op0
) == 1
18655 || standard_80387_constant_p (op1
) == 1)
18656 && GET_CODE (op1
) != FLOAT
)
18657 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18659 op0
= force_reg (op_mode
, op0
);
18660 op1
= force_reg (op_mode
, op1
);
18664 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18665 things around if they appear profitable, otherwise force op0
18666 into a register. */
18668 if (standard_80387_constant_p (op0
) == 0
18670 && ! (standard_80387_constant_p (op1
) == 0
18673 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18674 if (new_code
!= UNKNOWN
)
18677 tmp
= op0
, op0
= op1
, op1
= tmp
;
18683 op0
= force_reg (op_mode
, op0
);
18685 if (CONSTANT_P (op1
))
18687 int tmp
= standard_80387_constant_p (op1
);
18689 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18693 op1
= force_reg (op_mode
, op1
);
18696 op1
= force_reg (op_mode
, op1
);
18700 /* Try to rearrange the comparison to make it cheaper. */
18701 if (ix86_fp_comparison_cost (code
)
18702 > ix86_fp_comparison_cost (swap_condition (code
))
18703 && (REG_P (op1
) || can_create_pseudo_p ()))
18706 tmp
= op0
, op0
= op1
, op1
= tmp
;
18707 code
= swap_condition (code
);
18709 op0
= force_reg (op_mode
, op0
);
18717 /* Convert comparison codes we use to represent FP comparison to integer
18718 code that will result in proper branch. Return UNKNOWN if no such code
18722 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18751 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18754 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18756 enum machine_mode fpcmp_mode
, intcmp_mode
;
18759 fpcmp_mode
= ix86_fp_compare_mode (code
);
18760 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18762 /* Do fcomi/sahf based test when profitable. */
18763 switch (ix86_fp_comparison_strategy (code
))
18765 case IX86_FPCMP_COMI
:
18766 intcmp_mode
= fpcmp_mode
;
18767 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18768 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18773 case IX86_FPCMP_SAHF
:
18774 intcmp_mode
= fpcmp_mode
;
18775 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18776 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18780 scratch
= gen_reg_rtx (HImode
);
18781 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18782 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18785 case IX86_FPCMP_ARITH
:
18786 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18787 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18788 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18790 scratch
= gen_reg_rtx (HImode
);
18791 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18793 /* In the unordered case, we have to check C2 for NaN's, which
18794 doesn't happen to work out to anything nice combination-wise.
18795 So do some bit twiddling on the value we've got in AH to come
18796 up with an appropriate set of condition codes. */
18798 intcmp_mode
= CCNOmode
;
18803 if (code
== GT
|| !TARGET_IEEE_FP
)
18805 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18810 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18811 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18812 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18813 intcmp_mode
= CCmode
;
18819 if (code
== LT
&& TARGET_IEEE_FP
)
18821 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18822 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18823 intcmp_mode
= CCmode
;
18828 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18834 if (code
== GE
|| !TARGET_IEEE_FP
)
18836 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18841 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18842 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18848 if (code
== LE
&& TARGET_IEEE_FP
)
18850 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18851 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18852 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18853 intcmp_mode
= CCmode
;
18858 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18864 if (code
== EQ
&& TARGET_IEEE_FP
)
18866 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18867 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18868 intcmp_mode
= CCmode
;
18873 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18879 if (code
== NE
&& TARGET_IEEE_FP
)
18881 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18882 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18888 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18894 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18898 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18903 gcc_unreachable ();
18911 /* Return the test that should be put into the flags user, i.e.
18912 the bcc, scc, or cmov instruction. */
18913 return gen_rtx_fmt_ee (code
, VOIDmode
,
18914 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18919 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18923 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18924 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18926 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18928 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18929 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18932 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18938 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18940 enum machine_mode mode
= GET_MODE (op0
);
18952 tmp
= ix86_expand_compare (code
, op0
, op1
);
18953 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18954 gen_rtx_LABEL_REF (VOIDmode
, label
),
18956 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18963 /* Expand DImode branch into multiple compare+branch. */
18965 rtx lo
[2], hi
[2], label2
;
18966 enum rtx_code code1
, code2
, code3
;
18967 enum machine_mode submode
;
18969 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18971 tmp
= op0
, op0
= op1
, op1
= tmp
;
18972 code
= swap_condition (code
);
18975 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18976 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18978 submode
= mode
== DImode
? SImode
: DImode
;
18980 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18981 avoid two branches. This costs one extra insn, so disable when
18982 optimizing for size. */
18984 if ((code
== EQ
|| code
== NE
)
18985 && (!optimize_insn_for_size_p ()
18986 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18991 if (hi
[1] != const0_rtx
)
18992 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18993 NULL_RTX
, 0, OPTAB_WIDEN
);
18996 if (lo
[1] != const0_rtx
)
18997 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18998 NULL_RTX
, 0, OPTAB_WIDEN
);
19000 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19001 NULL_RTX
, 0, OPTAB_WIDEN
);
19003 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19007 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19008 op1 is a constant and the low word is zero, then we can just
19009 examine the high word. Similarly for low word -1 and
19010 less-or-equal-than or greater-than. */
19012 if (CONST_INT_P (hi
[1]))
19015 case LT
: case LTU
: case GE
: case GEU
:
19016 if (lo
[1] == const0_rtx
)
19018 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19022 case LE
: case LEU
: case GT
: case GTU
:
19023 if (lo
[1] == constm1_rtx
)
19025 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19033 /* Otherwise, we need two or three jumps. */
19035 label2
= gen_label_rtx ();
19038 code2
= swap_condition (code
);
19039 code3
= unsigned_condition (code
);
19043 case LT
: case GT
: case LTU
: case GTU
:
19046 case LE
: code1
= LT
; code2
= GT
; break;
19047 case GE
: code1
= GT
; code2
= LT
; break;
19048 case LEU
: code1
= LTU
; code2
= GTU
; break;
19049 case GEU
: code1
= GTU
; code2
= LTU
; break;
19051 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19052 case NE
: code2
= UNKNOWN
; break;
19055 gcc_unreachable ();
19060 * if (hi(a) < hi(b)) goto true;
19061 * if (hi(a) > hi(b)) goto false;
19062 * if (lo(a) < lo(b)) goto true;
19066 if (code1
!= UNKNOWN
)
19067 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19068 if (code2
!= UNKNOWN
)
19069 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19071 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19073 if (code2
!= UNKNOWN
)
19074 emit_label (label2
);
19079 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19084 /* Split branch based on floating point condition. */
19086 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19087 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19092 if (target2
!= pc_rtx
)
19095 code
= reverse_condition_maybe_unordered (code
);
19100 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19103 /* Remove pushed operand from stack. */
19105 ix86_free_from_memory (GET_MODE (pushed
));
19107 i
= emit_jump_insn (gen_rtx_SET
19109 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19110 condition
, target1
, target2
)));
19111 if (split_branch_probability
>= 0)
19112 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19116 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19120 gcc_assert (GET_MODE (dest
) == QImode
);
19122 ret
= ix86_expand_compare (code
, op0
, op1
);
19123 PUT_MODE (ret
, QImode
);
19124 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19127 /* Expand comparison setting or clearing carry flag. Return true when
19128 successful and set pop for the operation. */
19130 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19132 enum machine_mode mode
=
19133 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19135 /* Do not handle double-mode compares that go through special path. */
19136 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19139 if (SCALAR_FLOAT_MODE_P (mode
))
19141 rtx compare_op
, compare_seq
;
19143 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19145 /* Shortcut: following common codes never translate
19146 into carry flag compares. */
19147 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19148 || code
== ORDERED
|| code
== UNORDERED
)
19151 /* These comparisons require zero flag; swap operands so they won't. */
19152 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19153 && !TARGET_IEEE_FP
)
19158 code
= swap_condition (code
);
19161 /* Try to expand the comparison and verify that we end up with
19162 carry flag based comparison. This fails to be true only when
19163 we decide to expand comparison using arithmetic that is not
19164 too common scenario. */
19166 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19167 compare_seq
= get_insns ();
19170 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19171 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19172 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19174 code
= GET_CODE (compare_op
);
19176 if (code
!= LTU
&& code
!= GEU
)
19179 emit_insn (compare_seq
);
19184 if (!INTEGRAL_MODE_P (mode
))
19193 /* Convert a==0 into (unsigned)a<1. */
19196 if (op1
!= const0_rtx
)
19199 code
= (code
== EQ
? LTU
: GEU
);
19202 /* Convert a>b into b<a or a>=b-1. */
19205 if (CONST_INT_P (op1
))
19207 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19208 /* Bail out on overflow. We still can swap operands but that
19209 would force loading of the constant into register. */
19210 if (op1
== const0_rtx
19211 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19213 code
= (code
== GTU
? GEU
: LTU
);
19220 code
= (code
== GTU
? LTU
: GEU
);
19224 /* Convert a>=0 into (unsigned)a<0x80000000. */
19227 if (mode
== DImode
|| op1
!= const0_rtx
)
19229 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19230 code
= (code
== LT
? GEU
: LTU
);
19234 if (mode
== DImode
|| op1
!= constm1_rtx
)
19236 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19237 code
= (code
== LE
? GEU
: LTU
);
19243 /* Swapping operands may cause constant to appear as first operand. */
19244 if (!nonimmediate_operand (op0
, VOIDmode
))
19246 if (!can_create_pseudo_p ())
19248 op0
= force_reg (mode
, op0
);
19250 *pop
= ix86_expand_compare (code
, op0
, op1
);
19251 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19256 ix86_expand_int_movcc (rtx operands
[])
19258 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19259 rtx compare_seq
, compare_op
;
19260 enum machine_mode mode
= GET_MODE (operands
[0]);
19261 bool sign_bit_compare_p
= false;
19262 rtx op0
= XEXP (operands
[1], 0);
19263 rtx op1
= XEXP (operands
[1], 1);
19265 if (GET_MODE (op0
) == TImode
19266 || (GET_MODE (op0
) == DImode
19271 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19272 compare_seq
= get_insns ();
19275 compare_code
= GET_CODE (compare_op
);
19277 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19278 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19279 sign_bit_compare_p
= true;
19281 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19282 HImode insns, we'd be swallowed in word prefix ops. */
19284 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19285 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19286 && CONST_INT_P (operands
[2])
19287 && CONST_INT_P (operands
[3]))
19289 rtx out
= operands
[0];
19290 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19291 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19292 HOST_WIDE_INT diff
;
19295 /* Sign bit compares are better done using shifts than we do by using
19297 if (sign_bit_compare_p
19298 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19300 /* Detect overlap between destination and compare sources. */
19303 if (!sign_bit_compare_p
)
19306 bool fpcmp
= false;
19308 compare_code
= GET_CODE (compare_op
);
19310 flags
= XEXP (compare_op
, 0);
19312 if (GET_MODE (flags
) == CCFPmode
19313 || GET_MODE (flags
) == CCFPUmode
)
19317 = ix86_fp_compare_code_to_integer (compare_code
);
19320 /* To simplify rest of code, restrict to the GEU case. */
19321 if (compare_code
== LTU
)
19323 HOST_WIDE_INT tmp
= ct
;
19326 compare_code
= reverse_condition (compare_code
);
19327 code
= reverse_condition (code
);
19332 PUT_CODE (compare_op
,
19333 reverse_condition_maybe_unordered
19334 (GET_CODE (compare_op
)));
19336 PUT_CODE (compare_op
,
19337 reverse_condition (GET_CODE (compare_op
)));
19341 if (reg_overlap_mentioned_p (out
, op0
)
19342 || reg_overlap_mentioned_p (out
, op1
))
19343 tmp
= gen_reg_rtx (mode
);
19345 if (mode
== DImode
)
19346 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19348 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19349 flags
, compare_op
));
19353 if (code
== GT
|| code
== GE
)
19354 code
= reverse_condition (code
);
19357 HOST_WIDE_INT tmp
= ct
;
19362 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19375 tmp
= expand_simple_binop (mode
, PLUS
,
19377 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19388 tmp
= expand_simple_binop (mode
, IOR
,
19390 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19392 else if (diff
== -1 && ct
)
19402 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19404 tmp
= expand_simple_binop (mode
, PLUS
,
19405 copy_rtx (tmp
), GEN_INT (cf
),
19406 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19414 * andl cf - ct, dest
19424 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19427 tmp
= expand_simple_binop (mode
, AND
,
19429 gen_int_mode (cf
- ct
, mode
),
19430 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19432 tmp
= expand_simple_binop (mode
, PLUS
,
19433 copy_rtx (tmp
), GEN_INT (ct
),
19434 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19437 if (!rtx_equal_p (tmp
, out
))
19438 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19445 enum machine_mode cmp_mode
= GET_MODE (op0
);
19448 tmp
= ct
, ct
= cf
, cf
= tmp
;
19451 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19453 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19455 /* We may be reversing unordered compare to normal compare, that
19456 is not valid in general (we may convert non-trapping condition
19457 to trapping one), however on i386 we currently emit all
19458 comparisons unordered. */
19459 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19460 code
= reverse_condition_maybe_unordered (code
);
19464 compare_code
= reverse_condition (compare_code
);
19465 code
= reverse_condition (code
);
19469 compare_code
= UNKNOWN
;
19470 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19471 && CONST_INT_P (op1
))
19473 if (op1
== const0_rtx
19474 && (code
== LT
|| code
== GE
))
19475 compare_code
= code
;
19476 else if (op1
== constm1_rtx
)
19480 else if (code
== GT
)
19485 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19486 if (compare_code
!= UNKNOWN
19487 && GET_MODE (op0
) == GET_MODE (out
)
19488 && (cf
== -1 || ct
== -1))
19490 /* If lea code below could be used, only optimize
19491 if it results in a 2 insn sequence. */
19493 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19494 || diff
== 3 || diff
== 5 || diff
== 9)
19495 || (compare_code
== LT
&& ct
== -1)
19496 || (compare_code
== GE
&& cf
== -1))
19499 * notl op1 (if necessary)
19507 code
= reverse_condition (code
);
19510 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19512 out
= expand_simple_binop (mode
, IOR
,
19514 out
, 1, OPTAB_DIRECT
);
19515 if (out
!= operands
[0])
19516 emit_move_insn (operands
[0], out
);
19523 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19524 || diff
== 3 || diff
== 5 || diff
== 9)
19525 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19527 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19533 * lea cf(dest*(ct-cf)),dest
19537 * This also catches the degenerate setcc-only case.
19543 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19546 /* On x86_64 the lea instruction operates on Pmode, so we need
19547 to get arithmetics done in proper mode to match. */
19549 tmp
= copy_rtx (out
);
19553 out1
= copy_rtx (out
);
19554 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19558 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19564 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19567 if (!rtx_equal_p (tmp
, out
))
19570 out
= force_operand (tmp
, copy_rtx (out
));
19572 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19574 if (!rtx_equal_p (out
, operands
[0]))
19575 emit_move_insn (operands
[0], copy_rtx (out
));
19581 * General case: Jumpful:
19582 * xorl dest,dest cmpl op1, op2
19583 * cmpl op1, op2 movl ct, dest
19584 * setcc dest jcc 1f
19585 * decl dest movl cf, dest
19586 * andl (cf-ct),dest 1:
19589 * Size 20. Size 14.
19591 * This is reasonably steep, but branch mispredict costs are
19592 * high on modern cpus, so consider failing only if optimizing
19596 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19597 && BRANCH_COST (optimize_insn_for_speed_p (),
19602 enum machine_mode cmp_mode
= GET_MODE (op0
);
19607 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19609 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19611 /* We may be reversing unordered compare to normal compare,
19612 that is not valid in general (we may convert non-trapping
19613 condition to trapping one), however on i386 we currently
19614 emit all comparisons unordered. */
19615 code
= reverse_condition_maybe_unordered (code
);
19619 code
= reverse_condition (code
);
19620 if (compare_code
!= UNKNOWN
)
19621 compare_code
= reverse_condition (compare_code
);
19625 if (compare_code
!= UNKNOWN
)
19627 /* notl op1 (if needed)
19632 For x < 0 (resp. x <= -1) there will be no notl,
19633 so if possible swap the constants to get rid of the
19635 True/false will be -1/0 while code below (store flag
19636 followed by decrement) is 0/-1, so the constants need
19637 to be exchanged once more. */
19639 if (compare_code
== GE
|| !cf
)
19641 code
= reverse_condition (code
);
19646 HOST_WIDE_INT tmp
= cf
;
19651 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19655 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19657 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19659 copy_rtx (out
), 1, OPTAB_DIRECT
);
19662 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19663 gen_int_mode (cf
- ct
, mode
),
19664 copy_rtx (out
), 1, OPTAB_DIRECT
);
19666 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19667 copy_rtx (out
), 1, OPTAB_DIRECT
);
19668 if (!rtx_equal_p (out
, operands
[0]))
19669 emit_move_insn (operands
[0], copy_rtx (out
));
19675 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19677 /* Try a few things more with specific constants and a variable. */
19680 rtx var
, orig_out
, out
, tmp
;
19682 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19685 /* If one of the two operands is an interesting constant, load a
19686 constant with the above and mask it in with a logical operation. */
19688 if (CONST_INT_P (operands
[2]))
19691 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19692 operands
[3] = constm1_rtx
, op
= and_optab
;
19693 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19694 operands
[3] = const0_rtx
, op
= ior_optab
;
19698 else if (CONST_INT_P (operands
[3]))
19701 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19702 operands
[2] = constm1_rtx
, op
= and_optab
;
19703 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19704 operands
[2] = const0_rtx
, op
= ior_optab
;
19711 orig_out
= operands
[0];
19712 tmp
= gen_reg_rtx (mode
);
19715 /* Recurse to get the constant loaded. */
19716 if (ix86_expand_int_movcc (operands
) == 0)
19719 /* Mask in the interesting variable. */
19720 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19722 if (!rtx_equal_p (out
, orig_out
))
19723 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19729 * For comparison with above,
19739 if (! nonimmediate_operand (operands
[2], mode
))
19740 operands
[2] = force_reg (mode
, operands
[2]);
19741 if (! nonimmediate_operand (operands
[3], mode
))
19742 operands
[3] = force_reg (mode
, operands
[3]);
19744 if (! register_operand (operands
[2], VOIDmode
)
19746 || ! register_operand (operands
[3], VOIDmode
)))
19747 operands
[2] = force_reg (mode
, operands
[2]);
19750 && ! register_operand (operands
[3], VOIDmode
))
19751 operands
[3] = force_reg (mode
, operands
[3]);
19753 emit_insn (compare_seq
);
19754 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19755 gen_rtx_IF_THEN_ELSE (mode
,
19756 compare_op
, operands
[2],
19761 /* Swap, force into registers, or otherwise massage the two operands
19762 to an sse comparison with a mask result. Thus we differ a bit from
19763 ix86_prepare_fp_compare_args which expects to produce a flags result.
19765 The DEST operand exists to help determine whether to commute commutative
19766 operators. The POP0/POP1 operands are updated in place. The new
19767 comparison code is returned, or UNKNOWN if not implementable. */
19769 static enum rtx_code
19770 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19771 rtx
*pop0
, rtx
*pop1
)
19779 /* AVX supports all the needed comparisons. */
19782 /* We have no LTGT as an operator. We could implement it with
19783 NE & ORDERED, but this requires an extra temporary. It's
19784 not clear that it's worth it. */
19791 /* These are supported directly. */
19798 /* AVX has 3 operand comparisons, no need to swap anything. */
19801 /* For commutative operators, try to canonicalize the destination
19802 operand to be first in the comparison - this helps reload to
19803 avoid extra moves. */
19804 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19812 /* These are not supported directly before AVX, and furthermore
19813 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19814 comparison operands to transform into something that is
19819 code
= swap_condition (code
);
19823 gcc_unreachable ();
19829 /* Detect conditional moves that exactly match min/max operational
19830 semantics. Note that this is IEEE safe, as long as we don't
19831 interchange the operands.
19833 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19834 and TRUE if the operation is successful and instructions are emitted. */
19837 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19838 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19840 enum machine_mode mode
;
19846 else if (code
== UNGE
)
19849 if_true
= if_false
;
19855 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19857 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19862 mode
= GET_MODE (dest
);
19864 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19865 but MODE may be a vector mode and thus not appropriate. */
19866 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19868 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19871 if_true
= force_reg (mode
, if_true
);
19872 v
= gen_rtvec (2, if_true
, if_false
);
19873 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19877 code
= is_min
? SMIN
: SMAX
;
19878 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19881 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19885 /* Expand an sse vector comparison. Return the register with the result. */
19888 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19889 rtx op_true
, rtx op_false
)
19891 enum machine_mode mode
= GET_MODE (dest
);
19892 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19895 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19896 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19897 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19900 || reg_overlap_mentioned_p (dest
, op_true
)
19901 || reg_overlap_mentioned_p (dest
, op_false
))
19902 dest
= gen_reg_rtx (mode
);
19904 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19905 if (cmp_mode
!= mode
)
19907 x
= force_reg (cmp_mode
, x
);
19908 convert_move (dest
, x
, false);
19911 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19916 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19917 operations. This is used for both scalar and vector conditional moves. */
19920 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19922 enum machine_mode mode
= GET_MODE (dest
);
19925 if (vector_all_ones_operand (op_true
, mode
)
19926 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19928 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19930 else if (op_false
== CONST0_RTX (mode
))
19932 op_true
= force_reg (mode
, op_true
);
19933 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19934 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19936 else if (op_true
== CONST0_RTX (mode
))
19938 op_false
= force_reg (mode
, op_false
);
19939 x
= gen_rtx_NOT (mode
, cmp
);
19940 x
= gen_rtx_AND (mode
, x
, op_false
);
19941 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19943 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19945 op_false
= force_reg (mode
, op_false
);
19946 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19947 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19949 else if (TARGET_XOP
)
19951 op_true
= force_reg (mode
, op_true
);
19953 if (!nonimmediate_operand (op_false
, mode
))
19954 op_false
= force_reg (mode
, op_false
);
19956 emit_insn (gen_rtx_SET (mode
, dest
,
19957 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19963 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19965 if (!nonimmediate_operand (op_true
, mode
))
19966 op_true
= force_reg (mode
, op_true
);
19968 op_false
= force_reg (mode
, op_false
);
19974 gen
= gen_sse4_1_blendvps
;
19978 gen
= gen_sse4_1_blendvpd
;
19986 gen
= gen_sse4_1_pblendvb
;
19987 dest
= gen_lowpart (V16QImode
, dest
);
19988 op_false
= gen_lowpart (V16QImode
, op_false
);
19989 op_true
= gen_lowpart (V16QImode
, op_true
);
19990 cmp
= gen_lowpart (V16QImode
, cmp
);
19995 gen
= gen_avx_blendvps256
;
19999 gen
= gen_avx_blendvpd256
;
20007 gen
= gen_avx2_pblendvb
;
20008 dest
= gen_lowpart (V32QImode
, dest
);
20009 op_false
= gen_lowpart (V32QImode
, op_false
);
20010 op_true
= gen_lowpart (V32QImode
, op_true
);
20011 cmp
= gen_lowpart (V32QImode
, cmp
);
20019 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20022 op_true
= force_reg (mode
, op_true
);
20024 t2
= gen_reg_rtx (mode
);
20026 t3
= gen_reg_rtx (mode
);
20030 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20031 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20033 x
= gen_rtx_NOT (mode
, cmp
);
20034 x
= gen_rtx_AND (mode
, x
, op_false
);
20035 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20037 x
= gen_rtx_IOR (mode
, t3
, t2
);
20038 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20043 /* Expand a floating-point conditional move. Return true if successful. */
20046 ix86_expand_fp_movcc (rtx operands
[])
20048 enum machine_mode mode
= GET_MODE (operands
[0]);
20049 enum rtx_code code
= GET_CODE (operands
[1]);
20050 rtx tmp
, compare_op
;
20051 rtx op0
= XEXP (operands
[1], 0);
20052 rtx op1
= XEXP (operands
[1], 1);
20054 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20056 enum machine_mode cmode
;
20058 /* Since we've no cmove for sse registers, don't force bad register
20059 allocation just to gain access to it. Deny movcc when the
20060 comparison mode doesn't match the move mode. */
20061 cmode
= GET_MODE (op0
);
20062 if (cmode
== VOIDmode
)
20063 cmode
= GET_MODE (op1
);
20067 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20068 if (code
== UNKNOWN
)
20071 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20072 operands
[2], operands
[3]))
20075 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20076 operands
[2], operands
[3]);
20077 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20081 if (GET_MODE (op0
) == TImode
20082 || (GET_MODE (op0
) == DImode
20086 /* The floating point conditional move instructions don't directly
20087 support conditions resulting from a signed integer comparison. */
20089 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20090 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20092 tmp
= gen_reg_rtx (QImode
);
20093 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20095 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20098 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20099 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20100 operands
[2], operands
[3])));
20105 /* Expand a floating-point vector conditional move; a vcond operation
20106 rather than a movcc operation. */
20109 ix86_expand_fp_vcond (rtx operands
[])
20111 enum rtx_code code
= GET_CODE (operands
[3]);
20114 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20115 &operands
[4], &operands
[5]);
20116 if (code
== UNKNOWN
)
20119 switch (GET_CODE (operands
[3]))
20122 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20123 operands
[5], operands
[0], operands
[0]);
20124 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20125 operands
[5], operands
[1], operands
[2]);
20129 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20130 operands
[5], operands
[0], operands
[0]);
20131 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20132 operands
[5], operands
[1], operands
[2]);
20136 gcc_unreachable ();
20138 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20140 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20144 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20145 operands
[5], operands
[1], operands
[2]))
20148 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20149 operands
[1], operands
[2]);
20150 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20154 /* Expand a signed/unsigned integral vector conditional move. */
20157 ix86_expand_int_vcond (rtx operands
[])
20159 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20160 enum machine_mode mode
= GET_MODE (operands
[4]);
20161 enum rtx_code code
= GET_CODE (operands
[3]);
20162 bool negate
= false;
20165 cop0
= operands
[4];
20166 cop1
= operands
[5];
20168 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20169 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20170 if ((code
== LT
|| code
== GE
)
20171 && data_mode
== mode
20172 && cop1
== CONST0_RTX (mode
)
20173 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20174 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20175 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20176 && (GET_MODE_SIZE (data_mode
) == 16
20177 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20179 rtx negop
= operands
[2 - (code
== LT
)];
20180 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20181 if (negop
== CONST1_RTX (data_mode
))
20183 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20184 operands
[0], 1, OPTAB_DIRECT
);
20185 if (res
!= operands
[0])
20186 emit_move_insn (operands
[0], res
);
20189 else if (GET_MODE_INNER (data_mode
) != DImode
20190 && vector_all_ones_operand (negop
, data_mode
))
20192 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20193 operands
[0], 0, OPTAB_DIRECT
);
20194 if (res
!= operands
[0])
20195 emit_move_insn (operands
[0], res
);
20200 if (!nonimmediate_operand (cop1
, mode
))
20201 cop1
= force_reg (mode
, cop1
);
20202 if (!general_operand (operands
[1], data_mode
))
20203 operands
[1] = force_reg (data_mode
, operands
[1]);
20204 if (!general_operand (operands
[2], data_mode
))
20205 operands
[2] = force_reg (data_mode
, operands
[2]);
20207 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20209 && (mode
== V16QImode
|| mode
== V8HImode
20210 || mode
== V4SImode
|| mode
== V2DImode
))
20214 /* Canonicalize the comparison to EQ, GT, GTU. */
20225 code
= reverse_condition (code
);
20231 code
= reverse_condition (code
);
20237 code
= swap_condition (code
);
20238 x
= cop0
, cop0
= cop1
, cop1
= x
;
20242 gcc_unreachable ();
20245 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20246 if (mode
== V2DImode
)
20251 /* SSE4.1 supports EQ. */
20252 if (!TARGET_SSE4_1
)
20258 /* SSE4.2 supports GT/GTU. */
20259 if (!TARGET_SSE4_2
)
20264 gcc_unreachable ();
20268 /* Unsigned parallel compare is not supported by the hardware.
20269 Play some tricks to turn this into a signed comparison
20273 cop0
= force_reg (mode
, cop0
);
20283 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20287 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20288 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20289 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20290 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20292 gcc_unreachable ();
20294 /* Subtract (-(INT MAX) - 1) from both operands to make
20296 mask
= ix86_build_signbit_mask (mode
, true, false);
20297 t1
= gen_reg_rtx (mode
);
20298 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20300 t2
= gen_reg_rtx (mode
);
20301 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20313 /* Perform a parallel unsigned saturating subtraction. */
20314 x
= gen_reg_rtx (mode
);
20315 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20316 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20319 cop1
= CONST0_RTX (mode
);
20325 gcc_unreachable ();
20330 /* Allow the comparison to be done in one mode, but the movcc to
20331 happen in another mode. */
20332 if (data_mode
== mode
)
20334 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20335 operands
[1+negate
], operands
[2-negate
]);
20339 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20340 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20342 operands
[1+negate
], operands
[2-negate
]);
20343 x
= gen_lowpart (data_mode
, x
);
20346 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20347 operands
[2-negate
]);
20351 /* Expand a variable vector permutation. */
20354 ix86_expand_vec_perm (rtx operands
[])
20356 rtx target
= operands
[0];
20357 rtx op0
= operands
[1];
20358 rtx op1
= operands
[2];
20359 rtx mask
= operands
[3];
20360 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20361 enum machine_mode mode
= GET_MODE (op0
);
20362 enum machine_mode maskmode
= GET_MODE (mask
);
20364 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20366 /* Number of elements in the vector. */
20367 w
= GET_MODE_NUNITS (mode
);
20368 e
= GET_MODE_UNIT_SIZE (mode
);
20369 gcc_assert (w
<= 32);
20373 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20375 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20376 an constant shuffle operand. With a tiny bit of effort we can
20377 use VPERMD instead. A re-interpretation stall for V4DFmode is
20378 unfortunate but there's no avoiding it.
20379 Similarly for V16HImode we don't have instructions for variable
20380 shuffling, while for V32QImode we can use after preparing suitable
20381 masks vpshufb; vpshufb; vpermq; vpor. */
20383 if (mode
== V16HImode
)
20385 maskmode
= mode
= V32QImode
;
20391 maskmode
= mode
= V8SImode
;
20395 t1
= gen_reg_rtx (maskmode
);
20397 /* Replicate the low bits of the V4DImode mask into V8SImode:
20399 t1 = { A A B B C C D D }. */
20400 for (i
= 0; i
< w
/ 2; ++i
)
20401 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20402 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20403 vt
= force_reg (maskmode
, vt
);
20404 mask
= gen_lowpart (maskmode
, mask
);
20405 if (maskmode
== V8SImode
)
20406 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20408 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20410 /* Multiply the shuffle indicies by two. */
20411 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20414 /* Add one to the odd shuffle indicies:
20415 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20416 for (i
= 0; i
< w
/ 2; ++i
)
20418 vec
[i
* 2] = const0_rtx
;
20419 vec
[i
* 2 + 1] = const1_rtx
;
20421 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20422 vt
= force_const_mem (maskmode
, vt
);
20423 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20426 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20427 operands
[3] = mask
= t1
;
20428 target
= gen_lowpart (mode
, target
);
20429 op0
= gen_lowpart (mode
, op0
);
20430 op1
= gen_lowpart (mode
, op1
);
20436 /* The VPERMD and VPERMPS instructions already properly ignore
20437 the high bits of the shuffle elements. No need for us to
20438 perform an AND ourselves. */
20439 if (one_operand_shuffle
)
20440 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20443 t1
= gen_reg_rtx (V8SImode
);
20444 t2
= gen_reg_rtx (V8SImode
);
20445 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20446 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20452 mask
= gen_lowpart (V8SFmode
, mask
);
20453 if (one_operand_shuffle
)
20454 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20457 t1
= gen_reg_rtx (V8SFmode
);
20458 t2
= gen_reg_rtx (V8SFmode
);
20459 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20460 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20466 /* By combining the two 128-bit input vectors into one 256-bit
20467 input vector, we can use VPERMD and VPERMPS for the full
20468 two-operand shuffle. */
20469 t1
= gen_reg_rtx (V8SImode
);
20470 t2
= gen_reg_rtx (V8SImode
);
20471 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20472 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20473 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20474 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20478 t1
= gen_reg_rtx (V8SFmode
);
20479 t2
= gen_reg_rtx (V8SImode
);
20480 mask
= gen_lowpart (V4SImode
, mask
);
20481 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20482 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20483 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20484 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20488 t1
= gen_reg_rtx (V32QImode
);
20489 t2
= gen_reg_rtx (V32QImode
);
20490 t3
= gen_reg_rtx (V32QImode
);
20491 vt2
= GEN_INT (128);
20492 for (i
= 0; i
< 32; i
++)
20494 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20495 vt
= force_reg (V32QImode
, vt
);
20496 for (i
= 0; i
< 32; i
++)
20497 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20498 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20499 vt2
= force_reg (V32QImode
, vt2
);
20500 /* From mask create two adjusted masks, which contain the same
20501 bits as mask in the low 7 bits of each vector element.
20502 The first mask will have the most significant bit clear
20503 if it requests element from the same 128-bit lane
20504 and MSB set if it requests element from the other 128-bit lane.
20505 The second mask will have the opposite values of the MSB,
20506 and additionally will have its 128-bit lanes swapped.
20507 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20508 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20509 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20510 stands for other 12 bytes. */
20511 /* The bit whether element is from the same lane or the other
20512 lane is bit 4, so shift it up by 3 to the MSB position. */
20513 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20514 gen_lowpart (V4DImode
, mask
),
20516 /* Clear MSB bits from the mask just in case it had them set. */
20517 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20518 /* After this t1 will have MSB set for elements from other lane. */
20519 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20520 /* Clear bits other than MSB. */
20521 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20522 /* Or in the lower bits from mask into t3. */
20523 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20524 /* And invert MSB bits in t1, so MSB is set for elements from the same
20526 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20527 /* Swap 128-bit lanes in t3. */
20528 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20529 gen_lowpart (V4DImode
, t3
),
20530 const2_rtx
, GEN_INT (3),
20531 const0_rtx
, const1_rtx
));
20532 /* And or in the lower bits from mask into t1. */
20533 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20534 if (one_operand_shuffle
)
20536 /* Each of these shuffles will put 0s in places where
20537 element from the other 128-bit lane is needed, otherwise
20538 will shuffle in the requested value. */
20539 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20540 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20541 /* For t3 the 128-bit lanes are swapped again. */
20542 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20543 gen_lowpart (V4DImode
, t3
),
20544 const2_rtx
, GEN_INT (3),
20545 const0_rtx
, const1_rtx
));
20546 /* And oring both together leads to the result. */
20547 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20551 t4
= gen_reg_rtx (V32QImode
);
20552 /* Similarly to the above one_operand_shuffle code,
20553 just for repeated twice for each operand. merge_two:
20554 code will merge the two results together. */
20555 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20556 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20557 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20558 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20559 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20560 gen_lowpart (V4DImode
, t4
),
20561 const2_rtx
, GEN_INT (3),
20562 const0_rtx
, const1_rtx
));
20563 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20564 gen_lowpart (V4DImode
, t3
),
20565 const2_rtx
, GEN_INT (3),
20566 const0_rtx
, const1_rtx
));
20567 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20568 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20574 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20581 /* The XOP VPPERM insn supports three inputs. By ignoring the
20582 one_operand_shuffle special case, we avoid creating another
20583 set of constant vectors in memory. */
20584 one_operand_shuffle
= false;
20586 /* mask = mask & {2*w-1, ...} */
20587 vt
= GEN_INT (2*w
- 1);
20591 /* mask = mask & {w-1, ...} */
20592 vt
= GEN_INT (w
- 1);
20595 for (i
= 0; i
< w
; i
++)
20597 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20598 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20599 NULL_RTX
, 0, OPTAB_DIRECT
);
20601 /* For non-QImode operations, convert the word permutation control
20602 into a byte permutation control. */
20603 if (mode
!= V16QImode
)
20605 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20606 GEN_INT (exact_log2 (e
)),
20607 NULL_RTX
, 0, OPTAB_DIRECT
);
20609 /* Convert mask to vector of chars. */
20610 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20612 /* Replicate each of the input bytes into byte positions:
20613 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20614 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20615 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20616 for (i
= 0; i
< 16; ++i
)
20617 vec
[i
] = GEN_INT (i
/e
* e
);
20618 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20619 vt
= force_const_mem (V16QImode
, vt
);
20621 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20623 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20625 /* Convert it into the byte positions by doing
20626 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20627 for (i
= 0; i
< 16; ++i
)
20628 vec
[i
] = GEN_INT (i
% e
);
20629 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20630 vt
= force_const_mem (V16QImode
, vt
);
20631 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20634 /* The actual shuffle operations all operate on V16QImode. */
20635 op0
= gen_lowpart (V16QImode
, op0
);
20636 op1
= gen_lowpart (V16QImode
, op1
);
20637 target
= gen_lowpart (V16QImode
, target
);
20641 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20643 else if (one_operand_shuffle
)
20645 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20652 /* Shuffle the two input vectors independently. */
20653 t1
= gen_reg_rtx (V16QImode
);
20654 t2
= gen_reg_rtx (V16QImode
);
20655 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20656 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20659 /* Then merge them together. The key is whether any given control
20660 element contained a bit set that indicates the second word. */
20661 mask
= operands
[3];
20663 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20665 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20666 more shuffle to convert the V2DI input mask into a V4SI
20667 input mask. At which point the masking that expand_int_vcond
20668 will work as desired. */
20669 rtx t3
= gen_reg_rtx (V4SImode
);
20670 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20671 const0_rtx
, const0_rtx
,
20672 const2_rtx
, const2_rtx
));
20674 maskmode
= V4SImode
;
20678 for (i
= 0; i
< w
; i
++)
20680 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20681 vt
= force_reg (maskmode
, vt
);
20682 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20683 NULL_RTX
, 0, OPTAB_DIRECT
);
20685 xops
[0] = gen_lowpart (mode
, operands
[0]);
20686 xops
[1] = gen_lowpart (mode
, t2
);
20687 xops
[2] = gen_lowpart (mode
, t1
);
20688 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20691 ok
= ix86_expand_int_vcond (xops
);
20696 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20697 true if we should do zero extension, else sign extension. HIGH_P is
20698 true if we want the N/2 high elements, else the low elements. */
20701 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20703 enum machine_mode imode
= GET_MODE (src
);
20708 rtx (*unpack
)(rtx
, rtx
);
20709 rtx (*extract
)(rtx
, rtx
) = NULL
;
20710 enum machine_mode halfmode
= BLKmode
;
20716 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20718 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20719 halfmode
= V16QImode
;
20721 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20725 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20727 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20728 halfmode
= V8HImode
;
20730 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20734 unpack
= gen_avx2_zero_extendv4siv4di2
;
20736 unpack
= gen_avx2_sign_extendv4siv4di2
;
20737 halfmode
= V4SImode
;
20739 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20743 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20745 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20749 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20751 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20755 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20757 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20760 gcc_unreachable ();
20763 if (GET_MODE_SIZE (imode
) == 32)
20765 tmp
= gen_reg_rtx (halfmode
);
20766 emit_insn (extract (tmp
, src
));
20770 /* Shift higher 8 bytes to lower 8 bytes. */
20771 tmp
= gen_reg_rtx (imode
);
20772 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20773 gen_lowpart (V1TImode
, src
),
20779 emit_insn (unpack (dest
, tmp
));
20783 rtx (*unpack
)(rtx
, rtx
, rtx
);
20789 unpack
= gen_vec_interleave_highv16qi
;
20791 unpack
= gen_vec_interleave_lowv16qi
;
20795 unpack
= gen_vec_interleave_highv8hi
;
20797 unpack
= gen_vec_interleave_lowv8hi
;
20801 unpack
= gen_vec_interleave_highv4si
;
20803 unpack
= gen_vec_interleave_lowv4si
;
20806 gcc_unreachable ();
20810 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20812 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20813 src
, pc_rtx
, pc_rtx
);
20815 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20819 /* Expand conditional increment or decrement using adb/sbb instructions.
20820 The default case using setcc followed by the conditional move can be
20821 done by generic code. */
20823 ix86_expand_int_addcc (rtx operands
[])
20825 enum rtx_code code
= GET_CODE (operands
[1]);
20827 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20829 rtx val
= const0_rtx
;
20830 bool fpcmp
= false;
20831 enum machine_mode mode
;
20832 rtx op0
= XEXP (operands
[1], 0);
20833 rtx op1
= XEXP (operands
[1], 1);
20835 if (operands
[3] != const1_rtx
20836 && operands
[3] != constm1_rtx
)
20838 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20840 code
= GET_CODE (compare_op
);
20842 flags
= XEXP (compare_op
, 0);
20844 if (GET_MODE (flags
) == CCFPmode
20845 || GET_MODE (flags
) == CCFPUmode
)
20848 code
= ix86_fp_compare_code_to_integer (code
);
20855 PUT_CODE (compare_op
,
20856 reverse_condition_maybe_unordered
20857 (GET_CODE (compare_op
)));
20859 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20862 mode
= GET_MODE (operands
[0]);
20864 /* Construct either adc or sbb insn. */
20865 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20870 insn
= gen_subqi3_carry
;
20873 insn
= gen_subhi3_carry
;
20876 insn
= gen_subsi3_carry
;
20879 insn
= gen_subdi3_carry
;
20882 gcc_unreachable ();
20890 insn
= gen_addqi3_carry
;
20893 insn
= gen_addhi3_carry
;
20896 insn
= gen_addsi3_carry
;
20899 insn
= gen_adddi3_carry
;
20902 gcc_unreachable ();
20905 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20911 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20912 but works for floating pointer parameters and nonoffsetable memories.
20913 For pushes, it returns just stack offsets; the values will be saved
20914 in the right order. Maximally three parts are generated. */
20917 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20922 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20924 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20926 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20927 gcc_assert (size
>= 2 && size
<= 4);
20929 /* Optimize constant pool reference to immediates. This is used by fp
20930 moves, that force all constants to memory to allow combining. */
20931 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20933 rtx tmp
= maybe_get_pool_constant (operand
);
20938 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20940 /* The only non-offsetable memories we handle are pushes. */
20941 int ok
= push_operand (operand
, VOIDmode
);
20945 operand
= copy_rtx (operand
);
20946 PUT_MODE (operand
, word_mode
);
20947 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20951 if (GET_CODE (operand
) == CONST_VECTOR
)
20953 enum machine_mode imode
= int_mode_for_mode (mode
);
20954 /* Caution: if we looked through a constant pool memory above,
20955 the operand may actually have a different mode now. That's
20956 ok, since we want to pun this all the way back to an integer. */
20957 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20958 gcc_assert (operand
!= NULL
);
20964 if (mode
== DImode
)
20965 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20970 if (REG_P (operand
))
20972 gcc_assert (reload_completed
);
20973 for (i
= 0; i
< size
; i
++)
20974 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20976 else if (offsettable_memref_p (operand
))
20978 operand
= adjust_address (operand
, SImode
, 0);
20979 parts
[0] = operand
;
20980 for (i
= 1; i
< size
; i
++)
20981 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20983 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20988 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20992 real_to_target (l
, &r
, mode
);
20993 parts
[3] = gen_int_mode (l
[3], SImode
);
20994 parts
[2] = gen_int_mode (l
[2], SImode
);
20997 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
20998 long double may not be 80-bit. */
20999 real_to_target (l
, &r
, mode
);
21000 parts
[2] = gen_int_mode (l
[2], SImode
);
21003 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21006 gcc_unreachable ();
21008 parts
[1] = gen_int_mode (l
[1], SImode
);
21009 parts
[0] = gen_int_mode (l
[0], SImode
);
21012 gcc_unreachable ();
21017 if (mode
== TImode
)
21018 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21019 if (mode
== XFmode
|| mode
== TFmode
)
21021 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21022 if (REG_P (operand
))
21024 gcc_assert (reload_completed
);
21025 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21026 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21028 else if (offsettable_memref_p (operand
))
21030 operand
= adjust_address (operand
, DImode
, 0);
21031 parts
[0] = operand
;
21032 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21034 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21039 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21040 real_to_target (l
, &r
, mode
);
21042 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21043 if (HOST_BITS_PER_WIDE_INT
>= 64)
21046 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21047 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21050 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21052 if (upper_mode
== SImode
)
21053 parts
[1] = gen_int_mode (l
[2], SImode
);
21054 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21057 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21058 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21061 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21064 gcc_unreachable ();
21071 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21072 Return false when normal moves are needed; true when all required
21073 insns have been emitted. Operands 2-4 contain the input values
21074 int the correct order; operands 5-7 contain the output values. */
21077 ix86_split_long_move (rtx operands
[])
21082 int collisions
= 0;
21083 enum machine_mode mode
= GET_MODE (operands
[0]);
21084 bool collisionparts
[4];
21086 /* The DFmode expanders may ask us to move double.
21087 For 64bit target this is single move. By hiding the fact
21088 here we simplify i386.md splitters. */
21089 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21091 /* Optimize constant pool reference to immediates. This is used by
21092 fp moves, that force all constants to memory to allow combining. */
21094 if (MEM_P (operands
[1])
21095 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21096 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21097 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21098 if (push_operand (operands
[0], VOIDmode
))
21100 operands
[0] = copy_rtx (operands
[0]);
21101 PUT_MODE (operands
[0], word_mode
);
21104 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21105 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21106 emit_move_insn (operands
[0], operands
[1]);
21110 /* The only non-offsettable memory we handle is push. */
21111 if (push_operand (operands
[0], VOIDmode
))
21114 gcc_assert (!MEM_P (operands
[0])
21115 || offsettable_memref_p (operands
[0]));
21117 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21118 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21120 /* When emitting push, take care for source operands on the stack. */
21121 if (push
&& MEM_P (operands
[1])
21122 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21124 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21126 /* Compensate for the stack decrement by 4. */
21127 if (!TARGET_64BIT
&& nparts
== 3
21128 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21129 src_base
= plus_constant (Pmode
, src_base
, 4);
21131 /* src_base refers to the stack pointer and is
21132 automatically decreased by emitted push. */
21133 for (i
= 0; i
< nparts
; i
++)
21134 part
[1][i
] = change_address (part
[1][i
],
21135 GET_MODE (part
[1][i
]), src_base
);
21138 /* We need to do copy in the right order in case an address register
21139 of the source overlaps the destination. */
21140 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21144 for (i
= 0; i
< nparts
; i
++)
21147 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21148 if (collisionparts
[i
])
21152 /* Collision in the middle part can be handled by reordering. */
21153 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21155 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21156 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21158 else if (collisions
== 1
21160 && (collisionparts
[1] || collisionparts
[2]))
21162 if (collisionparts
[1])
21164 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21165 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21169 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21170 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21174 /* If there are more collisions, we can't handle it by reordering.
21175 Do an lea to the last part and use only one colliding move. */
21176 else if (collisions
> 1)
21182 base
= part
[0][nparts
- 1];
21184 /* Handle the case when the last part isn't valid for lea.
21185 Happens in 64-bit mode storing the 12-byte XFmode. */
21186 if (GET_MODE (base
) != Pmode
)
21187 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21189 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21190 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21191 for (i
= 1; i
< nparts
; i
++)
21193 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21194 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21205 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21206 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21207 stack_pointer_rtx
, GEN_INT (-4)));
21208 emit_move_insn (part
[0][2], part
[1][2]);
21210 else if (nparts
== 4)
21212 emit_move_insn (part
[0][3], part
[1][3]);
21213 emit_move_insn (part
[0][2], part
[1][2]);
21218 /* In 64bit mode we don't have 32bit push available. In case this is
21219 register, it is OK - we will just use larger counterpart. We also
21220 retype memory - these comes from attempt to avoid REX prefix on
21221 moving of second half of TFmode value. */
21222 if (GET_MODE (part
[1][1]) == SImode
)
21224 switch (GET_CODE (part
[1][1]))
21227 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21231 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21235 gcc_unreachable ();
21238 if (GET_MODE (part
[1][0]) == SImode
)
21239 part
[1][0] = part
[1][1];
21242 emit_move_insn (part
[0][1], part
[1][1]);
21243 emit_move_insn (part
[0][0], part
[1][0]);
21247 /* Choose correct order to not overwrite the source before it is copied. */
21248 if ((REG_P (part
[0][0])
21249 && REG_P (part
[1][1])
21250 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21252 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21254 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21256 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21258 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21260 operands
[2 + i
] = part
[0][j
];
21261 operands
[6 + i
] = part
[1][j
];
21266 for (i
= 0; i
< nparts
; i
++)
21268 operands
[2 + i
] = part
[0][i
];
21269 operands
[6 + i
] = part
[1][i
];
21273 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21274 if (optimize_insn_for_size_p ())
21276 for (j
= 0; j
< nparts
- 1; j
++)
21277 if (CONST_INT_P (operands
[6 + j
])
21278 && operands
[6 + j
] != const0_rtx
21279 && REG_P (operands
[2 + j
]))
21280 for (i
= j
; i
< nparts
- 1; i
++)
21281 if (CONST_INT_P (operands
[7 + i
])
21282 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21283 operands
[7 + i
] = operands
[2 + j
];
21286 for (i
= 0; i
< nparts
; i
++)
21287 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21292 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21293 left shift by a constant, either using a single shift or
21294 a sequence of add instructions. */
21297 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21299 rtx (*insn
)(rtx
, rtx
, rtx
);
21302 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21303 && !optimize_insn_for_size_p ()))
21305 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21306 while (count
-- > 0)
21307 emit_insn (insn (operand
, operand
, operand
));
21311 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21312 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21317 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21319 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21320 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21321 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21323 rtx low
[2], high
[2];
21326 if (CONST_INT_P (operands
[2]))
21328 split_double_mode (mode
, operands
, 2, low
, high
);
21329 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21331 if (count
>= half_width
)
21333 emit_move_insn (high
[0], low
[1]);
21334 emit_move_insn (low
[0], const0_rtx
);
21336 if (count
> half_width
)
21337 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21341 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21343 if (!rtx_equal_p (operands
[0], operands
[1]))
21344 emit_move_insn (operands
[0], operands
[1]);
21346 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21347 ix86_expand_ashl_const (low
[0], count
, mode
);
21352 split_double_mode (mode
, operands
, 1, low
, high
);
21354 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21356 if (operands
[1] == const1_rtx
)
21358 /* Assuming we've chosen a QImode capable registers, then 1 << N
21359 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21360 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21362 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21364 ix86_expand_clear (low
[0]);
21365 ix86_expand_clear (high
[0]);
21366 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21368 d
= gen_lowpart (QImode
, low
[0]);
21369 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21370 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21371 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21373 d
= gen_lowpart (QImode
, high
[0]);
21374 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21375 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21376 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21379 /* Otherwise, we can get the same results by manually performing
21380 a bit extract operation on bit 5/6, and then performing the two
21381 shifts. The two methods of getting 0/1 into low/high are exactly
21382 the same size. Avoiding the shift in the bit extract case helps
21383 pentium4 a bit; no one else seems to care much either way. */
21386 enum machine_mode half_mode
;
21387 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21388 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21389 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21390 HOST_WIDE_INT bits
;
21393 if (mode
== DImode
)
21395 half_mode
= SImode
;
21396 gen_lshr3
= gen_lshrsi3
;
21397 gen_and3
= gen_andsi3
;
21398 gen_xor3
= gen_xorsi3
;
21403 half_mode
= DImode
;
21404 gen_lshr3
= gen_lshrdi3
;
21405 gen_and3
= gen_anddi3
;
21406 gen_xor3
= gen_xordi3
;
21410 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21411 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21413 x
= gen_lowpart (half_mode
, operands
[2]);
21414 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21416 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21417 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21418 emit_move_insn (low
[0], high
[0]);
21419 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21422 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21423 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21427 if (operands
[1] == constm1_rtx
)
21429 /* For -1 << N, we can avoid the shld instruction, because we
21430 know that we're shifting 0...31/63 ones into a -1. */
21431 emit_move_insn (low
[0], constm1_rtx
);
21432 if (optimize_insn_for_size_p ())
21433 emit_move_insn (high
[0], low
[0]);
21435 emit_move_insn (high
[0], constm1_rtx
);
21439 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21441 if (!rtx_equal_p (operands
[0], operands
[1]))
21442 emit_move_insn (operands
[0], operands
[1]);
21444 split_double_mode (mode
, operands
, 1, low
, high
);
21445 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21448 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21450 if (TARGET_CMOVE
&& scratch
)
21452 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21453 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21455 ix86_expand_clear (scratch
);
21456 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21460 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21461 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21463 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21468 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21470 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21471 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21472 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21473 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21475 rtx low
[2], high
[2];
21478 if (CONST_INT_P (operands
[2]))
21480 split_double_mode (mode
, operands
, 2, low
, high
);
21481 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21483 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21485 emit_move_insn (high
[0], high
[1]);
21486 emit_insn (gen_ashr3 (high
[0], high
[0],
21487 GEN_INT (half_width
- 1)));
21488 emit_move_insn (low
[0], high
[0]);
21491 else if (count
>= half_width
)
21493 emit_move_insn (low
[0], high
[1]);
21494 emit_move_insn (high
[0], low
[0]);
21495 emit_insn (gen_ashr3 (high
[0], high
[0],
21496 GEN_INT (half_width
- 1)));
21498 if (count
> half_width
)
21499 emit_insn (gen_ashr3 (low
[0], low
[0],
21500 GEN_INT (count
- half_width
)));
21504 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21506 if (!rtx_equal_p (operands
[0], operands
[1]))
21507 emit_move_insn (operands
[0], operands
[1]);
21509 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21510 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21515 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21517 if (!rtx_equal_p (operands
[0], operands
[1]))
21518 emit_move_insn (operands
[0], operands
[1]);
21520 split_double_mode (mode
, operands
, 1, low
, high
);
21522 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21523 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21525 if (TARGET_CMOVE
&& scratch
)
21527 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21528 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21530 emit_move_insn (scratch
, high
[0]);
21531 emit_insn (gen_ashr3 (scratch
, scratch
,
21532 GEN_INT (half_width
- 1)));
21533 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21538 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21539 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21541 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21547 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21549 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21550 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21551 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21552 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21554 rtx low
[2], high
[2];
21557 if (CONST_INT_P (operands
[2]))
21559 split_double_mode (mode
, operands
, 2, low
, high
);
21560 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21562 if (count
>= half_width
)
21564 emit_move_insn (low
[0], high
[1]);
21565 ix86_expand_clear (high
[0]);
21567 if (count
> half_width
)
21568 emit_insn (gen_lshr3 (low
[0], low
[0],
21569 GEN_INT (count
- half_width
)));
21573 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21575 if (!rtx_equal_p (operands
[0], operands
[1]))
21576 emit_move_insn (operands
[0], operands
[1]);
21578 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21579 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21584 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21586 if (!rtx_equal_p (operands
[0], operands
[1]))
21587 emit_move_insn (operands
[0], operands
[1]);
21589 split_double_mode (mode
, operands
, 1, low
, high
);
21591 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21592 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21594 if (TARGET_CMOVE
&& scratch
)
21596 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21597 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21599 ix86_expand_clear (scratch
);
21600 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21605 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21606 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21608 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21613 /* Predict just emitted jump instruction to be taken with probability PROB. */
21615 predict_jump (int prob
)
21617 rtx insn
= get_last_insn ();
21618 gcc_assert (JUMP_P (insn
));
21619 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21622 /* Helper function for the string operations below. Dest VARIABLE whether
21623 it is aligned to VALUE bytes. If true, jump to the label. */
21625 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21627 rtx label
= gen_label_rtx ();
21628 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21629 if (GET_MODE (variable
) == DImode
)
21630 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21632 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21633 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21636 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21638 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21642 /* Adjust COUNTER by the VALUE. */
21644 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21646 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21647 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21649 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21652 /* Zero extend possibly SImode EXP to Pmode register. */
21654 ix86_zero_extend_to_Pmode (rtx exp
)
21656 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21659 /* Divide COUNTREG by SCALE. */
21661 scale_counter (rtx countreg
, int scale
)
21667 if (CONST_INT_P (countreg
))
21668 return GEN_INT (INTVAL (countreg
) / scale
);
21669 gcc_assert (REG_P (countreg
));
21671 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21672 GEN_INT (exact_log2 (scale
)),
21673 NULL
, 1, OPTAB_DIRECT
);
21677 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21678 DImode for constant loop counts. */
21680 static enum machine_mode
21681 counter_mode (rtx count_exp
)
21683 if (GET_MODE (count_exp
) != VOIDmode
)
21684 return GET_MODE (count_exp
);
21685 if (!CONST_INT_P (count_exp
))
21687 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21692 /* When SRCPTR is non-NULL, output simple loop to move memory
21693 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21694 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21695 equivalent loop to set memory by VALUE (supposed to be in MODE).
21697 The size is rounded down to whole number of chunk size moved at once.
21698 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21702 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21703 rtx destptr
, rtx srcptr
, rtx value
,
21704 rtx count
, enum machine_mode mode
, int unroll
,
21707 rtx out_label
, top_label
, iter
, tmp
;
21708 enum machine_mode iter_mode
= counter_mode (count
);
21709 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21710 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21716 top_label
= gen_label_rtx ();
21717 out_label
= gen_label_rtx ();
21718 iter
= gen_reg_rtx (iter_mode
);
21720 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21721 NULL
, 1, OPTAB_DIRECT
);
21722 /* Those two should combine. */
21723 if (piece_size
== const1_rtx
)
21725 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21727 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21729 emit_move_insn (iter
, const0_rtx
);
21731 emit_label (top_label
);
21733 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21734 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21735 destmem
= change_address (destmem
, mode
, x_addr
);
21739 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21740 srcmem
= change_address (srcmem
, mode
, y_addr
);
21742 /* When unrolling for chips that reorder memory reads and writes,
21743 we can save registers by using single temporary.
21744 Also using 4 temporaries is overkill in 32bit mode. */
21745 if (!TARGET_64BIT
&& 0)
21747 for (i
= 0; i
< unroll
; i
++)
21752 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21754 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21756 emit_move_insn (destmem
, srcmem
);
21762 gcc_assert (unroll
<= 4);
21763 for (i
= 0; i
< unroll
; i
++)
21765 tmpreg
[i
] = gen_reg_rtx (mode
);
21769 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21771 emit_move_insn (tmpreg
[i
], srcmem
);
21773 for (i
= 0; i
< unroll
; i
++)
21778 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21780 emit_move_insn (destmem
, tmpreg
[i
]);
21785 for (i
= 0; i
< unroll
; i
++)
21789 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21790 emit_move_insn (destmem
, value
);
21793 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21794 true, OPTAB_LIB_WIDEN
);
21796 emit_move_insn (iter
, tmp
);
21798 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21800 if (expected_size
!= -1)
21802 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21803 if (expected_size
== 0)
21805 else if (expected_size
> REG_BR_PROB_BASE
)
21806 predict_jump (REG_BR_PROB_BASE
- 1);
21808 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21811 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21812 iter
= ix86_zero_extend_to_Pmode (iter
);
21813 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21814 true, OPTAB_LIB_WIDEN
);
21815 if (tmp
!= destptr
)
21816 emit_move_insn (destptr
, tmp
);
21819 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21820 true, OPTAB_LIB_WIDEN
);
21822 emit_move_insn (srcptr
, tmp
);
21824 emit_label (out_label
);
21827 /* Output "rep; mov" instruction.
21828 Arguments have same meaning as for previous function */
21830 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21831 rtx destptr
, rtx srcptr
,
21833 enum machine_mode mode
)
21838 HOST_WIDE_INT rounded_count
;
21840 /* If the size is known, it is shorter to use rep movs. */
21841 if (mode
== QImode
&& CONST_INT_P (count
)
21842 && !(INTVAL (count
) & 3))
21845 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21846 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21847 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21848 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21849 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21850 if (mode
!= QImode
)
21852 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21853 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21854 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21855 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21856 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21857 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21861 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21862 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21864 if (CONST_INT_P (count
))
21866 rounded_count
= (INTVAL (count
)
21867 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21868 destmem
= shallow_copy_rtx (destmem
);
21869 srcmem
= shallow_copy_rtx (srcmem
);
21870 set_mem_size (destmem
, rounded_count
);
21871 set_mem_size (srcmem
, rounded_count
);
21875 if (MEM_SIZE_KNOWN_P (destmem
))
21876 clear_mem_size (destmem
);
21877 if (MEM_SIZE_KNOWN_P (srcmem
))
21878 clear_mem_size (srcmem
);
21880 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21884 /* Output "rep; stos" instruction.
21885 Arguments have same meaning as for previous function */
21887 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21888 rtx count
, enum machine_mode mode
,
21893 HOST_WIDE_INT rounded_count
;
21895 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21896 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21897 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21898 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21899 if (mode
!= QImode
)
21901 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21902 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21903 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21906 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21907 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21909 rounded_count
= (INTVAL (count
)
21910 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21911 destmem
= shallow_copy_rtx (destmem
);
21912 set_mem_size (destmem
, rounded_count
);
21914 else if (MEM_SIZE_KNOWN_P (destmem
))
21915 clear_mem_size (destmem
);
21916 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21920 emit_strmov (rtx destmem
, rtx srcmem
,
21921 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21923 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21924 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21925 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21928 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21930 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21931 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21934 if (CONST_INT_P (count
))
21936 HOST_WIDE_INT countval
= INTVAL (count
);
21939 if ((countval
& 0x10) && max_size
> 16)
21943 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21944 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21947 gcc_unreachable ();
21950 if ((countval
& 0x08) && max_size
> 8)
21953 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21956 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21957 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21961 if ((countval
& 0x04) && max_size
> 4)
21963 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21966 if ((countval
& 0x02) && max_size
> 2)
21968 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21971 if ((countval
& 0x01) && max_size
> 1)
21973 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21980 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21981 count
, 1, OPTAB_DIRECT
);
21982 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21983 count
, QImode
, 1, 4);
21987 /* When there are stringops, we can cheaply increase dest and src pointers.
21988 Otherwise we save code size by maintaining offset (zero is readily
21989 available from preceding rep operation) and using x86 addressing modes.
21991 if (TARGET_SINGLE_STRINGOP
)
21995 rtx label
= ix86_expand_aligntest (count
, 4, true);
21996 src
= change_address (srcmem
, SImode
, srcptr
);
21997 dest
= change_address (destmem
, SImode
, destptr
);
21998 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21999 emit_label (label
);
22000 LABEL_NUSES (label
) = 1;
22004 rtx label
= ix86_expand_aligntest (count
, 2, true);
22005 src
= change_address (srcmem
, HImode
, srcptr
);
22006 dest
= change_address (destmem
, HImode
, destptr
);
22007 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22008 emit_label (label
);
22009 LABEL_NUSES (label
) = 1;
22013 rtx label
= ix86_expand_aligntest (count
, 1, true);
22014 src
= change_address (srcmem
, QImode
, srcptr
);
22015 dest
= change_address (destmem
, QImode
, destptr
);
22016 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22017 emit_label (label
);
22018 LABEL_NUSES (label
) = 1;
22023 rtx offset
= force_reg (Pmode
, const0_rtx
);
22028 rtx label
= ix86_expand_aligntest (count
, 4, true);
22029 src
= change_address (srcmem
, SImode
, srcptr
);
22030 dest
= change_address (destmem
, SImode
, destptr
);
22031 emit_move_insn (dest
, src
);
22032 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22033 true, OPTAB_LIB_WIDEN
);
22035 emit_move_insn (offset
, tmp
);
22036 emit_label (label
);
22037 LABEL_NUSES (label
) = 1;
22041 rtx label
= ix86_expand_aligntest (count
, 2, true);
22042 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22043 src
= change_address (srcmem
, HImode
, tmp
);
22044 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22045 dest
= change_address (destmem
, HImode
, tmp
);
22046 emit_move_insn (dest
, src
);
22047 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22048 true, OPTAB_LIB_WIDEN
);
22050 emit_move_insn (offset
, tmp
);
22051 emit_label (label
);
22052 LABEL_NUSES (label
) = 1;
22056 rtx label
= ix86_expand_aligntest (count
, 1, true);
22057 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22058 src
= change_address (srcmem
, QImode
, tmp
);
22059 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22060 dest
= change_address (destmem
, QImode
, tmp
);
22061 emit_move_insn (dest
, src
);
22062 emit_label (label
);
22063 LABEL_NUSES (label
) = 1;
22068 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22070 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22071 rtx count
, int max_size
)
22074 expand_simple_binop (counter_mode (count
), AND
, count
,
22075 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22076 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22077 gen_lowpart (QImode
, value
), count
, QImode
,
22081 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22083 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22087 if (CONST_INT_P (count
))
22089 HOST_WIDE_INT countval
= INTVAL (count
);
22092 if ((countval
& 0x10) && max_size
> 16)
22096 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22097 emit_insn (gen_strset (destptr
, dest
, value
));
22098 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22099 emit_insn (gen_strset (destptr
, dest
, value
));
22102 gcc_unreachable ();
22105 if ((countval
& 0x08) && max_size
> 8)
22109 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22110 emit_insn (gen_strset (destptr
, dest
, value
));
22114 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22115 emit_insn (gen_strset (destptr
, dest
, value
));
22116 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22117 emit_insn (gen_strset (destptr
, dest
, value
));
22121 if ((countval
& 0x04) && max_size
> 4)
22123 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22124 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22127 if ((countval
& 0x02) && max_size
> 2)
22129 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22130 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22133 if ((countval
& 0x01) && max_size
> 1)
22135 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22136 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22143 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22148 rtx label
= ix86_expand_aligntest (count
, 16, true);
22151 dest
= change_address (destmem
, DImode
, destptr
);
22152 emit_insn (gen_strset (destptr
, dest
, value
));
22153 emit_insn (gen_strset (destptr
, dest
, value
));
22157 dest
= change_address (destmem
, SImode
, destptr
);
22158 emit_insn (gen_strset (destptr
, dest
, value
));
22159 emit_insn (gen_strset (destptr
, dest
, value
));
22160 emit_insn (gen_strset (destptr
, dest
, value
));
22161 emit_insn (gen_strset (destptr
, dest
, value
));
22163 emit_label (label
);
22164 LABEL_NUSES (label
) = 1;
22168 rtx label
= ix86_expand_aligntest (count
, 8, true);
22171 dest
= change_address (destmem
, DImode
, destptr
);
22172 emit_insn (gen_strset (destptr
, dest
, value
));
22176 dest
= change_address (destmem
, SImode
, destptr
);
22177 emit_insn (gen_strset (destptr
, dest
, value
));
22178 emit_insn (gen_strset (destptr
, dest
, value
));
22180 emit_label (label
);
22181 LABEL_NUSES (label
) = 1;
22185 rtx label
= ix86_expand_aligntest (count
, 4, true);
22186 dest
= change_address (destmem
, SImode
, destptr
);
22187 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22188 emit_label (label
);
22189 LABEL_NUSES (label
) = 1;
22193 rtx label
= ix86_expand_aligntest (count
, 2, true);
22194 dest
= change_address (destmem
, HImode
, destptr
);
22195 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22196 emit_label (label
);
22197 LABEL_NUSES (label
) = 1;
22201 rtx label
= ix86_expand_aligntest (count
, 1, true);
22202 dest
= change_address (destmem
, QImode
, destptr
);
22203 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22204 emit_label (label
);
22205 LABEL_NUSES (label
) = 1;
22209 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22210 DESIRED_ALIGNMENT. */
22212 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22213 rtx destptr
, rtx srcptr
, rtx count
,
22214 int align
, int desired_alignment
)
22216 if (align
<= 1 && desired_alignment
> 1)
22218 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22219 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22220 destmem
= change_address (destmem
, QImode
, destptr
);
22221 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22222 ix86_adjust_counter (count
, 1);
22223 emit_label (label
);
22224 LABEL_NUSES (label
) = 1;
22226 if (align
<= 2 && desired_alignment
> 2)
22228 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22229 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22230 destmem
= change_address (destmem
, HImode
, destptr
);
22231 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22232 ix86_adjust_counter (count
, 2);
22233 emit_label (label
);
22234 LABEL_NUSES (label
) = 1;
22236 if (align
<= 4 && desired_alignment
> 4)
22238 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22239 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22240 destmem
= change_address (destmem
, SImode
, destptr
);
22241 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22242 ix86_adjust_counter (count
, 4);
22243 emit_label (label
);
22244 LABEL_NUSES (label
) = 1;
22246 gcc_assert (desired_alignment
<= 8);
22249 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22250 ALIGN_BYTES is how many bytes need to be copied. */
22252 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22253 int desired_align
, int align_bytes
)
22256 rtx orig_dst
= dst
;
22257 rtx orig_src
= src
;
22259 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22260 if (src_align_bytes
>= 0)
22261 src_align_bytes
= desired_align
- src_align_bytes
;
22262 if (align_bytes
& 1)
22264 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22265 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22267 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22269 if (align_bytes
& 2)
22271 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22272 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22273 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22274 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22275 if (src_align_bytes
>= 0
22276 && (src_align_bytes
& 1) == (align_bytes
& 1)
22277 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22278 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22280 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22282 if (align_bytes
& 4)
22284 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22285 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22286 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22287 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22288 if (src_align_bytes
>= 0)
22290 unsigned int src_align
= 0;
22291 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22293 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22295 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22296 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22299 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22301 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22302 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22303 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22304 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22305 if (src_align_bytes
>= 0)
22307 unsigned int src_align
= 0;
22308 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22310 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22312 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22314 if (src_align
> (unsigned int) desired_align
)
22315 src_align
= desired_align
;
22316 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22317 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22319 if (MEM_SIZE_KNOWN_P (orig_dst
))
22320 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22321 if (MEM_SIZE_KNOWN_P (orig_src
))
22322 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22327 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22328 DESIRED_ALIGNMENT. */
22330 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22331 int align
, int desired_alignment
)
22333 if (align
<= 1 && desired_alignment
> 1)
22335 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22336 destmem
= change_address (destmem
, QImode
, destptr
);
22337 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22338 ix86_adjust_counter (count
, 1);
22339 emit_label (label
);
22340 LABEL_NUSES (label
) = 1;
22342 if (align
<= 2 && desired_alignment
> 2)
22344 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22345 destmem
= change_address (destmem
, HImode
, destptr
);
22346 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22347 ix86_adjust_counter (count
, 2);
22348 emit_label (label
);
22349 LABEL_NUSES (label
) = 1;
22351 if (align
<= 4 && desired_alignment
> 4)
22353 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22354 destmem
= change_address (destmem
, SImode
, destptr
);
22355 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22356 ix86_adjust_counter (count
, 4);
22357 emit_label (label
);
22358 LABEL_NUSES (label
) = 1;
22360 gcc_assert (desired_alignment
<= 8);
22363 /* Set enough from DST to align DST known to by aligned by ALIGN to
22364 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22366 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22367 int desired_align
, int align_bytes
)
22370 rtx orig_dst
= dst
;
22371 if (align_bytes
& 1)
22373 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22375 emit_insn (gen_strset (destreg
, dst
,
22376 gen_lowpart (QImode
, value
)));
22378 if (align_bytes
& 2)
22380 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22381 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22382 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22384 emit_insn (gen_strset (destreg
, dst
,
22385 gen_lowpart (HImode
, value
)));
22387 if (align_bytes
& 4)
22389 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22390 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22391 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22393 emit_insn (gen_strset (destreg
, dst
,
22394 gen_lowpart (SImode
, value
)));
22396 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22397 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22398 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22399 if (MEM_SIZE_KNOWN_P (orig_dst
))
22400 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22404 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22405 static enum stringop_alg
22406 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22407 int *dynamic_check
, bool *noalign
)
22409 const struct stringop_algs
* algs
;
22410 bool optimize_for_speed
;
22411 /* Algorithms using the rep prefix want at least edi and ecx;
22412 additionally, memset wants eax and memcpy wants esi. Don't
22413 consider such algorithms if the user has appropriated those
22414 registers for their own purposes. */
22415 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22417 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22420 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22421 || (alg != rep_prefix_1_byte \
22422 && alg != rep_prefix_4_byte \
22423 && alg != rep_prefix_8_byte))
22424 const struct processor_costs
*cost
;
22426 /* Even if the string operation call is cold, we still might spend a lot
22427 of time processing large blocks. */
22428 if (optimize_function_for_size_p (cfun
)
22429 || (optimize_insn_for_size_p ()
22430 && expected_size
!= -1 && expected_size
< 256))
22431 optimize_for_speed
= false;
22433 optimize_for_speed
= true;
22435 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22437 *dynamic_check
= -1;
22439 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22441 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22442 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22443 return ix86_stringop_alg
;
22444 /* rep; movq or rep; movl is the smallest variant. */
22445 else if (!optimize_for_speed
)
22447 if (!count
|| (count
& 3))
22448 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22450 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22452 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22454 else if (expected_size
!= -1 && expected_size
< 4)
22455 return loop_1_byte
;
22456 else if (expected_size
!= -1)
22459 enum stringop_alg alg
= libcall
;
22460 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22462 /* We get here if the algorithms that were not libcall-based
22463 were rep-prefix based and we are unable to use rep prefixes
22464 based on global register usage. Break out of the loop and
22465 use the heuristic below. */
22466 if (algs
->size
[i
].max
== 0)
22468 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22470 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22472 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22474 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22475 last non-libcall inline algorithm. */
22476 if (TARGET_INLINE_ALL_STRINGOPS
)
22478 /* When the current size is best to be copied by a libcall,
22479 but we are still forced to inline, run the heuristic below
22480 that will pick code for medium sized blocks. */
22481 if (alg
!= libcall
)
22485 else if (ALG_USABLE_P (candidate
))
22487 *noalign
= algs
->size
[i
].noalign
;
22492 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22494 /* When asked to inline the call anyway, try to pick meaningful choice.
22495 We look for maximal size of block that is faster to copy by hand and
22496 take blocks of at most of that size guessing that average size will
22497 be roughly half of the block.
22499 If this turns out to be bad, we might simply specify the preferred
22500 choice in ix86_costs. */
22501 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22502 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22505 enum stringop_alg alg
;
22507 bool any_alg_usable_p
= true;
22509 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22511 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22512 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22514 if (candidate
!= libcall
&& candidate
22515 && ALG_USABLE_P (candidate
))
22516 max
= algs
->size
[i
].max
;
22518 /* If there aren't any usable algorithms, then recursing on
22519 smaller sizes isn't going to find anything. Just return the
22520 simple byte-at-a-time copy loop. */
22521 if (!any_alg_usable_p
)
22523 /* Pick something reasonable. */
22524 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22525 *dynamic_check
= 128;
22526 return loop_1_byte
;
22530 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22531 gcc_assert (*dynamic_check
== -1);
22532 gcc_assert (alg
!= libcall
);
22533 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22534 *dynamic_check
= max
;
22537 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22538 #undef ALG_USABLE_P
22541 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22542 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22544 decide_alignment (int align
,
22545 enum stringop_alg alg
,
22548 int desired_align
= 0;
22552 gcc_unreachable ();
22554 case unrolled_loop
:
22555 desired_align
= GET_MODE_SIZE (Pmode
);
22557 case rep_prefix_8_byte
:
22560 case rep_prefix_4_byte
:
22561 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22562 copying whole cacheline at once. */
22563 if (TARGET_PENTIUMPRO
)
22568 case rep_prefix_1_byte
:
22569 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22570 copying whole cacheline at once. */
22571 if (TARGET_PENTIUMPRO
)
22585 if (desired_align
< align
)
22586 desired_align
= align
;
22587 if (expected_size
!= -1 && expected_size
< 4)
22588 desired_align
= align
;
22589 return desired_align
;
22592 /* Return the smallest power of 2 greater than VAL. */
22594 smallest_pow2_greater_than (int val
)
22602 /* Expand string move (memcpy) operation. Use i386 string operations
22603 when profitable. expand_setmem contains similar code. The code
22604 depends upon architecture, block size and alignment, but always has
22605 the same overall structure:
22607 1) Prologue guard: Conditional that jumps up to epilogues for small
22608 blocks that can be handled by epilogue alone. This is faster
22609 but also needed for correctness, since prologue assume the block
22610 is larger than the desired alignment.
22612 Optional dynamic check for size and libcall for large
22613 blocks is emitted here too, with -minline-stringops-dynamically.
22615 2) Prologue: copy first few bytes in order to get destination
22616 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22617 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22618 copied. We emit either a jump tree on power of two sized
22619 blocks, or a byte loop.
22621 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22622 with specified algorithm.
22624 4) Epilogue: code copying tail of the block that is too small to be
22625 handled by main body (or up to size guarded by prologue guard). */
22628 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22629 rtx expected_align_exp
, rtx expected_size_exp
)
22635 rtx jump_around_label
= NULL
;
22636 HOST_WIDE_INT align
= 1;
22637 unsigned HOST_WIDE_INT count
= 0;
22638 HOST_WIDE_INT expected_size
= -1;
22639 int size_needed
= 0, epilogue_size_needed
;
22640 int desired_align
= 0, align_bytes
= 0;
22641 enum stringop_alg alg
;
22643 bool need_zero_guard
= false;
22646 if (CONST_INT_P (align_exp
))
22647 align
= INTVAL (align_exp
);
22648 /* i386 can do misaligned access on reasonably increased cost. */
22649 if (CONST_INT_P (expected_align_exp
)
22650 && INTVAL (expected_align_exp
) > align
)
22651 align
= INTVAL (expected_align_exp
);
22652 /* ALIGN is the minimum of destination and source alignment, but we care here
22653 just about destination alignment. */
22654 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22655 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22657 if (CONST_INT_P (count_exp
))
22658 count
= expected_size
= INTVAL (count_exp
);
22659 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22660 expected_size
= INTVAL (expected_size_exp
);
22662 /* Make sure we don't need to care about overflow later on. */
22663 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22666 /* Step 0: Decide on preferred algorithm, desired alignment and
22667 size of chunks to be copied by main loop. */
22669 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22670 desired_align
= decide_alignment (align
, alg
, expected_size
);
22672 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
22673 align
= desired_align
;
22675 if (alg
== libcall
)
22677 gcc_assert (alg
!= no_stringop
);
22679 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22680 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22681 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22686 gcc_unreachable ();
22688 need_zero_guard
= true;
22689 size_needed
= GET_MODE_SIZE (word_mode
);
22691 case unrolled_loop
:
22692 need_zero_guard
= true;
22693 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22695 case rep_prefix_8_byte
:
22698 case rep_prefix_4_byte
:
22701 case rep_prefix_1_byte
:
22705 need_zero_guard
= true;
22710 epilogue_size_needed
= size_needed
;
22712 /* Step 1: Prologue guard. */
22714 /* Alignment code needs count to be in register. */
22715 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22717 if (INTVAL (count_exp
) > desired_align
22718 && INTVAL (count_exp
) > size_needed
)
22721 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22722 if (align_bytes
<= 0)
22725 align_bytes
= desired_align
- align_bytes
;
22727 if (align_bytes
== 0)
22728 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22730 gcc_assert (desired_align
>= 1 && align
>= 1);
22732 /* Ensure that alignment prologue won't copy past end of block. */
22733 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22735 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22736 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22737 Make sure it is power of 2. */
22738 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22742 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22744 /* If main algorithm works on QImode, no epilogue is needed.
22745 For small sizes just don't align anything. */
22746 if (size_needed
== 1)
22747 desired_align
= align
;
22754 label
= gen_label_rtx ();
22755 emit_cmp_and_jump_insns (count_exp
,
22756 GEN_INT (epilogue_size_needed
),
22757 LTU
, 0, counter_mode (count_exp
), 1, label
);
22758 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22759 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22761 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22765 /* Emit code to decide on runtime whether library call or inline should be
22767 if (dynamic_check
!= -1)
22769 if (CONST_INT_P (count_exp
))
22771 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22773 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22774 count_exp
= const0_rtx
;
22780 rtx hot_label
= gen_label_rtx ();
22781 jump_around_label
= gen_label_rtx ();
22782 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22783 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22784 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22785 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22786 emit_jump (jump_around_label
);
22787 emit_label (hot_label
);
22791 /* Step 2: Alignment prologue. */
22793 if (desired_align
> align
)
22795 if (align_bytes
== 0)
22797 /* Except for the first move in epilogue, we no longer know
22798 constant offset in aliasing info. It don't seems to worth
22799 the pain to maintain it for the first move, so throw away
22801 src
= change_address (src
, BLKmode
, srcreg
);
22802 dst
= change_address (dst
, BLKmode
, destreg
);
22803 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22808 /* If we know how many bytes need to be stored before dst is
22809 sufficiently aligned, maintain aliasing info accurately. */
22810 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22811 desired_align
, align_bytes
);
22812 count_exp
= plus_constant (counter_mode (count_exp
),
22813 count_exp
, -align_bytes
);
22814 count
-= align_bytes
;
22816 if (need_zero_guard
22817 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22818 || (align_bytes
== 0
22819 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22820 + desired_align
- align
))))
22822 /* It is possible that we copied enough so the main loop will not
22824 gcc_assert (size_needed
> 1);
22825 if (label
== NULL_RTX
)
22826 label
= gen_label_rtx ();
22827 emit_cmp_and_jump_insns (count_exp
,
22828 GEN_INT (size_needed
),
22829 LTU
, 0, counter_mode (count_exp
), 1, label
);
22830 if (expected_size
== -1
22831 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22832 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22834 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22837 if (label
&& size_needed
== 1)
22839 emit_label (label
);
22840 LABEL_NUSES (label
) = 1;
22842 epilogue_size_needed
= 1;
22844 else if (label
== NULL_RTX
)
22845 epilogue_size_needed
= size_needed
;
22847 /* Step 3: Main loop. */
22853 gcc_unreachable ();
22855 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22856 count_exp
, QImode
, 1, expected_size
);
22859 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22860 count_exp
, word_mode
, 1, expected_size
);
22862 case unrolled_loop
:
22863 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22864 registers for 4 temporaries anyway. */
22865 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22866 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22869 case rep_prefix_8_byte
:
22870 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22873 case rep_prefix_4_byte
:
22874 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22877 case rep_prefix_1_byte
:
22878 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22882 /* Adjust properly the offset of src and dest memory for aliasing. */
22883 if (CONST_INT_P (count_exp
))
22885 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22886 (count
/ size_needed
) * size_needed
);
22887 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22888 (count
/ size_needed
) * size_needed
);
22892 src
= change_address (src
, BLKmode
, srcreg
);
22893 dst
= change_address (dst
, BLKmode
, destreg
);
22896 /* Step 4: Epilogue to copy the remaining bytes. */
22900 /* When the main loop is done, COUNT_EXP might hold original count,
22901 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22902 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22903 bytes. Compensate if needed. */
22905 if (size_needed
< epilogue_size_needed
)
22908 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22909 GEN_INT (size_needed
- 1), count_exp
, 1,
22911 if (tmp
!= count_exp
)
22912 emit_move_insn (count_exp
, tmp
);
22914 emit_label (label
);
22915 LABEL_NUSES (label
) = 1;
22918 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22919 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22920 epilogue_size_needed
);
22921 if (jump_around_label
)
22922 emit_label (jump_around_label
);
22926 /* Helper function for memcpy. For QImode value 0xXY produce
22927 0xXYXYXYXY of wide specified by MODE. This is essentially
22928 a * 0x10101010, but we can do slightly better than
22929 synth_mult by unwinding the sequence by hand on CPUs with
22932 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22934 enum machine_mode valmode
= GET_MODE (val
);
22936 int nops
= mode
== DImode
? 3 : 2;
22938 gcc_assert (mode
== SImode
|| mode
== DImode
);
22939 if (val
== const0_rtx
)
22940 return copy_to_mode_reg (mode
, const0_rtx
);
22941 if (CONST_INT_P (val
))
22943 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22947 if (mode
== DImode
)
22948 v
|= (v
<< 16) << 16;
22949 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22952 if (valmode
== VOIDmode
)
22954 if (valmode
!= QImode
)
22955 val
= gen_lowpart (QImode
, val
);
22956 if (mode
== QImode
)
22958 if (!TARGET_PARTIAL_REG_STALL
)
22960 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22961 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22962 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22963 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22965 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22966 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22967 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22972 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22974 if (!TARGET_PARTIAL_REG_STALL
)
22975 if (mode
== SImode
)
22976 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22978 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22981 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22982 NULL
, 1, OPTAB_DIRECT
);
22984 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22986 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22987 NULL
, 1, OPTAB_DIRECT
);
22988 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22989 if (mode
== SImode
)
22991 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22992 NULL
, 1, OPTAB_DIRECT
);
22993 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22998 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22999 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23000 alignment from ALIGN to DESIRED_ALIGN. */
23002 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23007 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23008 promoted_val
= promote_duplicated_reg (DImode
, val
);
23009 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23010 promoted_val
= promote_duplicated_reg (SImode
, val
);
23011 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23012 promoted_val
= promote_duplicated_reg (HImode
, val
);
23014 promoted_val
= val
;
23016 return promoted_val
;
23019 /* Expand string clear operation (bzero). Use i386 string operations when
23020 profitable. See expand_movmem comment for explanation of individual
23021 steps performed. */
23023 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23024 rtx expected_align_exp
, rtx expected_size_exp
)
23029 rtx jump_around_label
= NULL
;
23030 HOST_WIDE_INT align
= 1;
23031 unsigned HOST_WIDE_INT count
= 0;
23032 HOST_WIDE_INT expected_size
= -1;
23033 int size_needed
= 0, epilogue_size_needed
;
23034 int desired_align
= 0, align_bytes
= 0;
23035 enum stringop_alg alg
;
23036 rtx promoted_val
= NULL
;
23037 bool force_loopy_epilogue
= false;
23039 bool need_zero_guard
= false;
23042 if (CONST_INT_P (align_exp
))
23043 align
= INTVAL (align_exp
);
23044 /* i386 can do misaligned access on reasonably increased cost. */
23045 if (CONST_INT_P (expected_align_exp
)
23046 && INTVAL (expected_align_exp
) > align
)
23047 align
= INTVAL (expected_align_exp
);
23048 if (CONST_INT_P (count_exp
))
23049 count
= expected_size
= INTVAL (count_exp
);
23050 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23051 expected_size
= INTVAL (expected_size_exp
);
23053 /* Make sure we don't need to care about overflow later on. */
23054 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23057 /* Step 0: Decide on preferred algorithm, desired alignment and
23058 size of chunks to be copied by main loop. */
23060 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23061 desired_align
= decide_alignment (align
, alg
, expected_size
);
23063 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23064 align
= desired_align
;
23066 if (alg
== libcall
)
23068 gcc_assert (alg
!= no_stringop
);
23070 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23071 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23076 gcc_unreachable ();
23078 need_zero_guard
= true;
23079 size_needed
= GET_MODE_SIZE (word_mode
);
23081 case unrolled_loop
:
23082 need_zero_guard
= true;
23083 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
23085 case rep_prefix_8_byte
:
23088 case rep_prefix_4_byte
:
23091 case rep_prefix_1_byte
:
23095 need_zero_guard
= true;
23099 epilogue_size_needed
= size_needed
;
23101 /* Step 1: Prologue guard. */
23103 /* Alignment code needs count to be in register. */
23104 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23106 if (INTVAL (count_exp
) > desired_align
23107 && INTVAL (count_exp
) > size_needed
)
23110 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23111 if (align_bytes
<= 0)
23114 align_bytes
= desired_align
- align_bytes
;
23116 if (align_bytes
== 0)
23118 enum machine_mode mode
= SImode
;
23119 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23121 count_exp
= force_reg (mode
, count_exp
);
23124 /* Do the cheap promotion to allow better CSE across the
23125 main loop and epilogue (ie one load of the big constant in the
23126 front of all code. */
23127 if (CONST_INT_P (val_exp
))
23128 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23129 desired_align
, align
);
23130 /* Ensure that alignment prologue won't copy past end of block. */
23131 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23133 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23134 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23135 Make sure it is power of 2. */
23136 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23138 /* To improve performance of small blocks, we jump around the VAL
23139 promoting mode. This mean that if the promoted VAL is not constant,
23140 we might not use it in the epilogue and have to use byte
23142 if (epilogue_size_needed
> 2 && !promoted_val
)
23143 force_loopy_epilogue
= true;
23146 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23148 /* If main algorithm works on QImode, no epilogue is needed.
23149 For small sizes just don't align anything. */
23150 if (size_needed
== 1)
23151 desired_align
= align
;
23158 label
= gen_label_rtx ();
23159 emit_cmp_and_jump_insns (count_exp
,
23160 GEN_INT (epilogue_size_needed
),
23161 LTU
, 0, counter_mode (count_exp
), 1, label
);
23162 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23163 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23165 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23168 if (dynamic_check
!= -1)
23170 rtx hot_label
= gen_label_rtx ();
23171 jump_around_label
= gen_label_rtx ();
23172 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23173 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23174 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23175 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23176 emit_jump (jump_around_label
);
23177 emit_label (hot_label
);
23180 /* Step 2: Alignment prologue. */
23182 /* Do the expensive promotion once we branched off the small blocks. */
23184 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23185 desired_align
, align
);
23186 gcc_assert (desired_align
>= 1 && align
>= 1);
23188 if (desired_align
> align
)
23190 if (align_bytes
== 0)
23192 /* Except for the first move in epilogue, we no longer know
23193 constant offset in aliasing info. It don't seems to worth
23194 the pain to maintain it for the first move, so throw away
23196 dst
= change_address (dst
, BLKmode
, destreg
);
23197 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23202 /* If we know how many bytes need to be stored before dst is
23203 sufficiently aligned, maintain aliasing info accurately. */
23204 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23205 desired_align
, align_bytes
);
23206 count_exp
= plus_constant (counter_mode (count_exp
),
23207 count_exp
, -align_bytes
);
23208 count
-= align_bytes
;
23210 if (need_zero_guard
23211 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23212 || (align_bytes
== 0
23213 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23214 + desired_align
- align
))))
23216 /* It is possible that we copied enough so the main loop will not
23218 gcc_assert (size_needed
> 1);
23219 if (label
== NULL_RTX
)
23220 label
= gen_label_rtx ();
23221 emit_cmp_and_jump_insns (count_exp
,
23222 GEN_INT (size_needed
),
23223 LTU
, 0, counter_mode (count_exp
), 1, label
);
23224 if (expected_size
== -1
23225 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23226 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23228 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23231 if (label
&& size_needed
== 1)
23233 emit_label (label
);
23234 LABEL_NUSES (label
) = 1;
23236 promoted_val
= val_exp
;
23237 epilogue_size_needed
= 1;
23239 else if (label
== NULL_RTX
)
23240 epilogue_size_needed
= size_needed
;
23242 /* Step 3: Main loop. */
23248 gcc_unreachable ();
23250 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23251 count_exp
, QImode
, 1, expected_size
);
23254 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23255 count_exp
, word_mode
, 1, expected_size
);
23257 case unrolled_loop
:
23258 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23259 count_exp
, word_mode
, 4, expected_size
);
23261 case rep_prefix_8_byte
:
23262 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23265 case rep_prefix_4_byte
:
23266 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23269 case rep_prefix_1_byte
:
23270 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23274 /* Adjust properly the offset of src and dest memory for aliasing. */
23275 if (CONST_INT_P (count_exp
))
23276 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23277 (count
/ size_needed
) * size_needed
);
23279 dst
= change_address (dst
, BLKmode
, destreg
);
23281 /* Step 4: Epilogue to copy the remaining bytes. */
23285 /* When the main loop is done, COUNT_EXP might hold original count,
23286 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23287 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23288 bytes. Compensate if needed. */
23290 if (size_needed
< epilogue_size_needed
)
23293 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23294 GEN_INT (size_needed
- 1), count_exp
, 1,
23296 if (tmp
!= count_exp
)
23297 emit_move_insn (count_exp
, tmp
);
23299 emit_label (label
);
23300 LABEL_NUSES (label
) = 1;
23303 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23305 if (force_loopy_epilogue
)
23306 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23307 epilogue_size_needed
);
23309 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23310 epilogue_size_needed
);
23312 if (jump_around_label
)
23313 emit_label (jump_around_label
);
23317 /* Expand the appropriate insns for doing strlen if not just doing
23320 out = result, initialized with the start address
23321 align_rtx = alignment of the address.
23322 scratch = scratch register, initialized with the startaddress when
23323 not aligned, otherwise undefined
23325 This is just the body. It needs the initializations mentioned above and
23326 some address computing at the end. These things are done in i386.md. */
23329 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23333 rtx align_2_label
= NULL_RTX
;
23334 rtx align_3_label
= NULL_RTX
;
23335 rtx align_4_label
= gen_label_rtx ();
23336 rtx end_0_label
= gen_label_rtx ();
23338 rtx tmpreg
= gen_reg_rtx (SImode
);
23339 rtx scratch
= gen_reg_rtx (SImode
);
23343 if (CONST_INT_P (align_rtx
))
23344 align
= INTVAL (align_rtx
);
23346 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23348 /* Is there a known alignment and is it less than 4? */
23351 rtx scratch1
= gen_reg_rtx (Pmode
);
23352 emit_move_insn (scratch1
, out
);
23353 /* Is there a known alignment and is it not 2? */
23356 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23357 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23359 /* Leave just the 3 lower bits. */
23360 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23361 NULL_RTX
, 0, OPTAB_WIDEN
);
23363 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23364 Pmode
, 1, align_4_label
);
23365 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23366 Pmode
, 1, align_2_label
);
23367 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23368 Pmode
, 1, align_3_label
);
23372 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23373 check if is aligned to 4 - byte. */
23375 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23376 NULL_RTX
, 0, OPTAB_WIDEN
);
23378 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23379 Pmode
, 1, align_4_label
);
23382 mem
= change_address (src
, QImode
, out
);
23384 /* Now compare the bytes. */
23386 /* Compare the first n unaligned byte on a byte per byte basis. */
23387 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23388 QImode
, 1, end_0_label
);
23390 /* Increment the address. */
23391 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23393 /* Not needed with an alignment of 2 */
23396 emit_label (align_2_label
);
23398 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23401 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23403 emit_label (align_3_label
);
23406 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23409 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23412 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23413 align this loop. It gives only huge programs, but does not help to
23415 emit_label (align_4_label
);
23417 mem
= change_address (src
, SImode
, out
);
23418 emit_move_insn (scratch
, mem
);
23419 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23421 /* This formula yields a nonzero result iff one of the bytes is zero.
23422 This saves three branches inside loop and many cycles. */
23424 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23425 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23426 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23427 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23428 gen_int_mode (0x80808080, SImode
)));
23429 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23434 rtx reg
= gen_reg_rtx (SImode
);
23435 rtx reg2
= gen_reg_rtx (Pmode
);
23436 emit_move_insn (reg
, tmpreg
);
23437 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23439 /* If zero is not in the first two bytes, move two bytes forward. */
23440 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23441 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23442 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23443 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23444 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23447 /* Emit lea manually to avoid clobbering of flags. */
23448 emit_insn (gen_rtx_SET (SImode
, reg2
,
23449 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23451 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23452 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23453 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23454 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23460 rtx end_2_label
= gen_label_rtx ();
23461 /* Is zero in the first two bytes? */
23463 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23464 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23465 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23466 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23467 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23469 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23470 JUMP_LABEL (tmp
) = end_2_label
;
23472 /* Not in the first two. Move two bytes forward. */
23473 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23474 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23476 emit_label (end_2_label
);
23480 /* Avoid branch in fixing the byte. */
23481 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23482 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23483 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23484 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23485 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23487 emit_label (end_0_label
);
23490 /* Expand strlen. */
23493 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23495 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23497 /* The generic case of strlen expander is long. Avoid it's
23498 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23500 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23501 && !TARGET_INLINE_ALL_STRINGOPS
23502 && !optimize_insn_for_size_p ()
23503 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23506 addr
= force_reg (Pmode
, XEXP (src
, 0));
23507 scratch1
= gen_reg_rtx (Pmode
);
23509 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23510 && !optimize_insn_for_size_p ())
23512 /* Well it seems that some optimizer does not combine a call like
23513 foo(strlen(bar), strlen(bar));
23514 when the move and the subtraction is done here. It does calculate
23515 the length just once when these instructions are done inside of
23516 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23517 often used and I use one fewer register for the lifetime of
23518 output_strlen_unroll() this is better. */
23520 emit_move_insn (out
, addr
);
23522 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23524 /* strlensi_unroll_1 returns the address of the zero at the end of
23525 the string, like memchr(), so compute the length by subtracting
23526 the start address. */
23527 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23533 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23534 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23537 scratch2
= gen_reg_rtx (Pmode
);
23538 scratch3
= gen_reg_rtx (Pmode
);
23539 scratch4
= force_reg (Pmode
, constm1_rtx
);
23541 emit_move_insn (scratch3
, addr
);
23542 eoschar
= force_reg (QImode
, eoschar
);
23544 src
= replace_equiv_address_nv (src
, scratch3
);
23546 /* If .md starts supporting :P, this can be done in .md. */
23547 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23548 scratch4
), UNSPEC_SCAS
);
23549 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23550 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23551 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23556 /* For given symbol (function) construct code to compute address of it's PLT
23557 entry in large x86-64 PIC model. */
23559 construct_plt_address (rtx symbol
)
23563 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23564 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23565 gcc_assert (Pmode
== DImode
);
23567 tmp
= gen_reg_rtx (Pmode
);
23568 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23570 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23571 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23576 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23578 rtx pop
, bool sibcall
)
23580 /* We need to represent that SI and DI registers are clobbered
23582 static int clobbered_registers
[] = {
23583 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23584 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23585 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23586 XMM15_REG
, SI_REG
, DI_REG
23588 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23589 rtx use
= NULL
, call
;
23590 unsigned int vec_len
;
23592 if (pop
== const0_rtx
)
23594 gcc_assert (!TARGET_64BIT
|| !pop
);
23596 if (TARGET_MACHO
&& !TARGET_64BIT
)
23599 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23600 fnaddr
= machopic_indirect_call_target (fnaddr
);
23605 /* Static functions and indirect calls don't need the pic register. */
23606 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23607 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23608 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23609 use_reg (&use
, pic_offset_table_rtx
);
23612 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23614 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23615 emit_move_insn (al
, callarg2
);
23616 use_reg (&use
, al
);
23619 if (ix86_cmodel
== CM_LARGE_PIC
23621 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23622 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23623 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23625 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23626 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23628 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23629 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23633 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23635 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23636 vec
[vec_len
++] = call
;
23640 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23641 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23642 vec
[vec_len
++] = pop
;
23645 if (TARGET_64BIT_MS_ABI
23646 && (!callarg2
|| INTVAL (callarg2
) != -2))
23650 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23651 UNSPEC_MS_TO_SYSV_CALL
);
23653 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23655 = gen_rtx_CLOBBER (VOIDmode
,
23656 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23658 clobbered_registers
[i
]));
23662 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23663 call
= emit_call_insn (call
);
23665 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23670 /* Output the assembly for a call instruction. */
23673 ix86_output_call_insn (rtx insn
, rtx call_op
)
23675 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23676 bool seh_nop_p
= false;
23679 if (SIBLING_CALL_P (insn
))
23683 /* SEH epilogue detection requires the indirect branch case
23684 to include REX.W. */
23685 else if (TARGET_SEH
)
23686 xasm
= "rex.W jmp %A0";
23690 output_asm_insn (xasm
, &call_op
);
23694 /* SEH unwinding can require an extra nop to be emitted in several
23695 circumstances. Determine if we have one of those. */
23700 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23702 /* If we get to another real insn, we don't need the nop. */
23706 /* If we get to the epilogue note, prevent a catch region from
23707 being adjacent to the standard epilogue sequence. If non-
23708 call-exceptions, we'll have done this during epilogue emission. */
23709 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23710 && !flag_non_call_exceptions
23711 && !can_throw_internal (insn
))
23718 /* If we didn't find a real insn following the call, prevent the
23719 unwinder from looking into the next function. */
23725 xasm
= "call\t%P0";
23727 xasm
= "call\t%A0";
23729 output_asm_insn (xasm
, &call_op
);
23737 /* Clear stack slot assignments remembered from previous functions.
23738 This is called from INIT_EXPANDERS once before RTL is emitted for each
23741 static struct machine_function
*
23742 ix86_init_machine_status (void)
23744 struct machine_function
*f
;
23746 f
= ggc_alloc_cleared_machine_function ();
23747 f
->use_fast_prologue_epilogue_nregs
= -1;
23748 f
->call_abi
= ix86_abi
;
23753 /* Return a MEM corresponding to a stack slot with mode MODE.
23754 Allocate a new slot if necessary.
23756 The RTL for a function can have several slots available: N is
23757 which slot to use. */
23760 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23762 struct stack_local_entry
*s
;
23764 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23766 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23767 if (s
->mode
== mode
&& s
->n
== n
)
23768 return validize_mem (copy_rtx (s
->rtl
));
23770 s
= ggc_alloc_stack_local_entry ();
23773 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23775 s
->next
= ix86_stack_locals
;
23776 ix86_stack_locals
= s
;
23777 return validize_mem (s
->rtl
);
23781 ix86_instantiate_decls (void)
23783 struct stack_local_entry
*s
;
23785 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23786 if (s
->rtl
!= NULL_RTX
)
23787 instantiate_decl_rtl (s
->rtl
);
23790 /* Calculate the length of the memory address in the instruction encoding.
23791 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23792 or other prefixes. We never generate addr32 prefix for LEA insn. */
23795 memory_address_length (rtx addr
, bool lea
)
23797 struct ix86_address parts
;
23798 rtx base
, index
, disp
;
23802 if (GET_CODE (addr
) == PRE_DEC
23803 || GET_CODE (addr
) == POST_INC
23804 || GET_CODE (addr
) == PRE_MODIFY
23805 || GET_CODE (addr
) == POST_MODIFY
)
23808 ok
= ix86_decompose_address (addr
, &parts
);
23811 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23813 /* If this is not LEA instruction, add the length of addr32 prefix. */
23814 if (TARGET_64BIT
&& !lea
23815 && (SImode_address_operand (addr
, VOIDmode
)
23816 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23817 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23821 index
= parts
.index
;
23824 if (base
&& GET_CODE (base
) == SUBREG
)
23825 base
= SUBREG_REG (base
);
23826 if (index
&& GET_CODE (index
) == SUBREG
)
23827 index
= SUBREG_REG (index
);
23829 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23830 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23833 - esp as the base always wants an index,
23834 - ebp as the base always wants a displacement,
23835 - r12 as the base always wants an index,
23836 - r13 as the base always wants a displacement. */
23838 /* Register Indirect. */
23839 if (base
&& !index
&& !disp
)
23841 /* esp (for its index) and ebp (for its displacement) need
23842 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23844 if (base
== arg_pointer_rtx
23845 || base
== frame_pointer_rtx
23846 || REGNO (base
) == SP_REG
23847 || REGNO (base
) == BP_REG
23848 || REGNO (base
) == R12_REG
23849 || REGNO (base
) == R13_REG
)
23853 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23854 is not disp32, but disp32(%rip), so for disp32
23855 SIB byte is needed, unless print_operand_address
23856 optimizes it into disp32(%rip) or (%rip) is implied
23858 else if (disp
&& !base
&& !index
)
23865 if (GET_CODE (disp
) == CONST
)
23866 symbol
= XEXP (disp
, 0);
23867 if (GET_CODE (symbol
) == PLUS
23868 && CONST_INT_P (XEXP (symbol
, 1)))
23869 symbol
= XEXP (symbol
, 0);
23871 if (GET_CODE (symbol
) != LABEL_REF
23872 && (GET_CODE (symbol
) != SYMBOL_REF
23873 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23874 && (GET_CODE (symbol
) != UNSPEC
23875 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23876 && XINT (symbol
, 1) != UNSPEC_PCREL
23877 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23883 /* Find the length of the displacement constant. */
23886 if (base
&& satisfies_constraint_K (disp
))
23891 /* ebp always wants a displacement. Similarly r13. */
23892 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23895 /* An index requires the two-byte modrm form.... */
23897 /* ...like esp (or r12), which always wants an index. */
23898 || base
== arg_pointer_rtx
23899 || base
== frame_pointer_rtx
23900 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23907 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23908 is set, expect that insn have 8bit immediate alternative. */
23910 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23914 extract_insn_cached (insn
);
23915 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23916 if (CONSTANT_P (recog_data
.operand
[i
]))
23918 enum attr_mode mode
= get_attr_mode (insn
);
23921 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23923 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23930 ival
= trunc_int_for_mode (ival
, HImode
);
23933 ival
= trunc_int_for_mode (ival
, SImode
);
23938 if (IN_RANGE (ival
, -128, 127))
23955 /* Immediates for DImode instructions are encoded
23956 as 32bit sign extended values. */
23961 fatal_insn ("unknown insn mode", insn
);
23967 /* Compute default value for "length_address" attribute. */
23969 ix86_attr_length_address_default (rtx insn
)
23973 if (get_attr_type (insn
) == TYPE_LEA
)
23975 rtx set
= PATTERN (insn
), addr
;
23977 if (GET_CODE (set
) == PARALLEL
)
23978 set
= XVECEXP (set
, 0, 0);
23980 gcc_assert (GET_CODE (set
) == SET
);
23982 addr
= SET_SRC (set
);
23984 return memory_address_length (addr
, true);
23987 extract_insn_cached (insn
);
23988 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23989 if (MEM_P (recog_data
.operand
[i
]))
23991 constrain_operands_cached (reload_completed
);
23992 if (which_alternative
!= -1)
23994 const char *constraints
= recog_data
.constraints
[i
];
23995 int alt
= which_alternative
;
23997 while (*constraints
== '=' || *constraints
== '+')
24000 while (*constraints
++ != ',')
24002 /* Skip ignored operands. */
24003 if (*constraints
== 'X')
24006 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24011 /* Compute default value for "length_vex" attribute. It includes
24012 2 or 3 byte VEX prefix and 1 opcode byte. */
24015 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24019 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24020 byte VEX prefix. */
24021 if (!has_0f_opcode
|| has_vex_w
)
24024 /* We can always use 2 byte VEX prefix in 32bit. */
24028 extract_insn_cached (insn
);
24030 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24031 if (REG_P (recog_data
.operand
[i
]))
24033 /* REX.W bit uses 3 byte VEX prefix. */
24034 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24035 && GENERAL_REG_P (recog_data
.operand
[i
]))
24040 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24041 if (MEM_P (recog_data
.operand
[i
])
24042 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24049 /* Return the maximum number of instructions a cpu can issue. */
24052 ix86_issue_rate (void)
24056 case PROCESSOR_PENTIUM
:
24057 case PROCESSOR_ATOM
:
24059 case PROCESSOR_BTVER2
:
24062 case PROCESSOR_PENTIUMPRO
:
24063 case PROCESSOR_PENTIUM4
:
24064 case PROCESSOR_CORE2
:
24065 case PROCESSOR_COREI7
:
24066 case PROCESSOR_ATHLON
:
24068 case PROCESSOR_AMDFAM10
:
24069 case PROCESSOR_NOCONA
:
24070 case PROCESSOR_GENERIC32
:
24071 case PROCESSOR_GENERIC64
:
24072 case PROCESSOR_BDVER1
:
24073 case PROCESSOR_BDVER2
:
24074 case PROCESSOR_BDVER3
:
24075 case PROCESSOR_BTVER1
:
24083 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24084 by DEP_INSN and nothing set by DEP_INSN. */
24087 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24091 /* Simplify the test for uninteresting insns. */
24092 if (insn_type
!= TYPE_SETCC
24093 && insn_type
!= TYPE_ICMOV
24094 && insn_type
!= TYPE_FCMOV
24095 && insn_type
!= TYPE_IBR
)
24098 if ((set
= single_set (dep_insn
)) != 0)
24100 set
= SET_DEST (set
);
24103 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24104 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24105 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24106 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24108 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24109 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24114 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24117 /* This test is true if the dependent insn reads the flags but
24118 not any other potentially set register. */
24119 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24122 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24128 /* Return true iff USE_INSN has a memory address with operands set by
24132 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24135 extract_insn_cached (use_insn
);
24136 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24137 if (MEM_P (recog_data
.operand
[i
]))
24139 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24140 return modified_in_p (addr
, set_insn
) != 0;
24146 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24148 enum attr_type insn_type
, dep_insn_type
;
24149 enum attr_memory memory
;
24151 int dep_insn_code_number
;
24153 /* Anti and output dependencies have zero cost on all CPUs. */
24154 if (REG_NOTE_KIND (link
) != 0)
24157 dep_insn_code_number
= recog_memoized (dep_insn
);
24159 /* If we can't recognize the insns, we can't really do anything. */
24160 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24163 insn_type
= get_attr_type (insn
);
24164 dep_insn_type
= get_attr_type (dep_insn
);
24168 case PROCESSOR_PENTIUM
:
24169 /* Address Generation Interlock adds a cycle of latency. */
24170 if (insn_type
== TYPE_LEA
)
24172 rtx addr
= PATTERN (insn
);
24174 if (GET_CODE (addr
) == PARALLEL
)
24175 addr
= XVECEXP (addr
, 0, 0);
24177 gcc_assert (GET_CODE (addr
) == SET
);
24179 addr
= SET_SRC (addr
);
24180 if (modified_in_p (addr
, dep_insn
))
24183 else if (ix86_agi_dependent (dep_insn
, insn
))
24186 /* ??? Compares pair with jump/setcc. */
24187 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24190 /* Floating point stores require value to be ready one cycle earlier. */
24191 if (insn_type
== TYPE_FMOV
24192 && get_attr_memory (insn
) == MEMORY_STORE
24193 && !ix86_agi_dependent (dep_insn
, insn
))
24197 case PROCESSOR_PENTIUMPRO
:
24198 memory
= get_attr_memory (insn
);
24200 /* INT->FP conversion is expensive. */
24201 if (get_attr_fp_int_src (dep_insn
))
24204 /* There is one cycle extra latency between an FP op and a store. */
24205 if (insn_type
== TYPE_FMOV
24206 && (set
= single_set (dep_insn
)) != NULL_RTX
24207 && (set2
= single_set (insn
)) != NULL_RTX
24208 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24209 && MEM_P (SET_DEST (set2
)))
24212 /* Show ability of reorder buffer to hide latency of load by executing
24213 in parallel with previous instruction in case
24214 previous instruction is not needed to compute the address. */
24215 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24216 && !ix86_agi_dependent (dep_insn
, insn
))
24218 /* Claim moves to take one cycle, as core can issue one load
24219 at time and the next load can start cycle later. */
24220 if (dep_insn_type
== TYPE_IMOV
24221 || dep_insn_type
== TYPE_FMOV
)
24229 memory
= get_attr_memory (insn
);
24231 /* The esp dependency is resolved before the instruction is really
24233 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24234 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24237 /* INT->FP conversion is expensive. */
24238 if (get_attr_fp_int_src (dep_insn
))
24241 /* Show ability of reorder buffer to hide latency of load by executing
24242 in parallel with previous instruction in case
24243 previous instruction is not needed to compute the address. */
24244 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24245 && !ix86_agi_dependent (dep_insn
, insn
))
24247 /* Claim moves to take one cycle, as core can issue one load
24248 at time and the next load can start cycle later. */
24249 if (dep_insn_type
== TYPE_IMOV
24250 || dep_insn_type
== TYPE_FMOV
)
24259 case PROCESSOR_ATHLON
:
24261 case PROCESSOR_AMDFAM10
:
24262 case PROCESSOR_BDVER1
:
24263 case PROCESSOR_BDVER2
:
24264 case PROCESSOR_BDVER3
:
24265 case PROCESSOR_BTVER1
:
24266 case PROCESSOR_BTVER2
:
24267 case PROCESSOR_ATOM
:
24268 case PROCESSOR_GENERIC32
:
24269 case PROCESSOR_GENERIC64
:
24270 memory
= get_attr_memory (insn
);
24272 /* Show ability of reorder buffer to hide latency of load by executing
24273 in parallel with previous instruction in case
24274 previous instruction is not needed to compute the address. */
24275 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24276 && !ix86_agi_dependent (dep_insn
, insn
))
24278 enum attr_unit unit
= get_attr_unit (insn
);
24281 /* Because of the difference between the length of integer and
24282 floating unit pipeline preparation stages, the memory operands
24283 for floating point are cheaper.
24285 ??? For Athlon it the difference is most probably 2. */
24286 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24289 loadcost
= TARGET_ATHLON
? 2 : 0;
24291 if (cost
>= loadcost
)
24304 /* How many alternative schedules to try. This should be as wide as the
24305 scheduling freedom in the DFA, but no wider. Making this value too
24306 large results extra work for the scheduler. */
24309 ia32_multipass_dfa_lookahead (void)
24313 case PROCESSOR_PENTIUM
:
24316 case PROCESSOR_PENTIUMPRO
:
24320 case PROCESSOR_CORE2
:
24321 case PROCESSOR_COREI7
:
24322 case PROCESSOR_ATOM
:
24323 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24324 as many instructions can be executed on a cycle, i.e.,
24325 issue_rate. I wonder why tuning for many CPUs does not do this. */
24326 if (reload_completed
)
24327 return ix86_issue_rate ();
24328 /* Don't use lookahead for pre-reload schedule to save compile time. */
24336 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24337 execution. It is applied if
24338 (1) IMUL instruction is on the top of list;
24339 (2) There exists the only producer of independent IMUL instruction in
24341 (3) Put found producer on the top of ready list.
24342 Returns issue rate. */
24345 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24346 int clock_var ATTRIBUTE_UNUSED
)
24348 static int issue_rate
= -1;
24349 int n_ready
= *pn_ready
;
24350 rtx insn
, insn1
, insn2
;
24352 sd_iterator_def sd_it
;
24356 /* Set up issue rate. */
24357 issue_rate
= ix86_issue_rate();
24359 /* Do reodering for Atom only. */
24360 if (ix86_tune
!= PROCESSOR_ATOM
)
24362 /* Do not perform ready list reodering for pre-reload schedule pass. */
24363 if (!reload_completed
)
24365 /* Nothing to do if ready list contains only 1 instruction. */
24369 /* Check that IMUL instruction is on the top of ready list. */
24370 insn
= ready
[n_ready
- 1];
24371 if (!NONDEBUG_INSN_P (insn
))
24373 insn
= PATTERN (insn
);
24374 if (GET_CODE (insn
) == PARALLEL
)
24375 insn
= XVECEXP (insn
, 0, 0);
24376 if (GET_CODE (insn
) != SET
)
24378 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24379 && GET_MODE (SET_SRC (insn
)) == SImode
))
24382 /* Search for producer of independent IMUL instruction. */
24383 for (i
= n_ready
- 2; i
>= 0; i
--)
24386 if (!NONDEBUG_INSN_P (insn
))
24388 /* Skip IMUL instruction. */
24389 insn2
= PATTERN (insn
);
24390 if (GET_CODE (insn2
) == PARALLEL
)
24391 insn2
= XVECEXP (insn2
, 0, 0);
24392 if (GET_CODE (insn2
) == SET
24393 && GET_CODE (SET_SRC (insn2
)) == MULT
24394 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24397 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24400 con
= DEP_CON (dep
);
24401 if (!NONDEBUG_INSN_P (con
))
24403 insn1
= PATTERN (con
);
24404 if (GET_CODE (insn1
) == PARALLEL
)
24405 insn1
= XVECEXP (insn1
, 0, 0);
24407 if (GET_CODE (insn1
) == SET
24408 && GET_CODE (SET_SRC (insn1
)) == MULT
24409 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24411 sd_iterator_def sd_it1
;
24413 /* Check if there is no other dependee for IMUL. */
24415 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24418 pro
= DEP_PRO (dep1
);
24419 if (!NONDEBUG_INSN_P (pro
))
24432 return issue_rate
; /* Didn't find IMUL producer. */
24434 if (sched_verbose
> 1)
24435 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24436 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24438 /* Put IMUL producer (ready[index]) at the top of ready list. */
24439 insn1
= ready
[index
];
24440 for (i
= index
; i
< n_ready
- 1; i
++)
24441 ready
[i
] = ready
[i
+ 1];
24442 ready
[n_ready
- 1] = insn1
;
24448 ix86_class_likely_spilled_p (reg_class_t
);
24450 /* Returns true if lhs of insn is HW function argument register and set up
24451 is_spilled to true if it is likely spilled HW register. */
24453 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24457 if (!NONDEBUG_INSN_P (insn
))
24459 /* Call instructions are not movable, ignore it. */
24462 insn
= PATTERN (insn
);
24463 if (GET_CODE (insn
) == PARALLEL
)
24464 insn
= XVECEXP (insn
, 0, 0);
24465 if (GET_CODE (insn
) != SET
)
24467 dst
= SET_DEST (insn
);
24468 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24469 && ix86_function_arg_regno_p (REGNO (dst
)))
24471 /* Is it likely spilled HW register? */
24472 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24473 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24474 *is_spilled
= true;
24480 /* Add output dependencies for chain of function adjacent arguments if only
24481 there is a move to likely spilled HW register. Return first argument
24482 if at least one dependence was added or NULL otherwise. */
24484 add_parameter_dependencies (rtx call
, rtx head
)
24488 rtx first_arg
= NULL
;
24489 bool is_spilled
= false;
24491 head
= PREV_INSN (head
);
24493 /* Find nearest to call argument passing instruction. */
24496 last
= PREV_INSN (last
);
24499 if (!NONDEBUG_INSN_P (last
))
24501 if (insn_is_function_arg (last
, &is_spilled
))
24509 insn
= PREV_INSN (last
);
24510 if (!INSN_P (insn
))
24514 if (!NONDEBUG_INSN_P (insn
))
24519 if (insn_is_function_arg (insn
, &is_spilled
))
24521 /* Add output depdendence between two function arguments if chain
24522 of output arguments contains likely spilled HW registers. */
24524 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24525 first_arg
= last
= insn
;
24535 /* Add output or anti dependency from insn to first_arg to restrict its code
24538 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24543 set
= single_set (insn
);
24546 tmp
= SET_DEST (set
);
24549 /* Add output dependency to the first function argument. */
24550 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24553 /* Add anti dependency. */
24554 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24557 /* Avoid cross block motion of function argument through adding dependency
24558 from the first non-jump instruction in bb. */
24560 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24562 rtx insn
= BB_END (bb
);
24566 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24568 rtx set
= single_set (insn
);
24571 avoid_func_arg_motion (arg
, insn
);
24575 if (insn
== BB_HEAD (bb
))
24577 insn
= PREV_INSN (insn
);
24581 /* Hook for pre-reload schedule - avoid motion of function arguments
24582 passed in likely spilled HW registers. */
24584 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24587 rtx first_arg
= NULL
;
24588 if (reload_completed
)
24590 while (head
!= tail
&& DEBUG_INSN_P (head
))
24591 head
= NEXT_INSN (head
);
24592 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24593 if (INSN_P (insn
) && CALL_P (insn
))
24595 first_arg
= add_parameter_dependencies (insn
, head
);
24598 /* Add dependee for first argument to predecessors if only
24599 region contains more than one block. */
24600 basic_block bb
= BLOCK_FOR_INSN (insn
);
24601 int rgn
= CONTAINING_RGN (bb
->index
);
24602 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24603 /* Skip trivial regions and region head blocks that can have
24604 predecessors outside of region. */
24605 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24609 /* Assume that region is SCC, i.e. all immediate predecessors
24610 of non-head block are in the same region. */
24611 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24613 /* Avoid creating of loop-carried dependencies through
24614 using topological odering in region. */
24615 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24616 add_dependee_for_func_arg (first_arg
, e
->src
);
24624 else if (first_arg
)
24625 avoid_func_arg_motion (first_arg
, insn
);
24628 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24629 HW registers to maximum, to schedule them at soon as possible. These are
24630 moves from function argument registers at the top of the function entry
24631 and moves from function return value registers after call. */
24633 ix86_adjust_priority (rtx insn
, int priority
)
24637 if (reload_completed
)
24640 if (!NONDEBUG_INSN_P (insn
))
24643 set
= single_set (insn
);
24646 rtx tmp
= SET_SRC (set
);
24648 && HARD_REGISTER_P (tmp
)
24649 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24650 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24651 return current_sched_info
->sched_max_insns_priority
;
24657 /* Model decoder of Core 2/i7.
24658 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24659 track the instruction fetch block boundaries and make sure that long
24660 (9+ bytes) instructions are assigned to D0. */
24662 /* Maximum length of an insn that can be handled by
24663 a secondary decoder unit. '8' for Core 2/i7. */
24664 static int core2i7_secondary_decoder_max_insn_size
;
24666 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24667 '16' for Core 2/i7. */
24668 static int core2i7_ifetch_block_size
;
24670 /* Maximum number of instructions decoder can handle per cycle.
24671 '6' for Core 2/i7. */
24672 static int core2i7_ifetch_block_max_insns
;
24674 typedef struct ix86_first_cycle_multipass_data_
*
24675 ix86_first_cycle_multipass_data_t
;
24676 typedef const struct ix86_first_cycle_multipass_data_
*
24677 const_ix86_first_cycle_multipass_data_t
;
24679 /* A variable to store target state across calls to max_issue within
24681 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24682 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24684 /* Initialize DATA. */
24686 core2i7_first_cycle_multipass_init (void *_data
)
24688 ix86_first_cycle_multipass_data_t data
24689 = (ix86_first_cycle_multipass_data_t
) _data
;
24691 data
->ifetch_block_len
= 0;
24692 data
->ifetch_block_n_insns
= 0;
24693 data
->ready_try_change
= NULL
;
24694 data
->ready_try_change_size
= 0;
24697 /* Advancing the cycle; reset ifetch block counts. */
24699 core2i7_dfa_post_advance_cycle (void)
24701 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24703 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24705 data
->ifetch_block_len
= 0;
24706 data
->ifetch_block_n_insns
= 0;
24709 static int min_insn_size (rtx
);
24711 /* Filter out insns from ready_try that the core will not be able to issue
24712 on current cycle due to decoder. */
24714 core2i7_first_cycle_multipass_filter_ready_try
24715 (const_ix86_first_cycle_multipass_data_t data
,
24716 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24723 if (ready_try
[n_ready
])
24726 insn
= get_ready_element (n_ready
);
24727 insn_size
= min_insn_size (insn
);
24729 if (/* If this is a too long an insn for a secondary decoder ... */
24730 (!first_cycle_insn_p
24731 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24732 /* ... or it would not fit into the ifetch block ... */
24733 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24734 /* ... or the decoder is full already ... */
24735 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24736 /* ... mask the insn out. */
24738 ready_try
[n_ready
] = 1;
24740 if (data
->ready_try_change
)
24741 bitmap_set_bit (data
->ready_try_change
, n_ready
);
24746 /* Prepare for a new round of multipass lookahead scheduling. */
24748 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24749 bool first_cycle_insn_p
)
24751 ix86_first_cycle_multipass_data_t data
24752 = (ix86_first_cycle_multipass_data_t
) _data
;
24753 const_ix86_first_cycle_multipass_data_t prev_data
24754 = ix86_first_cycle_multipass_data
;
24756 /* Restore the state from the end of the previous round. */
24757 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24758 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24760 /* Filter instructions that cannot be issued on current cycle due to
24761 decoder restrictions. */
24762 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24763 first_cycle_insn_p
);
24766 /* INSN is being issued in current solution. Account for its impact on
24767 the decoder model. */
24769 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24770 rtx insn
, const void *_prev_data
)
24772 ix86_first_cycle_multipass_data_t data
24773 = (ix86_first_cycle_multipass_data_t
) _data
;
24774 const_ix86_first_cycle_multipass_data_t prev_data
24775 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24777 int insn_size
= min_insn_size (insn
);
24779 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24780 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24781 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24782 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24784 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24785 if (!data
->ready_try_change
)
24787 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24788 data
->ready_try_change_size
= n_ready
;
24790 else if (data
->ready_try_change_size
< n_ready
)
24792 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24794 data
->ready_try_change_size
= n_ready
;
24796 bitmap_clear (data
->ready_try_change
);
24798 /* Filter out insns from ready_try that the core will not be able to issue
24799 on current cycle due to decoder. */
24800 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24804 /* Revert the effect on ready_try. */
24806 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24808 int n_ready ATTRIBUTE_UNUSED
)
24810 const_ix86_first_cycle_multipass_data_t data
24811 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24812 unsigned int i
= 0;
24813 sbitmap_iterator sbi
;
24815 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24816 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
24822 /* Save the result of multipass lookahead scheduling for the next round. */
24824 core2i7_first_cycle_multipass_end (const void *_data
)
24826 const_ix86_first_cycle_multipass_data_t data
24827 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24828 ix86_first_cycle_multipass_data_t next_data
24829 = ix86_first_cycle_multipass_data
;
24833 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24834 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24838 /* Deallocate target data. */
24840 core2i7_first_cycle_multipass_fini (void *_data
)
24842 ix86_first_cycle_multipass_data_t data
24843 = (ix86_first_cycle_multipass_data_t
) _data
;
24845 if (data
->ready_try_change
)
24847 sbitmap_free (data
->ready_try_change
);
24848 data
->ready_try_change
= NULL
;
24849 data
->ready_try_change_size
= 0;
24853 /* Prepare for scheduling pass. */
24855 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24856 int verbose ATTRIBUTE_UNUSED
,
24857 int max_uid ATTRIBUTE_UNUSED
)
24859 /* Install scheduling hooks for current CPU. Some of these hooks are used
24860 in time-critical parts of the scheduler, so we only set them up when
24861 they are actually used. */
24864 case PROCESSOR_CORE2
:
24865 case PROCESSOR_COREI7
:
24866 /* Do not perform multipass scheduling for pre-reload schedule
24867 to save compile time. */
24868 if (reload_completed
)
24870 targetm
.sched
.dfa_post_advance_cycle
24871 = core2i7_dfa_post_advance_cycle
;
24872 targetm
.sched
.first_cycle_multipass_init
24873 = core2i7_first_cycle_multipass_init
;
24874 targetm
.sched
.first_cycle_multipass_begin
24875 = core2i7_first_cycle_multipass_begin
;
24876 targetm
.sched
.first_cycle_multipass_issue
24877 = core2i7_first_cycle_multipass_issue
;
24878 targetm
.sched
.first_cycle_multipass_backtrack
24879 = core2i7_first_cycle_multipass_backtrack
;
24880 targetm
.sched
.first_cycle_multipass_end
24881 = core2i7_first_cycle_multipass_end
;
24882 targetm
.sched
.first_cycle_multipass_fini
24883 = core2i7_first_cycle_multipass_fini
;
24885 /* Set decoder parameters. */
24886 core2i7_secondary_decoder_max_insn_size
= 8;
24887 core2i7_ifetch_block_size
= 16;
24888 core2i7_ifetch_block_max_insns
= 6;
24891 /* ... Fall through ... */
24893 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24894 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24895 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24896 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24897 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24898 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24899 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24905 /* Compute the alignment given to a constant that is being placed in memory.
24906 EXP is the constant and ALIGN is the alignment that the object would
24908 The value of this function is used instead of that alignment to align
24912 ix86_constant_alignment (tree exp
, int align
)
24914 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24915 || TREE_CODE (exp
) == INTEGER_CST
)
24917 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24919 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24922 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24923 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24924 return BITS_PER_WORD
;
24929 /* Compute the alignment for a static variable.
24930 TYPE is the data type, and ALIGN is the alignment that
24931 the object would ordinarily have. The value of this function is used
24932 instead of that alignment to align the object. */
24935 ix86_data_alignment (tree type
, int align
)
24937 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24939 if (AGGREGATE_TYPE_P (type
)
24940 && TYPE_SIZE (type
)
24941 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24942 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24943 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24944 && align
< max_align
)
24947 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24948 to 16byte boundary. */
24951 if (AGGREGATE_TYPE_P (type
)
24952 && TYPE_SIZE (type
)
24953 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24954 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24955 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24959 if (TREE_CODE (type
) == ARRAY_TYPE
)
24961 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24963 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24966 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24969 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24971 if ((TYPE_MODE (type
) == XCmode
24972 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24975 else if ((TREE_CODE (type
) == RECORD_TYPE
24976 || TREE_CODE (type
) == UNION_TYPE
24977 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24978 && TYPE_FIELDS (type
))
24980 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24982 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24985 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24986 || TREE_CODE (type
) == INTEGER_TYPE
)
24988 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24990 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24997 /* Compute the alignment for a local variable or a stack slot. EXP is
24998 the data type or decl itself, MODE is the widest mode available and
24999 ALIGN is the alignment that the object would ordinarily have. The
25000 value of this macro is used instead of that alignment to align the
25004 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25005 unsigned int align
)
25009 if (exp
&& DECL_P (exp
))
25011 type
= TREE_TYPE (exp
);
25020 /* Don't do dynamic stack realignment for long long objects with
25021 -mpreferred-stack-boundary=2. */
25024 && ix86_preferred_stack_boundary
< 64
25025 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25026 && (!type
|| !TYPE_USER_ALIGN (type
))
25027 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25030 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25031 register in MODE. We will return the largest alignment of XF
25035 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25036 align
= GET_MODE_ALIGNMENT (DFmode
);
25040 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25041 to 16byte boundary. Exact wording is:
25043 An array uses the same alignment as its elements, except that a local or
25044 global array variable of length at least 16 bytes or
25045 a C99 variable-length array variable always has alignment of at least 16 bytes.
25047 This was added to allow use of aligned SSE instructions at arrays. This
25048 rule is meant for static storage (where compiler can not do the analysis
25049 by itself). We follow it for automatic variables only when convenient.
25050 We fully control everything in the function compiled and functions from
25051 other unit can not rely on the alignment.
25053 Exclude va_list type. It is the common case of local array where
25054 we can not benefit from the alignment. */
25055 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25058 if (AGGREGATE_TYPE_P (type
)
25059 && (va_list_type_node
== NULL_TREE
25060 || (TYPE_MAIN_VARIANT (type
)
25061 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25062 && TYPE_SIZE (type
)
25063 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25064 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25065 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25068 if (TREE_CODE (type
) == ARRAY_TYPE
)
25070 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25072 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25075 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25077 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25079 if ((TYPE_MODE (type
) == XCmode
25080 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25083 else if ((TREE_CODE (type
) == RECORD_TYPE
25084 || TREE_CODE (type
) == UNION_TYPE
25085 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25086 && TYPE_FIELDS (type
))
25088 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25090 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25093 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25094 || TREE_CODE (type
) == INTEGER_TYPE
)
25097 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25099 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25105 /* Compute the minimum required alignment for dynamic stack realignment
25106 purposes for a local variable, parameter or a stack slot. EXP is
25107 the data type or decl itself, MODE is its mode and ALIGN is the
25108 alignment that the object would ordinarily have. */
25111 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25112 unsigned int align
)
25116 if (exp
&& DECL_P (exp
))
25118 type
= TREE_TYPE (exp
);
25127 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25130 /* Don't do dynamic stack realignment for long long objects with
25131 -mpreferred-stack-boundary=2. */
25132 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25133 && (!type
|| !TYPE_USER_ALIGN (type
))
25134 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25140 /* Find a location for the static chain incoming to a nested function.
25141 This is a register, unless all free registers are used by arguments. */
25144 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25148 if (!DECL_STATIC_CHAIN (fndecl
))
25153 /* We always use R10 in 64-bit mode. */
25161 /* By default in 32-bit mode we use ECX to pass the static chain. */
25164 fntype
= TREE_TYPE (fndecl
);
25165 ccvt
= ix86_get_callcvt (fntype
);
25166 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25168 /* Fastcall functions use ecx/edx for arguments, which leaves
25169 us with EAX for the static chain.
25170 Thiscall functions use ecx for arguments, which also
25171 leaves us with EAX for the static chain. */
25174 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25176 /* Thiscall functions use ecx for arguments, which leaves
25177 us with EAX and EDX for the static chain.
25178 We are using for abi-compatibility EAX. */
25181 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25183 /* For regparm 3, we have no free call-clobbered registers in
25184 which to store the static chain. In order to implement this,
25185 we have the trampoline push the static chain to the stack.
25186 However, we can't push a value below the return address when
25187 we call the nested function directly, so we have to use an
25188 alternate entry point. For this we use ESI, and have the
25189 alternate entry point push ESI, so that things appear the
25190 same once we're executing the nested function. */
25193 if (fndecl
== current_function_decl
)
25194 ix86_static_chain_on_stack
= true;
25195 return gen_frame_mem (SImode
,
25196 plus_constant (Pmode
,
25197 arg_pointer_rtx
, -8));
25203 return gen_rtx_REG (Pmode
, regno
);
25206 /* Emit RTL insns to initialize the variable parts of a trampoline.
25207 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25208 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25209 to be passed to the target function. */
25212 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25218 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25224 /* Load the function address to r11. Try to load address using
25225 the shorter movl instead of movabs. We may want to support
25226 movq for kernel mode, but kernel does not use trampolines at
25227 the moment. FNADDR is a 32bit address and may not be in
25228 DImode when ptr_mode == SImode. Always use movl in this
25230 if (ptr_mode
== SImode
25231 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25233 fnaddr
= copy_addr_to_reg (fnaddr
);
25235 mem
= adjust_address (m_tramp
, HImode
, offset
);
25236 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25238 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25239 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25244 mem
= adjust_address (m_tramp
, HImode
, offset
);
25245 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25247 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25248 emit_move_insn (mem
, fnaddr
);
25252 /* Load static chain using movabs to r10. Use the shorter movl
25253 instead of movabs when ptr_mode == SImode. */
25254 if (ptr_mode
== SImode
)
25265 mem
= adjust_address (m_tramp
, HImode
, offset
);
25266 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25268 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25269 emit_move_insn (mem
, chain_value
);
25272 /* Jump to r11; the last (unused) byte is a nop, only there to
25273 pad the write out to a single 32-bit store. */
25274 mem
= adjust_address (m_tramp
, SImode
, offset
);
25275 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25282 /* Depending on the static chain location, either load a register
25283 with a constant, or push the constant to the stack. All of the
25284 instructions are the same size. */
25285 chain
= ix86_static_chain (fndecl
, true);
25288 switch (REGNO (chain
))
25291 opcode
= 0xb8; break;
25293 opcode
= 0xb9; break;
25295 gcc_unreachable ();
25301 mem
= adjust_address (m_tramp
, QImode
, offset
);
25302 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25304 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25305 emit_move_insn (mem
, chain_value
);
25308 mem
= adjust_address (m_tramp
, QImode
, offset
);
25309 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25311 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25313 /* Compute offset from the end of the jmp to the target function.
25314 In the case in which the trampoline stores the static chain on
25315 the stack, we need to skip the first insn which pushes the
25316 (call-saved) register static chain; this push is 1 byte. */
25318 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25319 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25320 offset
- (MEM_P (chain
) ? 1 : 0)),
25321 NULL_RTX
, 1, OPTAB_DIRECT
);
25322 emit_move_insn (mem
, disp
);
25325 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25327 #ifdef HAVE_ENABLE_EXECUTE_STACK
25328 #ifdef CHECK_EXECUTE_STACK_ENABLED
25329 if (CHECK_EXECUTE_STACK_ENABLED
)
25331 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25332 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25336 /* The following file contains several enumerations and data structures
25337 built from the definitions in i386-builtin-types.def. */
25339 #include "i386-builtin-types.inc"
25341 /* Table for the ix86 builtin non-function types. */
25342 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25344 /* Retrieve an element from the above table, building some of
25345 the types lazily. */
25348 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25350 unsigned int index
;
25353 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25355 type
= ix86_builtin_type_tab
[(int) tcode
];
25359 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25360 if (tcode
<= IX86_BT_LAST_VECT
)
25362 enum machine_mode mode
;
25364 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25365 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25366 mode
= ix86_builtin_type_vect_mode
[index
];
25368 type
= build_vector_type_for_mode (itype
, mode
);
25374 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25375 if (tcode
<= IX86_BT_LAST_PTR
)
25376 quals
= TYPE_UNQUALIFIED
;
25378 quals
= TYPE_QUAL_CONST
;
25380 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25381 if (quals
!= TYPE_UNQUALIFIED
)
25382 itype
= build_qualified_type (itype
, quals
);
25384 type
= build_pointer_type (itype
);
25387 ix86_builtin_type_tab
[(int) tcode
] = type
;
25391 /* Table for the ix86 builtin function types. */
25392 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25394 /* Retrieve an element from the above table, building some of
25395 the types lazily. */
25398 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25402 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25404 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25408 if (tcode
<= IX86_BT_LAST_FUNC
)
25410 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25411 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25412 tree rtype
, atype
, args
= void_list_node
;
25415 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25416 for (i
= after
- 1; i
> start
; --i
)
25418 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25419 args
= tree_cons (NULL
, atype
, args
);
25422 type
= build_function_type (rtype
, args
);
25426 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25427 enum ix86_builtin_func_type icode
;
25429 icode
= ix86_builtin_func_alias_base
[index
];
25430 type
= ix86_get_builtin_func_type (icode
);
25433 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25438 /* Codes for all the SSE/MMX builtins. */
25441 IX86_BUILTIN_ADDPS
,
25442 IX86_BUILTIN_ADDSS
,
25443 IX86_BUILTIN_DIVPS
,
25444 IX86_BUILTIN_DIVSS
,
25445 IX86_BUILTIN_MULPS
,
25446 IX86_BUILTIN_MULSS
,
25447 IX86_BUILTIN_SUBPS
,
25448 IX86_BUILTIN_SUBSS
,
25450 IX86_BUILTIN_CMPEQPS
,
25451 IX86_BUILTIN_CMPLTPS
,
25452 IX86_BUILTIN_CMPLEPS
,
25453 IX86_BUILTIN_CMPGTPS
,
25454 IX86_BUILTIN_CMPGEPS
,
25455 IX86_BUILTIN_CMPNEQPS
,
25456 IX86_BUILTIN_CMPNLTPS
,
25457 IX86_BUILTIN_CMPNLEPS
,
25458 IX86_BUILTIN_CMPNGTPS
,
25459 IX86_BUILTIN_CMPNGEPS
,
25460 IX86_BUILTIN_CMPORDPS
,
25461 IX86_BUILTIN_CMPUNORDPS
,
25462 IX86_BUILTIN_CMPEQSS
,
25463 IX86_BUILTIN_CMPLTSS
,
25464 IX86_BUILTIN_CMPLESS
,
25465 IX86_BUILTIN_CMPNEQSS
,
25466 IX86_BUILTIN_CMPNLTSS
,
25467 IX86_BUILTIN_CMPNLESS
,
25468 IX86_BUILTIN_CMPNGTSS
,
25469 IX86_BUILTIN_CMPNGESS
,
25470 IX86_BUILTIN_CMPORDSS
,
25471 IX86_BUILTIN_CMPUNORDSS
,
25473 IX86_BUILTIN_COMIEQSS
,
25474 IX86_BUILTIN_COMILTSS
,
25475 IX86_BUILTIN_COMILESS
,
25476 IX86_BUILTIN_COMIGTSS
,
25477 IX86_BUILTIN_COMIGESS
,
25478 IX86_BUILTIN_COMINEQSS
,
25479 IX86_BUILTIN_UCOMIEQSS
,
25480 IX86_BUILTIN_UCOMILTSS
,
25481 IX86_BUILTIN_UCOMILESS
,
25482 IX86_BUILTIN_UCOMIGTSS
,
25483 IX86_BUILTIN_UCOMIGESS
,
25484 IX86_BUILTIN_UCOMINEQSS
,
25486 IX86_BUILTIN_CVTPI2PS
,
25487 IX86_BUILTIN_CVTPS2PI
,
25488 IX86_BUILTIN_CVTSI2SS
,
25489 IX86_BUILTIN_CVTSI642SS
,
25490 IX86_BUILTIN_CVTSS2SI
,
25491 IX86_BUILTIN_CVTSS2SI64
,
25492 IX86_BUILTIN_CVTTPS2PI
,
25493 IX86_BUILTIN_CVTTSS2SI
,
25494 IX86_BUILTIN_CVTTSS2SI64
,
25496 IX86_BUILTIN_MAXPS
,
25497 IX86_BUILTIN_MAXSS
,
25498 IX86_BUILTIN_MINPS
,
25499 IX86_BUILTIN_MINSS
,
25501 IX86_BUILTIN_LOADUPS
,
25502 IX86_BUILTIN_STOREUPS
,
25503 IX86_BUILTIN_MOVSS
,
25505 IX86_BUILTIN_MOVHLPS
,
25506 IX86_BUILTIN_MOVLHPS
,
25507 IX86_BUILTIN_LOADHPS
,
25508 IX86_BUILTIN_LOADLPS
,
25509 IX86_BUILTIN_STOREHPS
,
25510 IX86_BUILTIN_STORELPS
,
25512 IX86_BUILTIN_MASKMOVQ
,
25513 IX86_BUILTIN_MOVMSKPS
,
25514 IX86_BUILTIN_PMOVMSKB
,
25516 IX86_BUILTIN_MOVNTPS
,
25517 IX86_BUILTIN_MOVNTQ
,
25519 IX86_BUILTIN_LOADDQU
,
25520 IX86_BUILTIN_STOREDQU
,
25522 IX86_BUILTIN_PACKSSWB
,
25523 IX86_BUILTIN_PACKSSDW
,
25524 IX86_BUILTIN_PACKUSWB
,
25526 IX86_BUILTIN_PADDB
,
25527 IX86_BUILTIN_PADDW
,
25528 IX86_BUILTIN_PADDD
,
25529 IX86_BUILTIN_PADDQ
,
25530 IX86_BUILTIN_PADDSB
,
25531 IX86_BUILTIN_PADDSW
,
25532 IX86_BUILTIN_PADDUSB
,
25533 IX86_BUILTIN_PADDUSW
,
25534 IX86_BUILTIN_PSUBB
,
25535 IX86_BUILTIN_PSUBW
,
25536 IX86_BUILTIN_PSUBD
,
25537 IX86_BUILTIN_PSUBQ
,
25538 IX86_BUILTIN_PSUBSB
,
25539 IX86_BUILTIN_PSUBSW
,
25540 IX86_BUILTIN_PSUBUSB
,
25541 IX86_BUILTIN_PSUBUSW
,
25544 IX86_BUILTIN_PANDN
,
25548 IX86_BUILTIN_PAVGB
,
25549 IX86_BUILTIN_PAVGW
,
25551 IX86_BUILTIN_PCMPEQB
,
25552 IX86_BUILTIN_PCMPEQW
,
25553 IX86_BUILTIN_PCMPEQD
,
25554 IX86_BUILTIN_PCMPGTB
,
25555 IX86_BUILTIN_PCMPGTW
,
25556 IX86_BUILTIN_PCMPGTD
,
25558 IX86_BUILTIN_PMADDWD
,
25560 IX86_BUILTIN_PMAXSW
,
25561 IX86_BUILTIN_PMAXUB
,
25562 IX86_BUILTIN_PMINSW
,
25563 IX86_BUILTIN_PMINUB
,
25565 IX86_BUILTIN_PMULHUW
,
25566 IX86_BUILTIN_PMULHW
,
25567 IX86_BUILTIN_PMULLW
,
25569 IX86_BUILTIN_PSADBW
,
25570 IX86_BUILTIN_PSHUFW
,
25572 IX86_BUILTIN_PSLLW
,
25573 IX86_BUILTIN_PSLLD
,
25574 IX86_BUILTIN_PSLLQ
,
25575 IX86_BUILTIN_PSRAW
,
25576 IX86_BUILTIN_PSRAD
,
25577 IX86_BUILTIN_PSRLW
,
25578 IX86_BUILTIN_PSRLD
,
25579 IX86_BUILTIN_PSRLQ
,
25580 IX86_BUILTIN_PSLLWI
,
25581 IX86_BUILTIN_PSLLDI
,
25582 IX86_BUILTIN_PSLLQI
,
25583 IX86_BUILTIN_PSRAWI
,
25584 IX86_BUILTIN_PSRADI
,
25585 IX86_BUILTIN_PSRLWI
,
25586 IX86_BUILTIN_PSRLDI
,
25587 IX86_BUILTIN_PSRLQI
,
25589 IX86_BUILTIN_PUNPCKHBW
,
25590 IX86_BUILTIN_PUNPCKHWD
,
25591 IX86_BUILTIN_PUNPCKHDQ
,
25592 IX86_BUILTIN_PUNPCKLBW
,
25593 IX86_BUILTIN_PUNPCKLWD
,
25594 IX86_BUILTIN_PUNPCKLDQ
,
25596 IX86_BUILTIN_SHUFPS
,
25598 IX86_BUILTIN_RCPPS
,
25599 IX86_BUILTIN_RCPSS
,
25600 IX86_BUILTIN_RSQRTPS
,
25601 IX86_BUILTIN_RSQRTPS_NR
,
25602 IX86_BUILTIN_RSQRTSS
,
25603 IX86_BUILTIN_RSQRTF
,
25604 IX86_BUILTIN_SQRTPS
,
25605 IX86_BUILTIN_SQRTPS_NR
,
25606 IX86_BUILTIN_SQRTSS
,
25608 IX86_BUILTIN_UNPCKHPS
,
25609 IX86_BUILTIN_UNPCKLPS
,
25611 IX86_BUILTIN_ANDPS
,
25612 IX86_BUILTIN_ANDNPS
,
25614 IX86_BUILTIN_XORPS
,
25617 IX86_BUILTIN_LDMXCSR
,
25618 IX86_BUILTIN_STMXCSR
,
25619 IX86_BUILTIN_SFENCE
,
25621 IX86_BUILTIN_FXSAVE
,
25622 IX86_BUILTIN_FXRSTOR
,
25623 IX86_BUILTIN_FXSAVE64
,
25624 IX86_BUILTIN_FXRSTOR64
,
25626 IX86_BUILTIN_XSAVE
,
25627 IX86_BUILTIN_XRSTOR
,
25628 IX86_BUILTIN_XSAVE64
,
25629 IX86_BUILTIN_XRSTOR64
,
25631 IX86_BUILTIN_XSAVEOPT
,
25632 IX86_BUILTIN_XSAVEOPT64
,
25634 /* 3DNow! Original */
25635 IX86_BUILTIN_FEMMS
,
25636 IX86_BUILTIN_PAVGUSB
,
25637 IX86_BUILTIN_PF2ID
,
25638 IX86_BUILTIN_PFACC
,
25639 IX86_BUILTIN_PFADD
,
25640 IX86_BUILTIN_PFCMPEQ
,
25641 IX86_BUILTIN_PFCMPGE
,
25642 IX86_BUILTIN_PFCMPGT
,
25643 IX86_BUILTIN_PFMAX
,
25644 IX86_BUILTIN_PFMIN
,
25645 IX86_BUILTIN_PFMUL
,
25646 IX86_BUILTIN_PFRCP
,
25647 IX86_BUILTIN_PFRCPIT1
,
25648 IX86_BUILTIN_PFRCPIT2
,
25649 IX86_BUILTIN_PFRSQIT1
,
25650 IX86_BUILTIN_PFRSQRT
,
25651 IX86_BUILTIN_PFSUB
,
25652 IX86_BUILTIN_PFSUBR
,
25653 IX86_BUILTIN_PI2FD
,
25654 IX86_BUILTIN_PMULHRW
,
25656 /* 3DNow! Athlon Extensions */
25657 IX86_BUILTIN_PF2IW
,
25658 IX86_BUILTIN_PFNACC
,
25659 IX86_BUILTIN_PFPNACC
,
25660 IX86_BUILTIN_PI2FW
,
25661 IX86_BUILTIN_PSWAPDSI
,
25662 IX86_BUILTIN_PSWAPDSF
,
25665 IX86_BUILTIN_ADDPD
,
25666 IX86_BUILTIN_ADDSD
,
25667 IX86_BUILTIN_DIVPD
,
25668 IX86_BUILTIN_DIVSD
,
25669 IX86_BUILTIN_MULPD
,
25670 IX86_BUILTIN_MULSD
,
25671 IX86_BUILTIN_SUBPD
,
25672 IX86_BUILTIN_SUBSD
,
25674 IX86_BUILTIN_CMPEQPD
,
25675 IX86_BUILTIN_CMPLTPD
,
25676 IX86_BUILTIN_CMPLEPD
,
25677 IX86_BUILTIN_CMPGTPD
,
25678 IX86_BUILTIN_CMPGEPD
,
25679 IX86_BUILTIN_CMPNEQPD
,
25680 IX86_BUILTIN_CMPNLTPD
,
25681 IX86_BUILTIN_CMPNLEPD
,
25682 IX86_BUILTIN_CMPNGTPD
,
25683 IX86_BUILTIN_CMPNGEPD
,
25684 IX86_BUILTIN_CMPORDPD
,
25685 IX86_BUILTIN_CMPUNORDPD
,
25686 IX86_BUILTIN_CMPEQSD
,
25687 IX86_BUILTIN_CMPLTSD
,
25688 IX86_BUILTIN_CMPLESD
,
25689 IX86_BUILTIN_CMPNEQSD
,
25690 IX86_BUILTIN_CMPNLTSD
,
25691 IX86_BUILTIN_CMPNLESD
,
25692 IX86_BUILTIN_CMPORDSD
,
25693 IX86_BUILTIN_CMPUNORDSD
,
25695 IX86_BUILTIN_COMIEQSD
,
25696 IX86_BUILTIN_COMILTSD
,
25697 IX86_BUILTIN_COMILESD
,
25698 IX86_BUILTIN_COMIGTSD
,
25699 IX86_BUILTIN_COMIGESD
,
25700 IX86_BUILTIN_COMINEQSD
,
25701 IX86_BUILTIN_UCOMIEQSD
,
25702 IX86_BUILTIN_UCOMILTSD
,
25703 IX86_BUILTIN_UCOMILESD
,
25704 IX86_BUILTIN_UCOMIGTSD
,
25705 IX86_BUILTIN_UCOMIGESD
,
25706 IX86_BUILTIN_UCOMINEQSD
,
25708 IX86_BUILTIN_MAXPD
,
25709 IX86_BUILTIN_MAXSD
,
25710 IX86_BUILTIN_MINPD
,
25711 IX86_BUILTIN_MINSD
,
25713 IX86_BUILTIN_ANDPD
,
25714 IX86_BUILTIN_ANDNPD
,
25716 IX86_BUILTIN_XORPD
,
25718 IX86_BUILTIN_SQRTPD
,
25719 IX86_BUILTIN_SQRTSD
,
25721 IX86_BUILTIN_UNPCKHPD
,
25722 IX86_BUILTIN_UNPCKLPD
,
25724 IX86_BUILTIN_SHUFPD
,
25726 IX86_BUILTIN_LOADUPD
,
25727 IX86_BUILTIN_STOREUPD
,
25728 IX86_BUILTIN_MOVSD
,
25730 IX86_BUILTIN_LOADHPD
,
25731 IX86_BUILTIN_LOADLPD
,
25733 IX86_BUILTIN_CVTDQ2PD
,
25734 IX86_BUILTIN_CVTDQ2PS
,
25736 IX86_BUILTIN_CVTPD2DQ
,
25737 IX86_BUILTIN_CVTPD2PI
,
25738 IX86_BUILTIN_CVTPD2PS
,
25739 IX86_BUILTIN_CVTTPD2DQ
,
25740 IX86_BUILTIN_CVTTPD2PI
,
25742 IX86_BUILTIN_CVTPI2PD
,
25743 IX86_BUILTIN_CVTSI2SD
,
25744 IX86_BUILTIN_CVTSI642SD
,
25746 IX86_BUILTIN_CVTSD2SI
,
25747 IX86_BUILTIN_CVTSD2SI64
,
25748 IX86_BUILTIN_CVTSD2SS
,
25749 IX86_BUILTIN_CVTSS2SD
,
25750 IX86_BUILTIN_CVTTSD2SI
,
25751 IX86_BUILTIN_CVTTSD2SI64
,
25753 IX86_BUILTIN_CVTPS2DQ
,
25754 IX86_BUILTIN_CVTPS2PD
,
25755 IX86_BUILTIN_CVTTPS2DQ
,
25757 IX86_BUILTIN_MOVNTI
,
25758 IX86_BUILTIN_MOVNTI64
,
25759 IX86_BUILTIN_MOVNTPD
,
25760 IX86_BUILTIN_MOVNTDQ
,
25762 IX86_BUILTIN_MOVQ128
,
25765 IX86_BUILTIN_MASKMOVDQU
,
25766 IX86_BUILTIN_MOVMSKPD
,
25767 IX86_BUILTIN_PMOVMSKB128
,
25769 IX86_BUILTIN_PACKSSWB128
,
25770 IX86_BUILTIN_PACKSSDW128
,
25771 IX86_BUILTIN_PACKUSWB128
,
25773 IX86_BUILTIN_PADDB128
,
25774 IX86_BUILTIN_PADDW128
,
25775 IX86_BUILTIN_PADDD128
,
25776 IX86_BUILTIN_PADDQ128
,
25777 IX86_BUILTIN_PADDSB128
,
25778 IX86_BUILTIN_PADDSW128
,
25779 IX86_BUILTIN_PADDUSB128
,
25780 IX86_BUILTIN_PADDUSW128
,
25781 IX86_BUILTIN_PSUBB128
,
25782 IX86_BUILTIN_PSUBW128
,
25783 IX86_BUILTIN_PSUBD128
,
25784 IX86_BUILTIN_PSUBQ128
,
25785 IX86_BUILTIN_PSUBSB128
,
25786 IX86_BUILTIN_PSUBSW128
,
25787 IX86_BUILTIN_PSUBUSB128
,
25788 IX86_BUILTIN_PSUBUSW128
,
25790 IX86_BUILTIN_PAND128
,
25791 IX86_BUILTIN_PANDN128
,
25792 IX86_BUILTIN_POR128
,
25793 IX86_BUILTIN_PXOR128
,
25795 IX86_BUILTIN_PAVGB128
,
25796 IX86_BUILTIN_PAVGW128
,
25798 IX86_BUILTIN_PCMPEQB128
,
25799 IX86_BUILTIN_PCMPEQW128
,
25800 IX86_BUILTIN_PCMPEQD128
,
25801 IX86_BUILTIN_PCMPGTB128
,
25802 IX86_BUILTIN_PCMPGTW128
,
25803 IX86_BUILTIN_PCMPGTD128
,
25805 IX86_BUILTIN_PMADDWD128
,
25807 IX86_BUILTIN_PMAXSW128
,
25808 IX86_BUILTIN_PMAXUB128
,
25809 IX86_BUILTIN_PMINSW128
,
25810 IX86_BUILTIN_PMINUB128
,
25812 IX86_BUILTIN_PMULUDQ
,
25813 IX86_BUILTIN_PMULUDQ128
,
25814 IX86_BUILTIN_PMULHUW128
,
25815 IX86_BUILTIN_PMULHW128
,
25816 IX86_BUILTIN_PMULLW128
,
25818 IX86_BUILTIN_PSADBW128
,
25819 IX86_BUILTIN_PSHUFHW
,
25820 IX86_BUILTIN_PSHUFLW
,
25821 IX86_BUILTIN_PSHUFD
,
25823 IX86_BUILTIN_PSLLDQI128
,
25824 IX86_BUILTIN_PSLLWI128
,
25825 IX86_BUILTIN_PSLLDI128
,
25826 IX86_BUILTIN_PSLLQI128
,
25827 IX86_BUILTIN_PSRAWI128
,
25828 IX86_BUILTIN_PSRADI128
,
25829 IX86_BUILTIN_PSRLDQI128
,
25830 IX86_BUILTIN_PSRLWI128
,
25831 IX86_BUILTIN_PSRLDI128
,
25832 IX86_BUILTIN_PSRLQI128
,
25834 IX86_BUILTIN_PSLLDQ128
,
25835 IX86_BUILTIN_PSLLW128
,
25836 IX86_BUILTIN_PSLLD128
,
25837 IX86_BUILTIN_PSLLQ128
,
25838 IX86_BUILTIN_PSRAW128
,
25839 IX86_BUILTIN_PSRAD128
,
25840 IX86_BUILTIN_PSRLW128
,
25841 IX86_BUILTIN_PSRLD128
,
25842 IX86_BUILTIN_PSRLQ128
,
25844 IX86_BUILTIN_PUNPCKHBW128
,
25845 IX86_BUILTIN_PUNPCKHWD128
,
25846 IX86_BUILTIN_PUNPCKHDQ128
,
25847 IX86_BUILTIN_PUNPCKHQDQ128
,
25848 IX86_BUILTIN_PUNPCKLBW128
,
25849 IX86_BUILTIN_PUNPCKLWD128
,
25850 IX86_BUILTIN_PUNPCKLDQ128
,
25851 IX86_BUILTIN_PUNPCKLQDQ128
,
25853 IX86_BUILTIN_CLFLUSH
,
25854 IX86_BUILTIN_MFENCE
,
25855 IX86_BUILTIN_LFENCE
,
25856 IX86_BUILTIN_PAUSE
,
25858 IX86_BUILTIN_BSRSI
,
25859 IX86_BUILTIN_BSRDI
,
25860 IX86_BUILTIN_RDPMC
,
25861 IX86_BUILTIN_RDTSC
,
25862 IX86_BUILTIN_RDTSCP
,
25863 IX86_BUILTIN_ROLQI
,
25864 IX86_BUILTIN_ROLHI
,
25865 IX86_BUILTIN_RORQI
,
25866 IX86_BUILTIN_RORHI
,
25869 IX86_BUILTIN_ADDSUBPS
,
25870 IX86_BUILTIN_HADDPS
,
25871 IX86_BUILTIN_HSUBPS
,
25872 IX86_BUILTIN_MOVSHDUP
,
25873 IX86_BUILTIN_MOVSLDUP
,
25874 IX86_BUILTIN_ADDSUBPD
,
25875 IX86_BUILTIN_HADDPD
,
25876 IX86_BUILTIN_HSUBPD
,
25877 IX86_BUILTIN_LDDQU
,
25879 IX86_BUILTIN_MONITOR
,
25880 IX86_BUILTIN_MWAIT
,
25883 IX86_BUILTIN_PHADDW
,
25884 IX86_BUILTIN_PHADDD
,
25885 IX86_BUILTIN_PHADDSW
,
25886 IX86_BUILTIN_PHSUBW
,
25887 IX86_BUILTIN_PHSUBD
,
25888 IX86_BUILTIN_PHSUBSW
,
25889 IX86_BUILTIN_PMADDUBSW
,
25890 IX86_BUILTIN_PMULHRSW
,
25891 IX86_BUILTIN_PSHUFB
,
25892 IX86_BUILTIN_PSIGNB
,
25893 IX86_BUILTIN_PSIGNW
,
25894 IX86_BUILTIN_PSIGND
,
25895 IX86_BUILTIN_PALIGNR
,
25896 IX86_BUILTIN_PABSB
,
25897 IX86_BUILTIN_PABSW
,
25898 IX86_BUILTIN_PABSD
,
25900 IX86_BUILTIN_PHADDW128
,
25901 IX86_BUILTIN_PHADDD128
,
25902 IX86_BUILTIN_PHADDSW128
,
25903 IX86_BUILTIN_PHSUBW128
,
25904 IX86_BUILTIN_PHSUBD128
,
25905 IX86_BUILTIN_PHSUBSW128
,
25906 IX86_BUILTIN_PMADDUBSW128
,
25907 IX86_BUILTIN_PMULHRSW128
,
25908 IX86_BUILTIN_PSHUFB128
,
25909 IX86_BUILTIN_PSIGNB128
,
25910 IX86_BUILTIN_PSIGNW128
,
25911 IX86_BUILTIN_PSIGND128
,
25912 IX86_BUILTIN_PALIGNR128
,
25913 IX86_BUILTIN_PABSB128
,
25914 IX86_BUILTIN_PABSW128
,
25915 IX86_BUILTIN_PABSD128
,
25917 /* AMDFAM10 - SSE4A New Instructions. */
25918 IX86_BUILTIN_MOVNTSD
,
25919 IX86_BUILTIN_MOVNTSS
,
25920 IX86_BUILTIN_EXTRQI
,
25921 IX86_BUILTIN_EXTRQ
,
25922 IX86_BUILTIN_INSERTQI
,
25923 IX86_BUILTIN_INSERTQ
,
25926 IX86_BUILTIN_BLENDPD
,
25927 IX86_BUILTIN_BLENDPS
,
25928 IX86_BUILTIN_BLENDVPD
,
25929 IX86_BUILTIN_BLENDVPS
,
25930 IX86_BUILTIN_PBLENDVB128
,
25931 IX86_BUILTIN_PBLENDW128
,
25936 IX86_BUILTIN_INSERTPS128
,
25938 IX86_BUILTIN_MOVNTDQA
,
25939 IX86_BUILTIN_MPSADBW128
,
25940 IX86_BUILTIN_PACKUSDW128
,
25941 IX86_BUILTIN_PCMPEQQ
,
25942 IX86_BUILTIN_PHMINPOSUW128
,
25944 IX86_BUILTIN_PMAXSB128
,
25945 IX86_BUILTIN_PMAXSD128
,
25946 IX86_BUILTIN_PMAXUD128
,
25947 IX86_BUILTIN_PMAXUW128
,
25949 IX86_BUILTIN_PMINSB128
,
25950 IX86_BUILTIN_PMINSD128
,
25951 IX86_BUILTIN_PMINUD128
,
25952 IX86_BUILTIN_PMINUW128
,
25954 IX86_BUILTIN_PMOVSXBW128
,
25955 IX86_BUILTIN_PMOVSXBD128
,
25956 IX86_BUILTIN_PMOVSXBQ128
,
25957 IX86_BUILTIN_PMOVSXWD128
,
25958 IX86_BUILTIN_PMOVSXWQ128
,
25959 IX86_BUILTIN_PMOVSXDQ128
,
25961 IX86_BUILTIN_PMOVZXBW128
,
25962 IX86_BUILTIN_PMOVZXBD128
,
25963 IX86_BUILTIN_PMOVZXBQ128
,
25964 IX86_BUILTIN_PMOVZXWD128
,
25965 IX86_BUILTIN_PMOVZXWQ128
,
25966 IX86_BUILTIN_PMOVZXDQ128
,
25968 IX86_BUILTIN_PMULDQ128
,
25969 IX86_BUILTIN_PMULLD128
,
25971 IX86_BUILTIN_ROUNDSD
,
25972 IX86_BUILTIN_ROUNDSS
,
25974 IX86_BUILTIN_ROUNDPD
,
25975 IX86_BUILTIN_ROUNDPS
,
25977 IX86_BUILTIN_FLOORPD
,
25978 IX86_BUILTIN_CEILPD
,
25979 IX86_BUILTIN_TRUNCPD
,
25980 IX86_BUILTIN_RINTPD
,
25981 IX86_BUILTIN_ROUNDPD_AZ
,
25983 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25984 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25985 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25987 IX86_BUILTIN_FLOORPS
,
25988 IX86_BUILTIN_CEILPS
,
25989 IX86_BUILTIN_TRUNCPS
,
25990 IX86_BUILTIN_RINTPS
,
25991 IX86_BUILTIN_ROUNDPS_AZ
,
25993 IX86_BUILTIN_FLOORPS_SFIX
,
25994 IX86_BUILTIN_CEILPS_SFIX
,
25995 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25997 IX86_BUILTIN_PTESTZ
,
25998 IX86_BUILTIN_PTESTC
,
25999 IX86_BUILTIN_PTESTNZC
,
26001 IX86_BUILTIN_VEC_INIT_V2SI
,
26002 IX86_BUILTIN_VEC_INIT_V4HI
,
26003 IX86_BUILTIN_VEC_INIT_V8QI
,
26004 IX86_BUILTIN_VEC_EXT_V2DF
,
26005 IX86_BUILTIN_VEC_EXT_V2DI
,
26006 IX86_BUILTIN_VEC_EXT_V4SF
,
26007 IX86_BUILTIN_VEC_EXT_V4SI
,
26008 IX86_BUILTIN_VEC_EXT_V8HI
,
26009 IX86_BUILTIN_VEC_EXT_V2SI
,
26010 IX86_BUILTIN_VEC_EXT_V4HI
,
26011 IX86_BUILTIN_VEC_EXT_V16QI
,
26012 IX86_BUILTIN_VEC_SET_V2DI
,
26013 IX86_BUILTIN_VEC_SET_V4SF
,
26014 IX86_BUILTIN_VEC_SET_V4SI
,
26015 IX86_BUILTIN_VEC_SET_V8HI
,
26016 IX86_BUILTIN_VEC_SET_V4HI
,
26017 IX86_BUILTIN_VEC_SET_V16QI
,
26019 IX86_BUILTIN_VEC_PACK_SFIX
,
26020 IX86_BUILTIN_VEC_PACK_SFIX256
,
26023 IX86_BUILTIN_CRC32QI
,
26024 IX86_BUILTIN_CRC32HI
,
26025 IX86_BUILTIN_CRC32SI
,
26026 IX86_BUILTIN_CRC32DI
,
26028 IX86_BUILTIN_PCMPESTRI128
,
26029 IX86_BUILTIN_PCMPESTRM128
,
26030 IX86_BUILTIN_PCMPESTRA128
,
26031 IX86_BUILTIN_PCMPESTRC128
,
26032 IX86_BUILTIN_PCMPESTRO128
,
26033 IX86_BUILTIN_PCMPESTRS128
,
26034 IX86_BUILTIN_PCMPESTRZ128
,
26035 IX86_BUILTIN_PCMPISTRI128
,
26036 IX86_BUILTIN_PCMPISTRM128
,
26037 IX86_BUILTIN_PCMPISTRA128
,
26038 IX86_BUILTIN_PCMPISTRC128
,
26039 IX86_BUILTIN_PCMPISTRO128
,
26040 IX86_BUILTIN_PCMPISTRS128
,
26041 IX86_BUILTIN_PCMPISTRZ128
,
26043 IX86_BUILTIN_PCMPGTQ
,
26045 /* AES instructions */
26046 IX86_BUILTIN_AESENC128
,
26047 IX86_BUILTIN_AESENCLAST128
,
26048 IX86_BUILTIN_AESDEC128
,
26049 IX86_BUILTIN_AESDECLAST128
,
26050 IX86_BUILTIN_AESIMC128
,
26051 IX86_BUILTIN_AESKEYGENASSIST128
,
26053 /* PCLMUL instruction */
26054 IX86_BUILTIN_PCLMULQDQ128
,
26057 IX86_BUILTIN_ADDPD256
,
26058 IX86_BUILTIN_ADDPS256
,
26059 IX86_BUILTIN_ADDSUBPD256
,
26060 IX86_BUILTIN_ADDSUBPS256
,
26061 IX86_BUILTIN_ANDPD256
,
26062 IX86_BUILTIN_ANDPS256
,
26063 IX86_BUILTIN_ANDNPD256
,
26064 IX86_BUILTIN_ANDNPS256
,
26065 IX86_BUILTIN_BLENDPD256
,
26066 IX86_BUILTIN_BLENDPS256
,
26067 IX86_BUILTIN_BLENDVPD256
,
26068 IX86_BUILTIN_BLENDVPS256
,
26069 IX86_BUILTIN_DIVPD256
,
26070 IX86_BUILTIN_DIVPS256
,
26071 IX86_BUILTIN_DPPS256
,
26072 IX86_BUILTIN_HADDPD256
,
26073 IX86_BUILTIN_HADDPS256
,
26074 IX86_BUILTIN_HSUBPD256
,
26075 IX86_BUILTIN_HSUBPS256
,
26076 IX86_BUILTIN_MAXPD256
,
26077 IX86_BUILTIN_MAXPS256
,
26078 IX86_BUILTIN_MINPD256
,
26079 IX86_BUILTIN_MINPS256
,
26080 IX86_BUILTIN_MULPD256
,
26081 IX86_BUILTIN_MULPS256
,
26082 IX86_BUILTIN_ORPD256
,
26083 IX86_BUILTIN_ORPS256
,
26084 IX86_BUILTIN_SHUFPD256
,
26085 IX86_BUILTIN_SHUFPS256
,
26086 IX86_BUILTIN_SUBPD256
,
26087 IX86_BUILTIN_SUBPS256
,
26088 IX86_BUILTIN_XORPD256
,
26089 IX86_BUILTIN_XORPS256
,
26090 IX86_BUILTIN_CMPSD
,
26091 IX86_BUILTIN_CMPSS
,
26092 IX86_BUILTIN_CMPPD
,
26093 IX86_BUILTIN_CMPPS
,
26094 IX86_BUILTIN_CMPPD256
,
26095 IX86_BUILTIN_CMPPS256
,
26096 IX86_BUILTIN_CVTDQ2PD256
,
26097 IX86_BUILTIN_CVTDQ2PS256
,
26098 IX86_BUILTIN_CVTPD2PS256
,
26099 IX86_BUILTIN_CVTPS2DQ256
,
26100 IX86_BUILTIN_CVTPS2PD256
,
26101 IX86_BUILTIN_CVTTPD2DQ256
,
26102 IX86_BUILTIN_CVTPD2DQ256
,
26103 IX86_BUILTIN_CVTTPS2DQ256
,
26104 IX86_BUILTIN_EXTRACTF128PD256
,
26105 IX86_BUILTIN_EXTRACTF128PS256
,
26106 IX86_BUILTIN_EXTRACTF128SI256
,
26107 IX86_BUILTIN_VZEROALL
,
26108 IX86_BUILTIN_VZEROUPPER
,
26109 IX86_BUILTIN_VPERMILVARPD
,
26110 IX86_BUILTIN_VPERMILVARPS
,
26111 IX86_BUILTIN_VPERMILVARPD256
,
26112 IX86_BUILTIN_VPERMILVARPS256
,
26113 IX86_BUILTIN_VPERMILPD
,
26114 IX86_BUILTIN_VPERMILPS
,
26115 IX86_BUILTIN_VPERMILPD256
,
26116 IX86_BUILTIN_VPERMILPS256
,
26117 IX86_BUILTIN_VPERMIL2PD
,
26118 IX86_BUILTIN_VPERMIL2PS
,
26119 IX86_BUILTIN_VPERMIL2PD256
,
26120 IX86_BUILTIN_VPERMIL2PS256
,
26121 IX86_BUILTIN_VPERM2F128PD256
,
26122 IX86_BUILTIN_VPERM2F128PS256
,
26123 IX86_BUILTIN_VPERM2F128SI256
,
26124 IX86_BUILTIN_VBROADCASTSS
,
26125 IX86_BUILTIN_VBROADCASTSD256
,
26126 IX86_BUILTIN_VBROADCASTSS256
,
26127 IX86_BUILTIN_VBROADCASTPD256
,
26128 IX86_BUILTIN_VBROADCASTPS256
,
26129 IX86_BUILTIN_VINSERTF128PD256
,
26130 IX86_BUILTIN_VINSERTF128PS256
,
26131 IX86_BUILTIN_VINSERTF128SI256
,
26132 IX86_BUILTIN_LOADUPD256
,
26133 IX86_BUILTIN_LOADUPS256
,
26134 IX86_BUILTIN_STOREUPD256
,
26135 IX86_BUILTIN_STOREUPS256
,
26136 IX86_BUILTIN_LDDQU256
,
26137 IX86_BUILTIN_MOVNTDQ256
,
26138 IX86_BUILTIN_MOVNTPD256
,
26139 IX86_BUILTIN_MOVNTPS256
,
26140 IX86_BUILTIN_LOADDQU256
,
26141 IX86_BUILTIN_STOREDQU256
,
26142 IX86_BUILTIN_MASKLOADPD
,
26143 IX86_BUILTIN_MASKLOADPS
,
26144 IX86_BUILTIN_MASKSTOREPD
,
26145 IX86_BUILTIN_MASKSTOREPS
,
26146 IX86_BUILTIN_MASKLOADPD256
,
26147 IX86_BUILTIN_MASKLOADPS256
,
26148 IX86_BUILTIN_MASKSTOREPD256
,
26149 IX86_BUILTIN_MASKSTOREPS256
,
26150 IX86_BUILTIN_MOVSHDUP256
,
26151 IX86_BUILTIN_MOVSLDUP256
,
26152 IX86_BUILTIN_MOVDDUP256
,
26154 IX86_BUILTIN_SQRTPD256
,
26155 IX86_BUILTIN_SQRTPS256
,
26156 IX86_BUILTIN_SQRTPS_NR256
,
26157 IX86_BUILTIN_RSQRTPS256
,
26158 IX86_BUILTIN_RSQRTPS_NR256
,
26160 IX86_BUILTIN_RCPPS256
,
26162 IX86_BUILTIN_ROUNDPD256
,
26163 IX86_BUILTIN_ROUNDPS256
,
26165 IX86_BUILTIN_FLOORPD256
,
26166 IX86_BUILTIN_CEILPD256
,
26167 IX86_BUILTIN_TRUNCPD256
,
26168 IX86_BUILTIN_RINTPD256
,
26169 IX86_BUILTIN_ROUNDPD_AZ256
,
26171 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26172 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26173 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26175 IX86_BUILTIN_FLOORPS256
,
26176 IX86_BUILTIN_CEILPS256
,
26177 IX86_BUILTIN_TRUNCPS256
,
26178 IX86_BUILTIN_RINTPS256
,
26179 IX86_BUILTIN_ROUNDPS_AZ256
,
26181 IX86_BUILTIN_FLOORPS_SFIX256
,
26182 IX86_BUILTIN_CEILPS_SFIX256
,
26183 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26185 IX86_BUILTIN_UNPCKHPD256
,
26186 IX86_BUILTIN_UNPCKLPD256
,
26187 IX86_BUILTIN_UNPCKHPS256
,
26188 IX86_BUILTIN_UNPCKLPS256
,
26190 IX86_BUILTIN_SI256_SI
,
26191 IX86_BUILTIN_PS256_PS
,
26192 IX86_BUILTIN_PD256_PD
,
26193 IX86_BUILTIN_SI_SI256
,
26194 IX86_BUILTIN_PS_PS256
,
26195 IX86_BUILTIN_PD_PD256
,
26197 IX86_BUILTIN_VTESTZPD
,
26198 IX86_BUILTIN_VTESTCPD
,
26199 IX86_BUILTIN_VTESTNZCPD
,
26200 IX86_BUILTIN_VTESTZPS
,
26201 IX86_BUILTIN_VTESTCPS
,
26202 IX86_BUILTIN_VTESTNZCPS
,
26203 IX86_BUILTIN_VTESTZPD256
,
26204 IX86_BUILTIN_VTESTCPD256
,
26205 IX86_BUILTIN_VTESTNZCPD256
,
26206 IX86_BUILTIN_VTESTZPS256
,
26207 IX86_BUILTIN_VTESTCPS256
,
26208 IX86_BUILTIN_VTESTNZCPS256
,
26209 IX86_BUILTIN_PTESTZ256
,
26210 IX86_BUILTIN_PTESTC256
,
26211 IX86_BUILTIN_PTESTNZC256
,
26213 IX86_BUILTIN_MOVMSKPD256
,
26214 IX86_BUILTIN_MOVMSKPS256
,
26217 IX86_BUILTIN_MPSADBW256
,
26218 IX86_BUILTIN_PABSB256
,
26219 IX86_BUILTIN_PABSW256
,
26220 IX86_BUILTIN_PABSD256
,
26221 IX86_BUILTIN_PACKSSDW256
,
26222 IX86_BUILTIN_PACKSSWB256
,
26223 IX86_BUILTIN_PACKUSDW256
,
26224 IX86_BUILTIN_PACKUSWB256
,
26225 IX86_BUILTIN_PADDB256
,
26226 IX86_BUILTIN_PADDW256
,
26227 IX86_BUILTIN_PADDD256
,
26228 IX86_BUILTIN_PADDQ256
,
26229 IX86_BUILTIN_PADDSB256
,
26230 IX86_BUILTIN_PADDSW256
,
26231 IX86_BUILTIN_PADDUSB256
,
26232 IX86_BUILTIN_PADDUSW256
,
26233 IX86_BUILTIN_PALIGNR256
,
26234 IX86_BUILTIN_AND256I
,
26235 IX86_BUILTIN_ANDNOT256I
,
26236 IX86_BUILTIN_PAVGB256
,
26237 IX86_BUILTIN_PAVGW256
,
26238 IX86_BUILTIN_PBLENDVB256
,
26239 IX86_BUILTIN_PBLENDVW256
,
26240 IX86_BUILTIN_PCMPEQB256
,
26241 IX86_BUILTIN_PCMPEQW256
,
26242 IX86_BUILTIN_PCMPEQD256
,
26243 IX86_BUILTIN_PCMPEQQ256
,
26244 IX86_BUILTIN_PCMPGTB256
,
26245 IX86_BUILTIN_PCMPGTW256
,
26246 IX86_BUILTIN_PCMPGTD256
,
26247 IX86_BUILTIN_PCMPGTQ256
,
26248 IX86_BUILTIN_PHADDW256
,
26249 IX86_BUILTIN_PHADDD256
,
26250 IX86_BUILTIN_PHADDSW256
,
26251 IX86_BUILTIN_PHSUBW256
,
26252 IX86_BUILTIN_PHSUBD256
,
26253 IX86_BUILTIN_PHSUBSW256
,
26254 IX86_BUILTIN_PMADDUBSW256
,
26255 IX86_BUILTIN_PMADDWD256
,
26256 IX86_BUILTIN_PMAXSB256
,
26257 IX86_BUILTIN_PMAXSW256
,
26258 IX86_BUILTIN_PMAXSD256
,
26259 IX86_BUILTIN_PMAXUB256
,
26260 IX86_BUILTIN_PMAXUW256
,
26261 IX86_BUILTIN_PMAXUD256
,
26262 IX86_BUILTIN_PMINSB256
,
26263 IX86_BUILTIN_PMINSW256
,
26264 IX86_BUILTIN_PMINSD256
,
26265 IX86_BUILTIN_PMINUB256
,
26266 IX86_BUILTIN_PMINUW256
,
26267 IX86_BUILTIN_PMINUD256
,
26268 IX86_BUILTIN_PMOVMSKB256
,
26269 IX86_BUILTIN_PMOVSXBW256
,
26270 IX86_BUILTIN_PMOVSXBD256
,
26271 IX86_BUILTIN_PMOVSXBQ256
,
26272 IX86_BUILTIN_PMOVSXWD256
,
26273 IX86_BUILTIN_PMOVSXWQ256
,
26274 IX86_BUILTIN_PMOVSXDQ256
,
26275 IX86_BUILTIN_PMOVZXBW256
,
26276 IX86_BUILTIN_PMOVZXBD256
,
26277 IX86_BUILTIN_PMOVZXBQ256
,
26278 IX86_BUILTIN_PMOVZXWD256
,
26279 IX86_BUILTIN_PMOVZXWQ256
,
26280 IX86_BUILTIN_PMOVZXDQ256
,
26281 IX86_BUILTIN_PMULDQ256
,
26282 IX86_BUILTIN_PMULHRSW256
,
26283 IX86_BUILTIN_PMULHUW256
,
26284 IX86_BUILTIN_PMULHW256
,
26285 IX86_BUILTIN_PMULLW256
,
26286 IX86_BUILTIN_PMULLD256
,
26287 IX86_BUILTIN_PMULUDQ256
,
26288 IX86_BUILTIN_POR256
,
26289 IX86_BUILTIN_PSADBW256
,
26290 IX86_BUILTIN_PSHUFB256
,
26291 IX86_BUILTIN_PSHUFD256
,
26292 IX86_BUILTIN_PSHUFHW256
,
26293 IX86_BUILTIN_PSHUFLW256
,
26294 IX86_BUILTIN_PSIGNB256
,
26295 IX86_BUILTIN_PSIGNW256
,
26296 IX86_BUILTIN_PSIGND256
,
26297 IX86_BUILTIN_PSLLDQI256
,
26298 IX86_BUILTIN_PSLLWI256
,
26299 IX86_BUILTIN_PSLLW256
,
26300 IX86_BUILTIN_PSLLDI256
,
26301 IX86_BUILTIN_PSLLD256
,
26302 IX86_BUILTIN_PSLLQI256
,
26303 IX86_BUILTIN_PSLLQ256
,
26304 IX86_BUILTIN_PSRAWI256
,
26305 IX86_BUILTIN_PSRAW256
,
26306 IX86_BUILTIN_PSRADI256
,
26307 IX86_BUILTIN_PSRAD256
,
26308 IX86_BUILTIN_PSRLDQI256
,
26309 IX86_BUILTIN_PSRLWI256
,
26310 IX86_BUILTIN_PSRLW256
,
26311 IX86_BUILTIN_PSRLDI256
,
26312 IX86_BUILTIN_PSRLD256
,
26313 IX86_BUILTIN_PSRLQI256
,
26314 IX86_BUILTIN_PSRLQ256
,
26315 IX86_BUILTIN_PSUBB256
,
26316 IX86_BUILTIN_PSUBW256
,
26317 IX86_BUILTIN_PSUBD256
,
26318 IX86_BUILTIN_PSUBQ256
,
26319 IX86_BUILTIN_PSUBSB256
,
26320 IX86_BUILTIN_PSUBSW256
,
26321 IX86_BUILTIN_PSUBUSB256
,
26322 IX86_BUILTIN_PSUBUSW256
,
26323 IX86_BUILTIN_PUNPCKHBW256
,
26324 IX86_BUILTIN_PUNPCKHWD256
,
26325 IX86_BUILTIN_PUNPCKHDQ256
,
26326 IX86_BUILTIN_PUNPCKHQDQ256
,
26327 IX86_BUILTIN_PUNPCKLBW256
,
26328 IX86_BUILTIN_PUNPCKLWD256
,
26329 IX86_BUILTIN_PUNPCKLDQ256
,
26330 IX86_BUILTIN_PUNPCKLQDQ256
,
26331 IX86_BUILTIN_PXOR256
,
26332 IX86_BUILTIN_MOVNTDQA256
,
26333 IX86_BUILTIN_VBROADCASTSS_PS
,
26334 IX86_BUILTIN_VBROADCASTSS_PS256
,
26335 IX86_BUILTIN_VBROADCASTSD_PD256
,
26336 IX86_BUILTIN_VBROADCASTSI256
,
26337 IX86_BUILTIN_PBLENDD256
,
26338 IX86_BUILTIN_PBLENDD128
,
26339 IX86_BUILTIN_PBROADCASTB256
,
26340 IX86_BUILTIN_PBROADCASTW256
,
26341 IX86_BUILTIN_PBROADCASTD256
,
26342 IX86_BUILTIN_PBROADCASTQ256
,
26343 IX86_BUILTIN_PBROADCASTB128
,
26344 IX86_BUILTIN_PBROADCASTW128
,
26345 IX86_BUILTIN_PBROADCASTD128
,
26346 IX86_BUILTIN_PBROADCASTQ128
,
26347 IX86_BUILTIN_VPERMVARSI256
,
26348 IX86_BUILTIN_VPERMDF256
,
26349 IX86_BUILTIN_VPERMVARSF256
,
26350 IX86_BUILTIN_VPERMDI256
,
26351 IX86_BUILTIN_VPERMTI256
,
26352 IX86_BUILTIN_VEXTRACT128I256
,
26353 IX86_BUILTIN_VINSERT128I256
,
26354 IX86_BUILTIN_MASKLOADD
,
26355 IX86_BUILTIN_MASKLOADQ
,
26356 IX86_BUILTIN_MASKLOADD256
,
26357 IX86_BUILTIN_MASKLOADQ256
,
26358 IX86_BUILTIN_MASKSTORED
,
26359 IX86_BUILTIN_MASKSTOREQ
,
26360 IX86_BUILTIN_MASKSTORED256
,
26361 IX86_BUILTIN_MASKSTOREQ256
,
26362 IX86_BUILTIN_PSLLVV4DI
,
26363 IX86_BUILTIN_PSLLVV2DI
,
26364 IX86_BUILTIN_PSLLVV8SI
,
26365 IX86_BUILTIN_PSLLVV4SI
,
26366 IX86_BUILTIN_PSRAVV8SI
,
26367 IX86_BUILTIN_PSRAVV4SI
,
26368 IX86_BUILTIN_PSRLVV4DI
,
26369 IX86_BUILTIN_PSRLVV2DI
,
26370 IX86_BUILTIN_PSRLVV8SI
,
26371 IX86_BUILTIN_PSRLVV4SI
,
26373 IX86_BUILTIN_GATHERSIV2DF
,
26374 IX86_BUILTIN_GATHERSIV4DF
,
26375 IX86_BUILTIN_GATHERDIV2DF
,
26376 IX86_BUILTIN_GATHERDIV4DF
,
26377 IX86_BUILTIN_GATHERSIV4SF
,
26378 IX86_BUILTIN_GATHERSIV8SF
,
26379 IX86_BUILTIN_GATHERDIV4SF
,
26380 IX86_BUILTIN_GATHERDIV8SF
,
26381 IX86_BUILTIN_GATHERSIV2DI
,
26382 IX86_BUILTIN_GATHERSIV4DI
,
26383 IX86_BUILTIN_GATHERDIV2DI
,
26384 IX86_BUILTIN_GATHERDIV4DI
,
26385 IX86_BUILTIN_GATHERSIV4SI
,
26386 IX86_BUILTIN_GATHERSIV8SI
,
26387 IX86_BUILTIN_GATHERDIV4SI
,
26388 IX86_BUILTIN_GATHERDIV8SI
,
26390 /* Alternate 4 element gather for the vectorizer where
26391 all operands are 32-byte wide. */
26392 IX86_BUILTIN_GATHERALTSIV4DF
,
26393 IX86_BUILTIN_GATHERALTDIV8SF
,
26394 IX86_BUILTIN_GATHERALTSIV4DI
,
26395 IX86_BUILTIN_GATHERALTDIV8SI
,
26397 /* TFmode support builtins. */
26399 IX86_BUILTIN_HUGE_VALQ
,
26400 IX86_BUILTIN_FABSQ
,
26401 IX86_BUILTIN_COPYSIGNQ
,
26403 /* Vectorizer support builtins. */
26404 IX86_BUILTIN_CPYSGNPS
,
26405 IX86_BUILTIN_CPYSGNPD
,
26406 IX86_BUILTIN_CPYSGNPS256
,
26407 IX86_BUILTIN_CPYSGNPD256
,
26409 /* FMA4 instructions. */
26410 IX86_BUILTIN_VFMADDSS
,
26411 IX86_BUILTIN_VFMADDSD
,
26412 IX86_BUILTIN_VFMADDPS
,
26413 IX86_BUILTIN_VFMADDPD
,
26414 IX86_BUILTIN_VFMADDPS256
,
26415 IX86_BUILTIN_VFMADDPD256
,
26416 IX86_BUILTIN_VFMADDSUBPS
,
26417 IX86_BUILTIN_VFMADDSUBPD
,
26418 IX86_BUILTIN_VFMADDSUBPS256
,
26419 IX86_BUILTIN_VFMADDSUBPD256
,
26421 /* FMA3 instructions. */
26422 IX86_BUILTIN_VFMADDSS3
,
26423 IX86_BUILTIN_VFMADDSD3
,
26425 /* XOP instructions. */
26426 IX86_BUILTIN_VPCMOV
,
26427 IX86_BUILTIN_VPCMOV_V2DI
,
26428 IX86_BUILTIN_VPCMOV_V4SI
,
26429 IX86_BUILTIN_VPCMOV_V8HI
,
26430 IX86_BUILTIN_VPCMOV_V16QI
,
26431 IX86_BUILTIN_VPCMOV_V4SF
,
26432 IX86_BUILTIN_VPCMOV_V2DF
,
26433 IX86_BUILTIN_VPCMOV256
,
26434 IX86_BUILTIN_VPCMOV_V4DI256
,
26435 IX86_BUILTIN_VPCMOV_V8SI256
,
26436 IX86_BUILTIN_VPCMOV_V16HI256
,
26437 IX86_BUILTIN_VPCMOV_V32QI256
,
26438 IX86_BUILTIN_VPCMOV_V8SF256
,
26439 IX86_BUILTIN_VPCMOV_V4DF256
,
26441 IX86_BUILTIN_VPPERM
,
26443 IX86_BUILTIN_VPMACSSWW
,
26444 IX86_BUILTIN_VPMACSWW
,
26445 IX86_BUILTIN_VPMACSSWD
,
26446 IX86_BUILTIN_VPMACSWD
,
26447 IX86_BUILTIN_VPMACSSDD
,
26448 IX86_BUILTIN_VPMACSDD
,
26449 IX86_BUILTIN_VPMACSSDQL
,
26450 IX86_BUILTIN_VPMACSSDQH
,
26451 IX86_BUILTIN_VPMACSDQL
,
26452 IX86_BUILTIN_VPMACSDQH
,
26453 IX86_BUILTIN_VPMADCSSWD
,
26454 IX86_BUILTIN_VPMADCSWD
,
26456 IX86_BUILTIN_VPHADDBW
,
26457 IX86_BUILTIN_VPHADDBD
,
26458 IX86_BUILTIN_VPHADDBQ
,
26459 IX86_BUILTIN_VPHADDWD
,
26460 IX86_BUILTIN_VPHADDWQ
,
26461 IX86_BUILTIN_VPHADDDQ
,
26462 IX86_BUILTIN_VPHADDUBW
,
26463 IX86_BUILTIN_VPHADDUBD
,
26464 IX86_BUILTIN_VPHADDUBQ
,
26465 IX86_BUILTIN_VPHADDUWD
,
26466 IX86_BUILTIN_VPHADDUWQ
,
26467 IX86_BUILTIN_VPHADDUDQ
,
26468 IX86_BUILTIN_VPHSUBBW
,
26469 IX86_BUILTIN_VPHSUBWD
,
26470 IX86_BUILTIN_VPHSUBDQ
,
26472 IX86_BUILTIN_VPROTB
,
26473 IX86_BUILTIN_VPROTW
,
26474 IX86_BUILTIN_VPROTD
,
26475 IX86_BUILTIN_VPROTQ
,
26476 IX86_BUILTIN_VPROTB_IMM
,
26477 IX86_BUILTIN_VPROTW_IMM
,
26478 IX86_BUILTIN_VPROTD_IMM
,
26479 IX86_BUILTIN_VPROTQ_IMM
,
26481 IX86_BUILTIN_VPSHLB
,
26482 IX86_BUILTIN_VPSHLW
,
26483 IX86_BUILTIN_VPSHLD
,
26484 IX86_BUILTIN_VPSHLQ
,
26485 IX86_BUILTIN_VPSHAB
,
26486 IX86_BUILTIN_VPSHAW
,
26487 IX86_BUILTIN_VPSHAD
,
26488 IX86_BUILTIN_VPSHAQ
,
26490 IX86_BUILTIN_VFRCZSS
,
26491 IX86_BUILTIN_VFRCZSD
,
26492 IX86_BUILTIN_VFRCZPS
,
26493 IX86_BUILTIN_VFRCZPD
,
26494 IX86_BUILTIN_VFRCZPS256
,
26495 IX86_BUILTIN_VFRCZPD256
,
26497 IX86_BUILTIN_VPCOMEQUB
,
26498 IX86_BUILTIN_VPCOMNEUB
,
26499 IX86_BUILTIN_VPCOMLTUB
,
26500 IX86_BUILTIN_VPCOMLEUB
,
26501 IX86_BUILTIN_VPCOMGTUB
,
26502 IX86_BUILTIN_VPCOMGEUB
,
26503 IX86_BUILTIN_VPCOMFALSEUB
,
26504 IX86_BUILTIN_VPCOMTRUEUB
,
26506 IX86_BUILTIN_VPCOMEQUW
,
26507 IX86_BUILTIN_VPCOMNEUW
,
26508 IX86_BUILTIN_VPCOMLTUW
,
26509 IX86_BUILTIN_VPCOMLEUW
,
26510 IX86_BUILTIN_VPCOMGTUW
,
26511 IX86_BUILTIN_VPCOMGEUW
,
26512 IX86_BUILTIN_VPCOMFALSEUW
,
26513 IX86_BUILTIN_VPCOMTRUEUW
,
26515 IX86_BUILTIN_VPCOMEQUD
,
26516 IX86_BUILTIN_VPCOMNEUD
,
26517 IX86_BUILTIN_VPCOMLTUD
,
26518 IX86_BUILTIN_VPCOMLEUD
,
26519 IX86_BUILTIN_VPCOMGTUD
,
26520 IX86_BUILTIN_VPCOMGEUD
,
26521 IX86_BUILTIN_VPCOMFALSEUD
,
26522 IX86_BUILTIN_VPCOMTRUEUD
,
26524 IX86_BUILTIN_VPCOMEQUQ
,
26525 IX86_BUILTIN_VPCOMNEUQ
,
26526 IX86_BUILTIN_VPCOMLTUQ
,
26527 IX86_BUILTIN_VPCOMLEUQ
,
26528 IX86_BUILTIN_VPCOMGTUQ
,
26529 IX86_BUILTIN_VPCOMGEUQ
,
26530 IX86_BUILTIN_VPCOMFALSEUQ
,
26531 IX86_BUILTIN_VPCOMTRUEUQ
,
26533 IX86_BUILTIN_VPCOMEQB
,
26534 IX86_BUILTIN_VPCOMNEB
,
26535 IX86_BUILTIN_VPCOMLTB
,
26536 IX86_BUILTIN_VPCOMLEB
,
26537 IX86_BUILTIN_VPCOMGTB
,
26538 IX86_BUILTIN_VPCOMGEB
,
26539 IX86_BUILTIN_VPCOMFALSEB
,
26540 IX86_BUILTIN_VPCOMTRUEB
,
26542 IX86_BUILTIN_VPCOMEQW
,
26543 IX86_BUILTIN_VPCOMNEW
,
26544 IX86_BUILTIN_VPCOMLTW
,
26545 IX86_BUILTIN_VPCOMLEW
,
26546 IX86_BUILTIN_VPCOMGTW
,
26547 IX86_BUILTIN_VPCOMGEW
,
26548 IX86_BUILTIN_VPCOMFALSEW
,
26549 IX86_BUILTIN_VPCOMTRUEW
,
26551 IX86_BUILTIN_VPCOMEQD
,
26552 IX86_BUILTIN_VPCOMNED
,
26553 IX86_BUILTIN_VPCOMLTD
,
26554 IX86_BUILTIN_VPCOMLED
,
26555 IX86_BUILTIN_VPCOMGTD
,
26556 IX86_BUILTIN_VPCOMGED
,
26557 IX86_BUILTIN_VPCOMFALSED
,
26558 IX86_BUILTIN_VPCOMTRUED
,
26560 IX86_BUILTIN_VPCOMEQQ
,
26561 IX86_BUILTIN_VPCOMNEQ
,
26562 IX86_BUILTIN_VPCOMLTQ
,
26563 IX86_BUILTIN_VPCOMLEQ
,
26564 IX86_BUILTIN_VPCOMGTQ
,
26565 IX86_BUILTIN_VPCOMGEQ
,
26566 IX86_BUILTIN_VPCOMFALSEQ
,
26567 IX86_BUILTIN_VPCOMTRUEQ
,
26569 /* LWP instructions. */
26570 IX86_BUILTIN_LLWPCB
,
26571 IX86_BUILTIN_SLWPCB
,
26572 IX86_BUILTIN_LWPVAL32
,
26573 IX86_BUILTIN_LWPVAL64
,
26574 IX86_BUILTIN_LWPINS32
,
26575 IX86_BUILTIN_LWPINS64
,
26580 IX86_BUILTIN_XBEGIN
,
26582 IX86_BUILTIN_XABORT
,
26583 IX86_BUILTIN_XTEST
,
26585 /* BMI instructions. */
26586 IX86_BUILTIN_BEXTR32
,
26587 IX86_BUILTIN_BEXTR64
,
26590 /* TBM instructions. */
26591 IX86_BUILTIN_BEXTRI32
,
26592 IX86_BUILTIN_BEXTRI64
,
26594 /* BMI2 instructions. */
26595 IX86_BUILTIN_BZHI32
,
26596 IX86_BUILTIN_BZHI64
,
26597 IX86_BUILTIN_PDEP32
,
26598 IX86_BUILTIN_PDEP64
,
26599 IX86_BUILTIN_PEXT32
,
26600 IX86_BUILTIN_PEXT64
,
26602 /* ADX instructions. */
26603 IX86_BUILTIN_ADDCARRYX32
,
26604 IX86_BUILTIN_ADDCARRYX64
,
26606 /* FSGSBASE instructions. */
26607 IX86_BUILTIN_RDFSBASE32
,
26608 IX86_BUILTIN_RDFSBASE64
,
26609 IX86_BUILTIN_RDGSBASE32
,
26610 IX86_BUILTIN_RDGSBASE64
,
26611 IX86_BUILTIN_WRFSBASE32
,
26612 IX86_BUILTIN_WRFSBASE64
,
26613 IX86_BUILTIN_WRGSBASE32
,
26614 IX86_BUILTIN_WRGSBASE64
,
26616 /* RDRND instructions. */
26617 IX86_BUILTIN_RDRAND16_STEP
,
26618 IX86_BUILTIN_RDRAND32_STEP
,
26619 IX86_BUILTIN_RDRAND64_STEP
,
26621 /* RDSEED instructions. */
26622 IX86_BUILTIN_RDSEED16_STEP
,
26623 IX86_BUILTIN_RDSEED32_STEP
,
26624 IX86_BUILTIN_RDSEED64_STEP
,
26626 /* F16C instructions. */
26627 IX86_BUILTIN_CVTPH2PS
,
26628 IX86_BUILTIN_CVTPH2PS256
,
26629 IX86_BUILTIN_CVTPS2PH
,
26630 IX86_BUILTIN_CVTPS2PH256
,
26632 /* CFString built-in for darwin */
26633 IX86_BUILTIN_CFSTRING
,
26635 /* Builtins to get CPU type and supported features. */
26636 IX86_BUILTIN_CPU_INIT
,
26637 IX86_BUILTIN_CPU_IS
,
26638 IX86_BUILTIN_CPU_SUPPORTS
,
26643 /* Table for the ix86 builtin decls. */
26644 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26646 /* Table of all of the builtin functions that are possible with different ISA's
26647 but are waiting to be built until a function is declared to use that
26649 struct builtin_isa
{
26650 const char *name
; /* function name */
26651 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26652 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26653 bool const_p
; /* true if the declaration is constant */
26654 bool set_and_not_built_p
;
26657 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26660 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26661 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26662 function decl in the ix86_builtins array. Returns the function decl or
26663 NULL_TREE, if the builtin was not added.
26665 If the front end has a special hook for builtin functions, delay adding
26666 builtin functions that aren't in the current ISA until the ISA is changed
26667 with function specific optimization. Doing so, can save about 300K for the
26668 default compiler. When the builtin is expanded, check at that time whether
26671 If the front end doesn't have a special hook, record all builtins, even if
26672 it isn't an instruction set in the current ISA in case the user uses
26673 function specific options for a different ISA, so that we don't get scope
26674 errors if a builtin is added in the middle of a function scope. */
26677 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26678 enum ix86_builtin_func_type tcode
,
26679 enum ix86_builtins code
)
26681 tree decl
= NULL_TREE
;
26683 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26685 ix86_builtins_isa
[(int) code
].isa
= mask
;
26687 mask
&= ~OPTION_MASK_ISA_64BIT
;
26689 || (mask
& ix86_isa_flags
) != 0
26690 || (lang_hooks
.builtin_function
26691 == lang_hooks
.builtin_function_ext_scope
))
26694 tree type
= ix86_get_builtin_func_type (tcode
);
26695 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26697 ix86_builtins
[(int) code
] = decl
;
26698 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26702 ix86_builtins
[(int) code
] = NULL_TREE
;
26703 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26704 ix86_builtins_isa
[(int) code
].name
= name
;
26705 ix86_builtins_isa
[(int) code
].const_p
= false;
26706 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26713 /* Like def_builtin, but also marks the function decl "const". */
26716 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26717 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26719 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26721 TREE_READONLY (decl
) = 1;
26723 ix86_builtins_isa
[(int) code
].const_p
= true;
26728 /* Add any new builtin functions for a given ISA that may not have been
26729 declared. This saves a bit of space compared to adding all of the
26730 declarations to the tree, even if we didn't use them. */
26733 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26737 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26739 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26740 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26744 /* Don't define the builtin again. */
26745 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26747 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26748 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26749 type
, i
, BUILT_IN_MD
, NULL
,
26752 ix86_builtins
[i
] = decl
;
26753 if (ix86_builtins_isa
[i
].const_p
)
26754 TREE_READONLY (decl
) = 1;
26759 /* Bits for builtin_description.flag. */
26761 /* Set when we don't support the comparison natively, and should
26762 swap_comparison in order to support it. */
26763 #define BUILTIN_DESC_SWAP_OPERANDS 1
26765 struct builtin_description
26767 const HOST_WIDE_INT mask
;
26768 const enum insn_code icode
;
26769 const char *const name
;
26770 const enum ix86_builtins code
;
26771 const enum rtx_code comparison
;
26775 static const struct builtin_description bdesc_comi
[] =
26777 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26778 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26779 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26780 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26781 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26782 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26783 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26784 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26785 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26786 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26787 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26788 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26798 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26803 static const struct builtin_description bdesc_pcmpestr
[] =
26806 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26807 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26808 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26809 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26810 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26811 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26812 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26815 static const struct builtin_description bdesc_pcmpistr
[] =
26818 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26819 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26820 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26821 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26822 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26823 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26824 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26827 /* Special builtins with variable number of arguments. */
26828 static const struct builtin_description bdesc_special_args
[] =
26830 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26831 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26832 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26835 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26838 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26840 /* FXSR, XSAVE and XSAVEOPT */
26841 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26842 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26843 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26844 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26845 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26847 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26848 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26849 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26850 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26851 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26854 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26855 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26856 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26858 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26859 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26860 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26861 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26863 /* SSE or 3DNow!A */
26864 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26865 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26868 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26869 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26870 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26871 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26872 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26873 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26874 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26875 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26876 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26877 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26879 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26880 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26883 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26886 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26889 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26890 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26893 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26894 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26896 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26897 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26898 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26899 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26900 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26902 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26903 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26904 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26905 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26906 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26907 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26908 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26910 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26911 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26912 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26914 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26915 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26916 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26917 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26918 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26919 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26920 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26921 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26924 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26925 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26926 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26927 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26928 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26929 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26930 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26931 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26932 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26934 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26935 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26936 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26937 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26938 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26939 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26942 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26943 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26944 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26945 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26946 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26947 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26948 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26949 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26952 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26953 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26954 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26957 /* Builtins with variable number of arguments. */
26958 static const struct builtin_description bdesc_args
[] =
26960 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26961 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26962 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26963 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26964 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26965 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26966 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26969 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26970 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26971 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26972 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26973 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26974 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26976 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26977 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26978 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26979 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26980 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26981 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26982 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26983 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26985 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26986 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26988 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26989 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26990 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26991 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26993 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26994 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26995 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26996 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26997 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26998 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27001 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27004 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27005 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27008 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27009 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27011 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27013 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27015 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27016 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27017 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27018 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27020 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27021 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27022 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27023 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27024 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27025 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27027 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27028 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27029 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27030 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27033 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27034 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27035 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27036 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27038 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27039 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27040 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27041 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27042 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27043 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27044 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27045 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27046 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27047 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27048 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27049 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27050 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27051 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27052 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27055 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27056 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27057 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27058 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27059 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27060 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27063 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27064 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27065 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27066 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27067 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27068 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27069 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27070 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27071 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27072 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27073 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27074 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27076 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27078 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27079 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27080 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27081 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27082 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27083 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27084 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27085 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27087 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27088 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27089 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27090 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27091 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27092 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27096 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27098 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27102 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27105 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27106 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27110 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27111 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27115 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27117 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27118 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27120 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27125 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27126 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27130 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27132 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27134 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27135 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27136 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27138 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27139 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27141 /* SSE MMX or 3Dnow!A */
27142 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27143 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27144 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27146 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27147 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27148 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27149 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27151 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27152 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27154 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27157 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27160 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27162 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27166 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27169 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27171 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27173 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27175 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27176 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27189 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27196 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27198 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27201 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27203 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27204 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27206 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27207 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27208 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27209 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27212 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27214 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27240 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27244 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27278 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27293 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27310 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27324 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27331 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27332 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27334 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27335 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27336 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27337 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27338 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27339 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27342 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27343 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27344 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27345 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27346 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27347 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27349 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27350 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27351 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27352 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27353 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27354 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27355 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27356 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27357 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27358 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27359 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27360 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27361 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27362 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27363 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27364 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27365 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27366 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27367 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27368 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27369 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27370 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27371 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27372 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27375 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27376 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27379 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27380 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27381 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27382 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27383 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27384 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27385 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27386 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27387 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27388 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27390 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27391 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27392 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27393 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27394 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27395 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27396 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27397 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27398 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27399 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27400 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27401 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27402 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27404 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27405 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27406 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27407 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27408 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27409 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27410 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27411 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27412 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27413 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27414 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27415 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27418 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27419 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27420 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27421 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27423 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27424 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27425 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27426 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27428 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27429 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27431 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27432 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27434 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27435 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27436 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27437 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27439 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27440 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27442 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27443 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27445 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27446 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27447 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27450 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27451 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27452 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27453 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27454 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27457 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27458 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27459 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27460 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27469 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27472 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27475 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27476 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27477 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27478 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27479 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27480 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27481 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27482 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27483 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27484 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27485 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27486 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27487 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27488 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27489 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27490 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27491 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27492 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27493 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27494 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27495 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27496 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27497 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27498 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27499 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27500 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27502 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27503 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27504 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27505 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27507 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27508 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27509 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27510 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27511 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27512 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27513 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27514 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27515 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27516 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27517 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27518 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27519 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27520 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27521 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27522 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27523 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27524 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27525 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27526 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27527 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27528 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27529 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27530 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27531 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27532 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27536 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27537 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27538 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27546 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27549 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27552 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27557 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27560 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27562 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27563 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27568 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27573 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27574 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27576 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27577 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27579 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27580 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27581 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27582 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27584 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27587 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27588 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27589 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27591 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27592 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27593 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27594 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27595 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27596 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27597 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27598 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27599 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27600 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27601 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27603 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27604 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27607 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27608 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27610 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27611 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27613 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27616 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27617 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27618 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27619 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27620 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27621 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27622 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27623 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27624 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27625 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27626 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27627 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27628 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27629 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27630 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27631 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27632 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27633 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27634 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27635 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27636 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27637 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27638 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27639 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27640 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27641 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27642 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27643 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27644 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27645 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27646 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27763 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27766 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27767 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27768 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27771 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27772 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27775 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27776 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27777 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27778 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27781 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27782 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27783 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27784 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27785 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27786 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27789 /* FMA4 and XOP. */
27790 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27791 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27792 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27793 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27794 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27795 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27796 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27797 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27798 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27799 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27800 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27801 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27802 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27803 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27804 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27805 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27806 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27807 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27808 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27809 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27810 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27811 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27812 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27813 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27814 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27815 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27816 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27817 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27818 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27819 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27820 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27821 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27822 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27823 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27824 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27825 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27826 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27827 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27828 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27829 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27830 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27831 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27832 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27833 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27834 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27835 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27836 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27837 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27838 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27839 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27840 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27841 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27843 static const struct builtin_description bdesc_multi_arg
[] =
27845 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27846 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27847 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27848 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27849 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27850 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27852 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27853 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27854 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27855 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27856 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27857 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27859 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27860 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27861 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27862 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27863 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27864 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27865 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27866 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27867 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27868 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27869 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27870 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27872 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27873 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27874 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27875 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27876 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27877 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27878 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27879 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27880 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27881 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27882 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27883 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27885 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27886 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27887 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27888 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27889 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27890 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27891 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27893 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27894 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27895 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27896 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27897 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27898 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27899 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27901 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27903 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27904 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27905 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27906 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27907 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27908 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27909 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27910 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27911 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27912 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27913 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27914 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27917 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27919 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27921 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27923 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27924 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27931 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27933 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27936 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27940 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27946 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27948 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27949 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27953 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27956 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27960 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27964 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27970 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27972 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27980 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28032 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28045 /* TM vector builtins. */
28047 /* Reuse the existing x86-specific `struct builtin_description' cause
28048 we're lazy. Add casts to make them fit. */
28049 static const struct builtin_description bdesc_tm
[] =
28051 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28052 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28053 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28054 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28055 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28056 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28057 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28059 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28060 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28061 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28062 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28063 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28064 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28065 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28067 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28068 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28069 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28070 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28071 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28072 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28073 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28075 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28076 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28077 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28080 /* TM callbacks. */
28082 /* Return the builtin decl needed to load a vector of TYPE. */
28085 ix86_builtin_tm_load (tree type
)
28087 if (TREE_CODE (type
) == VECTOR_TYPE
)
28089 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28092 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28094 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28096 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28102 /* Return the builtin decl needed to store a vector of TYPE. */
28105 ix86_builtin_tm_store (tree type
)
28107 if (TREE_CODE (type
) == VECTOR_TYPE
)
28109 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28112 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28114 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28116 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28122 /* Initialize the transactional memory vector load/store builtins. */
28125 ix86_init_tm_builtins (void)
28127 enum ix86_builtin_func_type ftype
;
28128 const struct builtin_description
*d
;
28131 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28132 tree attrs_log
, attrs_type_log
;
28137 /* If there are no builtins defined, we must be compiling in a
28138 language without trans-mem support. */
28139 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28142 /* Use whatever attributes a normal TM load has. */
28143 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28144 attrs_load
= DECL_ATTRIBUTES (decl
);
28145 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28146 /* Use whatever attributes a normal TM store has. */
28147 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28148 attrs_store
= DECL_ATTRIBUTES (decl
);
28149 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28150 /* Use whatever attributes a normal TM log has. */
28151 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28152 attrs_log
= DECL_ATTRIBUTES (decl
);
28153 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28155 for (i
= 0, d
= bdesc_tm
;
28156 i
< ARRAY_SIZE (bdesc_tm
);
28159 if ((d
->mask
& ix86_isa_flags
) != 0
28160 || (lang_hooks
.builtin_function
28161 == lang_hooks
.builtin_function_ext_scope
))
28163 tree type
, attrs
, attrs_type
;
28164 enum built_in_function code
= (enum built_in_function
) d
->code
;
28166 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28167 type
= ix86_get_builtin_func_type (ftype
);
28169 if (BUILTIN_TM_LOAD_P (code
))
28171 attrs
= attrs_load
;
28172 attrs_type
= attrs_type_load
;
28174 else if (BUILTIN_TM_STORE_P (code
))
28176 attrs
= attrs_store
;
28177 attrs_type
= attrs_type_store
;
28182 attrs_type
= attrs_type_log
;
28184 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28185 /* The builtin without the prefix for
28186 calling it directly. */
28187 d
->name
+ strlen ("__builtin_"),
28189 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28190 set the TYPE_ATTRIBUTES. */
28191 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28193 set_builtin_decl (code
, decl
, false);
28198 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28199 in the current target ISA to allow the user to compile particular modules
28200 with different target specific options that differ from the command line
28203 ix86_init_mmx_sse_builtins (void)
28205 const struct builtin_description
* d
;
28206 enum ix86_builtin_func_type ftype
;
28209 /* Add all special builtins with variable number of operands. */
28210 for (i
= 0, d
= bdesc_special_args
;
28211 i
< ARRAY_SIZE (bdesc_special_args
);
28217 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28218 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28221 /* Add all builtins with variable number of operands. */
28222 for (i
= 0, d
= bdesc_args
;
28223 i
< ARRAY_SIZE (bdesc_args
);
28229 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28230 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28233 /* pcmpestr[im] insns. */
28234 for (i
= 0, d
= bdesc_pcmpestr
;
28235 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28238 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28239 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28241 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28242 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28245 /* pcmpistr[im] insns. */
28246 for (i
= 0, d
= bdesc_pcmpistr
;
28247 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28250 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28251 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28253 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28254 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28257 /* comi/ucomi insns. */
28258 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28260 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28261 ftype
= INT_FTYPE_V2DF_V2DF
;
28263 ftype
= INT_FTYPE_V4SF_V4SF
;
28264 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28268 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28269 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28270 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28271 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28273 /* SSE or 3DNow!A */
28274 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28275 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28276 IX86_BUILTIN_MASKMOVQ
);
28279 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28280 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28282 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28283 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28284 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28285 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28288 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28289 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28290 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28291 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28294 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28295 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28296 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28297 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28298 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28299 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28300 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28301 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28302 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28303 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28304 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28305 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28308 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28309 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28312 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28313 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28314 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28315 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28316 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28317 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28318 IX86_BUILTIN_RDRAND64_STEP
);
28321 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28322 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28323 IX86_BUILTIN_GATHERSIV2DF
);
28325 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28326 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28327 IX86_BUILTIN_GATHERSIV4DF
);
28329 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28330 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28331 IX86_BUILTIN_GATHERDIV2DF
);
28333 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28334 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28335 IX86_BUILTIN_GATHERDIV4DF
);
28337 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28338 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28339 IX86_BUILTIN_GATHERSIV4SF
);
28341 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28342 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28343 IX86_BUILTIN_GATHERSIV8SF
);
28345 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28346 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28347 IX86_BUILTIN_GATHERDIV4SF
);
28349 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28350 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28351 IX86_BUILTIN_GATHERDIV8SF
);
28353 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28354 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28355 IX86_BUILTIN_GATHERSIV2DI
);
28357 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28358 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28359 IX86_BUILTIN_GATHERSIV4DI
);
28361 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28362 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28363 IX86_BUILTIN_GATHERDIV2DI
);
28365 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28366 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28367 IX86_BUILTIN_GATHERDIV4DI
);
28369 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28370 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28371 IX86_BUILTIN_GATHERSIV4SI
);
28373 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28374 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28375 IX86_BUILTIN_GATHERSIV8SI
);
28377 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28378 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28379 IX86_BUILTIN_GATHERDIV4SI
);
28381 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28382 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28383 IX86_BUILTIN_GATHERDIV8SI
);
28385 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28386 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28387 IX86_BUILTIN_GATHERALTSIV4DF
);
28389 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28390 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28391 IX86_BUILTIN_GATHERALTDIV8SF
);
28393 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28394 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28395 IX86_BUILTIN_GATHERALTSIV4DI
);
28397 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28398 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28399 IX86_BUILTIN_GATHERALTDIV8SI
);
28402 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28403 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28405 /* MMX access to the vec_init patterns. */
28406 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28407 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28409 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28410 V4HI_FTYPE_HI_HI_HI_HI
,
28411 IX86_BUILTIN_VEC_INIT_V4HI
);
28413 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28414 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28415 IX86_BUILTIN_VEC_INIT_V8QI
);
28417 /* Access to the vec_extract patterns. */
28418 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28419 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28420 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28421 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28422 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28423 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28424 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28425 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28426 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28427 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28429 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28430 "__builtin_ia32_vec_ext_v4hi",
28431 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28433 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28434 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28436 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28437 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28439 /* Access to the vec_set patterns. */
28440 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28441 "__builtin_ia32_vec_set_v2di",
28442 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28444 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28445 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28447 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28448 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28450 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28451 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28453 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28454 "__builtin_ia32_vec_set_v4hi",
28455 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28457 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28458 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28461 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28462 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28463 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28464 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28465 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28466 "__builtin_ia32_rdseed_di_step",
28467 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28470 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28471 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28472 def_builtin (OPTION_MASK_ISA_64BIT
,
28473 "__builtin_ia32_addcarryx_u64",
28474 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28475 IX86_BUILTIN_ADDCARRYX64
);
28477 /* Add FMA4 multi-arg argument instructions */
28478 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28483 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28484 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28488 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28489 to return a pointer to VERSION_DECL if the outcome of the expression
28490 formed by PREDICATE_CHAIN is true. This function will be called during
28491 version dispatch to decide which function version to execute. It returns
28492 the basic block at the end, to which more conditions can be added. */
28495 add_condition_to_bb (tree function_decl
, tree version_decl
,
28496 tree predicate_chain
, basic_block new_bb
)
28498 gimple return_stmt
;
28499 tree convert_expr
, result_var
;
28500 gimple convert_stmt
;
28501 gimple call_cond_stmt
;
28502 gimple if_else_stmt
;
28504 basic_block bb1
, bb2
, bb3
;
28507 tree cond_var
, and_expr_var
= NULL_TREE
;
28510 tree predicate_decl
, predicate_arg
;
28512 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
28514 gcc_assert (new_bb
!= NULL
);
28515 gseq
= bb_seq (new_bb
);
28518 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
28519 build_fold_addr_expr (version_decl
));
28520 result_var
= create_tmp_var (ptr_type_node
, NULL
);
28521 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
28522 return_stmt
= gimple_build_return (result_var
);
28524 if (predicate_chain
== NULL_TREE
)
28526 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28527 gimple_seq_add_stmt (&gseq
, return_stmt
);
28528 set_bb_seq (new_bb
, gseq
);
28529 gimple_set_bb (convert_stmt
, new_bb
);
28530 gimple_set_bb (return_stmt
, new_bb
);
28535 while (predicate_chain
!= NULL
)
28537 cond_var
= create_tmp_var (integer_type_node
, NULL
);
28538 predicate_decl
= TREE_PURPOSE (predicate_chain
);
28539 predicate_arg
= TREE_VALUE (predicate_chain
);
28540 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
28541 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
28543 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
28544 gimple_set_bb (call_cond_stmt
, new_bb
);
28545 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
28547 predicate_chain
= TREE_CHAIN (predicate_chain
);
28549 if (and_expr_var
== NULL
)
28550 and_expr_var
= cond_var
;
28553 gimple assign_stmt
;
28554 /* Use MIN_EXPR to check if any integer is zero?.
28555 and_expr_var = min_expr <cond_var, and_expr_var> */
28556 assign_stmt
= gimple_build_assign (and_expr_var
,
28557 build2 (MIN_EXPR
, integer_type_node
,
28558 cond_var
, and_expr_var
));
28560 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
28561 gimple_set_bb (assign_stmt
, new_bb
);
28562 gimple_seq_add_stmt (&gseq
, assign_stmt
);
28566 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
28568 NULL_TREE
, NULL_TREE
);
28569 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
28570 gimple_set_bb (if_else_stmt
, new_bb
);
28571 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
28573 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28574 gimple_seq_add_stmt (&gseq
, return_stmt
);
28575 set_bb_seq (new_bb
, gseq
);
28578 e12
= split_block (bb1
, if_else_stmt
);
28580 e12
->flags
&= ~EDGE_FALLTHRU
;
28581 e12
->flags
|= EDGE_TRUE_VALUE
;
28583 e23
= split_block (bb2
, return_stmt
);
28585 gimple_set_bb (convert_stmt
, bb2
);
28586 gimple_set_bb (return_stmt
, bb2
);
28589 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
28592 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
28599 /* This parses the attribute arguments to target in DECL and determines
28600 the right builtin to use to match the platform specification.
28601 It returns the priority value for this version decl. If PREDICATE_LIST
28602 is not NULL, it stores the list of cpu features that need to be checked
28603 before dispatching this function. */
28605 static unsigned int
28606 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
28609 struct cl_target_option cur_target
;
28611 struct cl_target_option
*new_target
;
28612 const char *arg_str
= NULL
;
28613 const char *attrs_str
= NULL
;
28614 char *tok_str
= NULL
;
28617 /* Priority of i386 features, greater value is higher priority. This is
28618 used to decide the order in which function dispatch must happen. For
28619 instance, a version specialized for SSE4.2 should be checked for dispatch
28620 before a version for SSE3, as SSE4.2 implies SSE3. */
28621 enum feature_priority
28642 enum feature_priority priority
= P_ZERO
;
28644 /* These are the target attribute strings for which a dispatcher is
28645 available, from fold_builtin_cpu. */
28647 static struct _feature_list
28649 const char *const name
;
28650 const enum feature_priority priority
;
28652 const feature_list
[] =
28658 {"ssse3", P_SSSE3
},
28659 {"sse4.1", P_SSE4_1
},
28660 {"sse4.2", P_SSE4_2
},
28661 {"popcnt", P_POPCNT
},
28667 static unsigned int NUM_FEATURES
28668 = sizeof (feature_list
) / sizeof (struct _feature_list
);
28672 tree predicate_chain
= NULL_TREE
;
28673 tree predicate_decl
, predicate_arg
;
28675 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28676 gcc_assert (attrs
!= NULL
);
28678 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
28680 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
28681 attrs_str
= TREE_STRING_POINTER (attrs
);
28684 /* Handle arch= if specified. For priority, set it to be 1 more than
28685 the best instruction set the processor can handle. For instance, if
28686 there is a version for atom and a version for ssse3 (the highest ISA
28687 priority for atom), the atom version must be checked for dispatch
28688 before the ssse3 version. */
28689 if (strstr (attrs_str
, "arch=") != NULL
)
28691 cl_target_option_save (&cur_target
, &global_options
);
28692 target_node
= ix86_valid_target_attribute_tree (attrs
);
28694 gcc_assert (target_node
);
28695 new_target
= TREE_TARGET_OPTION (target_node
);
28696 gcc_assert (new_target
);
28698 if (new_target
->arch_specified
&& new_target
->arch
> 0)
28700 switch (new_target
->arch
)
28702 case PROCESSOR_CORE2
:
28704 priority
= P_PROC_SSSE3
;
28706 case PROCESSOR_COREI7
:
28707 arg_str
= "corei7";
28708 priority
= P_PROC_SSE4_2
;
28710 case PROCESSOR_ATOM
:
28712 priority
= P_PROC_SSSE3
;
28714 case PROCESSOR_AMDFAM10
:
28715 arg_str
= "amdfam10h";
28716 priority
= P_PROC_SSE4_a
;
28718 case PROCESSOR_BDVER1
:
28719 arg_str
= "bdver1";
28720 priority
= P_PROC_FMA
;
28722 case PROCESSOR_BDVER2
:
28723 arg_str
= "bdver2";
28724 priority
= P_PROC_FMA
;
28729 cl_target_option_restore (&global_options
, &cur_target
);
28731 if (predicate_list
&& arg_str
== NULL
)
28733 error_at (DECL_SOURCE_LOCATION (decl
),
28734 "No dispatcher found for the versioning attributes");
28738 if (predicate_list
)
28740 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
28741 /* For a C string literal the length includes the trailing NULL. */
28742 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
28743 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28748 /* Process feature name. */
28749 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
28750 strcpy (tok_str
, attrs_str
);
28751 token
= strtok (tok_str
, ",");
28752 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
28754 while (token
!= NULL
)
28756 /* Do not process "arch=" */
28757 if (strncmp (token
, "arch=", 5) == 0)
28759 token
= strtok (NULL
, ",");
28762 for (i
= 0; i
< NUM_FEATURES
; ++i
)
28764 if (strcmp (token
, feature_list
[i
].name
) == 0)
28766 if (predicate_list
)
28768 predicate_arg
= build_string_literal (
28769 strlen (feature_list
[i
].name
) + 1,
28770 feature_list
[i
].name
);
28771 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28774 /* Find the maximum priority feature. */
28775 if (feature_list
[i
].priority
> priority
)
28776 priority
= feature_list
[i
].priority
;
28781 if (predicate_list
&& i
== NUM_FEATURES
)
28783 error_at (DECL_SOURCE_LOCATION (decl
),
28784 "No dispatcher found for %s", token
);
28787 token
= strtok (NULL
, ",");
28791 if (predicate_list
&& predicate_chain
== NULL_TREE
)
28793 error_at (DECL_SOURCE_LOCATION (decl
),
28794 "No dispatcher found for the versioning attributes : %s",
28798 else if (predicate_list
)
28800 predicate_chain
= nreverse (predicate_chain
);
28801 *predicate_list
= predicate_chain
;
28807 /* This compares the priority of target features in function DECL1
28808 and DECL2. It returns positive value if DECL1 is higher priority,
28809 negative value if DECL2 is higher priority and 0 if they are the
28813 ix86_compare_version_priority (tree decl1
, tree decl2
)
28815 unsigned int priority1
= 0;
28816 unsigned int priority2
= 0;
28818 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1
)) != NULL
)
28819 priority1
= get_builtin_code_for_version (decl1
, NULL
);
28821 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2
)) != NULL
)
28822 priority2
= get_builtin_code_for_version (decl2
, NULL
);
28824 return (int)priority1
- (int)priority2
;
28827 /* V1 and V2 point to function versions with different priorities
28828 based on the target ISA. This function compares their priorities. */
28831 feature_compare (const void *v1
, const void *v2
)
28833 typedef struct _function_version_info
28836 tree predicate_chain
;
28837 unsigned int dispatch_priority
;
28838 } function_version_info
;
28840 const function_version_info c1
= *(const function_version_info
*)v1
;
28841 const function_version_info c2
= *(const function_version_info
*)v2
;
28842 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
28845 /* This function generates the dispatch function for
28846 multi-versioned functions. DISPATCH_DECL is the function which will
28847 contain the dispatch logic. FNDECLS are the function choices for
28848 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
28849 in DISPATCH_DECL in which the dispatch code is generated. */
28852 dispatch_function_versions (tree dispatch_decl
,
28854 basic_block
*empty_bb
)
28857 gimple ifunc_cpu_init_stmt
;
28861 vec
<tree
> *fndecls
;
28862 unsigned int num_versions
= 0;
28863 unsigned int actual_versions
= 0;
28866 struct _function_version_info
28869 tree predicate_chain
;
28870 unsigned int dispatch_priority
;
28871 }*function_version_info
;
28873 gcc_assert (dispatch_decl
!= NULL
28874 && fndecls_p
!= NULL
28875 && empty_bb
!= NULL
);
28877 /*fndecls_p is actually a vector. */
28878 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
28880 /* At least one more version other than the default. */
28881 num_versions
= fndecls
->length ();
28882 gcc_assert (num_versions
>= 2);
28884 function_version_info
= (struct _function_version_info
*)
28885 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
28887 /* The first version in the vector is the default decl. */
28888 default_decl
= (*fndecls
)[0];
28890 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
28892 gseq
= bb_seq (*empty_bb
);
28893 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
28894 constructors, so explicity call __builtin_cpu_init here. */
28895 ifunc_cpu_init_stmt
= gimple_build_call_vec (
28896 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
28897 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
28898 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
28899 set_bb_seq (*empty_bb
, gseq
);
28904 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
28906 tree version_decl
= ele
;
28907 tree predicate_chain
= NULL_TREE
;
28908 unsigned int priority
;
28909 /* Get attribute string, parse it and find the right predicate decl.
28910 The predicate function could be a lengthy combination of many
28911 features, like arch-type and various isa-variants. */
28912 priority
= get_builtin_code_for_version (version_decl
,
28915 if (predicate_chain
== NULL_TREE
)
28919 function_version_info
[ix
- 1].version_decl
= version_decl
;
28920 function_version_info
[ix
- 1].predicate_chain
= predicate_chain
;
28921 function_version_info
[ix
- 1].dispatch_priority
= priority
;
28924 /* Sort the versions according to descending order of dispatch priority. The
28925 priority is based on the ISA. This is not a perfect solution. There
28926 could still be ambiguity. If more than one function version is suitable
28927 to execute, which one should be dispatched? In future, allow the user
28928 to specify a dispatch priority next to the version. */
28929 qsort (function_version_info
, actual_versions
,
28930 sizeof (struct _function_version_info
), feature_compare
);
28932 for (i
= 0; i
< actual_versions
; ++i
)
28933 *empty_bb
= add_condition_to_bb (dispatch_decl
,
28934 function_version_info
[i
].version_decl
,
28935 function_version_info
[i
].predicate_chain
,
28938 /* dispatch default version at the end. */
28939 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
28942 free (function_version_info
);
28946 /* Comparator function to be used in qsort routine to sort attribute
28947 specification strings to "target". */
28950 attr_strcmp (const void *v1
, const void *v2
)
28952 const char *c1
= *(char *const*)v1
;
28953 const char *c2
= *(char *const*)v2
;
28954 return strcmp (c1
, c2
);
28957 /* STR is the argument to target attribute. This function tokenizes
28958 the comma separated arguments, sorts them and returns a string which
28959 is a unique identifier for the comma separated arguments. It also
28960 replaces non-identifier characters "=,-" with "_". */
28963 sorted_attr_string (const char *str
)
28965 char **args
= NULL
;
28966 char *attr_str
, *ret_str
;
28968 unsigned int argnum
= 1;
28971 for (i
= 0; i
< strlen (str
); i
++)
28975 attr_str
= (char *)xmalloc (strlen (str
) + 1);
28976 strcpy (attr_str
, str
);
28978 /* Replace "=,-" with "_". */
28979 for (i
= 0; i
< strlen (attr_str
); i
++)
28980 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
28986 args
= XNEWVEC (char *, argnum
);
28989 attr
= strtok (attr_str
, ",");
28990 while (attr
!= NULL
)
28994 attr
= strtok (NULL
, ",");
28997 qsort (args
, argnum
, sizeof (char*), attr_strcmp
);
28999 ret_str
= (char *)xmalloc (strlen (str
) + 1);
29000 strcpy (ret_str
, args
[0]);
29001 for (i
= 1; i
< argnum
; i
++)
29003 strcat (ret_str
, "_");
29004 strcat (ret_str
, args
[i
]);
29012 /* This function changes the assembler name for functions that are
29013 versions. If DECL is a function version and has a "target"
29014 attribute, it appends the attribute string to its assembler name. */
29017 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29020 const char *orig_name
, *version_string
, *attr_str
;
29021 char *assembler_name
;
29023 if (DECL_DECLARED_INLINE_P (decl
)
29024 && lookup_attribute ("gnu_inline",
29025 DECL_ATTRIBUTES (decl
)))
29026 error_at (DECL_SOURCE_LOCATION (decl
),
29027 "Function versions cannot be marked as gnu_inline,"
29028 " bodies have to be generated");
29030 if (DECL_VIRTUAL_P (decl
)
29031 || DECL_VINDEX (decl
))
29032 error_at (DECL_SOURCE_LOCATION (decl
),
29033 "Virtual function versioning not supported\n");
29035 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29037 /* target attribute string is NULL for default functions. */
29038 if (version_attr
== NULL_TREE
)
29041 orig_name
= IDENTIFIER_POINTER (id
);
29043 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29045 attr_str
= sorted_attr_string (version_string
);
29046 assembler_name
= (char *) xmalloc (strlen (orig_name
)
29047 + strlen (attr_str
) + 2);
29049 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29051 /* Allow assembler name to be modified if already set. */
29052 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29053 SET_DECL_RTL (decl
, NULL
);
29055 return get_identifier (assembler_name
);
29058 /* This function returns true if FN1 and FN2 are versions of the same function,
29059 that is, the target strings of the function decls are different. This assumes
29060 that FN1 and FN2 have the same signature. */
29063 ix86_function_versions (tree fn1
, tree fn2
)
29066 const char *attr_str1
, *attr_str2
;
29067 char *target1
, *target2
;
29070 if (TREE_CODE (fn1
) != FUNCTION_DECL
29071 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29074 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29075 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29077 /* At least one function decl should have the target attribute specified. */
29078 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29081 /* If one function does not have a target attribute, these are versions. */
29082 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29085 attr_str1
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr1
)));
29086 attr_str2
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr2
)));
29088 target1
= sorted_attr_string (attr_str1
);
29089 target2
= sorted_attr_string (attr_str2
);
29091 /* The sorted target strings must be different for fn1 and fn2
29093 if (strcmp (target1
, target2
) == 0)
29104 /* This target supports function multiversioning. */
29107 ix86_supports_function_versions (void)
29113 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29115 /* For function version, add the target suffix to the assembler name. */
29116 if (TREE_CODE (decl
) == FUNCTION_DECL
29117 && DECL_FUNCTION_VERSIONED (decl
))
29118 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29119 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29120 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29126 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29127 is true, append the full path name of the source file. */
29130 make_name (tree decl
, const char *suffix
, bool make_unique
)
29132 char *global_var_name
;
29135 const char *unique_name
= NULL
;
29137 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29139 /* Get a unique name that can be used globally without any chances
29140 of collision at link time. */
29142 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29144 name_len
= strlen (name
) + strlen (suffix
) + 2;
29147 name_len
+= strlen (unique_name
) + 1;
29148 global_var_name
= XNEWVEC (char, name_len
);
29150 /* Use '.' to concatenate names as it is demangler friendly. */
29152 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
,
29153 unique_name
, suffix
);
29155 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29157 return global_var_name
;
29160 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29162 /* Make a dispatcher declaration for the multi-versioned function DECL.
29163 Calls to DECL function will be replaced with calls to the dispatcher
29164 by the front-end. Return the decl created. */
29167 make_dispatcher_decl (const tree decl
)
29170 char *func_name
, *resolver_name
;
29171 tree fn_type
, func_type
;
29172 bool is_uniq
= false;
29174 if (TREE_PUBLIC (decl
) == 0)
29177 func_name
= make_name (decl
, "ifunc", is_uniq
);
29178 resolver_name
= make_name (decl
, "resolver", is_uniq
);
29179 gcc_assert (resolver_name
);
29181 fn_type
= TREE_TYPE (decl
);
29182 func_type
= build_function_type (TREE_TYPE (fn_type
),
29183 TYPE_ARG_TYPES (fn_type
));
29185 func_decl
= build_fn_decl (func_name
, func_type
);
29186 TREE_USED (func_decl
) = 1;
29187 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29188 DECL_INITIAL (func_decl
) = error_mark_node
;
29189 DECL_ARTIFICIAL (func_decl
) = 1;
29190 /* Mark this func as external, the resolver will flip it again if
29191 it gets generated. */
29192 DECL_EXTERNAL (func_decl
) = 1;
29193 /* This will be of type IFUNCs have to be externally visible. */
29194 TREE_PUBLIC (func_decl
) = 1;
29201 /* Returns true if decl is multi-versioned and DECL is the default function,
29202 that is it is not tagged with target specific optimization. */
29205 is_function_default_version (const tree decl
)
29207 return (TREE_CODE (decl
) == FUNCTION_DECL
29208 && DECL_FUNCTION_VERSIONED (decl
)
29209 && lookup_attribute ("target", DECL_ATTRIBUTES (decl
)) == NULL_TREE
);
29212 /* Make a dispatcher declaration for the multi-versioned function DECL.
29213 Calls to DECL function will be replaced with calls to the dispatcher
29214 by the front-end. Returns the decl of the dispatcher function. */
29217 ix86_get_function_versions_dispatcher (void *decl
)
29219 tree fn
= (tree
) decl
;
29220 struct cgraph_node
*node
= NULL
;
29221 struct cgraph_node
*default_node
= NULL
;
29222 struct cgraph_function_version_info
*node_v
= NULL
;
29223 struct cgraph_function_version_info
*first_v
= NULL
;
29225 tree dispatch_decl
= NULL
;
29227 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29228 struct cgraph_function_version_info
*it_v
= NULL
;
29229 struct cgraph_node
*dispatcher_node
= NULL
;
29230 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29233 struct cgraph_function_version_info
*default_version_info
= NULL
;
29235 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29237 node
= cgraph_get_node (fn
);
29238 gcc_assert (node
!= NULL
);
29240 node_v
= get_cgraph_node_version (node
);
29241 gcc_assert (node_v
!= NULL
);
29243 if (node_v
->dispatcher_resolver
!= NULL
)
29244 return node_v
->dispatcher_resolver
;
29246 /* Find the default version and make it the first node. */
29248 /* Go to the beginnig of the chain. */
29249 while (first_v
->prev
!= NULL
)
29250 first_v
= first_v
->prev
;
29251 default_version_info
= first_v
;
29252 while (default_version_info
!= NULL
)
29254 if (is_function_default_version
29255 (default_version_info
->this_node
->symbol
.decl
))
29257 default_version_info
= default_version_info
->next
;
29260 /* If there is no default node, just return NULL. */
29261 if (default_version_info
== NULL
)
29264 /* Make default info the first node. */
29265 if (first_v
!= default_version_info
)
29267 default_version_info
->prev
->next
= default_version_info
->next
;
29268 if (default_version_info
->next
)
29269 default_version_info
->next
->prev
= default_version_info
->prev
;
29270 first_v
->prev
= default_version_info
;
29271 default_version_info
->next
= first_v
;
29272 default_version_info
->prev
= NULL
;
29275 default_node
= default_version_info
->this_node
;
29277 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29278 /* Right now, the dispatching is done via ifunc. */
29279 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29281 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29282 gcc_assert (dispatcher_node
!= NULL
);
29283 dispatcher_node
->dispatcher_function
= 1;
29284 dispatcher_version_info
29285 = insert_new_cgraph_node_version (dispatcher_node
);
29286 dispatcher_version_info
->next
= default_version_info
;
29287 dispatcher_node
->local
.finalized
= 1;
29289 /* Set the dispatcher for all the versions. */
29290 it_v
= default_version_info
;
29291 while (it_v
!= NULL
)
29293 it_v
->dispatcher_resolver
= dispatch_decl
;
29297 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29298 "multiversioning needs ifunc which is not supported "
29299 "in this configuration");
29301 return dispatch_decl
;
29304 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29308 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29311 tree attr_arg_name
;
29315 attr_name
= get_identifier (name
);
29316 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29317 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29318 attr
= tree_cons (attr_name
, attr_args
, chain
);
29322 /* Make the resolver function decl to dispatch the versions of
29323 a multi-versioned function, DEFAULT_DECL. Create an
29324 empty basic block in the resolver and store the pointer in
29325 EMPTY_BB. Return the decl of the resolver function. */
29328 make_resolver_func (const tree default_decl
,
29329 const tree dispatch_decl
,
29330 basic_block
*empty_bb
)
29332 char *resolver_name
;
29333 tree decl
, type
, decl_name
, t
;
29334 bool is_uniq
= false;
29336 /* IFUNC's have to be globally visible. So, if the default_decl is
29337 not, then the name of the IFUNC should be made unique. */
29338 if (TREE_PUBLIC (default_decl
) == 0)
29341 /* Append the filename to the resolver function if the versions are
29342 not externally visible. This is because the resolver function has
29343 to be externally visible for the loader to find it. So, appending
29344 the filename will prevent conflicts with a resolver function from
29345 another module which is based on the same version name. */
29346 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29348 /* The resolver function should return a (void *). */
29349 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29351 decl
= build_fn_decl (resolver_name
, type
);
29352 decl_name
= get_identifier (resolver_name
);
29353 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29355 DECL_NAME (decl
) = decl_name
;
29356 TREE_USED (decl
) = 1;
29357 DECL_ARTIFICIAL (decl
) = 1;
29358 DECL_IGNORED_P (decl
) = 0;
29359 /* IFUNC resolvers have to be externally visible. */
29360 TREE_PUBLIC (decl
) = 1;
29361 DECL_UNINLINABLE (decl
) = 0;
29363 /* Resolver is not external, body is generated. */
29364 DECL_EXTERNAL (decl
) = 0;
29365 DECL_EXTERNAL (dispatch_decl
) = 0;
29367 DECL_CONTEXT (decl
) = NULL_TREE
;
29368 DECL_INITIAL (decl
) = make_node (BLOCK
);
29369 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29371 if (DECL_COMDAT_GROUP (default_decl
)
29372 || TREE_PUBLIC (default_decl
))
29374 /* In this case, each translation unit with a call to this
29375 versioned function will put out a resolver. Ensure it
29376 is comdat to keep just one copy. */
29377 DECL_COMDAT (decl
) = 1;
29378 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29380 /* Build result decl and add to function_decl. */
29381 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29382 DECL_ARTIFICIAL (t
) = 1;
29383 DECL_IGNORED_P (t
) = 1;
29384 DECL_RESULT (decl
) = t
;
29386 gimplify_function_tree (decl
);
29387 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29388 *empty_bb
= init_lowered_empty_function (decl
, false);
29390 cgraph_add_new_function (decl
, true);
29391 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29395 gcc_assert (dispatch_decl
!= NULL
);
29396 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29397 DECL_ATTRIBUTES (dispatch_decl
)
29398 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29400 /* Create the alias for dispatch to resolver here. */
29401 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29402 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29406 /* Generate the dispatching code body to dispatch multi-versioned function
29407 DECL. The target hook is called to process the "target" attributes and
29408 provide the code to dispatch the right function at run-time. NODE points
29409 to the dispatcher decl whose body will be created. */
29412 ix86_generate_version_dispatcher_body (void *node_p
)
29414 tree resolver_decl
;
29415 basic_block empty_bb
;
29416 vec
<tree
> fn_ver_vec
= vNULL
;
29417 tree default_ver_decl
;
29418 struct cgraph_node
*versn
;
29419 struct cgraph_node
*node
;
29421 struct cgraph_function_version_info
*node_version_info
= NULL
;
29422 struct cgraph_function_version_info
*versn_info
= NULL
;
29424 node
= (cgraph_node
*)node_p
;
29426 node_version_info
= get_cgraph_node_version (node
);
29427 gcc_assert (node
->dispatcher_function
29428 && node_version_info
!= NULL
);
29430 if (node_version_info
->dispatcher_resolver
)
29431 return node_version_info
->dispatcher_resolver
;
29433 /* The first version in the chain corresponds to the default version. */
29434 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29436 /* node is going to be an alias, so remove the finalized bit. */
29437 node
->local
.finalized
= false;
29439 resolver_decl
= make_resolver_func (default_ver_decl
,
29440 node
->symbol
.decl
, &empty_bb
);
29442 node_version_info
->dispatcher_resolver
= resolver_decl
;
29444 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29446 fn_ver_vec
.create (2);
29448 for (versn_info
= node_version_info
->next
; versn_info
;
29449 versn_info
= versn_info
->next
)
29451 versn
= versn_info
->this_node
;
29452 /* Check for virtual functions here again, as by this time it should
29453 have been determined if this function needs a vtable index or
29454 not. This happens for methods in derived classes that override
29455 virtual methods in base classes but are not explicitly marked as
29457 if (DECL_VINDEX (versn
->symbol
.decl
))
29458 error_at (DECL_SOURCE_LOCATION (versn
->symbol
.decl
),
29459 "Virtual function multiversioning not supported");
29460 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29463 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29465 rebuild_cgraph_edges ();
29467 return resolver_decl
;
29469 /* This builds the processor_model struct type defined in
29470 libgcc/config/i386/cpuinfo.c */
29473 build_processor_model_struct (void)
29475 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29477 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
29479 tree type
= make_node (RECORD_TYPE
);
29481 /* The first 3 fields are unsigned int. */
29482 for (i
= 0; i
< 3; ++i
)
29484 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29485 get_identifier (field_name
[i
]), unsigned_type_node
);
29486 if (field_chain
!= NULL_TREE
)
29487 DECL_CHAIN (field
) = field_chain
;
29488 field_chain
= field
;
29491 /* The last field is an array of unsigned integers of size one. */
29492 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29493 get_identifier (field_name
[3]),
29494 build_array_type (unsigned_type_node
,
29495 build_index_type (size_one_node
)));
29496 if (field_chain
!= NULL_TREE
)
29497 DECL_CHAIN (field
) = field_chain
;
29498 field_chain
= field
;
29500 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
29504 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29507 make_var_decl (tree type
, const char *name
)
29511 new_decl
= build_decl (UNKNOWN_LOCATION
,
29513 get_identifier(name
),
29516 DECL_EXTERNAL (new_decl
) = 1;
29517 TREE_STATIC (new_decl
) = 1;
29518 TREE_PUBLIC (new_decl
) = 1;
29519 DECL_INITIAL (new_decl
) = 0;
29520 DECL_ARTIFICIAL (new_decl
) = 0;
29521 DECL_PRESERVE_P (new_decl
) = 1;
29523 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
29524 assemble_variable (new_decl
, 0, 0, 0);
29529 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29530 into an integer defined in libgcc/config/i386/cpuinfo.c */
29533 fold_builtin_cpu (tree fndecl
, tree
*args
)
29536 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29537 DECL_FUNCTION_CODE (fndecl
);
29538 tree param_string_cst
= NULL
;
29540 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29541 enum processor_features
29557 /* These are the values for vendor types and cpu types and subtypes
29558 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29559 the corresponding start value. */
29560 enum processor_model
29570 M_CPU_SUBTYPE_START
,
29571 M_INTEL_COREI7_NEHALEM
,
29572 M_INTEL_COREI7_WESTMERE
,
29573 M_INTEL_COREI7_SANDYBRIDGE
,
29574 M_AMDFAM10H_BARCELONA
,
29575 M_AMDFAM10H_SHANGHAI
,
29576 M_AMDFAM10H_ISTANBUL
,
29577 M_AMDFAM15H_BDVER1
,
29578 M_AMDFAM15H_BDVER2
,
29582 static struct _arch_names_table
29584 const char *const name
;
29585 const enum processor_model model
;
29587 const arch_names_table
[] =
29590 {"intel", M_INTEL
},
29591 {"atom", M_INTEL_ATOM
},
29592 {"core2", M_INTEL_CORE2
},
29593 {"corei7", M_INTEL_COREI7
},
29594 {"nehalem", M_INTEL_COREI7_NEHALEM
},
29595 {"westmere", M_INTEL_COREI7_WESTMERE
},
29596 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
29597 {"amdfam10h", M_AMDFAM10H
},
29598 {"barcelona", M_AMDFAM10H_BARCELONA
},
29599 {"shanghai", M_AMDFAM10H_SHANGHAI
},
29600 {"istanbul", M_AMDFAM10H_ISTANBUL
},
29601 {"amdfam15h", M_AMDFAM15H
},
29602 {"bdver1", M_AMDFAM15H_BDVER1
},
29603 {"bdver2", M_AMDFAM15H_BDVER2
},
29604 {"bdver3", M_AMDFAM15H_BDVER3
},
29607 static struct _isa_names_table
29609 const char *const name
;
29610 const enum processor_features feature
;
29612 const isa_names_table
[] =
29616 {"popcnt", F_POPCNT
},
29620 {"ssse3", F_SSSE3
},
29621 {"sse4.1", F_SSE4_1
},
29622 {"sse4.2", F_SSE4_2
},
29627 tree __processor_model_type
= build_processor_model_struct ();
29628 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
29631 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
29633 param_string_cst
= *args
;
29634 while (param_string_cst
29635 && TREE_CODE (param_string_cst
) != STRING_CST
)
29637 /* *args must be a expr that can contain other EXPRS leading to a
29639 if (!EXPR_P (param_string_cst
))
29641 error ("Parameter to builtin must be a string constant or literal");
29642 return integer_zero_node
;
29644 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
29647 gcc_assert (param_string_cst
);
29649 if (fn_code
== IX86_BUILTIN_CPU_IS
)
29655 unsigned int field_val
= 0;
29656 unsigned int NUM_ARCH_NAMES
29657 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
29659 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
29660 if (strcmp (arch_names_table
[i
].name
,
29661 TREE_STRING_POINTER (param_string_cst
)) == 0)
29664 if (i
== NUM_ARCH_NAMES
)
29666 error ("Parameter to builtin not valid: %s",
29667 TREE_STRING_POINTER (param_string_cst
));
29668 return integer_zero_node
;
29671 field
= TYPE_FIELDS (__processor_model_type
);
29672 field_val
= arch_names_table
[i
].model
;
29674 /* CPU types are stored in the next field. */
29675 if (field_val
> M_CPU_TYPE_START
29676 && field_val
< M_CPU_SUBTYPE_START
)
29678 field
= DECL_CHAIN (field
);
29679 field_val
-= M_CPU_TYPE_START
;
29682 /* CPU subtypes are stored in the next field. */
29683 if (field_val
> M_CPU_SUBTYPE_START
)
29685 field
= DECL_CHAIN ( DECL_CHAIN (field
));
29686 field_val
-= M_CPU_SUBTYPE_START
;
29689 /* Get the appropriate field in __cpu_model. */
29690 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29693 /* Check the value. */
29694 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
29695 build_int_cstu (unsigned_type_node
, field_val
));
29696 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29698 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29705 unsigned int field_val
= 0;
29706 unsigned int NUM_ISA_NAMES
29707 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
29709 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
29710 if (strcmp (isa_names_table
[i
].name
,
29711 TREE_STRING_POINTER (param_string_cst
)) == 0)
29714 if (i
== NUM_ISA_NAMES
)
29716 error ("Parameter to builtin not valid: %s",
29717 TREE_STRING_POINTER (param_string_cst
));
29718 return integer_zero_node
;
29721 field
= TYPE_FIELDS (__processor_model_type
);
29722 /* Get the last field, which is __cpu_features. */
29723 while (DECL_CHAIN (field
))
29724 field
= DECL_CHAIN (field
);
29726 /* Get the appropriate field: __cpu_model.__cpu_features */
29727 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29730 /* Access the 0th element of __cpu_features array. */
29731 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
29732 integer_zero_node
, NULL_TREE
, NULL_TREE
);
29734 field_val
= (1 << isa_names_table
[i
].feature
);
29735 /* Return __cpu_model.__cpu_features[0] & field_val */
29736 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
29737 build_int_cstu (unsigned_type_node
, field_val
));
29738 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29740 gcc_unreachable ();
29744 ix86_fold_builtin (tree fndecl
, int n_args
,
29745 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
29747 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29749 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29750 DECL_FUNCTION_CODE (fndecl
);
29751 if (fn_code
== IX86_BUILTIN_CPU_IS
29752 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29754 gcc_assert (n_args
== 1);
29755 return fold_builtin_cpu (fndecl
, args
);
29759 #ifdef SUBTARGET_FOLD_BUILTIN
29760 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
29766 /* Make builtins to detect cpu type and features supported. NAME is
29767 the builtin name, CODE is the builtin code, and FTYPE is the function
29768 type of the builtin. */
29771 make_cpu_type_builtin (const char* name
, int code
,
29772 enum ix86_builtin_func_type ftype
, bool is_const
)
29777 type
= ix86_get_builtin_func_type (ftype
);
29778 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29780 gcc_assert (decl
!= NULL_TREE
);
29781 ix86_builtins
[(int) code
] = decl
;
29782 TREE_READONLY (decl
) = is_const
;
29785 /* Make builtins to get CPU type and features supported. The created
29788 __builtin_cpu_init (), to detect cpu type and features,
29789 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
29790 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
29794 ix86_init_platform_type_builtins (void)
29796 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
29797 INT_FTYPE_VOID
, false);
29798 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
29799 INT_FTYPE_PCCHAR
, true);
29800 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
29801 INT_FTYPE_PCCHAR
, true);
29804 /* Internal method for ix86_init_builtins. */
29807 ix86_init_builtins_va_builtins_abi (void)
29809 tree ms_va_ref
, sysv_va_ref
;
29810 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
29811 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
29812 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
29813 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
29817 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
29818 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
29819 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
29821 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
29824 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29825 fnvoid_va_start_ms
=
29826 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29827 fnvoid_va_end_sysv
=
29828 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
29829 fnvoid_va_start_sysv
=
29830 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
29832 fnvoid_va_copy_ms
=
29833 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
29835 fnvoid_va_copy_sysv
=
29836 build_function_type_list (void_type_node
, sysv_va_ref
,
29837 sysv_va_ref
, NULL_TREE
);
29839 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
29840 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29841 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
29842 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29843 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
29844 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29845 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
29846 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29847 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
29848 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29849 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
29850 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29854 ix86_init_builtin_types (void)
29856 tree float128_type_node
, float80_type_node
;
29858 /* The __float80 type. */
29859 float80_type_node
= long_double_type_node
;
29860 if (TYPE_MODE (float80_type_node
) != XFmode
)
29862 /* The __float80 type. */
29863 float80_type_node
= make_node (REAL_TYPE
);
29865 TYPE_PRECISION (float80_type_node
) = 80;
29866 layout_type (float80_type_node
);
29868 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
29870 /* The __float128 type. */
29871 float128_type_node
= make_node (REAL_TYPE
);
29872 TYPE_PRECISION (float128_type_node
) = 128;
29873 layout_type (float128_type_node
);
29874 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
29876 /* This macro is built by i386-builtin-types.awk. */
29877 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
29881 ix86_init_builtins (void)
29885 ix86_init_builtin_types ();
29887 /* Builtins to get CPU type and features. */
29888 ix86_init_platform_type_builtins ();
29890 /* TFmode support builtins. */
29891 def_builtin_const (0, "__builtin_infq",
29892 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
29893 def_builtin_const (0, "__builtin_huge_valq",
29894 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
29896 /* We will expand them to normal call if SSE isn't available since
29897 they are used by libgcc. */
29898 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
29899 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
29900 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
29901 TREE_READONLY (t
) = 1;
29902 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
29904 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
29905 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
29906 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
29907 TREE_READONLY (t
) = 1;
29908 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
29910 ix86_init_tm_builtins ();
29911 ix86_init_mmx_sse_builtins ();
29914 ix86_init_builtins_va_builtins_abi ();
29916 #ifdef SUBTARGET_INIT_BUILTINS
29917 SUBTARGET_INIT_BUILTINS
;
29921 /* Return the ix86 builtin for CODE. */
29924 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
29926 if (code
>= IX86_BUILTIN_MAX
)
29927 return error_mark_node
;
29929 return ix86_builtins
[code
];
29932 /* Errors in the source file can cause expand_expr to return const0_rtx
29933 where we expect a vector. To avoid crashing, use one of the vector
29934 clear instructions. */
29936 safe_vector_operand (rtx x
, enum machine_mode mode
)
29938 if (x
== const0_rtx
)
29939 x
= CONST0_RTX (mode
);
29943 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
29946 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
29949 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29950 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29951 rtx op0
= expand_normal (arg0
);
29952 rtx op1
= expand_normal (arg1
);
29953 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29954 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
29955 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
29957 if (VECTOR_MODE_P (mode0
))
29958 op0
= safe_vector_operand (op0
, mode0
);
29959 if (VECTOR_MODE_P (mode1
))
29960 op1
= safe_vector_operand (op1
, mode1
);
29962 if (optimize
|| !target
29963 || GET_MODE (target
) != tmode
29964 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
29965 target
= gen_reg_rtx (tmode
);
29967 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
29969 rtx x
= gen_reg_rtx (V4SImode
);
29970 emit_insn (gen_sse2_loadd (x
, op1
));
29971 op1
= gen_lowpart (TImode
, x
);
29974 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29975 op0
= copy_to_mode_reg (mode0
, op0
);
29976 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
29977 op1
= copy_to_mode_reg (mode1
, op1
);
29979 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
29988 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
29991 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
29992 enum ix86_builtin_func_type m_type
,
29993 enum rtx_code sub_code
)
29998 bool comparison_p
= false;
30000 bool last_arg_constant
= false;
30001 int num_memory
= 0;
30004 enum machine_mode mode
;
30007 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30011 case MULTI_ARG_4_DF2_DI_I
:
30012 case MULTI_ARG_4_DF2_DI_I1
:
30013 case MULTI_ARG_4_SF2_SI_I
:
30014 case MULTI_ARG_4_SF2_SI_I1
:
30016 last_arg_constant
= true;
30019 case MULTI_ARG_3_SF
:
30020 case MULTI_ARG_3_DF
:
30021 case MULTI_ARG_3_SF2
:
30022 case MULTI_ARG_3_DF2
:
30023 case MULTI_ARG_3_DI
:
30024 case MULTI_ARG_3_SI
:
30025 case MULTI_ARG_3_SI_DI
:
30026 case MULTI_ARG_3_HI
:
30027 case MULTI_ARG_3_HI_SI
:
30028 case MULTI_ARG_3_QI
:
30029 case MULTI_ARG_3_DI2
:
30030 case MULTI_ARG_3_SI2
:
30031 case MULTI_ARG_3_HI2
:
30032 case MULTI_ARG_3_QI2
:
30036 case MULTI_ARG_2_SF
:
30037 case MULTI_ARG_2_DF
:
30038 case MULTI_ARG_2_DI
:
30039 case MULTI_ARG_2_SI
:
30040 case MULTI_ARG_2_HI
:
30041 case MULTI_ARG_2_QI
:
30045 case MULTI_ARG_2_DI_IMM
:
30046 case MULTI_ARG_2_SI_IMM
:
30047 case MULTI_ARG_2_HI_IMM
:
30048 case MULTI_ARG_2_QI_IMM
:
30050 last_arg_constant
= true;
30053 case MULTI_ARG_1_SF
:
30054 case MULTI_ARG_1_DF
:
30055 case MULTI_ARG_1_SF2
:
30056 case MULTI_ARG_1_DF2
:
30057 case MULTI_ARG_1_DI
:
30058 case MULTI_ARG_1_SI
:
30059 case MULTI_ARG_1_HI
:
30060 case MULTI_ARG_1_QI
:
30061 case MULTI_ARG_1_SI_DI
:
30062 case MULTI_ARG_1_HI_DI
:
30063 case MULTI_ARG_1_HI_SI
:
30064 case MULTI_ARG_1_QI_DI
:
30065 case MULTI_ARG_1_QI_SI
:
30066 case MULTI_ARG_1_QI_HI
:
30070 case MULTI_ARG_2_DI_CMP
:
30071 case MULTI_ARG_2_SI_CMP
:
30072 case MULTI_ARG_2_HI_CMP
:
30073 case MULTI_ARG_2_QI_CMP
:
30075 comparison_p
= true;
30078 case MULTI_ARG_2_SF_TF
:
30079 case MULTI_ARG_2_DF_TF
:
30080 case MULTI_ARG_2_DI_TF
:
30081 case MULTI_ARG_2_SI_TF
:
30082 case MULTI_ARG_2_HI_TF
:
30083 case MULTI_ARG_2_QI_TF
:
30089 gcc_unreachable ();
30092 if (optimize
|| !target
30093 || GET_MODE (target
) != tmode
30094 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30095 target
= gen_reg_rtx (tmode
);
30097 gcc_assert (nargs
<= 4);
30099 for (i
= 0; i
< nargs
; i
++)
30101 tree arg
= CALL_EXPR_ARG (exp
, i
);
30102 rtx op
= expand_normal (arg
);
30103 int adjust
= (comparison_p
) ? 1 : 0;
30104 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30106 if (last_arg_constant
&& i
== nargs
- 1)
30108 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30110 enum insn_code new_icode
= icode
;
30113 case CODE_FOR_xop_vpermil2v2df3
:
30114 case CODE_FOR_xop_vpermil2v4sf3
:
30115 case CODE_FOR_xop_vpermil2v4df3
:
30116 case CODE_FOR_xop_vpermil2v8sf3
:
30117 error ("the last argument must be a 2-bit immediate");
30118 return gen_reg_rtx (tmode
);
30119 case CODE_FOR_xop_rotlv2di3
:
30120 new_icode
= CODE_FOR_rotlv2di3
;
30122 case CODE_FOR_xop_rotlv4si3
:
30123 new_icode
= CODE_FOR_rotlv4si3
;
30125 case CODE_FOR_xop_rotlv8hi3
:
30126 new_icode
= CODE_FOR_rotlv8hi3
;
30128 case CODE_FOR_xop_rotlv16qi3
:
30129 new_icode
= CODE_FOR_rotlv16qi3
;
30131 if (CONST_INT_P (op
))
30133 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30134 op
= GEN_INT (INTVAL (op
) & mask
);
30135 gcc_checking_assert
30136 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30140 gcc_checking_assert
30142 && insn_data
[new_icode
].operand
[0].mode
== tmode
30143 && insn_data
[new_icode
].operand
[1].mode
== tmode
30144 && insn_data
[new_icode
].operand
[2].mode
== mode
30145 && insn_data
[new_icode
].operand
[0].predicate
30146 == insn_data
[icode
].operand
[0].predicate
30147 && insn_data
[new_icode
].operand
[1].predicate
30148 == insn_data
[icode
].operand
[1].predicate
);
30154 gcc_unreachable ();
30161 if (VECTOR_MODE_P (mode
))
30162 op
= safe_vector_operand (op
, mode
);
30164 /* If we aren't optimizing, only allow one memory operand to be
30166 if (memory_operand (op
, mode
))
30169 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30172 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30174 op
= force_reg (mode
, op
);
30178 args
[i
].mode
= mode
;
30184 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30189 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30190 GEN_INT ((int)sub_code
));
30191 else if (! comparison_p
)
30192 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30195 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30199 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30204 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30208 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30212 gcc_unreachable ();
30222 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30223 insns with vec_merge. */
30226 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30230 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30231 rtx op1
, op0
= expand_normal (arg0
);
30232 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30233 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30235 if (optimize
|| !target
30236 || GET_MODE (target
) != tmode
30237 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30238 target
= gen_reg_rtx (tmode
);
30240 if (VECTOR_MODE_P (mode0
))
30241 op0
= safe_vector_operand (op0
, mode0
);
30243 if ((optimize
&& !register_operand (op0
, mode0
))
30244 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30245 op0
= copy_to_mode_reg (mode0
, op0
);
30248 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30249 op1
= copy_to_mode_reg (mode0
, op1
);
30251 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30258 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30261 ix86_expand_sse_compare (const struct builtin_description
*d
,
30262 tree exp
, rtx target
, bool swap
)
30265 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30266 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30267 rtx op0
= expand_normal (arg0
);
30268 rtx op1
= expand_normal (arg1
);
30270 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30271 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30272 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30273 enum rtx_code comparison
= d
->comparison
;
30275 if (VECTOR_MODE_P (mode0
))
30276 op0
= safe_vector_operand (op0
, mode0
);
30277 if (VECTOR_MODE_P (mode1
))
30278 op1
= safe_vector_operand (op1
, mode1
);
30280 /* Swap operands if we have a comparison that isn't available in
30284 rtx tmp
= gen_reg_rtx (mode1
);
30285 emit_move_insn (tmp
, op1
);
30290 if (optimize
|| !target
30291 || GET_MODE (target
) != tmode
30292 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30293 target
= gen_reg_rtx (tmode
);
30295 if ((optimize
&& !register_operand (op0
, mode0
))
30296 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30297 op0
= copy_to_mode_reg (mode0
, op0
);
30298 if ((optimize
&& !register_operand (op1
, mode1
))
30299 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30300 op1
= copy_to_mode_reg (mode1
, op1
);
30302 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30303 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30310 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30313 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30317 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30318 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30319 rtx op0
= expand_normal (arg0
);
30320 rtx op1
= expand_normal (arg1
);
30321 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30322 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30323 enum rtx_code comparison
= d
->comparison
;
30325 if (VECTOR_MODE_P (mode0
))
30326 op0
= safe_vector_operand (op0
, mode0
);
30327 if (VECTOR_MODE_P (mode1
))
30328 op1
= safe_vector_operand (op1
, mode1
);
30330 /* Swap operands if we have a comparison that isn't available in
30332 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30339 target
= gen_reg_rtx (SImode
);
30340 emit_move_insn (target
, const0_rtx
);
30341 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30343 if ((optimize
&& !register_operand (op0
, mode0
))
30344 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30345 op0
= copy_to_mode_reg (mode0
, op0
);
30346 if ((optimize
&& !register_operand (op1
, mode1
))
30347 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30348 op1
= copy_to_mode_reg (mode1
, op1
);
30350 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30354 emit_insn (gen_rtx_SET (VOIDmode
,
30355 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30356 gen_rtx_fmt_ee (comparison
, QImode
,
30360 return SUBREG_REG (target
);
30363 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30366 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30370 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30371 rtx op1
, op0
= expand_normal (arg0
);
30372 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30373 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30375 if (optimize
|| target
== 0
30376 || GET_MODE (target
) != tmode
30377 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30378 target
= gen_reg_rtx (tmode
);
30380 if (VECTOR_MODE_P (mode0
))
30381 op0
= safe_vector_operand (op0
, mode0
);
30383 if ((optimize
&& !register_operand (op0
, mode0
))
30384 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30385 op0
= copy_to_mode_reg (mode0
, op0
);
30387 op1
= GEN_INT (d
->comparison
);
30389 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30397 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30398 tree exp
, rtx target
)
30401 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30402 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30403 rtx op0
= expand_normal (arg0
);
30404 rtx op1
= expand_normal (arg1
);
30406 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30407 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30408 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30410 if (optimize
|| target
== 0
30411 || GET_MODE (target
) != tmode
30412 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30413 target
= gen_reg_rtx (tmode
);
30415 op0
= safe_vector_operand (op0
, mode0
);
30416 op1
= safe_vector_operand (op1
, mode1
);
30418 if ((optimize
&& !register_operand (op0
, mode0
))
30419 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30420 op0
= copy_to_mode_reg (mode0
, op0
);
30421 if ((optimize
&& !register_operand (op1
, mode1
))
30422 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30423 op1
= copy_to_mode_reg (mode1
, op1
);
30425 op2
= GEN_INT (d
->comparison
);
30427 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30434 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30437 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30441 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30442 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30443 rtx op0
= expand_normal (arg0
);
30444 rtx op1
= expand_normal (arg1
);
30445 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30446 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30447 enum rtx_code comparison
= d
->comparison
;
30449 if (VECTOR_MODE_P (mode0
))
30450 op0
= safe_vector_operand (op0
, mode0
);
30451 if (VECTOR_MODE_P (mode1
))
30452 op1
= safe_vector_operand (op1
, mode1
);
30454 target
= gen_reg_rtx (SImode
);
30455 emit_move_insn (target
, const0_rtx
);
30456 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30458 if ((optimize
&& !register_operand (op0
, mode0
))
30459 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30460 op0
= copy_to_mode_reg (mode0
, op0
);
30461 if ((optimize
&& !register_operand (op1
, mode1
))
30462 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30463 op1
= copy_to_mode_reg (mode1
, op1
);
30465 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30469 emit_insn (gen_rtx_SET (VOIDmode
,
30470 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30471 gen_rtx_fmt_ee (comparison
, QImode
,
30475 return SUBREG_REG (target
);
30478 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30481 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
30482 tree exp
, rtx target
)
30485 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30486 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30487 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30488 tree arg3
= CALL_EXPR_ARG (exp
, 3);
30489 tree arg4
= CALL_EXPR_ARG (exp
, 4);
30490 rtx scratch0
, scratch1
;
30491 rtx op0
= expand_normal (arg0
);
30492 rtx op1
= expand_normal (arg1
);
30493 rtx op2
= expand_normal (arg2
);
30494 rtx op3
= expand_normal (arg3
);
30495 rtx op4
= expand_normal (arg4
);
30496 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
30498 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30499 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30500 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30501 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
30502 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
30503 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
30504 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
30506 if (VECTOR_MODE_P (modev2
))
30507 op0
= safe_vector_operand (op0
, modev2
);
30508 if (VECTOR_MODE_P (modev4
))
30509 op2
= safe_vector_operand (op2
, modev4
);
30511 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30512 op0
= copy_to_mode_reg (modev2
, op0
);
30513 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
30514 op1
= copy_to_mode_reg (modei3
, op1
);
30515 if ((optimize
&& !register_operand (op2
, modev4
))
30516 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
30517 op2
= copy_to_mode_reg (modev4
, op2
);
30518 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
30519 op3
= copy_to_mode_reg (modei5
, op3
);
30521 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
30523 error ("the fifth argument must be an 8-bit immediate");
30527 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
30529 if (optimize
|| !target
30530 || GET_MODE (target
) != tmode0
30531 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30532 target
= gen_reg_rtx (tmode0
);
30534 scratch1
= gen_reg_rtx (tmode1
);
30536 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30538 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30540 if (optimize
|| !target
30541 || GET_MODE (target
) != tmode1
30542 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30543 target
= gen_reg_rtx (tmode1
);
30545 scratch0
= gen_reg_rtx (tmode0
);
30547 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
30551 gcc_assert (d
->flag
);
30553 scratch0
= gen_reg_rtx (tmode0
);
30554 scratch1
= gen_reg_rtx (tmode1
);
30556 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30566 target
= gen_reg_rtx (SImode
);
30567 emit_move_insn (target
, const0_rtx
);
30568 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30571 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30572 gen_rtx_fmt_ee (EQ
, QImode
,
30573 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30576 return SUBREG_REG (target
);
30583 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30586 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
30587 tree exp
, rtx target
)
30590 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30591 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30592 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30593 rtx scratch0
, scratch1
;
30594 rtx op0
= expand_normal (arg0
);
30595 rtx op1
= expand_normal (arg1
);
30596 rtx op2
= expand_normal (arg2
);
30597 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
30599 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30600 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30601 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30602 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
30603 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
30605 if (VECTOR_MODE_P (modev2
))
30606 op0
= safe_vector_operand (op0
, modev2
);
30607 if (VECTOR_MODE_P (modev3
))
30608 op1
= safe_vector_operand (op1
, modev3
);
30610 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30611 op0
= copy_to_mode_reg (modev2
, op0
);
30612 if ((optimize
&& !register_operand (op1
, modev3
))
30613 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
30614 op1
= copy_to_mode_reg (modev3
, op1
);
30616 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
30618 error ("the third argument must be an 8-bit immediate");
30622 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
30624 if (optimize
|| !target
30625 || GET_MODE (target
) != tmode0
30626 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30627 target
= gen_reg_rtx (tmode0
);
30629 scratch1
= gen_reg_rtx (tmode1
);
30631 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
30633 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30635 if (optimize
|| !target
30636 || GET_MODE (target
) != tmode1
30637 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30638 target
= gen_reg_rtx (tmode1
);
30640 scratch0
= gen_reg_rtx (tmode0
);
30642 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
30646 gcc_assert (d
->flag
);
30648 scratch0
= gen_reg_rtx (tmode0
);
30649 scratch1
= gen_reg_rtx (tmode1
);
30651 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
30661 target
= gen_reg_rtx (SImode
);
30662 emit_move_insn (target
, const0_rtx
);
30663 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30666 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30667 gen_rtx_fmt_ee (EQ
, QImode
,
30668 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30671 return SUBREG_REG (target
);
30677 /* Subroutine of ix86_expand_builtin to take care of insns with
30678 variable number of operands. */
30681 ix86_expand_args_builtin (const struct builtin_description
*d
,
30682 tree exp
, rtx target
)
30684 rtx pat
, real_target
;
30685 unsigned int i
, nargs
;
30686 unsigned int nargs_constant
= 0;
30687 int num_memory
= 0;
30691 enum machine_mode mode
;
30693 bool last_arg_count
= false;
30694 enum insn_code icode
= d
->icode
;
30695 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30696 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30697 enum machine_mode rmode
= VOIDmode
;
30699 enum rtx_code comparison
= d
->comparison
;
30701 switch ((enum ix86_builtin_func_type
) d
->flag
)
30703 case V2DF_FTYPE_V2DF_ROUND
:
30704 case V4DF_FTYPE_V4DF_ROUND
:
30705 case V4SF_FTYPE_V4SF_ROUND
:
30706 case V8SF_FTYPE_V8SF_ROUND
:
30707 case V4SI_FTYPE_V4SF_ROUND
:
30708 case V8SI_FTYPE_V8SF_ROUND
:
30709 return ix86_expand_sse_round (d
, exp
, target
);
30710 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
30711 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
30712 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
30713 case INT_FTYPE_V8SF_V8SF_PTEST
:
30714 case INT_FTYPE_V4DI_V4DI_PTEST
:
30715 case INT_FTYPE_V4DF_V4DF_PTEST
:
30716 case INT_FTYPE_V4SF_V4SF_PTEST
:
30717 case INT_FTYPE_V2DI_V2DI_PTEST
:
30718 case INT_FTYPE_V2DF_V2DF_PTEST
:
30719 return ix86_expand_sse_ptest (d
, exp
, target
);
30720 case FLOAT128_FTYPE_FLOAT128
:
30721 case FLOAT_FTYPE_FLOAT
:
30722 case INT_FTYPE_INT
:
30723 case UINT64_FTYPE_INT
:
30724 case UINT16_FTYPE_UINT16
:
30725 case INT64_FTYPE_INT64
:
30726 case INT64_FTYPE_V4SF
:
30727 case INT64_FTYPE_V2DF
:
30728 case INT_FTYPE_V16QI
:
30729 case INT_FTYPE_V8QI
:
30730 case INT_FTYPE_V8SF
:
30731 case INT_FTYPE_V4DF
:
30732 case INT_FTYPE_V4SF
:
30733 case INT_FTYPE_V2DF
:
30734 case INT_FTYPE_V32QI
:
30735 case V16QI_FTYPE_V16QI
:
30736 case V8SI_FTYPE_V8SF
:
30737 case V8SI_FTYPE_V4SI
:
30738 case V8HI_FTYPE_V8HI
:
30739 case V8HI_FTYPE_V16QI
:
30740 case V8QI_FTYPE_V8QI
:
30741 case V8SF_FTYPE_V8SF
:
30742 case V8SF_FTYPE_V8SI
:
30743 case V8SF_FTYPE_V4SF
:
30744 case V8SF_FTYPE_V8HI
:
30745 case V4SI_FTYPE_V4SI
:
30746 case V4SI_FTYPE_V16QI
:
30747 case V4SI_FTYPE_V4SF
:
30748 case V4SI_FTYPE_V8SI
:
30749 case V4SI_FTYPE_V8HI
:
30750 case V4SI_FTYPE_V4DF
:
30751 case V4SI_FTYPE_V2DF
:
30752 case V4HI_FTYPE_V4HI
:
30753 case V4DF_FTYPE_V4DF
:
30754 case V4DF_FTYPE_V4SI
:
30755 case V4DF_FTYPE_V4SF
:
30756 case V4DF_FTYPE_V2DF
:
30757 case V4SF_FTYPE_V4SF
:
30758 case V4SF_FTYPE_V4SI
:
30759 case V4SF_FTYPE_V8SF
:
30760 case V4SF_FTYPE_V4DF
:
30761 case V4SF_FTYPE_V8HI
:
30762 case V4SF_FTYPE_V2DF
:
30763 case V2DI_FTYPE_V2DI
:
30764 case V2DI_FTYPE_V16QI
:
30765 case V2DI_FTYPE_V8HI
:
30766 case V2DI_FTYPE_V4SI
:
30767 case V2DF_FTYPE_V2DF
:
30768 case V2DF_FTYPE_V4SI
:
30769 case V2DF_FTYPE_V4DF
:
30770 case V2DF_FTYPE_V4SF
:
30771 case V2DF_FTYPE_V2SI
:
30772 case V2SI_FTYPE_V2SI
:
30773 case V2SI_FTYPE_V4SF
:
30774 case V2SI_FTYPE_V2SF
:
30775 case V2SI_FTYPE_V2DF
:
30776 case V2SF_FTYPE_V2SF
:
30777 case V2SF_FTYPE_V2SI
:
30778 case V32QI_FTYPE_V32QI
:
30779 case V32QI_FTYPE_V16QI
:
30780 case V16HI_FTYPE_V16HI
:
30781 case V16HI_FTYPE_V8HI
:
30782 case V8SI_FTYPE_V8SI
:
30783 case V16HI_FTYPE_V16QI
:
30784 case V8SI_FTYPE_V16QI
:
30785 case V4DI_FTYPE_V16QI
:
30786 case V8SI_FTYPE_V8HI
:
30787 case V4DI_FTYPE_V8HI
:
30788 case V4DI_FTYPE_V4SI
:
30789 case V4DI_FTYPE_V2DI
:
30792 case V4SF_FTYPE_V4SF_VEC_MERGE
:
30793 case V2DF_FTYPE_V2DF_VEC_MERGE
:
30794 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
30795 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
30796 case V16QI_FTYPE_V16QI_V16QI
:
30797 case V16QI_FTYPE_V8HI_V8HI
:
30798 case V8QI_FTYPE_V8QI_V8QI
:
30799 case V8QI_FTYPE_V4HI_V4HI
:
30800 case V8HI_FTYPE_V8HI_V8HI
:
30801 case V8HI_FTYPE_V16QI_V16QI
:
30802 case V8HI_FTYPE_V4SI_V4SI
:
30803 case V8SF_FTYPE_V8SF_V8SF
:
30804 case V8SF_FTYPE_V8SF_V8SI
:
30805 case V4SI_FTYPE_V4SI_V4SI
:
30806 case V4SI_FTYPE_V8HI_V8HI
:
30807 case V4SI_FTYPE_V4SF_V4SF
:
30808 case V4SI_FTYPE_V2DF_V2DF
:
30809 case V4HI_FTYPE_V4HI_V4HI
:
30810 case V4HI_FTYPE_V8QI_V8QI
:
30811 case V4HI_FTYPE_V2SI_V2SI
:
30812 case V4DF_FTYPE_V4DF_V4DF
:
30813 case V4DF_FTYPE_V4DF_V4DI
:
30814 case V4SF_FTYPE_V4SF_V4SF
:
30815 case V4SF_FTYPE_V4SF_V4SI
:
30816 case V4SF_FTYPE_V4SF_V2SI
:
30817 case V4SF_FTYPE_V4SF_V2DF
:
30818 case V4SF_FTYPE_V4SF_DI
:
30819 case V4SF_FTYPE_V4SF_SI
:
30820 case V2DI_FTYPE_V2DI_V2DI
:
30821 case V2DI_FTYPE_V16QI_V16QI
:
30822 case V2DI_FTYPE_V4SI_V4SI
:
30823 case V2UDI_FTYPE_V4USI_V4USI
:
30824 case V2DI_FTYPE_V2DI_V16QI
:
30825 case V2DI_FTYPE_V2DF_V2DF
:
30826 case V2SI_FTYPE_V2SI_V2SI
:
30827 case V2SI_FTYPE_V4HI_V4HI
:
30828 case V2SI_FTYPE_V2SF_V2SF
:
30829 case V2DF_FTYPE_V2DF_V2DF
:
30830 case V2DF_FTYPE_V2DF_V4SF
:
30831 case V2DF_FTYPE_V2DF_V2DI
:
30832 case V2DF_FTYPE_V2DF_DI
:
30833 case V2DF_FTYPE_V2DF_SI
:
30834 case V2SF_FTYPE_V2SF_V2SF
:
30835 case V1DI_FTYPE_V1DI_V1DI
:
30836 case V1DI_FTYPE_V8QI_V8QI
:
30837 case V1DI_FTYPE_V2SI_V2SI
:
30838 case V32QI_FTYPE_V16HI_V16HI
:
30839 case V16HI_FTYPE_V8SI_V8SI
:
30840 case V32QI_FTYPE_V32QI_V32QI
:
30841 case V16HI_FTYPE_V32QI_V32QI
:
30842 case V16HI_FTYPE_V16HI_V16HI
:
30843 case V8SI_FTYPE_V4DF_V4DF
:
30844 case V8SI_FTYPE_V8SI_V8SI
:
30845 case V8SI_FTYPE_V16HI_V16HI
:
30846 case V4DI_FTYPE_V4DI_V4DI
:
30847 case V4DI_FTYPE_V8SI_V8SI
:
30848 case V4UDI_FTYPE_V8USI_V8USI
:
30849 if (comparison
== UNKNOWN
)
30850 return ix86_expand_binop_builtin (icode
, exp
, target
);
30853 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
30854 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
30855 gcc_assert (comparison
!= UNKNOWN
);
30859 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
30860 case V16HI_FTYPE_V16HI_SI_COUNT
:
30861 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
30862 case V8SI_FTYPE_V8SI_SI_COUNT
:
30863 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
30864 case V4DI_FTYPE_V4DI_INT_COUNT
:
30865 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
30866 case V8HI_FTYPE_V8HI_SI_COUNT
:
30867 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
30868 case V4SI_FTYPE_V4SI_SI_COUNT
:
30869 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
30870 case V4HI_FTYPE_V4HI_SI_COUNT
:
30871 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
30872 case V2DI_FTYPE_V2DI_SI_COUNT
:
30873 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
30874 case V2SI_FTYPE_V2SI_SI_COUNT
:
30875 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
30876 case V1DI_FTYPE_V1DI_SI_COUNT
:
30878 last_arg_count
= true;
30880 case UINT64_FTYPE_UINT64_UINT64
:
30881 case UINT_FTYPE_UINT_UINT
:
30882 case UINT_FTYPE_UINT_USHORT
:
30883 case UINT_FTYPE_UINT_UCHAR
:
30884 case UINT16_FTYPE_UINT16_INT
:
30885 case UINT8_FTYPE_UINT8_INT
:
30888 case V2DI_FTYPE_V2DI_INT_CONVERT
:
30891 nargs_constant
= 1;
30893 case V4DI_FTYPE_V4DI_INT_CONVERT
:
30896 nargs_constant
= 1;
30898 case V8HI_FTYPE_V8HI_INT
:
30899 case V8HI_FTYPE_V8SF_INT
:
30900 case V8HI_FTYPE_V4SF_INT
:
30901 case V8SF_FTYPE_V8SF_INT
:
30902 case V4SI_FTYPE_V4SI_INT
:
30903 case V4SI_FTYPE_V8SI_INT
:
30904 case V4HI_FTYPE_V4HI_INT
:
30905 case V4DF_FTYPE_V4DF_INT
:
30906 case V4SF_FTYPE_V4SF_INT
:
30907 case V4SF_FTYPE_V8SF_INT
:
30908 case V2DI_FTYPE_V2DI_INT
:
30909 case V2DF_FTYPE_V2DF_INT
:
30910 case V2DF_FTYPE_V4DF_INT
:
30911 case V16HI_FTYPE_V16HI_INT
:
30912 case V8SI_FTYPE_V8SI_INT
:
30913 case V4DI_FTYPE_V4DI_INT
:
30914 case V2DI_FTYPE_V4DI_INT
:
30916 nargs_constant
= 1;
30918 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
30919 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
30920 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
30921 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
30922 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
30923 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
30926 case V32QI_FTYPE_V32QI_V32QI_INT
:
30927 case V16HI_FTYPE_V16HI_V16HI_INT
:
30928 case V16QI_FTYPE_V16QI_V16QI_INT
:
30929 case V4DI_FTYPE_V4DI_V4DI_INT
:
30930 case V8HI_FTYPE_V8HI_V8HI_INT
:
30931 case V8SI_FTYPE_V8SI_V8SI_INT
:
30932 case V8SI_FTYPE_V8SI_V4SI_INT
:
30933 case V8SF_FTYPE_V8SF_V8SF_INT
:
30934 case V8SF_FTYPE_V8SF_V4SF_INT
:
30935 case V4SI_FTYPE_V4SI_V4SI_INT
:
30936 case V4DF_FTYPE_V4DF_V4DF_INT
:
30937 case V4DF_FTYPE_V4DF_V2DF_INT
:
30938 case V4SF_FTYPE_V4SF_V4SF_INT
:
30939 case V2DI_FTYPE_V2DI_V2DI_INT
:
30940 case V4DI_FTYPE_V4DI_V2DI_INT
:
30941 case V2DF_FTYPE_V2DF_V2DF_INT
:
30943 nargs_constant
= 1;
30945 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
30948 nargs_constant
= 1;
30950 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
30953 nargs_constant
= 1;
30955 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
30958 nargs_constant
= 1;
30960 case V2DI_FTYPE_V2DI_UINT_UINT
:
30962 nargs_constant
= 2;
30964 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
30965 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
30966 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
30967 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
30969 nargs_constant
= 1;
30971 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
30973 nargs_constant
= 2;
30975 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
30976 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
30980 gcc_unreachable ();
30983 gcc_assert (nargs
<= ARRAY_SIZE (args
));
30985 if (comparison
!= UNKNOWN
)
30987 gcc_assert (nargs
== 2);
30988 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
30991 if (rmode
== VOIDmode
|| rmode
== tmode
)
30995 || GET_MODE (target
) != tmode
30996 || !insn_p
->operand
[0].predicate (target
, tmode
))
30997 target
= gen_reg_rtx (tmode
);
30998 real_target
= target
;
31002 target
= gen_reg_rtx (rmode
);
31003 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31006 for (i
= 0; i
< nargs
; i
++)
31008 tree arg
= CALL_EXPR_ARG (exp
, i
);
31009 rtx op
= expand_normal (arg
);
31010 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31011 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31013 if (last_arg_count
&& (i
+ 1) == nargs
)
31015 /* SIMD shift insns take either an 8-bit immediate or
31016 register as count. But builtin functions take int as
31017 count. If count doesn't match, we put it in register. */
31020 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31021 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31022 op
= copy_to_reg (op
);
31025 else if ((nargs
- i
) <= nargs_constant
)
31030 case CODE_FOR_avx2_inserti128
:
31031 case CODE_FOR_avx2_extracti128
:
31032 error ("the last argument must be an 1-bit immediate");
31035 case CODE_FOR_sse4_1_roundsd
:
31036 case CODE_FOR_sse4_1_roundss
:
31038 case CODE_FOR_sse4_1_roundpd
:
31039 case CODE_FOR_sse4_1_roundps
:
31040 case CODE_FOR_avx_roundpd256
:
31041 case CODE_FOR_avx_roundps256
:
31043 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31044 case CODE_FOR_sse4_1_roundps_sfix
:
31045 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31046 case CODE_FOR_avx_roundps_sfix256
:
31048 case CODE_FOR_sse4_1_blendps
:
31049 case CODE_FOR_avx_blendpd256
:
31050 case CODE_FOR_avx_vpermilv4df
:
31051 error ("the last argument must be a 4-bit immediate");
31054 case CODE_FOR_sse4_1_blendpd
:
31055 case CODE_FOR_avx_vpermilv2df
:
31056 case CODE_FOR_xop_vpermil2v2df3
:
31057 case CODE_FOR_xop_vpermil2v4sf3
:
31058 case CODE_FOR_xop_vpermil2v4df3
:
31059 case CODE_FOR_xop_vpermil2v8sf3
:
31060 error ("the last argument must be a 2-bit immediate");
31063 case CODE_FOR_avx_vextractf128v4df
:
31064 case CODE_FOR_avx_vextractf128v8sf
:
31065 case CODE_FOR_avx_vextractf128v8si
:
31066 case CODE_FOR_avx_vinsertf128v4df
:
31067 case CODE_FOR_avx_vinsertf128v8sf
:
31068 case CODE_FOR_avx_vinsertf128v8si
:
31069 error ("the last argument must be a 1-bit immediate");
31072 case CODE_FOR_avx_vmcmpv2df3
:
31073 case CODE_FOR_avx_vmcmpv4sf3
:
31074 case CODE_FOR_avx_cmpv2df3
:
31075 case CODE_FOR_avx_cmpv4sf3
:
31076 case CODE_FOR_avx_cmpv4df3
:
31077 case CODE_FOR_avx_cmpv8sf3
:
31078 error ("the last argument must be a 5-bit immediate");
31082 switch (nargs_constant
)
31085 if ((nargs
- i
) == nargs_constant
)
31087 error ("the next to last argument must be an 8-bit immediate");
31091 error ("the last argument must be an 8-bit immediate");
31094 gcc_unreachable ();
31101 if (VECTOR_MODE_P (mode
))
31102 op
= safe_vector_operand (op
, mode
);
31104 /* If we aren't optimizing, only allow one memory operand to
31106 if (memory_operand (op
, mode
))
31109 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31111 if (optimize
|| !match
|| num_memory
> 1)
31112 op
= copy_to_mode_reg (mode
, op
);
31116 op
= copy_to_reg (op
);
31117 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31122 args
[i
].mode
= mode
;
31128 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31131 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31134 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31138 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31139 args
[2].op
, args
[3].op
);
31142 gcc_unreachable ();
31152 /* Subroutine of ix86_expand_builtin to take care of special insns
31153 with variable number of operands. */
31156 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31157 tree exp
, rtx target
)
31161 unsigned int i
, nargs
, arg_adjust
, memory
;
31165 enum machine_mode mode
;
31167 enum insn_code icode
= d
->icode
;
31168 bool last_arg_constant
= false;
31169 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31170 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31171 enum { load
, store
} klass
;
31173 switch ((enum ix86_builtin_func_type
) d
->flag
)
31175 case VOID_FTYPE_VOID
:
31176 emit_insn (GEN_FCN (icode
) (target
));
31178 case VOID_FTYPE_UINT64
:
31179 case VOID_FTYPE_UNSIGNED
:
31185 case INT_FTYPE_VOID
:
31186 case UINT64_FTYPE_VOID
:
31187 case UNSIGNED_FTYPE_VOID
:
31192 case UINT64_FTYPE_PUNSIGNED
:
31193 case V2DI_FTYPE_PV2DI
:
31194 case V4DI_FTYPE_PV4DI
:
31195 case V32QI_FTYPE_PCCHAR
:
31196 case V16QI_FTYPE_PCCHAR
:
31197 case V8SF_FTYPE_PCV4SF
:
31198 case V8SF_FTYPE_PCFLOAT
:
31199 case V4SF_FTYPE_PCFLOAT
:
31200 case V4DF_FTYPE_PCV2DF
:
31201 case V4DF_FTYPE_PCDOUBLE
:
31202 case V2DF_FTYPE_PCDOUBLE
:
31203 case VOID_FTYPE_PVOID
:
31208 case VOID_FTYPE_PV2SF_V4SF
:
31209 case VOID_FTYPE_PV4DI_V4DI
:
31210 case VOID_FTYPE_PV2DI_V2DI
:
31211 case VOID_FTYPE_PCHAR_V32QI
:
31212 case VOID_FTYPE_PCHAR_V16QI
:
31213 case VOID_FTYPE_PFLOAT_V8SF
:
31214 case VOID_FTYPE_PFLOAT_V4SF
:
31215 case VOID_FTYPE_PDOUBLE_V4DF
:
31216 case VOID_FTYPE_PDOUBLE_V2DF
:
31217 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31218 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31219 case VOID_FTYPE_PINT_INT
:
31222 /* Reserve memory operand for target. */
31223 memory
= ARRAY_SIZE (args
);
31225 case V4SF_FTYPE_V4SF_PCV2SF
:
31226 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31231 case V8SF_FTYPE_PCV8SF_V8SI
:
31232 case V4DF_FTYPE_PCV4DF_V4DI
:
31233 case V4SF_FTYPE_PCV4SF_V4SI
:
31234 case V2DF_FTYPE_PCV2DF_V2DI
:
31235 case V8SI_FTYPE_PCV8SI_V8SI
:
31236 case V4DI_FTYPE_PCV4DI_V4DI
:
31237 case V4SI_FTYPE_PCV4SI_V4SI
:
31238 case V2DI_FTYPE_PCV2DI_V2DI
:
31243 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31244 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31245 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31246 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31247 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31248 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31249 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31250 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31253 /* Reserve memory operand for target. */
31254 memory
= ARRAY_SIZE (args
);
31256 case VOID_FTYPE_UINT_UINT_UINT
:
31257 case VOID_FTYPE_UINT64_UINT_UINT
:
31258 case UCHAR_FTYPE_UINT_UINT_UINT
:
31259 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31262 memory
= ARRAY_SIZE (args
);
31263 last_arg_constant
= true;
31266 gcc_unreachable ();
31269 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31271 if (klass
== store
)
31273 arg
= CALL_EXPR_ARG (exp
, 0);
31274 op
= expand_normal (arg
);
31275 gcc_assert (target
== 0);
31278 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31279 target
= gen_rtx_MEM (tmode
, op
);
31282 target
= force_reg (tmode
, op
);
31290 || !register_operand (target
, tmode
)
31291 || GET_MODE (target
) != tmode
)
31292 target
= gen_reg_rtx (tmode
);
31295 for (i
= 0; i
< nargs
; i
++)
31297 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31300 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31301 op
= expand_normal (arg
);
31302 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31304 if (last_arg_constant
&& (i
+ 1) == nargs
)
31308 if (icode
== CODE_FOR_lwp_lwpvalsi3
31309 || icode
== CODE_FOR_lwp_lwpinssi3
31310 || icode
== CODE_FOR_lwp_lwpvaldi3
31311 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31312 error ("the last argument must be a 32-bit immediate");
31314 error ("the last argument must be an 8-bit immediate");
31322 /* This must be the memory operand. */
31323 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31324 op
= gen_rtx_MEM (mode
, op
);
31325 gcc_assert (GET_MODE (op
) == mode
31326 || GET_MODE (op
) == VOIDmode
);
31330 /* This must be register. */
31331 if (VECTOR_MODE_P (mode
))
31332 op
= safe_vector_operand (op
, mode
);
31334 gcc_assert (GET_MODE (op
) == mode
31335 || GET_MODE (op
) == VOIDmode
);
31336 op
= copy_to_mode_reg (mode
, op
);
31341 args
[i
].mode
= mode
;
31347 pat
= GEN_FCN (icode
) (target
);
31350 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31353 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31356 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31359 gcc_unreachable ();
31365 return klass
== store
? 0 : target
;
31368 /* Return the integer constant in ARG. Constrain it to be in the range
31369 of the subparts of VEC_TYPE; issue an error if not. */
31372 get_element_number (tree vec_type
, tree arg
)
31374 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31376 if (!host_integerp (arg
, 1)
31377 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31379 error ("selector must be an integer constant in the range 0..%wi", max
);
31386 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31387 ix86_expand_vector_init. We DO have language-level syntax for this, in
31388 the form of (type){ init-list }. Except that since we can't place emms
31389 instructions from inside the compiler, we can't allow the use of MMX
31390 registers unless the user explicitly asks for it. So we do *not* define
31391 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31392 we have builtins invoked by mmintrin.h that gives us license to emit
31393 these sorts of instructions. */
31396 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31398 enum machine_mode tmode
= TYPE_MODE (type
);
31399 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31400 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31401 rtvec v
= rtvec_alloc (n_elt
);
31403 gcc_assert (VECTOR_MODE_P (tmode
));
31404 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31406 for (i
= 0; i
< n_elt
; ++i
)
31408 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31409 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31412 if (!target
|| !register_operand (target
, tmode
))
31413 target
= gen_reg_rtx (tmode
);
31415 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31419 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31420 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31421 had a language-level syntax for referencing vector elements. */
31424 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31426 enum machine_mode tmode
, mode0
;
31431 arg0
= CALL_EXPR_ARG (exp
, 0);
31432 arg1
= CALL_EXPR_ARG (exp
, 1);
31434 op0
= expand_normal (arg0
);
31435 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31437 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31438 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31439 gcc_assert (VECTOR_MODE_P (mode0
));
31441 op0
= force_reg (mode0
, op0
);
31443 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31444 target
= gen_reg_rtx (tmode
);
31446 ix86_expand_vector_extract (true, target
, op0
, elt
);
31451 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31452 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31453 a language-level syntax for referencing vector elements. */
31456 ix86_expand_vec_set_builtin (tree exp
)
31458 enum machine_mode tmode
, mode1
;
31459 tree arg0
, arg1
, arg2
;
31461 rtx op0
, op1
, target
;
31463 arg0
= CALL_EXPR_ARG (exp
, 0);
31464 arg1
= CALL_EXPR_ARG (exp
, 1);
31465 arg2
= CALL_EXPR_ARG (exp
, 2);
31467 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
31468 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31469 gcc_assert (VECTOR_MODE_P (tmode
));
31471 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
31472 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
31473 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
31475 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
31476 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
31478 op0
= force_reg (tmode
, op0
);
31479 op1
= force_reg (mode1
, op1
);
31481 /* OP0 is the source of these builtin functions and shouldn't be
31482 modified. Create a copy, use it and return it as target. */
31483 target
= gen_reg_rtx (tmode
);
31484 emit_move_insn (target
, op0
);
31485 ix86_expand_vector_set (true, target
, op1
, elt
);
31490 /* Expand an expression EXP that calls a built-in function,
31491 with result going to TARGET if that's convenient
31492 (and in mode MODE if that's convenient).
31493 SUBTARGET may be used as the target for computing one of EXP's operands.
31494 IGNORE is nonzero if the value is to be ignored. */
31497 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
31498 enum machine_mode mode ATTRIBUTE_UNUSED
,
31499 int ignore ATTRIBUTE_UNUSED
)
31501 const struct builtin_description
*d
;
31503 enum insn_code icode
;
31504 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
31505 tree arg0
, arg1
, arg2
, arg3
, arg4
;
31506 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
31507 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
31508 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
31510 /* For CPU builtins that can be folded, fold first and expand the fold. */
31513 case IX86_BUILTIN_CPU_INIT
:
31515 /* Make it call __cpu_indicator_init in libgcc. */
31516 tree call_expr
, fndecl
, type
;
31517 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
31518 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
31519 call_expr
= build_call_expr (fndecl
, 0);
31520 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
31522 case IX86_BUILTIN_CPU_IS
:
31523 case IX86_BUILTIN_CPU_SUPPORTS
:
31525 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31526 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
31527 gcc_assert (fold_expr
!= NULL_TREE
);
31528 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
31532 /* Determine whether the builtin function is available under the current ISA.
31533 Originally the builtin was not created if it wasn't applicable to the
31534 current ISA based on the command line switches. With function specific
31535 options, we need to check in the context of the function making the call
31536 whether it is supported. */
31537 if (ix86_builtins_isa
[fcode
].isa
31538 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
31540 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
31541 NULL
, (enum fpmath_unit
) 0, false);
31544 error ("%qE needs unknown isa option", fndecl
);
31547 gcc_assert (opts
!= NULL
);
31548 error ("%qE needs isa option %s", fndecl
, opts
);
31556 case IX86_BUILTIN_MASKMOVQ
:
31557 case IX86_BUILTIN_MASKMOVDQU
:
31558 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
31559 ? CODE_FOR_mmx_maskmovq
31560 : CODE_FOR_sse2_maskmovdqu
);
31561 /* Note the arg order is different from the operand order. */
31562 arg1
= CALL_EXPR_ARG (exp
, 0);
31563 arg2
= CALL_EXPR_ARG (exp
, 1);
31564 arg0
= CALL_EXPR_ARG (exp
, 2);
31565 op0
= expand_normal (arg0
);
31566 op1
= expand_normal (arg1
);
31567 op2
= expand_normal (arg2
);
31568 mode0
= insn_data
[icode
].operand
[0].mode
;
31569 mode1
= insn_data
[icode
].operand
[1].mode
;
31570 mode2
= insn_data
[icode
].operand
[2].mode
;
31572 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31573 op0
= gen_rtx_MEM (mode1
, op0
);
31575 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31576 op0
= copy_to_mode_reg (mode0
, op0
);
31577 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
31578 op1
= copy_to_mode_reg (mode1
, op1
);
31579 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
31580 op2
= copy_to_mode_reg (mode2
, op2
);
31581 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31587 case IX86_BUILTIN_LDMXCSR
:
31588 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
31589 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31590 emit_move_insn (target
, op0
);
31591 emit_insn (gen_sse_ldmxcsr (target
));
31594 case IX86_BUILTIN_STMXCSR
:
31595 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31596 emit_insn (gen_sse_stmxcsr (target
));
31597 return copy_to_mode_reg (SImode
, target
);
31599 case IX86_BUILTIN_CLFLUSH
:
31600 arg0
= CALL_EXPR_ARG (exp
, 0);
31601 op0
= expand_normal (arg0
);
31602 icode
= CODE_FOR_sse2_clflush
;
31603 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31604 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31606 emit_insn (gen_sse2_clflush (op0
));
31609 case IX86_BUILTIN_MONITOR
:
31610 arg0
= CALL_EXPR_ARG (exp
, 0);
31611 arg1
= CALL_EXPR_ARG (exp
, 1);
31612 arg2
= CALL_EXPR_ARG (exp
, 2);
31613 op0
= expand_normal (arg0
);
31614 op1
= expand_normal (arg1
);
31615 op2
= expand_normal (arg2
);
31617 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31619 op1
= copy_to_mode_reg (SImode
, op1
);
31621 op2
= copy_to_mode_reg (SImode
, op2
);
31622 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
31625 case IX86_BUILTIN_MWAIT
:
31626 arg0
= CALL_EXPR_ARG (exp
, 0);
31627 arg1
= CALL_EXPR_ARG (exp
, 1);
31628 op0
= expand_normal (arg0
);
31629 op1
= expand_normal (arg1
);
31631 op0
= copy_to_mode_reg (SImode
, op0
);
31633 op1
= copy_to_mode_reg (SImode
, op1
);
31634 emit_insn (gen_sse3_mwait (op0
, op1
));
31637 case IX86_BUILTIN_VEC_INIT_V2SI
:
31638 case IX86_BUILTIN_VEC_INIT_V4HI
:
31639 case IX86_BUILTIN_VEC_INIT_V8QI
:
31640 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
31642 case IX86_BUILTIN_VEC_EXT_V2DF
:
31643 case IX86_BUILTIN_VEC_EXT_V2DI
:
31644 case IX86_BUILTIN_VEC_EXT_V4SF
:
31645 case IX86_BUILTIN_VEC_EXT_V4SI
:
31646 case IX86_BUILTIN_VEC_EXT_V8HI
:
31647 case IX86_BUILTIN_VEC_EXT_V2SI
:
31648 case IX86_BUILTIN_VEC_EXT_V4HI
:
31649 case IX86_BUILTIN_VEC_EXT_V16QI
:
31650 return ix86_expand_vec_ext_builtin (exp
, target
);
31652 case IX86_BUILTIN_VEC_SET_V2DI
:
31653 case IX86_BUILTIN_VEC_SET_V4SF
:
31654 case IX86_BUILTIN_VEC_SET_V4SI
:
31655 case IX86_BUILTIN_VEC_SET_V8HI
:
31656 case IX86_BUILTIN_VEC_SET_V4HI
:
31657 case IX86_BUILTIN_VEC_SET_V16QI
:
31658 return ix86_expand_vec_set_builtin (exp
);
31660 case IX86_BUILTIN_INFQ
:
31661 case IX86_BUILTIN_HUGE_VALQ
:
31663 REAL_VALUE_TYPE inf
;
31667 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
31669 tmp
= validize_mem (force_const_mem (mode
, tmp
));
31672 target
= gen_reg_rtx (mode
);
31674 emit_move_insn (target
, tmp
);
31678 case IX86_BUILTIN_RDPMC
:
31679 case IX86_BUILTIN_RDTSC
:
31680 case IX86_BUILTIN_RDTSCP
:
31682 op0
= gen_reg_rtx (DImode
);
31683 op1
= gen_reg_rtx (DImode
);
31685 if (fcode
== IX86_BUILTIN_RDPMC
)
31687 arg0
= CALL_EXPR_ARG (exp
, 0);
31688 op2
= expand_normal (arg0
);
31689 if (!register_operand (op2
, SImode
))
31690 op2
= copy_to_mode_reg (SImode
, op2
);
31692 insn
= (TARGET_64BIT
31693 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
31694 : gen_rdpmc (op0
, op2
));
31697 else if (fcode
== IX86_BUILTIN_RDTSC
)
31699 insn
= (TARGET_64BIT
31700 ? gen_rdtsc_rex64 (op0
, op1
)
31701 : gen_rdtsc (op0
));
31706 op2
= gen_reg_rtx (SImode
);
31708 insn
= (TARGET_64BIT
31709 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
31710 : gen_rdtscp (op0
, op2
));
31713 arg0
= CALL_EXPR_ARG (exp
, 0);
31714 op4
= expand_normal (arg0
);
31715 if (!address_operand (op4
, VOIDmode
))
31717 op4
= convert_memory_address (Pmode
, op4
);
31718 op4
= copy_addr_to_reg (op4
);
31720 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
31724 target
= gen_reg_rtx (mode
);
31728 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
31729 op1
, 1, OPTAB_DIRECT
);
31730 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
31731 op0
, 1, OPTAB_DIRECT
);
31734 emit_move_insn (target
, op0
);
31737 case IX86_BUILTIN_FXSAVE
:
31738 case IX86_BUILTIN_FXRSTOR
:
31739 case IX86_BUILTIN_FXSAVE64
:
31740 case IX86_BUILTIN_FXRSTOR64
:
31743 case IX86_BUILTIN_FXSAVE
:
31744 icode
= CODE_FOR_fxsave
;
31746 case IX86_BUILTIN_FXRSTOR
:
31747 icode
= CODE_FOR_fxrstor
;
31749 case IX86_BUILTIN_FXSAVE64
:
31750 icode
= CODE_FOR_fxsave64
;
31752 case IX86_BUILTIN_FXRSTOR64
:
31753 icode
= CODE_FOR_fxrstor64
;
31756 gcc_unreachable ();
31759 arg0
= CALL_EXPR_ARG (exp
, 0);
31760 op0
= expand_normal (arg0
);
31762 if (!address_operand (op0
, VOIDmode
))
31764 op0
= convert_memory_address (Pmode
, op0
);
31765 op0
= copy_addr_to_reg (op0
);
31767 op0
= gen_rtx_MEM (BLKmode
, op0
);
31769 pat
= GEN_FCN (icode
) (op0
);
31774 case IX86_BUILTIN_XSAVE
:
31775 case IX86_BUILTIN_XRSTOR
:
31776 case IX86_BUILTIN_XSAVE64
:
31777 case IX86_BUILTIN_XRSTOR64
:
31778 case IX86_BUILTIN_XSAVEOPT
:
31779 case IX86_BUILTIN_XSAVEOPT64
:
31780 arg0
= CALL_EXPR_ARG (exp
, 0);
31781 arg1
= CALL_EXPR_ARG (exp
, 1);
31782 op0
= expand_normal (arg0
);
31783 op1
= expand_normal (arg1
);
31785 if (!address_operand (op0
, VOIDmode
))
31787 op0
= convert_memory_address (Pmode
, op0
);
31788 op0
= copy_addr_to_reg (op0
);
31790 op0
= gen_rtx_MEM (BLKmode
, op0
);
31792 op1
= force_reg (DImode
, op1
);
31796 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
31797 NULL
, 1, OPTAB_DIRECT
);
31800 case IX86_BUILTIN_XSAVE
:
31801 icode
= CODE_FOR_xsave_rex64
;
31803 case IX86_BUILTIN_XRSTOR
:
31804 icode
= CODE_FOR_xrstor_rex64
;
31806 case IX86_BUILTIN_XSAVE64
:
31807 icode
= CODE_FOR_xsave64
;
31809 case IX86_BUILTIN_XRSTOR64
:
31810 icode
= CODE_FOR_xrstor64
;
31812 case IX86_BUILTIN_XSAVEOPT
:
31813 icode
= CODE_FOR_xsaveopt_rex64
;
31815 case IX86_BUILTIN_XSAVEOPT64
:
31816 icode
= CODE_FOR_xsaveopt64
;
31819 gcc_unreachable ();
31822 op2
= gen_lowpart (SImode
, op2
);
31823 op1
= gen_lowpart (SImode
, op1
);
31824 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31830 case IX86_BUILTIN_XSAVE
:
31831 icode
= CODE_FOR_xsave
;
31833 case IX86_BUILTIN_XRSTOR
:
31834 icode
= CODE_FOR_xrstor
;
31836 case IX86_BUILTIN_XSAVEOPT
:
31837 icode
= CODE_FOR_xsaveopt
;
31840 gcc_unreachable ();
31842 pat
= GEN_FCN (icode
) (op0
, op1
);
31849 case IX86_BUILTIN_LLWPCB
:
31850 arg0
= CALL_EXPR_ARG (exp
, 0);
31851 op0
= expand_normal (arg0
);
31852 icode
= CODE_FOR_lwp_llwpcb
;
31853 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31854 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31855 emit_insn (gen_lwp_llwpcb (op0
));
31858 case IX86_BUILTIN_SLWPCB
:
31859 icode
= CODE_FOR_lwp_slwpcb
;
31861 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
31862 target
= gen_reg_rtx (Pmode
);
31863 emit_insn (gen_lwp_slwpcb (target
));
31866 case IX86_BUILTIN_BEXTRI32
:
31867 case IX86_BUILTIN_BEXTRI64
:
31868 arg0
= CALL_EXPR_ARG (exp
, 0);
31869 arg1
= CALL_EXPR_ARG (exp
, 1);
31870 op0
= expand_normal (arg0
);
31871 op1
= expand_normal (arg1
);
31872 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
31873 ? CODE_FOR_tbm_bextri_si
31874 : CODE_FOR_tbm_bextri_di
);
31875 if (!CONST_INT_P (op1
))
31877 error ("last argument must be an immediate");
31882 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
31883 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
31884 op1
= GEN_INT (length
);
31885 op2
= GEN_INT (lsb_index
);
31886 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
31892 case IX86_BUILTIN_RDRAND16_STEP
:
31893 icode
= CODE_FOR_rdrandhi_1
;
31897 case IX86_BUILTIN_RDRAND32_STEP
:
31898 icode
= CODE_FOR_rdrandsi_1
;
31902 case IX86_BUILTIN_RDRAND64_STEP
:
31903 icode
= CODE_FOR_rdranddi_1
;
31907 op0
= gen_reg_rtx (mode0
);
31908 emit_insn (GEN_FCN (icode
) (op0
));
31910 arg0
= CALL_EXPR_ARG (exp
, 0);
31911 op1
= expand_normal (arg0
);
31912 if (!address_operand (op1
, VOIDmode
))
31914 op1
= convert_memory_address (Pmode
, op1
);
31915 op1
= copy_addr_to_reg (op1
);
31917 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31919 op1
= gen_reg_rtx (SImode
);
31920 emit_move_insn (op1
, CONST1_RTX (SImode
));
31922 /* Emit SImode conditional move. */
31923 if (mode0
== HImode
)
31925 op2
= gen_reg_rtx (SImode
);
31926 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
31928 else if (mode0
== SImode
)
31931 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
31934 target
= gen_reg_rtx (SImode
);
31936 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31938 emit_insn (gen_rtx_SET (VOIDmode
, target
,
31939 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
31942 case IX86_BUILTIN_RDSEED16_STEP
:
31943 icode
= CODE_FOR_rdseedhi_1
;
31947 case IX86_BUILTIN_RDSEED32_STEP
:
31948 icode
= CODE_FOR_rdseedsi_1
;
31952 case IX86_BUILTIN_RDSEED64_STEP
:
31953 icode
= CODE_FOR_rdseeddi_1
;
31957 op0
= gen_reg_rtx (mode0
);
31958 emit_insn (GEN_FCN (icode
) (op0
));
31960 arg0
= CALL_EXPR_ARG (exp
, 0);
31961 op1
= expand_normal (arg0
);
31962 if (!address_operand (op1
, VOIDmode
))
31964 op1
= convert_memory_address (Pmode
, op1
);
31965 op1
= copy_addr_to_reg (op1
);
31967 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31969 op2
= gen_reg_rtx (QImode
);
31971 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31973 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
31976 target
= gen_reg_rtx (SImode
);
31978 emit_insn (gen_zero_extendqisi2 (target
, op2
));
31981 case IX86_BUILTIN_ADDCARRYX32
:
31982 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
31986 case IX86_BUILTIN_ADDCARRYX64
:
31987 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
31991 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
31992 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
31993 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
31994 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
31996 op0
= gen_reg_rtx (QImode
);
31998 /* Generate CF from input operand. */
31999 op1
= expand_normal (arg0
);
32000 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32001 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32003 /* Gen ADCX instruction to compute X+Y+CF. */
32004 op2
= expand_normal (arg1
);
32005 op3
= expand_normal (arg2
);
32008 op2
= copy_to_mode_reg (mode0
, op2
);
32010 op3
= copy_to_mode_reg (mode0
, op3
);
32012 op0
= gen_reg_rtx (mode0
);
32014 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32015 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32016 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32018 /* Store the result. */
32019 op4
= expand_normal (arg3
);
32020 if (!address_operand (op4
, VOIDmode
))
32022 op4
= convert_memory_address (Pmode
, op4
);
32023 op4
= copy_addr_to_reg (op4
);
32025 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32027 /* Return current CF value. */
32029 target
= gen_reg_rtx (QImode
);
32031 PUT_MODE (pat
, QImode
);
32032 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32035 case IX86_BUILTIN_GATHERSIV2DF
:
32036 icode
= CODE_FOR_avx2_gathersiv2df
;
32038 case IX86_BUILTIN_GATHERSIV4DF
:
32039 icode
= CODE_FOR_avx2_gathersiv4df
;
32041 case IX86_BUILTIN_GATHERDIV2DF
:
32042 icode
= CODE_FOR_avx2_gatherdiv2df
;
32044 case IX86_BUILTIN_GATHERDIV4DF
:
32045 icode
= CODE_FOR_avx2_gatherdiv4df
;
32047 case IX86_BUILTIN_GATHERSIV4SF
:
32048 icode
= CODE_FOR_avx2_gathersiv4sf
;
32050 case IX86_BUILTIN_GATHERSIV8SF
:
32051 icode
= CODE_FOR_avx2_gathersiv8sf
;
32053 case IX86_BUILTIN_GATHERDIV4SF
:
32054 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32056 case IX86_BUILTIN_GATHERDIV8SF
:
32057 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32059 case IX86_BUILTIN_GATHERSIV2DI
:
32060 icode
= CODE_FOR_avx2_gathersiv2di
;
32062 case IX86_BUILTIN_GATHERSIV4DI
:
32063 icode
= CODE_FOR_avx2_gathersiv4di
;
32065 case IX86_BUILTIN_GATHERDIV2DI
:
32066 icode
= CODE_FOR_avx2_gatherdiv2di
;
32068 case IX86_BUILTIN_GATHERDIV4DI
:
32069 icode
= CODE_FOR_avx2_gatherdiv4di
;
32071 case IX86_BUILTIN_GATHERSIV4SI
:
32072 icode
= CODE_FOR_avx2_gathersiv4si
;
32074 case IX86_BUILTIN_GATHERSIV8SI
:
32075 icode
= CODE_FOR_avx2_gathersiv8si
;
32077 case IX86_BUILTIN_GATHERDIV4SI
:
32078 icode
= CODE_FOR_avx2_gatherdiv4si
;
32080 case IX86_BUILTIN_GATHERDIV8SI
:
32081 icode
= CODE_FOR_avx2_gatherdiv8si
;
32083 case IX86_BUILTIN_GATHERALTSIV4DF
:
32084 icode
= CODE_FOR_avx2_gathersiv4df
;
32086 case IX86_BUILTIN_GATHERALTDIV8SF
:
32087 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32089 case IX86_BUILTIN_GATHERALTSIV4DI
:
32090 icode
= CODE_FOR_avx2_gathersiv4di
;
32092 case IX86_BUILTIN_GATHERALTDIV8SI
:
32093 icode
= CODE_FOR_avx2_gatherdiv8si
;
32097 arg0
= CALL_EXPR_ARG (exp
, 0);
32098 arg1
= CALL_EXPR_ARG (exp
, 1);
32099 arg2
= CALL_EXPR_ARG (exp
, 2);
32100 arg3
= CALL_EXPR_ARG (exp
, 3);
32101 arg4
= CALL_EXPR_ARG (exp
, 4);
32102 op0
= expand_normal (arg0
);
32103 op1
= expand_normal (arg1
);
32104 op2
= expand_normal (arg2
);
32105 op3
= expand_normal (arg3
);
32106 op4
= expand_normal (arg4
);
32107 /* Note the arg order is different from the operand order. */
32108 mode0
= insn_data
[icode
].operand
[1].mode
;
32109 mode2
= insn_data
[icode
].operand
[3].mode
;
32110 mode3
= insn_data
[icode
].operand
[4].mode
;
32111 mode4
= insn_data
[icode
].operand
[5].mode
;
32113 if (target
== NULL_RTX
32114 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32115 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32117 subtarget
= target
;
32119 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32120 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32122 rtx half
= gen_reg_rtx (V4SImode
);
32123 if (!nonimmediate_operand (op2
, V8SImode
))
32124 op2
= copy_to_mode_reg (V8SImode
, op2
);
32125 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32128 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32129 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32131 rtx (*gen
) (rtx
, rtx
);
32132 rtx half
= gen_reg_rtx (mode0
);
32133 if (mode0
== V4SFmode
)
32134 gen
= gen_vec_extract_lo_v8sf
;
32136 gen
= gen_vec_extract_lo_v8si
;
32137 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32138 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32139 emit_insn (gen (half
, op0
));
32141 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32142 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32143 emit_insn (gen (half
, op3
));
32147 /* Force memory operand only with base register here. But we
32148 don't want to do it on memory operand for other builtin
32150 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32152 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32153 op0
= copy_to_mode_reg (mode0
, op0
);
32154 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32155 op1
= copy_to_mode_reg (Pmode
, op1
);
32156 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32157 op2
= copy_to_mode_reg (mode2
, op2
);
32158 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32159 op3
= copy_to_mode_reg (mode3
, op3
);
32160 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32162 error ("last argument must be scale 1, 2, 4, 8");
32166 /* Optimize. If mask is known to have all high bits set,
32167 replace op0 with pc_rtx to signal that the instruction
32168 overwrites the whole destination and doesn't use its
32169 previous contents. */
32172 if (TREE_CODE (arg3
) == VECTOR_CST
)
32174 unsigned int negative
= 0;
32175 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32177 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32178 if (TREE_CODE (cst
) == INTEGER_CST
32179 && tree_int_cst_sign_bit (cst
))
32181 else if (TREE_CODE (cst
) == REAL_CST
32182 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32185 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32188 else if (TREE_CODE (arg3
) == SSA_NAME
)
32190 /* Recognize also when mask is like:
32191 __v2df src = _mm_setzero_pd ();
32192 __v2df mask = _mm_cmpeq_pd (src, src);
32194 __v8sf src = _mm256_setzero_ps ();
32195 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32196 as that is a cheaper way to load all ones into
32197 a register than having to load a constant from
32199 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32200 if (is_gimple_call (def_stmt
))
32202 tree fndecl
= gimple_call_fndecl (def_stmt
);
32204 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32205 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32207 case IX86_BUILTIN_CMPPD
:
32208 case IX86_BUILTIN_CMPPS
:
32209 case IX86_BUILTIN_CMPPD256
:
32210 case IX86_BUILTIN_CMPPS256
:
32211 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32214 case IX86_BUILTIN_CMPEQPD
:
32215 case IX86_BUILTIN_CMPEQPS
:
32216 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32217 && initializer_zerop (gimple_call_arg (def_stmt
,
32228 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32233 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32234 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32236 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32237 ? V4SFmode
: V4SImode
;
32238 if (target
== NULL_RTX
)
32239 target
= gen_reg_rtx (tmode
);
32240 if (tmode
== V4SFmode
)
32241 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32243 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32246 target
= subtarget
;
32250 case IX86_BUILTIN_XABORT
:
32251 icode
= CODE_FOR_xabort
;
32252 arg0
= CALL_EXPR_ARG (exp
, 0);
32253 op0
= expand_normal (arg0
);
32254 mode0
= insn_data
[icode
].operand
[0].mode
;
32255 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32257 error ("the xabort's argument must be an 8-bit immediate");
32260 emit_insn (gen_xabort (op0
));
32267 for (i
= 0, d
= bdesc_special_args
;
32268 i
< ARRAY_SIZE (bdesc_special_args
);
32270 if (d
->code
== fcode
)
32271 return ix86_expand_special_args_builtin (d
, exp
, target
);
32273 for (i
= 0, d
= bdesc_args
;
32274 i
< ARRAY_SIZE (bdesc_args
);
32276 if (d
->code
== fcode
)
32279 case IX86_BUILTIN_FABSQ
:
32280 case IX86_BUILTIN_COPYSIGNQ
:
32282 /* Emit a normal call if SSE isn't available. */
32283 return expand_call (exp
, target
, ignore
);
32285 return ix86_expand_args_builtin (d
, exp
, target
);
32288 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32289 if (d
->code
== fcode
)
32290 return ix86_expand_sse_comi (d
, exp
, target
);
32292 for (i
= 0, d
= bdesc_pcmpestr
;
32293 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32295 if (d
->code
== fcode
)
32296 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32298 for (i
= 0, d
= bdesc_pcmpistr
;
32299 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32301 if (d
->code
== fcode
)
32302 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32304 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32305 if (d
->code
== fcode
)
32306 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32307 (enum ix86_builtin_func_type
)
32308 d
->flag
, d
->comparison
);
32310 gcc_unreachable ();
32313 /* Returns a function decl for a vectorized version of the builtin function
32314 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32315 if it is not available. */
32318 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32321 enum machine_mode in_mode
, out_mode
;
32323 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32325 if (TREE_CODE (type_out
) != VECTOR_TYPE
32326 || TREE_CODE (type_in
) != VECTOR_TYPE
32327 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32330 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32331 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32332 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32333 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32337 case BUILT_IN_SQRT
:
32338 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32340 if (out_n
== 2 && in_n
== 2)
32341 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32342 else if (out_n
== 4 && in_n
== 4)
32343 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32347 case BUILT_IN_SQRTF
:
32348 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32350 if (out_n
== 4 && in_n
== 4)
32351 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32352 else if (out_n
== 8 && in_n
== 8)
32353 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32357 case BUILT_IN_IFLOOR
:
32358 case BUILT_IN_LFLOOR
:
32359 case BUILT_IN_LLFLOOR
:
32360 /* The round insn does not trap on denormals. */
32361 if (flag_trapping_math
|| !TARGET_ROUND
)
32364 if (out_mode
== SImode
&& in_mode
== DFmode
)
32366 if (out_n
== 4 && in_n
== 2)
32367 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32368 else if (out_n
== 8 && in_n
== 4)
32369 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32373 case BUILT_IN_IFLOORF
:
32374 case BUILT_IN_LFLOORF
:
32375 case BUILT_IN_LLFLOORF
:
32376 /* The round insn does not trap on denormals. */
32377 if (flag_trapping_math
|| !TARGET_ROUND
)
32380 if (out_mode
== SImode
&& in_mode
== SFmode
)
32382 if (out_n
== 4 && in_n
== 4)
32383 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32384 else if (out_n
== 8 && in_n
== 8)
32385 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32389 case BUILT_IN_ICEIL
:
32390 case BUILT_IN_LCEIL
:
32391 case BUILT_IN_LLCEIL
:
32392 /* The round insn does not trap on denormals. */
32393 if (flag_trapping_math
|| !TARGET_ROUND
)
32396 if (out_mode
== SImode
&& in_mode
== DFmode
)
32398 if (out_n
== 4 && in_n
== 2)
32399 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32400 else if (out_n
== 8 && in_n
== 4)
32401 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32405 case BUILT_IN_ICEILF
:
32406 case BUILT_IN_LCEILF
:
32407 case BUILT_IN_LLCEILF
:
32408 /* The round insn does not trap on denormals. */
32409 if (flag_trapping_math
|| !TARGET_ROUND
)
32412 if (out_mode
== SImode
&& in_mode
== SFmode
)
32414 if (out_n
== 4 && in_n
== 4)
32415 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32416 else if (out_n
== 8 && in_n
== 8)
32417 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32421 case BUILT_IN_IRINT
:
32422 case BUILT_IN_LRINT
:
32423 case BUILT_IN_LLRINT
:
32424 if (out_mode
== SImode
&& in_mode
== DFmode
)
32426 if (out_n
== 4 && in_n
== 2)
32427 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32428 else if (out_n
== 8 && in_n
== 4)
32429 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32433 case BUILT_IN_IRINTF
:
32434 case BUILT_IN_LRINTF
:
32435 case BUILT_IN_LLRINTF
:
32436 if (out_mode
== SImode
&& in_mode
== SFmode
)
32438 if (out_n
== 4 && in_n
== 4)
32439 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32440 else if (out_n
== 8 && in_n
== 8)
32441 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32445 case BUILT_IN_IROUND
:
32446 case BUILT_IN_LROUND
:
32447 case BUILT_IN_LLROUND
:
32448 /* The round insn does not trap on denormals. */
32449 if (flag_trapping_math
|| !TARGET_ROUND
)
32452 if (out_mode
== SImode
&& in_mode
== DFmode
)
32454 if (out_n
== 4 && in_n
== 2)
32455 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
32456 else if (out_n
== 8 && in_n
== 4)
32457 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
32461 case BUILT_IN_IROUNDF
:
32462 case BUILT_IN_LROUNDF
:
32463 case BUILT_IN_LLROUNDF
:
32464 /* The round insn does not trap on denormals. */
32465 if (flag_trapping_math
|| !TARGET_ROUND
)
32468 if (out_mode
== SImode
&& in_mode
== SFmode
)
32470 if (out_n
== 4 && in_n
== 4)
32471 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
32472 else if (out_n
== 8 && in_n
== 8)
32473 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
32477 case BUILT_IN_COPYSIGN
:
32478 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32480 if (out_n
== 2 && in_n
== 2)
32481 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
32482 else if (out_n
== 4 && in_n
== 4)
32483 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
32487 case BUILT_IN_COPYSIGNF
:
32488 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32490 if (out_n
== 4 && in_n
== 4)
32491 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
32492 else if (out_n
== 8 && in_n
== 8)
32493 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
32497 case BUILT_IN_FLOOR
:
32498 /* The round insn does not trap on denormals. */
32499 if (flag_trapping_math
|| !TARGET_ROUND
)
32502 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32504 if (out_n
== 2 && in_n
== 2)
32505 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
32506 else if (out_n
== 4 && in_n
== 4)
32507 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
32511 case BUILT_IN_FLOORF
:
32512 /* The round insn does not trap on denormals. */
32513 if (flag_trapping_math
|| !TARGET_ROUND
)
32516 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32518 if (out_n
== 4 && in_n
== 4)
32519 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
32520 else if (out_n
== 8 && in_n
== 8)
32521 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
32525 case BUILT_IN_CEIL
:
32526 /* The round insn does not trap on denormals. */
32527 if (flag_trapping_math
|| !TARGET_ROUND
)
32530 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32532 if (out_n
== 2 && in_n
== 2)
32533 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
32534 else if (out_n
== 4 && in_n
== 4)
32535 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
32539 case BUILT_IN_CEILF
:
32540 /* The round insn does not trap on denormals. */
32541 if (flag_trapping_math
|| !TARGET_ROUND
)
32544 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32546 if (out_n
== 4 && in_n
== 4)
32547 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
32548 else if (out_n
== 8 && in_n
== 8)
32549 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
32553 case BUILT_IN_TRUNC
:
32554 /* The round insn does not trap on denormals. */
32555 if (flag_trapping_math
|| !TARGET_ROUND
)
32558 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32560 if (out_n
== 2 && in_n
== 2)
32561 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
32562 else if (out_n
== 4 && in_n
== 4)
32563 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
32567 case BUILT_IN_TRUNCF
:
32568 /* The round insn does not trap on denormals. */
32569 if (flag_trapping_math
|| !TARGET_ROUND
)
32572 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32574 if (out_n
== 4 && in_n
== 4)
32575 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
32576 else if (out_n
== 8 && in_n
== 8)
32577 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
32581 case BUILT_IN_RINT
:
32582 /* The round insn does not trap on denormals. */
32583 if (flag_trapping_math
|| !TARGET_ROUND
)
32586 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32588 if (out_n
== 2 && in_n
== 2)
32589 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
32590 else if (out_n
== 4 && in_n
== 4)
32591 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
32595 case BUILT_IN_RINTF
:
32596 /* The round insn does not trap on denormals. */
32597 if (flag_trapping_math
|| !TARGET_ROUND
)
32600 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32602 if (out_n
== 4 && in_n
== 4)
32603 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
32604 else if (out_n
== 8 && in_n
== 8)
32605 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
32609 case BUILT_IN_ROUND
:
32610 /* The round insn does not trap on denormals. */
32611 if (flag_trapping_math
|| !TARGET_ROUND
)
32614 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32616 if (out_n
== 2 && in_n
== 2)
32617 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
32618 else if (out_n
== 4 && in_n
== 4)
32619 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
32623 case BUILT_IN_ROUNDF
:
32624 /* The round insn does not trap on denormals. */
32625 if (flag_trapping_math
|| !TARGET_ROUND
)
32628 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32630 if (out_n
== 4 && in_n
== 4)
32631 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
32632 else if (out_n
== 8 && in_n
== 8)
32633 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
32638 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32640 if (out_n
== 2 && in_n
== 2)
32641 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
32642 if (out_n
== 4 && in_n
== 4)
32643 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
32647 case BUILT_IN_FMAF
:
32648 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32650 if (out_n
== 4 && in_n
== 4)
32651 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
32652 if (out_n
== 8 && in_n
== 8)
32653 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
32661 /* Dispatch to a handler for a vectorization library. */
32662 if (ix86_veclib_handler
)
32663 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
32669 /* Handler for an SVML-style interface to
32670 a library with vectorized intrinsics. */
32673 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
32676 tree fntype
, new_fndecl
, args
;
32679 enum machine_mode el_mode
, in_mode
;
32682 /* The SVML is suitable for unsafe math only. */
32683 if (!flag_unsafe_math_optimizations
)
32686 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32687 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32688 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32689 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32690 if (el_mode
!= in_mode
32698 case BUILT_IN_LOG10
:
32700 case BUILT_IN_TANH
:
32702 case BUILT_IN_ATAN
:
32703 case BUILT_IN_ATAN2
:
32704 case BUILT_IN_ATANH
:
32705 case BUILT_IN_CBRT
:
32706 case BUILT_IN_SINH
:
32708 case BUILT_IN_ASINH
:
32709 case BUILT_IN_ASIN
:
32710 case BUILT_IN_COSH
:
32712 case BUILT_IN_ACOSH
:
32713 case BUILT_IN_ACOS
:
32714 if (el_mode
!= DFmode
|| n
!= 2)
32718 case BUILT_IN_EXPF
:
32719 case BUILT_IN_LOGF
:
32720 case BUILT_IN_LOG10F
:
32721 case BUILT_IN_POWF
:
32722 case BUILT_IN_TANHF
:
32723 case BUILT_IN_TANF
:
32724 case BUILT_IN_ATANF
:
32725 case BUILT_IN_ATAN2F
:
32726 case BUILT_IN_ATANHF
:
32727 case BUILT_IN_CBRTF
:
32728 case BUILT_IN_SINHF
:
32729 case BUILT_IN_SINF
:
32730 case BUILT_IN_ASINHF
:
32731 case BUILT_IN_ASINF
:
32732 case BUILT_IN_COSHF
:
32733 case BUILT_IN_COSF
:
32734 case BUILT_IN_ACOSHF
:
32735 case BUILT_IN_ACOSF
:
32736 if (el_mode
!= SFmode
|| n
!= 4)
32744 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32746 if (fn
== BUILT_IN_LOGF
)
32747 strcpy (name
, "vmlsLn4");
32748 else if (fn
== BUILT_IN_LOG
)
32749 strcpy (name
, "vmldLn2");
32752 sprintf (name
, "vmls%s", bname
+10);
32753 name
[strlen (name
)-1] = '4';
32756 sprintf (name
, "vmld%s2", bname
+10);
32758 /* Convert to uppercase. */
32762 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32764 args
= TREE_CHAIN (args
))
32768 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32770 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32772 /* Build a function declaration for the vectorized function. */
32773 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32774 FUNCTION_DECL
, get_identifier (name
), fntype
);
32775 TREE_PUBLIC (new_fndecl
) = 1;
32776 DECL_EXTERNAL (new_fndecl
) = 1;
32777 DECL_IS_NOVOPS (new_fndecl
) = 1;
32778 TREE_READONLY (new_fndecl
) = 1;
32783 /* Handler for an ACML-style interface to
32784 a library with vectorized intrinsics. */
32787 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
32789 char name
[20] = "__vr.._";
32790 tree fntype
, new_fndecl
, args
;
32793 enum machine_mode el_mode
, in_mode
;
32796 /* The ACML is 64bits only and suitable for unsafe math only as
32797 it does not correctly support parts of IEEE with the required
32798 precision such as denormals. */
32800 || !flag_unsafe_math_optimizations
)
32803 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32804 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32805 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32806 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32807 if (el_mode
!= in_mode
32817 case BUILT_IN_LOG2
:
32818 case BUILT_IN_LOG10
:
32821 if (el_mode
!= DFmode
32826 case BUILT_IN_SINF
:
32827 case BUILT_IN_COSF
:
32828 case BUILT_IN_EXPF
:
32829 case BUILT_IN_POWF
:
32830 case BUILT_IN_LOGF
:
32831 case BUILT_IN_LOG2F
:
32832 case BUILT_IN_LOG10F
:
32835 if (el_mode
!= SFmode
32844 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32845 sprintf (name
+ 7, "%s", bname
+10);
32848 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32850 args
= TREE_CHAIN (args
))
32854 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32856 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32858 /* Build a function declaration for the vectorized function. */
32859 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32860 FUNCTION_DECL
, get_identifier (name
), fntype
);
32861 TREE_PUBLIC (new_fndecl
) = 1;
32862 DECL_EXTERNAL (new_fndecl
) = 1;
32863 DECL_IS_NOVOPS (new_fndecl
) = 1;
32864 TREE_READONLY (new_fndecl
) = 1;
32869 /* Returns a decl of a function that implements gather load with
32870 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
32871 Return NULL_TREE if it is not available. */
32874 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
32875 const_tree index_type
, int scale
)
32878 enum ix86_builtins code
;
32883 if ((TREE_CODE (index_type
) != INTEGER_TYPE
32884 && !POINTER_TYPE_P (index_type
))
32885 || (TYPE_MODE (index_type
) != SImode
32886 && TYPE_MODE (index_type
) != DImode
))
32889 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
32892 /* v*gather* insn sign extends index to pointer mode. */
32893 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
32894 && TYPE_UNSIGNED (index_type
))
32899 || (scale
& (scale
- 1)) != 0)
32902 si
= TYPE_MODE (index_type
) == SImode
;
32903 switch (TYPE_MODE (mem_vectype
))
32906 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
32909 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
32912 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
32915 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
32918 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
32921 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
32924 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
32927 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
32933 return ix86_builtins
[code
];
32936 /* Returns a code for a target-specific builtin that implements
32937 reciprocal of the function, or NULL_TREE if not available. */
32940 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
32941 bool sqrt ATTRIBUTE_UNUSED
)
32943 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
32944 && flag_finite_math_only
&& !flag_trapping_math
32945 && flag_unsafe_math_optimizations
))
32949 /* Machine dependent builtins. */
32952 /* Vectorized version of sqrt to rsqrt conversion. */
32953 case IX86_BUILTIN_SQRTPS_NR
:
32954 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
32956 case IX86_BUILTIN_SQRTPS_NR256
:
32957 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
32963 /* Normal builtins. */
32966 /* Sqrt to rsqrt conversion. */
32967 case BUILT_IN_SQRTF
:
32968 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
32975 /* Helper for avx_vpermilps256_operand et al. This is also used by
32976 the expansion functions to turn the parallel back into a mask.
32977 The return value is 0 for no match and the imm8+1 for a match. */
32980 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
32982 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
32984 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
32986 if (XVECLEN (par
, 0) != (int) nelt
)
32989 /* Validate that all of the elements are constants, and not totally
32990 out of range. Copy the data into an integral array to make the
32991 subsequent checks easier. */
32992 for (i
= 0; i
< nelt
; ++i
)
32994 rtx er
= XVECEXP (par
, 0, i
);
32995 unsigned HOST_WIDE_INT ei
;
32997 if (!CONST_INT_P (er
))
33008 /* In the 256-bit DFmode case, we can only move elements within
33010 for (i
= 0; i
< 2; ++i
)
33014 mask
|= ipar
[i
] << i
;
33016 for (i
= 2; i
< 4; ++i
)
33020 mask
|= (ipar
[i
] - 2) << i
;
33025 /* In the 256-bit SFmode case, we have full freedom of movement
33026 within the low 128-bit lane, but the high 128-bit lane must
33027 mirror the exact same pattern. */
33028 for (i
= 0; i
< 4; ++i
)
33029 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33036 /* In the 128-bit case, we've full freedom in the placement of
33037 the elements from the source operand. */
33038 for (i
= 0; i
< nelt
; ++i
)
33039 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33043 gcc_unreachable ();
33046 /* Make sure success has a non-zero value by adding one. */
33050 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33051 the expansion functions to turn the parallel back into a mask.
33052 The return value is 0 for no match and the imm8+1 for a match. */
33055 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33057 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33059 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33061 if (XVECLEN (par
, 0) != (int) nelt
)
33064 /* Validate that all of the elements are constants, and not totally
33065 out of range. Copy the data into an integral array to make the
33066 subsequent checks easier. */
33067 for (i
= 0; i
< nelt
; ++i
)
33069 rtx er
= XVECEXP (par
, 0, i
);
33070 unsigned HOST_WIDE_INT ei
;
33072 if (!CONST_INT_P (er
))
33075 if (ei
>= 2 * nelt
)
33080 /* Validate that the halves of the permute are halves. */
33081 for (i
= 0; i
< nelt2
- 1; ++i
)
33082 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33084 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33085 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33088 /* Reconstruct the mask. */
33089 for (i
= 0; i
< 2; ++i
)
33091 unsigned e
= ipar
[i
* nelt2
];
33095 mask
|= e
<< (i
* 4);
33098 /* Make sure success has a non-zero value by adding one. */
33102 /* Store OPERAND to the memory after reload is completed. This means
33103 that we can't easily use assign_stack_local. */
33105 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33109 gcc_assert (reload_completed
);
33110 if (ix86_using_red_zone ())
33112 result
= gen_rtx_MEM (mode
,
33113 gen_rtx_PLUS (Pmode
,
33115 GEN_INT (-RED_ZONE_SIZE
)));
33116 emit_move_insn (result
, operand
);
33118 else if (TARGET_64BIT
)
33124 operand
= gen_lowpart (DImode
, operand
);
33128 gen_rtx_SET (VOIDmode
,
33129 gen_rtx_MEM (DImode
,
33130 gen_rtx_PRE_DEC (DImode
,
33131 stack_pointer_rtx
)),
33135 gcc_unreachable ();
33137 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33146 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33148 gen_rtx_SET (VOIDmode
,
33149 gen_rtx_MEM (SImode
,
33150 gen_rtx_PRE_DEC (Pmode
,
33151 stack_pointer_rtx
)),
33154 gen_rtx_SET (VOIDmode
,
33155 gen_rtx_MEM (SImode
,
33156 gen_rtx_PRE_DEC (Pmode
,
33157 stack_pointer_rtx
)),
33162 /* Store HImodes as SImodes. */
33163 operand
= gen_lowpart (SImode
, operand
);
33167 gen_rtx_SET (VOIDmode
,
33168 gen_rtx_MEM (GET_MODE (operand
),
33169 gen_rtx_PRE_DEC (SImode
,
33170 stack_pointer_rtx
)),
33174 gcc_unreachable ();
33176 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33181 /* Free operand from the memory. */
33183 ix86_free_from_memory (enum machine_mode mode
)
33185 if (!ix86_using_red_zone ())
33189 if (mode
== DImode
|| TARGET_64BIT
)
33193 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33194 to pop or add instruction if registers are available. */
33195 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33196 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33201 /* Return a register priority for hard reg REGNO. */
33203 ix86_register_priority (int hard_regno
)
33205 /* ebp and r13 as the base always wants a displacement, r12 as the
33206 base always wants an index. So discourage their usage in an
33208 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33210 if (hard_regno
== BP_REG
)
33212 /* New x86-64 int registers result in bigger code size. Discourage
33214 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33216 /* New x86-64 SSE registers result in bigger code size. Discourage
33218 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33220 /* Usage of AX register results in smaller code. Prefer it. */
33221 if (hard_regno
== 0)
33226 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33228 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33229 QImode must go into class Q_REGS.
33230 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33231 movdf to do mem-to-mem moves through integer regs. */
33234 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33236 enum machine_mode mode
= GET_MODE (x
);
33238 /* We're only allowed to return a subclass of CLASS. Many of the
33239 following checks fail for NO_REGS, so eliminate that early. */
33240 if (regclass
== NO_REGS
)
33243 /* All classes can load zeros. */
33244 if (x
== CONST0_RTX (mode
))
33247 /* Force constants into memory if we are loading a (nonzero) constant into
33248 an MMX or SSE register. This is because there are no MMX/SSE instructions
33249 to load from a constant. */
33251 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33254 /* Prefer SSE regs only, if we can use them for math. */
33255 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33256 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33258 /* Floating-point constants need more complex checks. */
33259 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33261 /* General regs can load everything. */
33262 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33265 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33266 zero above. We only want to wind up preferring 80387 registers if
33267 we plan on doing computation with them. */
33269 && standard_80387_constant_p (x
) > 0)
33271 /* Limit class to non-sse. */
33272 if (regclass
== FLOAT_SSE_REGS
)
33274 if (regclass
== FP_TOP_SSE_REGS
)
33276 if (regclass
== FP_SECOND_SSE_REGS
)
33277 return FP_SECOND_REG
;
33278 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33285 /* Generally when we see PLUS here, it's the function invariant
33286 (plus soft-fp const_int). Which can only be computed into general
33288 if (GET_CODE (x
) == PLUS
)
33289 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33291 /* QImode constants are easy to load, but non-constant QImode data
33292 must go into Q_REGS. */
33293 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33295 if (reg_class_subset_p (regclass
, Q_REGS
))
33297 if (reg_class_subset_p (Q_REGS
, regclass
))
33305 /* Discourage putting floating-point values in SSE registers unless
33306 SSE math is being used, and likewise for the 387 registers. */
33308 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33310 enum machine_mode mode
= GET_MODE (x
);
33312 /* Restrict the output reload class to the register bank that we are doing
33313 math on. If we would like not to return a subset of CLASS, reject this
33314 alternative: if reload cannot do this, it will still use its choice. */
33315 mode
= GET_MODE (x
);
33316 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33317 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33319 if (X87_FLOAT_MODE_P (mode
))
33321 if (regclass
== FP_TOP_SSE_REGS
)
33323 else if (regclass
== FP_SECOND_SSE_REGS
)
33324 return FP_SECOND_REG
;
33326 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33333 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33334 enum machine_mode mode
, secondary_reload_info
*sri
)
33336 /* Double-word spills from general registers to non-offsettable memory
33337 references (zero-extended addresses) require special handling. */
33340 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33341 && rclass
== GENERAL_REGS
33342 && !offsettable_memref_p (x
))
33345 ? CODE_FOR_reload_noff_load
33346 : CODE_FOR_reload_noff_store
);
33347 /* Add the cost of moving address to a temporary. */
33348 sri
->extra_cost
= 1;
33353 /* QImode spills from non-QI registers require
33354 intermediate register on 32bit targets. */
33356 && !in_p
&& mode
== QImode
33357 && (rclass
== GENERAL_REGS
33358 || rclass
== LEGACY_REGS
33359 || rclass
== NON_Q_REGS
33362 || rclass
== INDEX_REGS
))
33371 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33372 regno
= true_regnum (x
);
33374 /* Return Q_REGS if the operand is in memory. */
33379 /* This condition handles corner case where an expression involving
33380 pointers gets vectorized. We're trying to use the address of a
33381 stack slot as a vector initializer.
33383 (set (reg:V2DI 74 [ vect_cst_.2 ])
33384 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33386 Eventually frame gets turned into sp+offset like this:
33388 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33389 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33390 (const_int 392 [0x188]))))
33392 That later gets turned into:
33394 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33395 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33396 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33398 We'll have the following reload recorded:
33400 Reload 0: reload_in (DI) =
33401 (plus:DI (reg/f:DI 7 sp)
33402 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33403 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33404 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33405 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33406 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33407 reload_reg_rtx: (reg:V2DI 22 xmm1)
33409 Which isn't going to work since SSE instructions can't handle scalar
33410 additions. Returning GENERAL_REGS forces the addition into integer
33411 register and reload can handle subsequent reloads without problems. */
33413 if (in_p
&& GET_CODE (x
) == PLUS
33414 && SSE_CLASS_P (rclass
)
33415 && SCALAR_INT_MODE_P (mode
))
33416 return GENERAL_REGS
;
33421 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33424 ix86_class_likely_spilled_p (reg_class_t rclass
)
33435 case SSE_FIRST_REG
:
33437 case FP_SECOND_REG
:
33447 /* If we are copying between general and FP registers, we need a memory
33448 location. The same is true for SSE and MMX registers.
33450 To optimize register_move_cost performance, allow inline variant.
33452 The macro can't work reliably when one of the CLASSES is class containing
33453 registers from multiple units (SSE, MMX, integer). We avoid this by never
33454 combining those units in single alternative in the machine description.
33455 Ensure that this constraint holds to avoid unexpected surprises.
33457 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33458 enforce these sanity checks. */
33461 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33462 enum machine_mode mode
, int strict
)
33464 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
33465 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
33466 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
33467 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
33468 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
33469 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
33471 gcc_assert (!strict
|| lra_in_progress
);
33475 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
33478 /* ??? This is a lie. We do have moves between mmx/general, and for
33479 mmx/sse2. But by saying we need secondary memory we discourage the
33480 register allocator from using the mmx registers unless needed. */
33481 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
33484 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33486 /* SSE1 doesn't have any direct moves from other classes. */
33490 /* If the target says that inter-unit moves are more expensive
33491 than moving through memory, then don't generate them. */
33492 if (!TARGET_INTER_UNIT_MOVES
)
33495 /* Between SSE and general, we have moves no larger than word size. */
33496 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33504 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33505 enum machine_mode mode
, int strict
)
33507 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
33510 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33512 On the 80386, this is the size of MODE in words,
33513 except in the FP regs, where a single reg is always enough. */
33515 static unsigned char
33516 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
33518 if (MAYBE_INTEGER_CLASS_P (rclass
))
33520 if (mode
== XFmode
)
33521 return (TARGET_64BIT
? 2 : 3);
33522 else if (mode
== XCmode
)
33523 return (TARGET_64BIT
? 4 : 6);
33525 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
33529 if (COMPLEX_MODE_P (mode
))
33536 /* Return true if the registers in CLASS cannot represent the change from
33537 modes FROM to TO. */
33540 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
33541 enum reg_class regclass
)
33546 /* x87 registers can't do subreg at all, as all values are reformatted
33547 to extended precision. */
33548 if (MAYBE_FLOAT_CLASS_P (regclass
))
33551 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
33553 /* Vector registers do not support QI or HImode loads. If we don't
33554 disallow a change to these modes, reload will assume it's ok to
33555 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33556 the vec_dupv4hi pattern. */
33557 if (GET_MODE_SIZE (from
) < 4)
33560 /* Vector registers do not support subreg with nonzero offsets, which
33561 are otherwise valid for integer registers. Since we can't see
33562 whether we have a nonzero offset from here, prohibit all
33563 nonparadoxical subregs changing size. */
33564 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
33571 /* Return the cost of moving data of mode M between a
33572 register and memory. A value of 2 is the default; this cost is
33573 relative to those in `REGISTER_MOVE_COST'.
33575 This function is used extensively by register_move_cost that is used to
33576 build tables at startup. Make it inline in this case.
33577 When IN is 2, return maximum of in and out move cost.
33579 If moving between registers and memory is more expensive than
33580 between two registers, you should define this macro to express the
33583 Model also increased moving costs of QImode registers in non
33587 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
33591 if (FLOAT_CLASS_P (regclass
))
33609 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
33610 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
33612 if (SSE_CLASS_P (regclass
))
33615 switch (GET_MODE_SIZE (mode
))
33630 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
33631 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
33633 if (MMX_CLASS_P (regclass
))
33636 switch (GET_MODE_SIZE (mode
))
33648 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
33649 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
33651 switch (GET_MODE_SIZE (mode
))
33654 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
33657 return ix86_cost
->int_store
[0];
33658 if (TARGET_PARTIAL_REG_DEPENDENCY
33659 && optimize_function_for_speed_p (cfun
))
33660 cost
= ix86_cost
->movzbl_load
;
33662 cost
= ix86_cost
->int_load
[0];
33664 return MAX (cost
, ix86_cost
->int_store
[0]);
33670 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
33672 return ix86_cost
->movzbl_load
;
33674 return ix86_cost
->int_store
[0] + 4;
33679 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
33680 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
33682 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33683 if (mode
== TFmode
)
33686 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
33688 cost
= ix86_cost
->int_load
[2];
33690 cost
= ix86_cost
->int_store
[2];
33691 return (cost
* (((int) GET_MODE_SIZE (mode
)
33692 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
33697 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
33700 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
33704 /* Return the cost of moving data from a register in class CLASS1 to
33705 one in class CLASS2.
33707 It is not required that the cost always equal 2 when FROM is the same as TO;
33708 on some machines it is expensive to move between registers if they are not
33709 general registers. */
33712 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
33713 reg_class_t class2_i
)
33715 enum reg_class class1
= (enum reg_class
) class1_i
;
33716 enum reg_class class2
= (enum reg_class
) class2_i
;
33718 /* In case we require secondary memory, compute cost of the store followed
33719 by load. In order to avoid bad register allocation choices, we need
33720 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33722 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
33726 cost
+= inline_memory_move_cost (mode
, class1
, 2);
33727 cost
+= inline_memory_move_cost (mode
, class2
, 2);
33729 /* In case of copying from general_purpose_register we may emit multiple
33730 stores followed by single load causing memory size mismatch stall.
33731 Count this as arbitrarily high cost of 20. */
33732 if (targetm
.class_max_nregs (class1
, mode
)
33733 > targetm
.class_max_nregs (class2
, mode
))
33736 /* In the case of FP/MMX moves, the registers actually overlap, and we
33737 have to switch modes in order to treat them differently. */
33738 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
33739 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
33745 /* Moves between SSE/MMX and integer unit are expensive. */
33746 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
33747 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33749 /* ??? By keeping returned value relatively high, we limit the number
33750 of moves between integer and MMX/SSE registers for all targets.
33751 Additionally, high value prevents problem with x86_modes_tieable_p(),
33752 where integer modes in MMX/SSE registers are not tieable
33753 because of missing QImode and HImode moves to, from or between
33754 MMX/SSE registers. */
33755 return MAX (8, ix86_cost
->mmxsse_to_integer
);
33757 if (MAYBE_FLOAT_CLASS_P (class1
))
33758 return ix86_cost
->fp_move
;
33759 if (MAYBE_SSE_CLASS_P (class1
))
33760 return ix86_cost
->sse_move
;
33761 if (MAYBE_MMX_CLASS_P (class1
))
33762 return ix86_cost
->mmx_move
;
33766 /* Return TRUE if hard register REGNO can hold a value of machine-mode
33770 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
33772 /* Flags and only flags can only hold CCmode values. */
33773 if (CC_REGNO_P (regno
))
33774 return GET_MODE_CLASS (mode
) == MODE_CC
;
33775 if (GET_MODE_CLASS (mode
) == MODE_CC
33776 || GET_MODE_CLASS (mode
) == MODE_RANDOM
33777 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
33779 if (STACK_REGNO_P (regno
))
33780 return VALID_FP_MODE_P (mode
);
33781 if (SSE_REGNO_P (regno
))
33783 /* We implement the move patterns for all vector modes into and
33784 out of SSE registers, even when no operation instructions
33785 are available. OImode move is available only when AVX is
33787 return ((TARGET_AVX
&& mode
== OImode
)
33788 || VALID_AVX256_REG_MODE (mode
)
33789 || VALID_SSE_REG_MODE (mode
)
33790 || VALID_SSE2_REG_MODE (mode
)
33791 || VALID_MMX_REG_MODE (mode
)
33792 || VALID_MMX_REG_MODE_3DNOW (mode
));
33794 if (MMX_REGNO_P (regno
))
33796 /* We implement the move patterns for 3DNOW modes even in MMX mode,
33797 so if the register is available at all, then we can move data of
33798 the given mode into or out of it. */
33799 return (VALID_MMX_REG_MODE (mode
)
33800 || VALID_MMX_REG_MODE_3DNOW (mode
));
33803 if (mode
== QImode
)
33805 /* Take care for QImode values - they can be in non-QI regs,
33806 but then they do cause partial register stalls. */
33807 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
33809 if (!TARGET_PARTIAL_REG_STALL
)
33811 return !can_create_pseudo_p ();
33813 /* We handle both integer and floats in the general purpose registers. */
33814 else if (VALID_INT_MODE_P (mode
))
33816 else if (VALID_FP_MODE_P (mode
))
33818 else if (VALID_DFP_MODE_P (mode
))
33820 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
33821 on to use that value in smaller contexts, this can easily force a
33822 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
33823 supporting DImode, allow it. */
33824 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
33830 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
33831 tieable integer mode. */
33834 ix86_tieable_integer_mode_p (enum machine_mode mode
)
33843 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
33846 return TARGET_64BIT
;
33853 /* Return true if MODE1 is accessible in a register that can hold MODE2
33854 without copying. That is, all register classes that can hold MODE2
33855 can also hold MODE1. */
33858 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
33860 if (mode1
== mode2
)
33863 if (ix86_tieable_integer_mode_p (mode1
)
33864 && ix86_tieable_integer_mode_p (mode2
))
33867 /* MODE2 being XFmode implies fp stack or general regs, which means we
33868 can tie any smaller floating point modes to it. Note that we do not
33869 tie this with TFmode. */
33870 if (mode2
== XFmode
)
33871 return mode1
== SFmode
|| mode1
== DFmode
;
33873 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
33874 that we can tie it with SFmode. */
33875 if (mode2
== DFmode
)
33876 return mode1
== SFmode
;
33878 /* If MODE2 is only appropriate for an SSE register, then tie with
33879 any other mode acceptable to SSE registers. */
33880 if (GET_MODE_SIZE (mode2
) == 32
33881 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33882 return (GET_MODE_SIZE (mode1
) == 32
33883 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33884 if (GET_MODE_SIZE (mode2
) == 16
33885 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33886 return (GET_MODE_SIZE (mode1
) == 16
33887 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33889 /* If MODE2 is appropriate for an MMX register, then tie
33890 with any other mode acceptable to MMX registers. */
33891 if (GET_MODE_SIZE (mode2
) == 8
33892 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
33893 return (GET_MODE_SIZE (mode1
) == 8
33894 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
33899 /* Return the cost of moving between two registers of mode MODE. */
33902 ix86_set_reg_reg_cost (enum machine_mode mode
)
33904 unsigned int units
= UNITS_PER_WORD
;
33906 switch (GET_MODE_CLASS (mode
))
33912 units
= GET_MODE_SIZE (CCmode
);
33916 if ((TARGET_SSE
&& mode
== TFmode
)
33917 || (TARGET_80387
&& mode
== XFmode
)
33918 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
33919 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
33920 units
= GET_MODE_SIZE (mode
);
33923 case MODE_COMPLEX_FLOAT
:
33924 if ((TARGET_SSE
&& mode
== TCmode
)
33925 || (TARGET_80387
&& mode
== XCmode
)
33926 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
33927 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
33928 units
= GET_MODE_SIZE (mode
);
33931 case MODE_VECTOR_INT
:
33932 case MODE_VECTOR_FLOAT
:
33933 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33934 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33935 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33936 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
33937 units
= GET_MODE_SIZE (mode
);
33940 /* Return the cost of moving between two registers of mode MODE,
33941 assuming that the move will be in pieces of at most UNITS bytes. */
33942 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
33945 /* Compute a (partial) cost for rtx X. Return true if the complete
33946 cost has been computed, and false if subexpressions should be
33947 scanned. In either case, *TOTAL contains the cost result. */
33950 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
33953 enum rtx_code code
= (enum rtx_code
) code_i
;
33954 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
33955 enum machine_mode mode
= GET_MODE (x
);
33956 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
33961 if (register_operand (SET_DEST (x
), VOIDmode
)
33962 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
33964 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
33973 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
33975 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
33977 else if (flag_pic
&& SYMBOLIC_CONST (x
)
33979 || (!GET_CODE (x
) != LABEL_REF
33980 && (GET_CODE (x
) != SYMBOL_REF
33981 || !SYMBOL_REF_LOCAL_P (x
)))))
33988 if (mode
== VOIDmode
)
33993 switch (standard_80387_constant_p (x
))
33998 default: /* Other constants */
34005 if (SSE_FLOAT_MODE_P (mode
))
34008 switch (standard_sse_constant_p (x
))
34012 case 1: /* 0: xor eliminates false dependency */
34015 default: /* -1: cmp contains false dependency */
34020 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34021 it'll probably end up. Add a penalty for size. */
34022 *total
= (COSTS_N_INSNS (1)
34023 + (flag_pic
!= 0 && !TARGET_64BIT
)
34024 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34028 /* The zero extensions is often completely free on x86_64, so make
34029 it as cheap as possible. */
34030 if (TARGET_64BIT
&& mode
== DImode
34031 && GET_MODE (XEXP (x
, 0)) == SImode
)
34033 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34034 *total
= cost
->add
;
34036 *total
= cost
->movzx
;
34040 *total
= cost
->movsx
;
34044 if (SCALAR_INT_MODE_P (mode
)
34045 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34046 && CONST_INT_P (XEXP (x
, 1)))
34048 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34051 *total
= cost
->add
;
34054 if ((value
== 2 || value
== 3)
34055 && cost
->lea
<= cost
->shift_const
)
34057 *total
= cost
->lea
;
34067 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34069 /* ??? Should be SSE vector operation cost. */
34070 /* At least for published AMD latencies, this really is the same
34071 as the latency for a simple fpu operation like fabs. */
34072 /* V*QImode is emulated with 1-11 insns. */
34073 if (mode
== V16QImode
|| mode
== V32QImode
)
34076 if (TARGET_XOP
&& mode
== V16QImode
)
34078 /* For XOP we use vpshab, which requires a broadcast of the
34079 value to the variable shift insn. For constants this
34080 means a V16Q const in mem; even when we can perform the
34081 shift with one insn set the cost to prefer paddb. */
34082 if (CONSTANT_P (XEXP (x
, 1)))
34084 *total
= (cost
->fabs
34085 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34086 + (speed
? 2 : COSTS_N_BYTES (16)));
34091 else if (TARGET_SSSE3
)
34093 *total
= cost
->fabs
* count
;
34096 *total
= cost
->fabs
;
34098 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34100 if (CONST_INT_P (XEXP (x
, 1)))
34102 if (INTVAL (XEXP (x
, 1)) > 32)
34103 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34105 *total
= cost
->shift_const
* 2;
34109 if (GET_CODE (XEXP (x
, 1)) == AND
)
34110 *total
= cost
->shift_var
* 2;
34112 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34117 if (CONST_INT_P (XEXP (x
, 1)))
34118 *total
= cost
->shift_const
;
34120 *total
= cost
->shift_var
;
34128 gcc_assert (FLOAT_MODE_P (mode
));
34129 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34131 /* ??? SSE scalar/vector cost should be used here. */
34132 /* ??? Bald assumption that fma has the same cost as fmul. */
34133 *total
= cost
->fmul
;
34134 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34136 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34138 if (GET_CODE (sub
) == NEG
)
34139 sub
= XEXP (sub
, 0);
34140 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34143 if (GET_CODE (sub
) == NEG
)
34144 sub
= XEXP (sub
, 0);
34145 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34150 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34152 /* ??? SSE scalar cost should be used here. */
34153 *total
= cost
->fmul
;
34156 else if (X87_FLOAT_MODE_P (mode
))
34158 *total
= cost
->fmul
;
34161 else if (FLOAT_MODE_P (mode
))
34163 /* ??? SSE vector cost should be used here. */
34164 *total
= cost
->fmul
;
34167 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34169 /* V*QImode is emulated with 7-13 insns. */
34170 if (mode
== V16QImode
|| mode
== V32QImode
)
34173 if (TARGET_XOP
&& mode
== V16QImode
)
34175 else if (TARGET_SSSE3
)
34177 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34179 /* V*DImode is emulated with 5-8 insns. */
34180 else if (mode
== V2DImode
|| mode
== V4DImode
)
34182 if (TARGET_XOP
&& mode
== V2DImode
)
34183 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34185 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34187 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34188 insns, including two PMULUDQ. */
34189 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34190 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34192 *total
= cost
->fmul
;
34197 rtx op0
= XEXP (x
, 0);
34198 rtx op1
= XEXP (x
, 1);
34200 if (CONST_INT_P (XEXP (x
, 1)))
34202 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34203 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34207 /* This is arbitrary. */
34210 /* Compute costs correctly for widening multiplication. */
34211 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34212 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34213 == GET_MODE_SIZE (mode
))
34215 int is_mulwiden
= 0;
34216 enum machine_mode inner_mode
= GET_MODE (op0
);
34218 if (GET_CODE (op0
) == GET_CODE (op1
))
34219 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34220 else if (CONST_INT_P (op1
))
34222 if (GET_CODE (op0
) == SIGN_EXTEND
)
34223 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34226 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34230 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34233 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34234 + nbits
* cost
->mult_bit
34235 + rtx_cost (op0
, outer_code
, opno
, speed
)
34236 + rtx_cost (op1
, outer_code
, opno
, speed
));
34245 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34246 /* ??? SSE cost should be used here. */
34247 *total
= cost
->fdiv
;
34248 else if (X87_FLOAT_MODE_P (mode
))
34249 *total
= cost
->fdiv
;
34250 else if (FLOAT_MODE_P (mode
))
34251 /* ??? SSE vector cost should be used here. */
34252 *total
= cost
->fdiv
;
34254 *total
= cost
->divide
[MODE_INDEX (mode
)];
34258 if (GET_MODE_CLASS (mode
) == MODE_INT
34259 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34261 if (GET_CODE (XEXP (x
, 0)) == PLUS
34262 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34263 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34264 && CONSTANT_P (XEXP (x
, 1)))
34266 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34267 if (val
== 2 || val
== 4 || val
== 8)
34269 *total
= cost
->lea
;
34270 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34271 outer_code
, opno
, speed
);
34272 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34273 outer_code
, opno
, speed
);
34274 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34278 else if (GET_CODE (XEXP (x
, 0)) == MULT
34279 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34281 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34282 if (val
== 2 || val
== 4 || val
== 8)
34284 *total
= cost
->lea
;
34285 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34286 outer_code
, opno
, speed
);
34287 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34291 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34293 *total
= cost
->lea
;
34294 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34295 outer_code
, opno
, speed
);
34296 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34297 outer_code
, opno
, speed
);
34298 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34305 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34307 /* ??? SSE cost should be used here. */
34308 *total
= cost
->fadd
;
34311 else if (X87_FLOAT_MODE_P (mode
))
34313 *total
= cost
->fadd
;
34316 else if (FLOAT_MODE_P (mode
))
34318 /* ??? SSE vector cost should be used here. */
34319 *total
= cost
->fadd
;
34327 if (GET_MODE_CLASS (mode
) == MODE_INT
34328 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34330 *total
= (cost
->add
* 2
34331 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34332 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34333 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34334 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34340 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34342 /* ??? SSE cost should be used here. */
34343 *total
= cost
->fchs
;
34346 else if (X87_FLOAT_MODE_P (mode
))
34348 *total
= cost
->fchs
;
34351 else if (FLOAT_MODE_P (mode
))
34353 /* ??? SSE vector cost should be used here. */
34354 *total
= cost
->fchs
;
34360 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34362 /* ??? Should be SSE vector operation cost. */
34363 /* At least for published AMD latencies, this really is the same
34364 as the latency for a simple fpu operation like fabs. */
34365 *total
= cost
->fabs
;
34367 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34368 *total
= cost
->add
* 2;
34370 *total
= cost
->add
;
34374 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34375 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34376 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34377 && XEXP (x
, 1) == const0_rtx
)
34379 /* This kind of construct is implemented using test[bwl].
34380 Treat it as if we had an AND. */
34381 *total
= (cost
->add
34382 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34383 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34389 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34394 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34395 /* ??? SSE cost should be used here. */
34396 *total
= cost
->fabs
;
34397 else if (X87_FLOAT_MODE_P (mode
))
34398 *total
= cost
->fabs
;
34399 else if (FLOAT_MODE_P (mode
))
34400 /* ??? SSE vector cost should be used here. */
34401 *total
= cost
->fabs
;
34405 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34406 /* ??? SSE cost should be used here. */
34407 *total
= cost
->fsqrt
;
34408 else if (X87_FLOAT_MODE_P (mode
))
34409 *total
= cost
->fsqrt
;
34410 else if (FLOAT_MODE_P (mode
))
34411 /* ??? SSE vector cost should be used here. */
34412 *total
= cost
->fsqrt
;
34416 if (XINT (x
, 1) == UNSPEC_TP
)
34423 case VEC_DUPLICATE
:
34424 /* ??? Assume all of these vector manipulation patterns are
34425 recognizable. In which case they all pretty much have the
34427 *total
= cost
->fabs
;
34437 static int current_machopic_label_num
;
34439 /* Given a symbol name and its associated stub, write out the
34440 definition of the stub. */
34443 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
34445 unsigned int length
;
34446 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
34447 int label
= ++current_machopic_label_num
;
34449 /* For 64-bit we shouldn't get here. */
34450 gcc_assert (!TARGET_64BIT
);
34452 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34453 symb
= targetm
.strip_name_encoding (symb
);
34455 length
= strlen (stub
);
34456 binder_name
= XALLOCAVEC (char, length
+ 32);
34457 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
34459 length
= strlen (symb
);
34460 symbol_name
= XALLOCAVEC (char, length
+ 32);
34461 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
34463 sprintf (lazy_ptr_name
, "L%d$lz", label
);
34465 if (MACHOPIC_ATT_STUB
)
34466 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
34467 else if (MACHOPIC_PURE
)
34468 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
34470 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
34472 fprintf (file
, "%s:\n", stub
);
34473 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34475 if (MACHOPIC_ATT_STUB
)
34477 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34479 else if (MACHOPIC_PURE
)
34482 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34483 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
34484 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34485 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34486 label
, lazy_ptr_name
, label
);
34487 fprintf (file
, "\tjmp\t*%%ecx\n");
34490 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
34492 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34493 it needs no stub-binding-helper. */
34494 if (MACHOPIC_ATT_STUB
)
34497 fprintf (file
, "%s:\n", binder_name
);
34501 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
34502 fprintf (file
, "\tpushl\t%%ecx\n");
34505 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
34507 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
34509 /* N.B. Keep the correspondence of these
34510 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34511 old-pic/new-pic/non-pic stubs; altering this will break
34512 compatibility with existing dylibs. */
34515 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34516 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
34519 /* 16-byte -mdynamic-no-pic stub. */
34520 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
34522 fprintf (file
, "%s:\n", lazy_ptr_name
);
34523 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34524 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
34526 #endif /* TARGET_MACHO */
34528 /* Order the registers for register allocator. */
34531 x86_order_regs_for_local_alloc (void)
34536 /* First allocate the local general purpose registers. */
34537 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34538 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
34539 reg_alloc_order
[pos
++] = i
;
34541 /* Global general purpose registers. */
34542 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34543 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
34544 reg_alloc_order
[pos
++] = i
;
34546 /* x87 registers come first in case we are doing FP math
34548 if (!TARGET_SSE_MATH
)
34549 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34550 reg_alloc_order
[pos
++] = i
;
34552 /* SSE registers. */
34553 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
34554 reg_alloc_order
[pos
++] = i
;
34555 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
34556 reg_alloc_order
[pos
++] = i
;
34558 /* x87 registers. */
34559 if (TARGET_SSE_MATH
)
34560 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34561 reg_alloc_order
[pos
++] = i
;
34563 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
34564 reg_alloc_order
[pos
++] = i
;
34566 /* Initialize the rest of array as we do not allocate some registers
34568 while (pos
< FIRST_PSEUDO_REGISTER
)
34569 reg_alloc_order
[pos
++] = 0;
34572 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34573 in struct attribute_spec handler. */
34575 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
34577 int flags ATTRIBUTE_UNUSED
,
34578 bool *no_add_attrs
)
34580 if (TREE_CODE (*node
) != FUNCTION_TYPE
34581 && TREE_CODE (*node
) != METHOD_TYPE
34582 && TREE_CODE (*node
) != FIELD_DECL
34583 && TREE_CODE (*node
) != TYPE_DECL
)
34585 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34587 *no_add_attrs
= true;
34592 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
34594 *no_add_attrs
= true;
34597 if (is_attribute_p ("callee_pop_aggregate_return", name
))
34601 cst
= TREE_VALUE (args
);
34602 if (TREE_CODE (cst
) != INTEGER_CST
)
34604 warning (OPT_Wattributes
,
34605 "%qE attribute requires an integer constant argument",
34607 *no_add_attrs
= true;
34609 else if (compare_tree_int (cst
, 0) != 0
34610 && compare_tree_int (cst
, 1) != 0)
34612 warning (OPT_Wattributes
,
34613 "argument to %qE attribute is neither zero, nor one",
34615 *no_add_attrs
= true;
34624 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34625 struct attribute_spec.handler. */
34627 ix86_handle_abi_attribute (tree
*node
, tree name
,
34628 tree args ATTRIBUTE_UNUSED
,
34629 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34631 if (TREE_CODE (*node
) != FUNCTION_TYPE
34632 && TREE_CODE (*node
) != METHOD_TYPE
34633 && TREE_CODE (*node
) != FIELD_DECL
34634 && TREE_CODE (*node
) != TYPE_DECL
)
34636 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34638 *no_add_attrs
= true;
34642 /* Can combine regparm with all attributes but fastcall. */
34643 if (is_attribute_p ("ms_abi", name
))
34645 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
34647 error ("ms_abi and sysv_abi attributes are not compatible");
34652 else if (is_attribute_p ("sysv_abi", name
))
34654 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
34656 error ("ms_abi and sysv_abi attributes are not compatible");
34665 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34666 struct attribute_spec.handler. */
34668 ix86_handle_struct_attribute (tree
*node
, tree name
,
34669 tree args ATTRIBUTE_UNUSED
,
34670 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34673 if (DECL_P (*node
))
34675 if (TREE_CODE (*node
) == TYPE_DECL
)
34676 type
= &TREE_TYPE (*node
);
34681 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
34683 warning (OPT_Wattributes
, "%qE attribute ignored",
34685 *no_add_attrs
= true;
34688 else if ((is_attribute_p ("ms_struct", name
)
34689 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
34690 || ((is_attribute_p ("gcc_struct", name
)
34691 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
34693 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
34695 *no_add_attrs
= true;
34702 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
34703 tree args ATTRIBUTE_UNUSED
,
34704 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34706 if (TREE_CODE (*node
) != FUNCTION_DECL
)
34708 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34710 *no_add_attrs
= true;
34716 ix86_ms_bitfield_layout_p (const_tree record_type
)
34718 return ((TARGET_MS_BITFIELD_LAYOUT
34719 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
34720 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
34723 /* Returns an expression indicating where the this parameter is
34724 located on entry to the FUNCTION. */
34727 x86_this_parameter (tree function
)
34729 tree type
= TREE_TYPE (function
);
34730 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
34735 const int *parm_regs
;
34737 if (ix86_function_type_abi (type
) == MS_ABI
)
34738 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
34740 parm_regs
= x86_64_int_parameter_registers
;
34741 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
34744 nregs
= ix86_function_regparm (type
, function
);
34746 if (nregs
> 0 && !stdarg_p (type
))
34749 unsigned int ccvt
= ix86_get_callcvt (type
);
34751 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34752 regno
= aggr
? DX_REG
: CX_REG
;
34753 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34757 return gen_rtx_MEM (SImode
,
34758 plus_constant (Pmode
, stack_pointer_rtx
, 4));
34767 return gen_rtx_MEM (SImode
,
34768 plus_constant (Pmode
,
34769 stack_pointer_rtx
, 4));
34772 return gen_rtx_REG (SImode
, regno
);
34775 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
34779 /* Determine whether x86_output_mi_thunk can succeed. */
34782 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
34783 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
34784 HOST_WIDE_INT vcall_offset
, const_tree function
)
34786 /* 64-bit can handle anything. */
34790 /* For 32-bit, everything's fine if we have one free register. */
34791 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
34794 /* Need a free register for vcall_offset. */
34798 /* Need a free register for GOT references. */
34799 if (flag_pic
&& !targetm
.binds_local_p (function
))
34802 /* Otherwise ok. */
34806 /* Output the assembler code for a thunk function. THUNK_DECL is the
34807 declaration for the thunk function itself, FUNCTION is the decl for
34808 the target function. DELTA is an immediate constant offset to be
34809 added to THIS. If VCALL_OFFSET is nonzero, the word at
34810 *(*this + vcall_offset) should be added to THIS. */
34813 x86_output_mi_thunk (FILE *file
,
34814 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
34815 HOST_WIDE_INT vcall_offset
, tree function
)
34817 rtx this_param
= x86_this_parameter (function
);
34818 rtx this_reg
, tmp
, fnaddr
;
34819 unsigned int tmp_regno
;
34822 tmp_regno
= R10_REG
;
34825 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
34826 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34827 tmp_regno
= AX_REG
;
34828 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34829 tmp_regno
= DX_REG
;
34831 tmp_regno
= CX_REG
;
34834 emit_note (NOTE_INSN_PROLOGUE_END
);
34836 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
34837 pull it in now and let DELTA benefit. */
34838 if (REG_P (this_param
))
34839 this_reg
= this_param
;
34840 else if (vcall_offset
)
34842 /* Put the this parameter into %eax. */
34843 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
34844 emit_move_insn (this_reg
, this_param
);
34847 this_reg
= NULL_RTX
;
34849 /* Adjust the this parameter by a fixed constant. */
34852 rtx delta_rtx
= GEN_INT (delta
);
34853 rtx delta_dst
= this_reg
? this_reg
: this_param
;
34857 if (!x86_64_general_operand (delta_rtx
, Pmode
))
34859 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34860 emit_move_insn (tmp
, delta_rtx
);
34865 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
34868 /* Adjust the this parameter by a value stored in the vtable. */
34871 rtx vcall_addr
, vcall_mem
, this_mem
;
34873 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34875 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
34876 if (Pmode
!= ptr_mode
)
34877 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
34878 emit_move_insn (tmp
, this_mem
);
34880 /* Adjust the this parameter. */
34881 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
34883 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
34885 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
34886 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
34887 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
34890 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
34891 if (Pmode
!= ptr_mode
)
34892 emit_insn (gen_addsi_1_zext (this_reg
,
34893 gen_rtx_REG (ptr_mode
,
34897 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
34900 /* If necessary, drop THIS back to its stack slot. */
34901 if (this_reg
&& this_reg
!= this_param
)
34902 emit_move_insn (this_param
, this_reg
);
34904 fnaddr
= XEXP (DECL_RTL (function
), 0);
34907 if (!flag_pic
|| targetm
.binds_local_p (function
)
34908 || cfun
->machine
->call_abi
== MS_ABI
)
34912 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
34913 tmp
= gen_rtx_CONST (Pmode
, tmp
);
34914 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
34919 if (!flag_pic
|| targetm
.binds_local_p (function
))
34922 else if (TARGET_MACHO
)
34924 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
34925 fnaddr
= XEXP (fnaddr
, 0);
34927 #endif /* TARGET_MACHO */
34930 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
34931 output_set_got (tmp
, NULL_RTX
);
34933 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
34934 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
34935 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
34939 /* Our sibling call patterns do not allow memories, because we have no
34940 predicate that can distinguish between frame and non-frame memory.
34941 For our purposes here, we can get away with (ab)using a jump pattern,
34942 because we're going to do no optimization. */
34943 if (MEM_P (fnaddr
))
34944 emit_jump_insn (gen_indirect_jump (fnaddr
));
34947 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
34948 fnaddr
= legitimize_pic_address (fnaddr
,
34949 gen_rtx_REG (Pmode
, tmp_regno
));
34951 if (!sibcall_insn_operand (fnaddr
, word_mode
))
34953 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
34954 if (GET_MODE (fnaddr
) != word_mode
)
34955 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
34956 emit_move_insn (tmp
, fnaddr
);
34960 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
34961 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
34962 tmp
= emit_call_insn (tmp
);
34963 SIBLING_CALL_P (tmp
) = 1;
34967 /* Emit just enough of rest_of_compilation to get the insns emitted.
34968 Note that use_thunk calls assemble_start_function et al. */
34969 tmp
= get_insns ();
34970 shorten_branches (tmp
);
34971 final_start_function (tmp
, file
, 1);
34972 final (tmp
, file
, 1);
34973 final_end_function ();
34977 x86_file_start (void)
34979 default_file_start ();
34981 darwin_file_start ();
34983 if (X86_FILE_START_VERSION_DIRECTIVE
)
34984 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
34985 if (X86_FILE_START_FLTUSED
)
34986 fputs ("\t.global\t__fltused\n", asm_out_file
);
34987 if (ix86_asm_dialect
== ASM_INTEL
)
34988 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
34992 x86_field_alignment (tree field
, int computed
)
34994 enum machine_mode mode
;
34995 tree type
= TREE_TYPE (field
);
34997 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
34999 mode
= TYPE_MODE (strip_array_types (type
));
35000 if (mode
== DFmode
|| mode
== DCmode
35001 || GET_MODE_CLASS (mode
) == MODE_INT
35002 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35003 return MIN (32, computed
);
35007 /* Output assembler code to FILE to increment profiler label # LABELNO
35008 for profiling a function entry. */
35010 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35012 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35017 #ifndef NO_PROFILE_COUNTERS
35018 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35021 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
35022 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35024 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35028 #ifndef NO_PROFILE_COUNTERS
35029 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35032 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35036 #ifndef NO_PROFILE_COUNTERS
35037 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35040 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35044 /* We don't have exact information about the insn sizes, but we may assume
35045 quite safely that we are informed about all 1 byte insns and memory
35046 address sizes. This is enough to eliminate unnecessary padding in
35050 min_insn_size (rtx insn
)
35054 if (!INSN_P (insn
) || !active_insn_p (insn
))
35057 /* Discard alignments we've emit and jump instructions. */
35058 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35059 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35061 if (JUMP_TABLE_DATA_P (insn
))
35064 /* Important case - calls are always 5 bytes.
35065 It is common to have many calls in the row. */
35067 && symbolic_reference_mentioned_p (PATTERN (insn
))
35068 && !SIBLING_CALL_P (insn
))
35070 len
= get_attr_length (insn
);
35074 /* For normal instructions we rely on get_attr_length being exact,
35075 with a few exceptions. */
35076 if (!JUMP_P (insn
))
35078 enum attr_type type
= get_attr_type (insn
);
35083 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35084 || asm_noperands (PATTERN (insn
)) >= 0)
35091 /* Otherwise trust get_attr_length. */
35095 l
= get_attr_length_address (insn
);
35096 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35105 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35107 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35111 ix86_avoid_jump_mispredicts (void)
35113 rtx insn
, start
= get_insns ();
35114 int nbytes
= 0, njumps
= 0;
35117 /* Look for all minimal intervals of instructions containing 4 jumps.
35118 The intervals are bounded by START and INSN. NBYTES is the total
35119 size of instructions in the interval including INSN and not including
35120 START. When the NBYTES is smaller than 16 bytes, it is possible
35121 that the end of START and INSN ends up in the same 16byte page.
35123 The smallest offset in the page INSN can start is the case where START
35124 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35125 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35127 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35131 if (LABEL_P (insn
))
35133 int align
= label_to_alignment (insn
);
35134 int max_skip
= label_to_max_skip (insn
);
35138 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35139 already in the current 16 byte page, because otherwise
35140 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35141 bytes to reach 16 byte boundary. */
35143 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35146 fprintf (dump_file
, "Label %i with max_skip %i\n",
35147 INSN_UID (insn
), max_skip
);
35150 while (nbytes
+ max_skip
>= 16)
35152 start
= NEXT_INSN (start
);
35153 if ((JUMP_P (start
)
35154 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35155 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35157 njumps
--, isjump
= 1;
35160 nbytes
-= min_insn_size (start
);
35166 min_size
= min_insn_size (insn
);
35167 nbytes
+= min_size
;
35169 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35170 INSN_UID (insn
), min_size
);
35172 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
35173 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
35181 start
= NEXT_INSN (start
);
35182 if ((JUMP_P (start
)
35183 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35184 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35186 njumps
--, isjump
= 1;
35189 nbytes
-= min_insn_size (start
);
35191 gcc_assert (njumps
>= 0);
35193 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35194 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35196 if (njumps
== 3 && isjump
&& nbytes
< 16)
35198 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35201 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35202 INSN_UID (insn
), padsize
);
35203 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35209 /* AMD Athlon works faster
35210 when RET is not destination of conditional jump or directly preceded
35211 by other jump instruction. We avoid the penalty by inserting NOP just
35212 before the RET instructions in such cases. */
35214 ix86_pad_returns (void)
35219 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35221 basic_block bb
= e
->src
;
35222 rtx ret
= BB_END (bb
);
35224 bool replace
= false;
35226 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35227 || optimize_bb_for_size_p (bb
))
35229 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35230 if (active_insn_p (prev
) || LABEL_P (prev
))
35232 if (prev
&& LABEL_P (prev
))
35237 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35238 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35239 && !(e
->flags
& EDGE_FALLTHRU
))
35244 prev
= prev_active_insn (ret
);
35246 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35249 /* Empty functions get branch mispredict even when
35250 the jump destination is not visible to us. */
35251 if (!prev
&& !optimize_function_for_size_p (cfun
))
35256 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35262 /* Count the minimum number of instructions in BB. Return 4 if the
35263 number of instructions >= 4. */
35266 ix86_count_insn_bb (basic_block bb
)
35269 int insn_count
= 0;
35271 /* Count number of instructions in this block. Return 4 if the number
35272 of instructions >= 4. */
35273 FOR_BB_INSNS (bb
, insn
)
35275 /* Only happen in exit blocks. */
35277 && ANY_RETURN_P (PATTERN (insn
)))
35280 if (NONDEBUG_INSN_P (insn
)
35281 && GET_CODE (PATTERN (insn
)) != USE
35282 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35285 if (insn_count
>= 4)
35294 /* Count the minimum number of instructions in code path in BB.
35295 Return 4 if the number of instructions >= 4. */
35298 ix86_count_insn (basic_block bb
)
35302 int min_prev_count
;
35304 /* Only bother counting instructions along paths with no
35305 more than 2 basic blocks between entry and exit. Given
35306 that BB has an edge to exit, determine if a predecessor
35307 of BB has an edge from entry. If so, compute the number
35308 of instructions in the predecessor block. If there
35309 happen to be multiple such blocks, compute the minimum. */
35310 min_prev_count
= 4;
35311 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35314 edge_iterator prev_ei
;
35316 if (e
->src
== ENTRY_BLOCK_PTR
)
35318 min_prev_count
= 0;
35321 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35323 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35325 int count
= ix86_count_insn_bb (e
->src
);
35326 if (count
< min_prev_count
)
35327 min_prev_count
= count
;
35333 if (min_prev_count
< 4)
35334 min_prev_count
+= ix86_count_insn_bb (bb
);
35336 return min_prev_count
;
35339 /* Pad short function to 4 instructions. */
35342 ix86_pad_short_function (void)
35347 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35349 rtx ret
= BB_END (e
->src
);
35350 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35352 int insn_count
= ix86_count_insn (e
->src
);
35354 /* Pad short function. */
35355 if (insn_count
< 4)
35359 /* Find epilogue. */
35362 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35363 insn
= PREV_INSN (insn
);
35368 /* Two NOPs count as one instruction. */
35369 insn_count
= 2 * (4 - insn_count
);
35370 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35376 /* Implement machine specific optimizations. We implement padding of returns
35377 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35381 /* We are freeing block_for_insn in the toplev to keep compatibility
35382 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35383 compute_bb_for_insn ();
35385 if (optimize
&& optimize_function_for_speed_p (cfun
))
35387 if (TARGET_PAD_SHORT_FUNCTION
)
35388 ix86_pad_short_function ();
35389 else if (TARGET_PAD_RETURNS
)
35390 ix86_pad_returns ();
35391 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35392 if (TARGET_FOUR_JUMP_LIMIT
)
35393 ix86_avoid_jump_mispredicts ();
35398 /* Return nonzero when QImode register that must be represented via REX prefix
35401 x86_extended_QIreg_mentioned_p (rtx insn
)
35404 extract_insn_cached (insn
);
35405 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35406 if (GENERAL_REG_P (recog_data
.operand
[i
])
35407 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
35412 /* Return nonzero when P points to register encoded via REX prefix.
35413 Called via for_each_rtx. */
35415 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
35417 unsigned int regno
;
35420 regno
= REGNO (*p
);
35421 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
35424 /* Return true when INSN mentions register that must be encoded using REX
35427 x86_extended_reg_mentioned_p (rtx insn
)
35429 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
35430 extended_reg_mentioned_1
, NULL
);
35433 /* If profitable, negate (without causing overflow) integer constant
35434 of mode MODE at location LOC. Return true in this case. */
35436 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
35440 if (!CONST_INT_P (*loc
))
35446 /* DImode x86_64 constants must fit in 32 bits. */
35447 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
35458 gcc_unreachable ();
35461 /* Avoid overflows. */
35462 if (mode_signbit_p (mode
, *loc
))
35465 val
= INTVAL (*loc
);
35467 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35468 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35469 if ((val
< 0 && val
!= -128)
35472 *loc
= GEN_INT (-val
);
35479 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35480 optabs would emit if we didn't have TFmode patterns. */
35483 x86_emit_floatuns (rtx operands
[2])
35485 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
35486 enum machine_mode mode
, inmode
;
35488 inmode
= GET_MODE (operands
[1]);
35489 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
35492 in
= force_reg (inmode
, operands
[1]);
35493 mode
= GET_MODE (out
);
35494 neglab
= gen_label_rtx ();
35495 donelab
= gen_label_rtx ();
35496 f0
= gen_reg_rtx (mode
);
35498 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
35500 expand_float (out
, in
, 0);
35502 emit_jump_insn (gen_jump (donelab
));
35505 emit_label (neglab
);
35507 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
35509 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
35511 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
35513 expand_float (f0
, i0
, 0);
35515 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
35517 emit_label (donelab
);
35520 /* AVX2 does support 32-byte integer vector operations,
35521 thus the longest vector we are faced with is V32QImode. */
35522 #define MAX_VECT_LEN 32
35524 struct expand_vec_perm_d
35526 rtx target
, op0
, op1
;
35527 unsigned char perm
[MAX_VECT_LEN
];
35528 enum machine_mode vmode
;
35529 unsigned char nelt
;
35530 bool one_operand_p
;
35534 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
35535 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
35536 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
35538 /* Get a vector mode of the same size as the original but with elements
35539 twice as wide. This is only guaranteed to apply to integral vectors. */
35541 static inline enum machine_mode
35542 get_mode_wider_vector (enum machine_mode o
)
35544 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35545 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
35546 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
35547 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
35551 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35552 with all elements equal to VAR. Return true if successful. */
35555 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
35556 rtx target
, rtx val
)
35579 /* First attempt to recognize VAL as-is. */
35580 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35581 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
35582 if (recog_memoized (insn
) < 0)
35585 /* If that fails, force VAL into a register. */
35588 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
35589 seq
= get_insns ();
35592 emit_insn_before (seq
, insn
);
35594 ok
= recog_memoized (insn
) >= 0;
35603 if (TARGET_SSE
|| TARGET_3DNOW_A
)
35607 val
= gen_lowpart (SImode
, val
);
35608 x
= gen_rtx_TRUNCATE (HImode
, val
);
35609 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
35610 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35623 struct expand_vec_perm_d dperm
;
35627 memset (&dperm
, 0, sizeof (dperm
));
35628 dperm
.target
= target
;
35629 dperm
.vmode
= mode
;
35630 dperm
.nelt
= GET_MODE_NUNITS (mode
);
35631 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
35632 dperm
.one_operand_p
= true;
35634 /* Extend to SImode using a paradoxical SUBREG. */
35635 tmp1
= gen_reg_rtx (SImode
);
35636 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
35638 /* Insert the SImode value as low element of a V4SImode vector. */
35639 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
35640 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
35642 ok
= (expand_vec_perm_1 (&dperm
)
35643 || expand_vec_perm_broadcast_1 (&dperm
));
35655 /* Replicate the value once into the next wider mode and recurse. */
35657 enum machine_mode smode
, wsmode
, wvmode
;
35660 smode
= GET_MODE_INNER (mode
);
35661 wvmode
= get_mode_wider_vector (mode
);
35662 wsmode
= GET_MODE_INNER (wvmode
);
35664 val
= convert_modes (wsmode
, smode
, val
, true);
35665 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
35666 GEN_INT (GET_MODE_BITSIZE (smode
)),
35667 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35668 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
35670 x
= gen_lowpart (wvmode
, target
);
35671 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
35679 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
35680 rtx x
= gen_reg_rtx (hvmode
);
35682 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
35685 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
35686 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35695 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35696 whose ONE_VAR element is VAR, and other elements are zero. Return true
35700 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
35701 rtx target
, rtx var
, int one_var
)
35703 enum machine_mode vsimode
;
35706 bool use_vector_set
= false;
35711 /* For SSE4.1, we normally use vector set. But if the second
35712 element is zero and inter-unit moves are OK, we use movq
35714 use_vector_set
= (TARGET_64BIT
35716 && !(TARGET_INTER_UNIT_MOVES
35722 use_vector_set
= TARGET_SSE4_1
;
35725 use_vector_set
= TARGET_SSE2
;
35728 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
35735 use_vector_set
= TARGET_AVX
;
35738 /* Use ix86_expand_vector_set in 64bit mode only. */
35739 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
35745 if (use_vector_set
)
35747 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
35748 var
= force_reg (GET_MODE_INNER (mode
), var
);
35749 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35765 var
= force_reg (GET_MODE_INNER (mode
), var
);
35766 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
35767 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35772 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
35773 new_target
= gen_reg_rtx (mode
);
35775 new_target
= target
;
35776 var
= force_reg (GET_MODE_INNER (mode
), var
);
35777 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
35778 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
35779 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
35782 /* We need to shuffle the value to the correct position, so
35783 create a new pseudo to store the intermediate result. */
35785 /* With SSE2, we can use the integer shuffle insns. */
35786 if (mode
!= V4SFmode
&& TARGET_SSE2
)
35788 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
35790 GEN_INT (one_var
== 1 ? 0 : 1),
35791 GEN_INT (one_var
== 2 ? 0 : 1),
35792 GEN_INT (one_var
== 3 ? 0 : 1)));
35793 if (target
!= new_target
)
35794 emit_move_insn (target
, new_target
);
35798 /* Otherwise convert the intermediate result to V4SFmode and
35799 use the SSE1 shuffle instructions. */
35800 if (mode
!= V4SFmode
)
35802 tmp
= gen_reg_rtx (V4SFmode
);
35803 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
35808 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
35810 GEN_INT (one_var
== 1 ? 0 : 1),
35811 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
35812 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
35814 if (mode
!= V4SFmode
)
35815 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
35816 else if (tmp
!= target
)
35817 emit_move_insn (target
, tmp
);
35819 else if (target
!= new_target
)
35820 emit_move_insn (target
, new_target
);
35825 vsimode
= V4SImode
;
35831 vsimode
= V2SImode
;
35837 /* Zero extend the variable element to SImode and recurse. */
35838 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
35840 x
= gen_reg_rtx (vsimode
);
35841 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
35843 gcc_unreachable ();
35845 emit_move_insn (target
, gen_lowpart (mode
, x
));
35853 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35854 consisting of the values in VALS. It is known that all elements
35855 except ONE_VAR are constants. Return true if successful. */
35858 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
35859 rtx target
, rtx vals
, int one_var
)
35861 rtx var
= XVECEXP (vals
, 0, one_var
);
35862 enum machine_mode wmode
;
35865 const_vec
= copy_rtx (vals
);
35866 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
35867 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
35875 /* For the two element vectors, it's just as easy to use
35876 the general case. */
35880 /* Use ix86_expand_vector_set in 64bit mode only. */
35903 /* There's no way to set one QImode entry easily. Combine
35904 the variable value with its adjacent constant value, and
35905 promote to an HImode set. */
35906 x
= XVECEXP (vals
, 0, one_var
^ 1);
35909 var
= convert_modes (HImode
, QImode
, var
, true);
35910 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
35911 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35912 x
= GEN_INT (INTVAL (x
) & 0xff);
35916 var
= convert_modes (HImode
, QImode
, var
, true);
35917 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
35919 if (x
!= const0_rtx
)
35920 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
35921 1, OPTAB_LIB_WIDEN
);
35923 x
= gen_reg_rtx (wmode
);
35924 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
35925 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
35927 emit_move_insn (target
, gen_lowpart (mode
, x
));
35934 emit_move_insn (target
, const_vec
);
35935 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35939 /* A subroutine of ix86_expand_vector_init_general. Use vector
35940 concatenate to handle the most general case: all values variable,
35941 and none identical. */
35944 ix86_expand_vector_init_concat (enum machine_mode mode
,
35945 rtx target
, rtx
*ops
, int n
)
35947 enum machine_mode cmode
, hmode
= VOIDmode
;
35948 rtx first
[8], second
[4];
35988 gcc_unreachable ();
35991 if (!register_operand (ops
[1], cmode
))
35992 ops
[1] = force_reg (cmode
, ops
[1]);
35993 if (!register_operand (ops
[0], cmode
))
35994 ops
[0] = force_reg (cmode
, ops
[0]);
35995 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35996 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36016 gcc_unreachable ();
36032 gcc_unreachable ();
36037 /* FIXME: We process inputs backward to help RA. PR 36222. */
36040 for (; i
> 0; i
-= 2, j
--)
36042 first
[j
] = gen_reg_rtx (cmode
);
36043 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36044 ix86_expand_vector_init (false, first
[j
],
36045 gen_rtx_PARALLEL (cmode
, v
));
36051 gcc_assert (hmode
!= VOIDmode
);
36052 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36054 second
[j
] = gen_reg_rtx (hmode
);
36055 ix86_expand_vector_init_concat (hmode
, second
[j
],
36059 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36062 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36066 gcc_unreachable ();
36070 /* A subroutine of ix86_expand_vector_init_general. Use vector
36071 interleave to handle the most general case: all values variable,
36072 and none identical. */
36075 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36076 rtx target
, rtx
*ops
, int n
)
36078 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36081 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36082 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36083 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36088 gen_load_even
= gen_vec_setv8hi
;
36089 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36090 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36091 inner_mode
= HImode
;
36092 first_imode
= V4SImode
;
36093 second_imode
= V2DImode
;
36094 third_imode
= VOIDmode
;
36097 gen_load_even
= gen_vec_setv16qi
;
36098 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36099 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36100 inner_mode
= QImode
;
36101 first_imode
= V8HImode
;
36102 second_imode
= V4SImode
;
36103 third_imode
= V2DImode
;
36106 gcc_unreachable ();
36109 for (i
= 0; i
< n
; i
++)
36111 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36112 op0
= gen_reg_rtx (SImode
);
36113 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36115 /* Insert the SImode value as low element of V4SImode vector. */
36116 op1
= gen_reg_rtx (V4SImode
);
36117 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36118 gen_rtx_VEC_DUPLICATE (V4SImode
,
36120 CONST0_RTX (V4SImode
),
36122 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36124 /* Cast the V4SImode vector back to a vector in orignal mode. */
36125 op0
= gen_reg_rtx (mode
);
36126 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36128 /* Load even elements into the second positon. */
36129 emit_insn (gen_load_even (op0
,
36130 force_reg (inner_mode
,
36134 /* Cast vector to FIRST_IMODE vector. */
36135 ops
[i
] = gen_reg_rtx (first_imode
);
36136 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36139 /* Interleave low FIRST_IMODE vectors. */
36140 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36142 op0
= gen_reg_rtx (first_imode
);
36143 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36145 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36146 ops
[j
] = gen_reg_rtx (second_imode
);
36147 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36150 /* Interleave low SECOND_IMODE vectors. */
36151 switch (second_imode
)
36154 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36156 op0
= gen_reg_rtx (second_imode
);
36157 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36160 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36162 ops
[j
] = gen_reg_rtx (third_imode
);
36163 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36165 second_imode
= V2DImode
;
36166 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36170 op0
= gen_reg_rtx (second_imode
);
36171 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36174 /* Cast the SECOND_IMODE vector back to a vector on original
36176 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36177 gen_lowpart (mode
, op0
)));
36181 gcc_unreachable ();
36185 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36186 all values variable, and none identical. */
36189 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36190 rtx target
, rtx vals
)
36192 rtx ops
[32], op0
, op1
;
36193 enum machine_mode half_mode
= VOIDmode
;
36200 if (!mmx_ok
&& !TARGET_SSE
)
36212 n
= GET_MODE_NUNITS (mode
);
36213 for (i
= 0; i
< n
; i
++)
36214 ops
[i
] = XVECEXP (vals
, 0, i
);
36215 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36219 half_mode
= V16QImode
;
36223 half_mode
= V8HImode
;
36227 n
= GET_MODE_NUNITS (mode
);
36228 for (i
= 0; i
< n
; i
++)
36229 ops
[i
] = XVECEXP (vals
, 0, i
);
36230 op0
= gen_reg_rtx (half_mode
);
36231 op1
= gen_reg_rtx (half_mode
);
36232 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36234 ix86_expand_vector_init_interleave (half_mode
, op1
,
36235 &ops
[n
>> 1], n
>> 2);
36236 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36237 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36241 if (!TARGET_SSE4_1
)
36249 /* Don't use ix86_expand_vector_init_interleave if we can't
36250 move from GPR to SSE register directly. */
36251 if (!TARGET_INTER_UNIT_MOVES
)
36254 n
= GET_MODE_NUNITS (mode
);
36255 for (i
= 0; i
< n
; i
++)
36256 ops
[i
] = XVECEXP (vals
, 0, i
);
36257 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36265 gcc_unreachable ();
36269 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36270 enum machine_mode inner_mode
;
36271 rtx words
[4], shift
;
36273 inner_mode
= GET_MODE_INNER (mode
);
36274 n_elts
= GET_MODE_NUNITS (mode
);
36275 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36276 n_elt_per_word
= n_elts
/ n_words
;
36277 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36279 for (i
= 0; i
< n_words
; ++i
)
36281 rtx word
= NULL_RTX
;
36283 for (j
= 0; j
< n_elt_per_word
; ++j
)
36285 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36286 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36292 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36293 word
, 1, OPTAB_LIB_WIDEN
);
36294 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36295 word
, 1, OPTAB_LIB_WIDEN
);
36303 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36304 else if (n_words
== 2)
36306 rtx tmp
= gen_reg_rtx (mode
);
36307 emit_clobber (tmp
);
36308 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36309 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36310 emit_move_insn (target
, tmp
);
36312 else if (n_words
== 4)
36314 rtx tmp
= gen_reg_rtx (V4SImode
);
36315 gcc_assert (word_mode
== SImode
);
36316 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36317 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36318 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36321 gcc_unreachable ();
36325 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36326 instructions unless MMX_OK is true. */
36329 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36331 enum machine_mode mode
= GET_MODE (target
);
36332 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36333 int n_elts
= GET_MODE_NUNITS (mode
);
36334 int n_var
= 0, one_var
= -1;
36335 bool all_same
= true, all_const_zero
= true;
36339 for (i
= 0; i
< n_elts
; ++i
)
36341 x
= XVECEXP (vals
, 0, i
);
36342 if (!(CONST_INT_P (x
)
36343 || GET_CODE (x
) == CONST_DOUBLE
36344 || GET_CODE (x
) == CONST_FIXED
))
36345 n_var
++, one_var
= i
;
36346 else if (x
!= CONST0_RTX (inner_mode
))
36347 all_const_zero
= false;
36348 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36352 /* Constants are best loaded from the constant pool. */
36355 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36359 /* If all values are identical, broadcast the value. */
36361 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36362 XVECEXP (vals
, 0, 0)))
36365 /* Values where only one field is non-constant are best loaded from
36366 the pool and overwritten via move later. */
36370 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36371 XVECEXP (vals
, 0, one_var
),
36375 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36379 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36383 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36385 enum machine_mode mode
= GET_MODE (target
);
36386 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36387 enum machine_mode half_mode
;
36388 bool use_vec_merge
= false;
36390 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36392 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36393 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36394 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36395 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36396 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36397 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36399 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36401 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36402 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36403 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36404 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36405 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36406 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
36416 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36417 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
36419 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36421 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36422 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36428 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
36432 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36433 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
36435 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36437 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36438 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36445 /* For the two element vectors, we implement a VEC_CONCAT with
36446 the extraction of the other element. */
36448 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
36449 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
36452 op0
= val
, op1
= tmp
;
36454 op0
= tmp
, op1
= val
;
36456 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
36457 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36462 use_vec_merge
= TARGET_SSE4_1
;
36469 use_vec_merge
= true;
36473 /* tmp = target = A B C D */
36474 tmp
= copy_to_reg (target
);
36475 /* target = A A B B */
36476 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
36477 /* target = X A B B */
36478 ix86_expand_vector_set (false, target
, val
, 0);
36479 /* target = A X C D */
36480 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36481 const1_rtx
, const0_rtx
,
36482 GEN_INT (2+4), GEN_INT (3+4)));
36486 /* tmp = target = A B C D */
36487 tmp
= copy_to_reg (target
);
36488 /* tmp = X B C D */
36489 ix86_expand_vector_set (false, tmp
, val
, 0);
36490 /* target = A B X D */
36491 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36492 const0_rtx
, const1_rtx
,
36493 GEN_INT (0+4), GEN_INT (3+4)));
36497 /* tmp = target = A B C D */
36498 tmp
= copy_to_reg (target
);
36499 /* tmp = X B C D */
36500 ix86_expand_vector_set (false, tmp
, val
, 0);
36501 /* target = A B X D */
36502 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36503 const0_rtx
, const1_rtx
,
36504 GEN_INT (2+4), GEN_INT (0+4)));
36508 gcc_unreachable ();
36513 use_vec_merge
= TARGET_SSE4_1
;
36517 /* Element 0 handled by vec_merge below. */
36520 use_vec_merge
= true;
36526 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36527 store into element 0, then shuffle them back. */
36531 order
[0] = GEN_INT (elt
);
36532 order
[1] = const1_rtx
;
36533 order
[2] = const2_rtx
;
36534 order
[3] = GEN_INT (3);
36535 order
[elt
] = const0_rtx
;
36537 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36538 order
[1], order
[2], order
[3]));
36540 ix86_expand_vector_set (false, target
, val
, 0);
36542 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36543 order
[1], order
[2], order
[3]));
36547 /* For SSE1, we have to reuse the V4SF code. */
36548 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
36549 gen_lowpart (SFmode
, val
), elt
);
36554 use_vec_merge
= TARGET_SSE2
;
36557 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36561 use_vec_merge
= TARGET_SSE4_1
;
36568 half_mode
= V16QImode
;
36574 half_mode
= V8HImode
;
36580 half_mode
= V4SImode
;
36586 half_mode
= V2DImode
;
36592 half_mode
= V4SFmode
;
36598 half_mode
= V2DFmode
;
36604 /* Compute offset. */
36608 gcc_assert (i
<= 1);
36610 /* Extract the half. */
36611 tmp
= gen_reg_rtx (half_mode
);
36612 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
36614 /* Put val in tmp at elt. */
36615 ix86_expand_vector_set (false, tmp
, val
, elt
);
36618 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
36627 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36628 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
36629 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36633 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36635 emit_move_insn (mem
, target
);
36637 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36638 emit_move_insn (tmp
, val
);
36640 emit_move_insn (target
, mem
);
36645 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
36647 enum machine_mode mode
= GET_MODE (vec
);
36648 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36649 bool use_vec_extr
= false;
36662 use_vec_extr
= true;
36666 use_vec_extr
= TARGET_SSE4_1
;
36678 tmp
= gen_reg_rtx (mode
);
36679 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
36680 GEN_INT (elt
), GEN_INT (elt
),
36681 GEN_INT (elt
+4), GEN_INT (elt
+4)));
36685 tmp
= gen_reg_rtx (mode
);
36686 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
36690 gcc_unreachable ();
36693 use_vec_extr
= true;
36698 use_vec_extr
= TARGET_SSE4_1
;
36712 tmp
= gen_reg_rtx (mode
);
36713 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
36714 GEN_INT (elt
), GEN_INT (elt
),
36715 GEN_INT (elt
), GEN_INT (elt
)));
36719 tmp
= gen_reg_rtx (mode
);
36720 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
36724 gcc_unreachable ();
36727 use_vec_extr
= true;
36732 /* For SSE1, we have to reuse the V4SF code. */
36733 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
36734 gen_lowpart (V4SFmode
, vec
), elt
);
36740 use_vec_extr
= TARGET_SSE2
;
36743 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36747 use_vec_extr
= TARGET_SSE4_1
;
36753 tmp
= gen_reg_rtx (V4SFmode
);
36755 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
36757 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
36758 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36766 tmp
= gen_reg_rtx (V2DFmode
);
36768 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
36770 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
36771 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36779 tmp
= gen_reg_rtx (V16QImode
);
36781 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
36783 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
36784 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
36792 tmp
= gen_reg_rtx (V8HImode
);
36794 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
36796 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
36797 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
36805 tmp
= gen_reg_rtx (V4SImode
);
36807 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
36809 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
36810 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36818 tmp
= gen_reg_rtx (V2DImode
);
36820 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
36822 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
36823 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36829 /* ??? Could extract the appropriate HImode element and shift. */
36836 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
36837 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
36839 /* Let the rtl optimizers know about the zero extension performed. */
36840 if (inner_mode
== QImode
|| inner_mode
== HImode
)
36842 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
36843 target
= gen_lowpart (SImode
, target
);
36846 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36850 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36852 emit_move_insn (mem
, vec
);
36854 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36855 emit_move_insn (target
, tmp
);
36859 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
36860 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
36861 The upper bits of DEST are undefined, though they shouldn't cause
36862 exceptions (some bits from src or all zeros are ok). */
36865 emit_reduc_half (rtx dest
, rtx src
, int i
)
36868 switch (GET_MODE (src
))
36872 tem
= gen_sse_movhlps (dest
, src
, src
);
36874 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
36875 GEN_INT (1 + 4), GEN_INT (1 + 4));
36878 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
36884 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
36885 gen_lowpart (V1TImode
, src
),
36890 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
36892 tem
= gen_avx_shufps256 (dest
, src
, src
,
36893 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
36897 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
36899 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
36906 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
36907 gen_lowpart (V4DImode
, src
),
36908 gen_lowpart (V4DImode
, src
),
36911 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
36912 gen_lowpart (V2TImode
, src
),
36916 gcc_unreachable ();
36921 /* Expand a vector reduction. FN is the binary pattern to reduce;
36922 DEST is the destination; IN is the input vector. */
36925 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
36927 rtx half
, dst
, vec
= in
;
36928 enum machine_mode mode
= GET_MODE (in
);
36931 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
36933 && mode
== V8HImode
36934 && fn
== gen_uminv8hi3
)
36936 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
36940 for (i
= GET_MODE_BITSIZE (mode
);
36941 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
36944 half
= gen_reg_rtx (mode
);
36945 emit_reduc_half (half
, vec
, i
);
36946 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
36949 dst
= gen_reg_rtx (mode
);
36950 emit_insn (fn (dst
, half
, vec
));
36955 /* Target hook for scalar_mode_supported_p. */
36957 ix86_scalar_mode_supported_p (enum machine_mode mode
)
36959 if (DECIMAL_FLOAT_MODE_P (mode
))
36960 return default_decimal_float_supported_p ();
36961 else if (mode
== TFmode
)
36964 return default_scalar_mode_supported_p (mode
);
36967 /* Implements target hook vector_mode_supported_p. */
36969 ix86_vector_mode_supported_p (enum machine_mode mode
)
36971 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
36973 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
36975 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
36977 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
36979 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
36984 /* Target hook for c_mode_for_suffix. */
36985 static enum machine_mode
36986 ix86_c_mode_for_suffix (char suffix
)
36996 /* Worker function for TARGET_MD_ASM_CLOBBERS.
36998 We do this in the new i386 backend to maintain source compatibility
36999 with the old cc0-based compiler. */
37002 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37003 tree inputs ATTRIBUTE_UNUSED
,
37006 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37008 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37013 /* Implements target vector targetm.asm.encode_section_info. */
37015 static void ATTRIBUTE_UNUSED
37016 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37018 default_encode_section_info (decl
, rtl
, first
);
37020 if (TREE_CODE (decl
) == VAR_DECL
37021 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37022 && ix86_in_large_data_p (decl
))
37023 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37026 /* Worker function for REVERSE_CONDITION. */
37029 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37031 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37032 ? reverse_condition (code
)
37033 : reverse_condition_maybe_unordered (code
));
37036 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37040 output_387_reg_move (rtx insn
, rtx
*operands
)
37042 if (REG_P (operands
[0]))
37044 if (REG_P (operands
[1])
37045 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37047 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37048 return output_387_ffreep (operands
, 0);
37049 return "fstp\t%y0";
37051 if (STACK_TOP_P (operands
[0]))
37052 return "fld%Z1\t%y1";
37055 else if (MEM_P (operands
[0]))
37057 gcc_assert (REG_P (operands
[1]));
37058 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37059 return "fstp%Z0\t%y0";
37062 /* There is no non-popping store to memory for XFmode.
37063 So if we need one, follow the store with a load. */
37064 if (GET_MODE (operands
[0]) == XFmode
)
37065 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37067 return "fst%Z0\t%y0";
37074 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37075 FP status register is set. */
37078 ix86_emit_fp_unordered_jump (rtx label
)
37080 rtx reg
= gen_reg_rtx (HImode
);
37083 emit_insn (gen_x86_fnstsw_1 (reg
));
37085 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37087 emit_insn (gen_x86_sahf_1 (reg
));
37089 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37090 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37094 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37096 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37097 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37100 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37101 gen_rtx_LABEL_REF (VOIDmode
, label
),
37103 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37105 emit_jump_insn (temp
);
37106 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37109 /* Output code to perform a log1p XFmode calculation. */
37111 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37113 rtx label1
= gen_label_rtx ();
37114 rtx label2
= gen_label_rtx ();
37116 rtx tmp
= gen_reg_rtx (XFmode
);
37117 rtx tmp2
= gen_reg_rtx (XFmode
);
37120 emit_insn (gen_absxf2 (tmp
, op1
));
37121 test
= gen_rtx_GE (VOIDmode
, tmp
,
37122 CONST_DOUBLE_FROM_REAL_VALUE (
37123 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37125 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37127 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37128 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37129 emit_jump (label2
);
37131 emit_label (label1
);
37132 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37133 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37134 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37135 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37137 emit_label (label2
);
37140 /* Emit code for round calculation. */
37141 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37143 enum machine_mode inmode
= GET_MODE (op1
);
37144 enum machine_mode outmode
= GET_MODE (op0
);
37145 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37146 rtx scratch
= gen_reg_rtx (HImode
);
37147 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37148 rtx jump_label
= gen_label_rtx ();
37150 rtx (*gen_abs
) (rtx
, rtx
);
37151 rtx (*gen_neg
) (rtx
, rtx
);
37156 gen_abs
= gen_abssf2
;
37159 gen_abs
= gen_absdf2
;
37162 gen_abs
= gen_absxf2
;
37165 gcc_unreachable ();
37171 gen_neg
= gen_negsf2
;
37174 gen_neg
= gen_negdf2
;
37177 gen_neg
= gen_negxf2
;
37180 gen_neg
= gen_neghi2
;
37183 gen_neg
= gen_negsi2
;
37186 gen_neg
= gen_negdi2
;
37189 gcc_unreachable ();
37192 e1
= gen_reg_rtx (inmode
);
37193 e2
= gen_reg_rtx (inmode
);
37194 res
= gen_reg_rtx (outmode
);
37196 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37198 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37200 /* scratch = fxam(op1) */
37201 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37202 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37204 /* e1 = fabs(op1) */
37205 emit_insn (gen_abs (e1
, op1
));
37207 /* e2 = e1 + 0.5 */
37208 half
= force_reg (inmode
, half
);
37209 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37210 gen_rtx_PLUS (inmode
, e1
, half
)));
37212 /* res = floor(e2) */
37213 if (inmode
!= XFmode
)
37215 tmp1
= gen_reg_rtx (XFmode
);
37217 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37218 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37228 rtx tmp0
= gen_reg_rtx (XFmode
);
37230 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37232 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37233 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37234 UNSPEC_TRUNC_NOOP
)));
37238 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37241 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37244 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37247 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37250 gcc_unreachable ();
37253 /* flags = signbit(a) */
37254 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37256 /* if (flags) then res = -res */
37257 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37258 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37259 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37261 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37262 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37263 JUMP_LABEL (insn
) = jump_label
;
37265 emit_insn (gen_neg (res
, res
));
37267 emit_label (jump_label
);
37268 LABEL_NUSES (jump_label
) = 1;
37270 emit_move_insn (op0
, res
);
37273 /* Output code to perform a Newton-Rhapson approximation of a single precision
37274 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37276 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37278 rtx x0
, x1
, e0
, e1
;
37280 x0
= gen_reg_rtx (mode
);
37281 e0
= gen_reg_rtx (mode
);
37282 e1
= gen_reg_rtx (mode
);
37283 x1
= gen_reg_rtx (mode
);
37285 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37287 b
= force_reg (mode
, b
);
37289 /* x0 = rcp(b) estimate */
37290 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37291 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37294 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37295 gen_rtx_MULT (mode
, x0
, b
)));
37298 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37299 gen_rtx_MULT (mode
, x0
, e0
)));
37302 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37303 gen_rtx_PLUS (mode
, x0
, x0
)));
37306 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37307 gen_rtx_MINUS (mode
, e1
, e0
)));
37310 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37311 gen_rtx_MULT (mode
, a
, x1
)));
37314 /* Output code to perform a Newton-Rhapson approximation of a
37315 single precision floating point [reciprocal] square root. */
37317 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37320 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37323 x0
= gen_reg_rtx (mode
);
37324 e0
= gen_reg_rtx (mode
);
37325 e1
= gen_reg_rtx (mode
);
37326 e2
= gen_reg_rtx (mode
);
37327 e3
= gen_reg_rtx (mode
);
37329 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37330 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37332 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37333 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37335 if (VECTOR_MODE_P (mode
))
37337 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37338 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37341 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37342 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37344 a
= force_reg (mode
, a
);
37346 /* x0 = rsqrt(a) estimate */
37347 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37348 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37351 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37356 zero
= gen_reg_rtx (mode
);
37357 mask
= gen_reg_rtx (mode
);
37359 zero
= force_reg (mode
, CONST0_RTX(mode
));
37360 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37361 gen_rtx_NE (mode
, zero
, a
)));
37363 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37364 gen_rtx_AND (mode
, x0
, mask
)));
37368 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37369 gen_rtx_MULT (mode
, x0
, a
)));
37371 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37372 gen_rtx_MULT (mode
, e0
, x0
)));
37375 mthree
= force_reg (mode
, mthree
);
37376 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37377 gen_rtx_PLUS (mode
, e1
, mthree
)));
37379 mhalf
= force_reg (mode
, mhalf
);
37381 /* e3 = -.5 * x0 */
37382 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37383 gen_rtx_MULT (mode
, x0
, mhalf
)));
37385 /* e3 = -.5 * e0 */
37386 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37387 gen_rtx_MULT (mode
, e0
, mhalf
)));
37388 /* ret = e2 * e3 */
37389 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37390 gen_rtx_MULT (mode
, e2
, e3
)));
37393 #ifdef TARGET_SOLARIS
37394 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37397 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37400 /* With Binutils 2.15, the "@unwind" marker must be specified on
37401 every occurrence of the ".eh_frame" section, not just the first
37404 && strcmp (name
, ".eh_frame") == 0)
37406 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37407 flags
& SECTION_WRITE
? "aw" : "a");
37412 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
37414 solaris_elf_asm_comdat_section (name
, flags
, decl
);
37419 default_elf_asm_named_section (name
, flags
, decl
);
37421 #endif /* TARGET_SOLARIS */
37423 /* Return the mangling of TYPE if it is an extended fundamental type. */
37425 static const char *
37426 ix86_mangle_type (const_tree type
)
37428 type
= TYPE_MAIN_VARIANT (type
);
37430 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
37431 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
37434 switch (TYPE_MODE (type
))
37437 /* __float128 is "g". */
37440 /* "long double" or __float80 is "e". */
37447 /* For 32-bit code we can save PIC register setup by using
37448 __stack_chk_fail_local hidden function instead of calling
37449 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37450 register, so it is better to call __stack_chk_fail directly. */
37452 static tree ATTRIBUTE_UNUSED
37453 ix86_stack_protect_fail (void)
37455 return TARGET_64BIT
37456 ? default_external_stack_protect_fail ()
37457 : default_hidden_stack_protect_fail ();
37460 /* Select a format to encode pointers in exception handling data. CODE
37461 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37462 true if the symbol may be affected by dynamic relocations.
37464 ??? All x86 object file formats are capable of representing this.
37465 After all, the relocation needed is the same as for the call insn.
37466 Whether or not a particular assembler allows us to enter such, I
37467 guess we'll have to see. */
37469 asm_preferred_eh_data_format (int code
, int global
)
37473 int type
= DW_EH_PE_sdata8
;
37475 || ix86_cmodel
== CM_SMALL_PIC
37476 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
37477 type
= DW_EH_PE_sdata4
;
37478 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
37480 if (ix86_cmodel
== CM_SMALL
37481 || (ix86_cmodel
== CM_MEDIUM
&& code
))
37482 return DW_EH_PE_udata4
;
37483 return DW_EH_PE_absptr
;
37486 /* Expand copysign from SIGN to the positive value ABS_VALUE
37487 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37490 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
37492 enum machine_mode mode
= GET_MODE (sign
);
37493 rtx sgn
= gen_reg_rtx (mode
);
37494 if (mask
== NULL_RTX
)
37496 enum machine_mode vmode
;
37498 if (mode
== SFmode
)
37500 else if (mode
== DFmode
)
37505 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
37506 if (!VECTOR_MODE_P (mode
))
37508 /* We need to generate a scalar mode mask in this case. */
37509 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37510 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37511 mask
= gen_reg_rtx (mode
);
37512 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37516 mask
= gen_rtx_NOT (mode
, mask
);
37517 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
37518 gen_rtx_AND (mode
, mask
, sign
)));
37519 emit_insn (gen_rtx_SET (VOIDmode
, result
,
37520 gen_rtx_IOR (mode
, abs_value
, sgn
)));
37523 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37524 mask for masking out the sign-bit is stored in *SMASK, if that is
37527 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
37529 enum machine_mode vmode
, mode
= GET_MODE (op0
);
37532 xa
= gen_reg_rtx (mode
);
37533 if (mode
== SFmode
)
37535 else if (mode
== DFmode
)
37539 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
37540 if (!VECTOR_MODE_P (mode
))
37542 /* We need to generate a scalar mode mask in this case. */
37543 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37544 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37545 mask
= gen_reg_rtx (mode
);
37546 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37548 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
37549 gen_rtx_AND (mode
, op0
, mask
)));
37557 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37558 swapping the operands if SWAP_OPERANDS is true. The expanded
37559 code is a forward jump to a newly created label in case the
37560 comparison is true. The generated label rtx is returned. */
37562 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
37563 bool swap_operands
)
37574 label
= gen_label_rtx ();
37575 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
37576 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37577 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
37578 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
37579 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
37580 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
37581 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37582 JUMP_LABEL (tmp
) = label
;
37587 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37588 using comparison code CODE. Operands are swapped for the comparison if
37589 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37591 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
37592 bool swap_operands
)
37594 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
37595 enum machine_mode mode
= GET_MODE (op0
);
37596 rtx mask
= gen_reg_rtx (mode
);
37605 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
37607 emit_insn (insn (mask
, op0
, op1
,
37608 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
37612 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37613 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37615 ix86_gen_TWO52 (enum machine_mode mode
)
37617 REAL_VALUE_TYPE TWO52r
;
37620 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
37621 TWO52
= const_double_from_real_value (TWO52r
, mode
);
37622 TWO52
= force_reg (mode
, TWO52
);
37627 /* Expand SSE sequence for computing lround from OP1 storing
37630 ix86_expand_lround (rtx op0
, rtx op1
)
37632 /* C code for the stuff we're doing below:
37633 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37636 enum machine_mode mode
= GET_MODE (op1
);
37637 const struct real_format
*fmt
;
37638 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37641 /* load nextafter (0.5, 0.0) */
37642 fmt
= REAL_MODE_FORMAT (mode
);
37643 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37644 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37646 /* adj = copysign (0.5, op1) */
37647 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37648 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
37650 /* adj = op1 + adj */
37651 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37653 /* op0 = (imode)adj */
37654 expand_fix (op0
, adj
, 0);
37657 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37660 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
37662 /* C code for the stuff we're doing below (for do_floor):
37664 xi -= (double)xi > op1 ? 1 : 0;
37667 enum machine_mode fmode
= GET_MODE (op1
);
37668 enum machine_mode imode
= GET_MODE (op0
);
37669 rtx ireg
, freg
, label
, tmp
;
37671 /* reg = (long)op1 */
37672 ireg
= gen_reg_rtx (imode
);
37673 expand_fix (ireg
, op1
, 0);
37675 /* freg = (double)reg */
37676 freg
= gen_reg_rtx (fmode
);
37677 expand_float (freg
, ireg
, 0);
37679 /* ireg = (freg > op1) ? ireg - 1 : ireg */
37680 label
= ix86_expand_sse_compare_and_jump (UNLE
,
37681 freg
, op1
, !do_floor
);
37682 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
37683 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
37684 emit_move_insn (ireg
, tmp
);
37686 emit_label (label
);
37687 LABEL_NUSES (label
) = 1;
37689 emit_move_insn (op0
, ireg
);
37692 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
37693 result in OPERAND0. */
37695 ix86_expand_rint (rtx operand0
, rtx operand1
)
37697 /* C code for the stuff we're doing below:
37698 xa = fabs (operand1);
37699 if (!isless (xa, 2**52))
37701 xa = xa + 2**52 - 2**52;
37702 return copysign (xa, operand1);
37704 enum machine_mode mode
= GET_MODE (operand0
);
37705 rtx res
, xa
, label
, TWO52
, mask
;
37707 res
= gen_reg_rtx (mode
);
37708 emit_move_insn (res
, operand1
);
37710 /* xa = abs (operand1) */
37711 xa
= ix86_expand_sse_fabs (res
, &mask
);
37713 /* if (!isless (xa, TWO52)) goto label; */
37714 TWO52
= ix86_gen_TWO52 (mode
);
37715 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37717 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37718 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37720 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
37722 emit_label (label
);
37723 LABEL_NUSES (label
) = 1;
37725 emit_move_insn (operand0
, res
);
37728 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37731 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
37733 /* C code for the stuff we expand below.
37734 double xa = fabs (x), x2;
37735 if (!isless (xa, TWO52))
37737 xa = xa + TWO52 - TWO52;
37738 x2 = copysign (xa, x);
37747 enum machine_mode mode
= GET_MODE (operand0
);
37748 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
37750 TWO52
= ix86_gen_TWO52 (mode
);
37752 /* Temporary for holding the result, initialized to the input
37753 operand to ease control flow. */
37754 res
= gen_reg_rtx (mode
);
37755 emit_move_insn (res
, operand1
);
37757 /* xa = abs (operand1) */
37758 xa
= ix86_expand_sse_fabs (res
, &mask
);
37760 /* if (!isless (xa, TWO52)) goto label; */
37761 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37763 /* xa = xa + TWO52 - TWO52; */
37764 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37765 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37767 /* xa = copysign (xa, operand1) */
37768 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
37770 /* generate 1.0 or -1.0 */
37771 one
= force_reg (mode
,
37772 const_double_from_real_value (do_floor
37773 ? dconst1
: dconstm1
, mode
));
37775 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37776 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37777 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37778 gen_rtx_AND (mode
, one
, tmp
)));
37779 /* We always need to subtract here to preserve signed zero. */
37780 tmp
= expand_simple_binop (mode
, MINUS
,
37781 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37782 emit_move_insn (res
, tmp
);
37784 emit_label (label
);
37785 LABEL_NUSES (label
) = 1;
37787 emit_move_insn (operand0
, res
);
37790 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37793 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
37795 /* C code for the stuff we expand below.
37796 double xa = fabs (x), x2;
37797 if (!isless (xa, TWO52))
37799 x2 = (double)(long)x;
37806 if (HONOR_SIGNED_ZEROS (mode))
37807 return copysign (x2, x);
37810 enum machine_mode mode
= GET_MODE (operand0
);
37811 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
37813 TWO52
= ix86_gen_TWO52 (mode
);
37815 /* Temporary for holding the result, initialized to the input
37816 operand to ease control flow. */
37817 res
= gen_reg_rtx (mode
);
37818 emit_move_insn (res
, operand1
);
37820 /* xa = abs (operand1) */
37821 xa
= ix86_expand_sse_fabs (res
, &mask
);
37823 /* if (!isless (xa, TWO52)) goto label; */
37824 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37826 /* xa = (double)(long)x */
37827 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37828 expand_fix (xi
, res
, 0);
37829 expand_float (xa
, xi
, 0);
37832 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37834 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37835 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37836 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37837 gen_rtx_AND (mode
, one
, tmp
)));
37838 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
37839 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37840 emit_move_insn (res
, tmp
);
37842 if (HONOR_SIGNED_ZEROS (mode
))
37843 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37845 emit_label (label
);
37846 LABEL_NUSES (label
) = 1;
37848 emit_move_insn (operand0
, res
);
37851 /* Expand SSE sequence for computing round from OPERAND1 storing
37852 into OPERAND0. Sequence that works without relying on DImode truncation
37853 via cvttsd2siq that is only available on 64bit targets. */
37855 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
37857 /* C code for the stuff we expand below.
37858 double xa = fabs (x), xa2, x2;
37859 if (!isless (xa, TWO52))
37861 Using the absolute value and copying back sign makes
37862 -0.0 -> -0.0 correct.
37863 xa2 = xa + TWO52 - TWO52;
37868 else if (dxa > 0.5)
37870 x2 = copysign (xa2, x);
37873 enum machine_mode mode
= GET_MODE (operand0
);
37874 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
37876 TWO52
= ix86_gen_TWO52 (mode
);
37878 /* Temporary for holding the result, initialized to the input
37879 operand to ease control flow. */
37880 res
= gen_reg_rtx (mode
);
37881 emit_move_insn (res
, operand1
);
37883 /* xa = abs (operand1) */
37884 xa
= ix86_expand_sse_fabs (res
, &mask
);
37886 /* if (!isless (xa, TWO52)) goto label; */
37887 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37889 /* xa2 = xa + TWO52 - TWO52; */
37890 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37891 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
37893 /* dxa = xa2 - xa; */
37894 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
37896 /* generate 0.5, 1.0 and -0.5 */
37897 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
37898 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37899 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
37903 tmp
= gen_reg_rtx (mode
);
37904 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
37905 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
37906 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37907 gen_rtx_AND (mode
, one
, tmp
)));
37908 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37909 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
37910 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
37911 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37912 gen_rtx_AND (mode
, one
, tmp
)));
37913 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37915 /* res = copysign (xa2, operand1) */
37916 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
37918 emit_label (label
);
37919 LABEL_NUSES (label
) = 1;
37921 emit_move_insn (operand0
, res
);
37924 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37927 ix86_expand_trunc (rtx operand0
, rtx operand1
)
37929 /* C code for SSE variant we expand below.
37930 double xa = fabs (x), x2;
37931 if (!isless (xa, TWO52))
37933 x2 = (double)(long)x;
37934 if (HONOR_SIGNED_ZEROS (mode))
37935 return copysign (x2, x);
37938 enum machine_mode mode
= GET_MODE (operand0
);
37939 rtx xa
, xi
, TWO52
, label
, res
, mask
;
37941 TWO52
= ix86_gen_TWO52 (mode
);
37943 /* Temporary for holding the result, initialized to the input
37944 operand to ease control flow. */
37945 res
= gen_reg_rtx (mode
);
37946 emit_move_insn (res
, operand1
);
37948 /* xa = abs (operand1) */
37949 xa
= ix86_expand_sse_fabs (res
, &mask
);
37951 /* if (!isless (xa, TWO52)) goto label; */
37952 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37954 /* x = (double)(long)x */
37955 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37956 expand_fix (xi
, res
, 0);
37957 expand_float (res
, xi
, 0);
37959 if (HONOR_SIGNED_ZEROS (mode
))
37960 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37962 emit_label (label
);
37963 LABEL_NUSES (label
) = 1;
37965 emit_move_insn (operand0
, res
);
37968 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37971 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
37973 enum machine_mode mode
= GET_MODE (operand0
);
37974 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
37976 /* C code for SSE variant we expand below.
37977 double xa = fabs (x), x2;
37978 if (!isless (xa, TWO52))
37980 xa2 = xa + TWO52 - TWO52;
37984 x2 = copysign (xa2, x);
37988 TWO52
= ix86_gen_TWO52 (mode
);
37990 /* Temporary for holding the result, initialized to the input
37991 operand to ease control flow. */
37992 res
= gen_reg_rtx (mode
);
37993 emit_move_insn (res
, operand1
);
37995 /* xa = abs (operand1) */
37996 xa
= ix86_expand_sse_fabs (res
, &smask
);
37998 /* if (!isless (xa, TWO52)) goto label; */
37999 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38001 /* res = xa + TWO52 - TWO52; */
38002 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38003 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38004 emit_move_insn (res
, tmp
);
38007 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38009 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38010 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38011 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38012 gen_rtx_AND (mode
, mask
, one
)));
38013 tmp
= expand_simple_binop (mode
, MINUS
,
38014 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38015 emit_move_insn (res
, tmp
);
38017 /* res = copysign (res, operand1) */
38018 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38020 emit_label (label
);
38021 LABEL_NUSES (label
) = 1;
38023 emit_move_insn (operand0
, res
);
38026 /* Expand SSE sequence for computing round from OPERAND1 storing
38029 ix86_expand_round (rtx operand0
, rtx operand1
)
38031 /* C code for the stuff we're doing below:
38032 double xa = fabs (x);
38033 if (!isless (xa, TWO52))
38035 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38036 return copysign (xa, x);
38038 enum machine_mode mode
= GET_MODE (operand0
);
38039 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38040 const struct real_format
*fmt
;
38041 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38043 /* Temporary for holding the result, initialized to the input
38044 operand to ease control flow. */
38045 res
= gen_reg_rtx (mode
);
38046 emit_move_insn (res
, operand1
);
38048 TWO52
= ix86_gen_TWO52 (mode
);
38049 xa
= ix86_expand_sse_fabs (res
, &mask
);
38050 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38052 /* load nextafter (0.5, 0.0) */
38053 fmt
= REAL_MODE_FORMAT (mode
);
38054 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38055 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38057 /* xa = xa + 0.5 */
38058 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38059 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38061 /* xa = (double)(int64_t)xa */
38062 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38063 expand_fix (xi
, xa
, 0);
38064 expand_float (xa
, xi
, 0);
38066 /* res = copysign (xa, operand1) */
38067 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38069 emit_label (label
);
38070 LABEL_NUSES (label
) = 1;
38072 emit_move_insn (operand0
, res
);
38075 /* Expand SSE sequence for computing round
38076 from OP1 storing into OP0 using sse4 round insn. */
38078 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38080 enum machine_mode mode
= GET_MODE (op0
);
38081 rtx e1
, e2
, res
, half
;
38082 const struct real_format
*fmt
;
38083 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38084 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38085 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38090 gen_copysign
= gen_copysignsf3
;
38091 gen_round
= gen_sse4_1_roundsf2
;
38094 gen_copysign
= gen_copysigndf3
;
38095 gen_round
= gen_sse4_1_rounddf2
;
38098 gcc_unreachable ();
38101 /* round (a) = trunc (a + copysign (0.5, a)) */
38103 /* load nextafter (0.5, 0.0) */
38104 fmt
= REAL_MODE_FORMAT (mode
);
38105 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38106 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38107 half
= const_double_from_real_value (pred_half
, mode
);
38109 /* e1 = copysign (0.5, op1) */
38110 e1
= gen_reg_rtx (mode
);
38111 emit_insn (gen_copysign (e1
, half
, op1
));
38113 /* e2 = op1 + e1 */
38114 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38116 /* res = trunc (e2) */
38117 res
= gen_reg_rtx (mode
);
38118 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38120 emit_move_insn (op0
, res
);
38124 /* Table of valid machine attributes. */
38125 static const struct attribute_spec ix86_attribute_table
[] =
38127 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38128 affects_type_identity } */
38129 /* Stdcall attribute says callee is responsible for popping arguments
38130 if they are not variable. */
38131 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38133 /* Fastcall attribute says callee is responsible for popping arguments
38134 if they are not variable. */
38135 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38137 /* Thiscall attribute says callee is responsible for popping arguments
38138 if they are not variable. */
38139 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38141 /* Cdecl attribute says the callee is a normal C declaration */
38142 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38144 /* Regparm attribute specifies how many integer arguments are to be
38145 passed in registers. */
38146 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38148 /* Sseregparm attribute says we are using x86_64 calling conventions
38149 for FP arguments. */
38150 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38152 /* The transactional memory builtins are implicitly regparm or fastcall
38153 depending on the ABI. Override the generic do-nothing attribute that
38154 these builtins were declared with. */
38155 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38157 /* force_align_arg_pointer says this function realigns the stack at entry. */
38158 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38159 false, true, true, ix86_handle_cconv_attribute
, false },
38160 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38161 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38162 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38163 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38166 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38168 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38170 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38171 SUBTARGET_ATTRIBUTE_TABLE
,
38173 /* ms_abi and sysv_abi calling convention function attributes. */
38174 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38175 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38176 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38178 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38179 ix86_handle_callee_pop_aggregate_return
, true },
38181 { NULL
, 0, 0, false, false, false, NULL
, false }
38184 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38186 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38188 int misalign ATTRIBUTE_UNUSED
)
38192 switch (type_of_cost
)
38195 return ix86_cost
->scalar_stmt_cost
;
38198 return ix86_cost
->scalar_load_cost
;
38201 return ix86_cost
->scalar_store_cost
;
38204 return ix86_cost
->vec_stmt_cost
;
38207 return ix86_cost
->vec_align_load_cost
;
38210 return ix86_cost
->vec_store_cost
;
38212 case vec_to_scalar
:
38213 return ix86_cost
->vec_to_scalar_cost
;
38215 case scalar_to_vec
:
38216 return ix86_cost
->scalar_to_vec_cost
;
38218 case unaligned_load
:
38219 case unaligned_store
:
38220 return ix86_cost
->vec_unalign_load_cost
;
38222 case cond_branch_taken
:
38223 return ix86_cost
->cond_taken_branch_cost
;
38225 case cond_branch_not_taken
:
38226 return ix86_cost
->cond_not_taken_branch_cost
;
38229 case vec_promote_demote
:
38230 return ix86_cost
->vec_stmt_cost
;
38232 case vec_construct
:
38233 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38234 return elements
/ 2 + 1;
38237 gcc_unreachable ();
38241 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38242 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38243 insn every time. */
38245 static GTY(()) rtx vselect_insn
;
38247 /* Initialize vselect_insn. */
38250 init_vselect_insn (void)
38255 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38256 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38257 XVECEXP (x
, 0, i
) = const0_rtx
;
38258 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38260 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38262 vselect_insn
= emit_insn (x
);
38266 /* Construct (set target (vec_select op0 (parallel perm))) and
38267 return true if that's a valid instruction in the active ISA. */
38270 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38271 unsigned nelt
, bool testing_p
)
38274 rtx x
, save_vconcat
;
38277 if (vselect_insn
== NULL_RTX
)
38278 init_vselect_insn ();
38280 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38281 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38282 for (i
= 0; i
< nelt
; ++i
)
38283 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38284 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38285 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38286 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38287 SET_DEST (PATTERN (vselect_insn
)) = target
;
38288 icode
= recog_memoized (vselect_insn
);
38290 if (icode
>= 0 && !testing_p
)
38291 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38293 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38294 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38295 INSN_CODE (vselect_insn
) = -1;
38300 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38303 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38304 const unsigned char *perm
, unsigned nelt
,
38307 enum machine_mode v2mode
;
38311 if (vselect_insn
== NULL_RTX
)
38312 init_vselect_insn ();
38314 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38315 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38316 PUT_MODE (x
, v2mode
);
38319 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38320 XEXP (x
, 0) = const0_rtx
;
38321 XEXP (x
, 1) = const0_rtx
;
38325 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38326 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38329 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38331 enum machine_mode vmode
= d
->vmode
;
38332 unsigned i
, mask
, nelt
= d
->nelt
;
38333 rtx target
, op0
, op1
, x
;
38334 rtx rperm
[32], vperm
;
38336 if (d
->one_operand_p
)
38338 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38340 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38342 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38347 /* This is a blend, not a permute. Elements must stay in their
38348 respective lanes. */
38349 for (i
= 0; i
< nelt
; ++i
)
38351 unsigned e
= d
->perm
[i
];
38352 if (!(e
== i
|| e
== i
+ nelt
))
38359 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38360 decision should be extracted elsewhere, so that we only try that
38361 sequence once all budget==3 options have been tried. */
38362 target
= d
->target
;
38375 for (i
= 0; i
< nelt
; ++i
)
38376 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38380 for (i
= 0; i
< 2; ++i
)
38381 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38386 for (i
= 0; i
< 4; ++i
)
38387 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38392 /* See if bytes move in pairs so we can use pblendw with
38393 an immediate argument, rather than pblendvb with a vector
38395 for (i
= 0; i
< 16; i
+= 2)
38396 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38399 for (i
= 0; i
< nelt
; ++i
)
38400 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38403 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38404 vperm
= force_reg (vmode
, vperm
);
38406 if (GET_MODE_SIZE (vmode
) == 16)
38407 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38409 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
38413 for (i
= 0; i
< 8; ++i
)
38414 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38419 target
= gen_lowpart (vmode
, target
);
38420 op0
= gen_lowpart (vmode
, op0
);
38421 op1
= gen_lowpart (vmode
, op1
);
38425 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38426 for (i
= 0; i
< 32; i
+= 2)
38427 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38429 /* See if bytes move in quadruplets. If yes, vpblendd
38430 with immediate can be used. */
38431 for (i
= 0; i
< 32; i
+= 4)
38432 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
38436 /* See if bytes move the same in both lanes. If yes,
38437 vpblendw with immediate can be used. */
38438 for (i
= 0; i
< 16; i
+= 2)
38439 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
38442 /* Use vpblendw. */
38443 for (i
= 0; i
< 16; ++i
)
38444 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
38449 /* Use vpblendd. */
38450 for (i
= 0; i
< 8; ++i
)
38451 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
38456 /* See if words move in pairs. If yes, vpblendd can be used. */
38457 for (i
= 0; i
< 16; i
+= 2)
38458 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38462 /* See if words move the same in both lanes. If not,
38463 vpblendvb must be used. */
38464 for (i
= 0; i
< 8; i
++)
38465 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
38467 /* Use vpblendvb. */
38468 for (i
= 0; i
< 32; ++i
)
38469 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
38473 target
= gen_lowpart (vmode
, target
);
38474 op0
= gen_lowpart (vmode
, op0
);
38475 op1
= gen_lowpart (vmode
, op1
);
38476 goto finish_pblendvb
;
38479 /* Use vpblendw. */
38480 for (i
= 0; i
< 16; ++i
)
38481 mask
|= (d
->perm
[i
] >= 16) << i
;
38485 /* Use vpblendd. */
38486 for (i
= 0; i
< 8; ++i
)
38487 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38492 /* Use vpblendd. */
38493 for (i
= 0; i
< 4; ++i
)
38494 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38499 gcc_unreachable ();
38502 /* This matches five different patterns with the different modes. */
38503 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
38504 x
= gen_rtx_SET (VOIDmode
, target
, x
);
38510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38511 in terms of the variable form of vpermilps.
38513 Note that we will have already failed the immediate input vpermilps,
38514 which requires that the high and low part shuffle be identical; the
38515 variable form doesn't require that. */
38518 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
38520 rtx rperm
[8], vperm
;
38523 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
38526 /* We can only permute within the 128-bit lane. */
38527 for (i
= 0; i
< 8; ++i
)
38529 unsigned e
= d
->perm
[i
];
38530 if (i
< 4 ? e
>= 4 : e
< 4)
38537 for (i
= 0; i
< 8; ++i
)
38539 unsigned e
= d
->perm
[i
];
38541 /* Within each 128-bit lane, the elements of op0 are numbered
38542 from 0 and the elements of op1 are numbered from 4. */
38548 rperm
[i
] = GEN_INT (e
);
38551 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
38552 vperm
= force_reg (V8SImode
, vperm
);
38553 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
38558 /* Return true if permutation D can be performed as VMODE permutation
38562 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
38564 unsigned int i
, j
, chunk
;
38566 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
38567 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
38568 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
38571 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
38574 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
38575 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
38576 if (d
->perm
[i
] & (chunk
- 1))
38579 for (j
= 1; j
< chunk
; ++j
)
38580 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
38586 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38587 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38590 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
38592 unsigned i
, nelt
, eltsz
, mask
;
38593 unsigned char perm
[32];
38594 enum machine_mode vmode
= V16QImode
;
38595 rtx rperm
[32], vperm
, target
, op0
, op1
;
38599 if (!d
->one_operand_p
)
38601 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
38604 && valid_perm_using_mode_p (V2TImode
, d
))
38609 /* Use vperm2i128 insn. The pattern uses
38610 V4DImode instead of V2TImode. */
38611 target
= gen_lowpart (V4DImode
, d
->target
);
38612 op0
= gen_lowpart (V4DImode
, d
->op0
);
38613 op1
= gen_lowpart (V4DImode
, d
->op1
);
38615 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
38616 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
38617 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
38625 if (GET_MODE_SIZE (d
->vmode
) == 16)
38630 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38635 /* V4DImode should be already handled through
38636 expand_vselect by vpermq instruction. */
38637 gcc_assert (d
->vmode
!= V4DImode
);
38640 if (d
->vmode
== V8SImode
38641 || d
->vmode
== V16HImode
38642 || d
->vmode
== V32QImode
)
38644 /* First see if vpermq can be used for
38645 V8SImode/V16HImode/V32QImode. */
38646 if (valid_perm_using_mode_p (V4DImode
, d
))
38648 for (i
= 0; i
< 4; i
++)
38649 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
38652 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
38653 gen_lowpart (V4DImode
, d
->op0
),
38657 /* Next see if vpermd can be used. */
38658 if (valid_perm_using_mode_p (V8SImode
, d
))
38661 /* Or if vpermps can be used. */
38662 else if (d
->vmode
== V8SFmode
)
38665 if (vmode
== V32QImode
)
38667 /* vpshufb only works intra lanes, it is not
38668 possible to shuffle bytes in between the lanes. */
38669 for (i
= 0; i
< nelt
; ++i
)
38670 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
38681 if (vmode
== V8SImode
)
38682 for (i
= 0; i
< 8; ++i
)
38683 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
38686 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38687 if (!d
->one_operand_p
)
38688 mask
= 2 * nelt
- 1;
38689 else if (vmode
== V16QImode
)
38692 mask
= nelt
/ 2 - 1;
38694 for (i
= 0; i
< nelt
; ++i
)
38696 unsigned j
, e
= d
->perm
[i
] & mask
;
38697 for (j
= 0; j
< eltsz
; ++j
)
38698 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
38702 vperm
= gen_rtx_CONST_VECTOR (vmode
,
38703 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
38704 vperm
= force_reg (vmode
, vperm
);
38706 target
= gen_lowpart (vmode
, d
->target
);
38707 op0
= gen_lowpart (vmode
, d
->op0
);
38708 if (d
->one_operand_p
)
38710 if (vmode
== V16QImode
)
38711 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
38712 else if (vmode
== V32QImode
)
38713 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
38714 else if (vmode
== V8SFmode
)
38715 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
38717 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
38721 op1
= gen_lowpart (vmode
, d
->op1
);
38722 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
38728 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
38729 in a single instruction. */
38732 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
38734 unsigned i
, nelt
= d
->nelt
;
38735 unsigned char perm2
[MAX_VECT_LEN
];
38737 /* Check plain VEC_SELECT first, because AVX has instructions that could
38738 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
38739 input where SEL+CONCAT may not. */
38740 if (d
->one_operand_p
)
38742 int mask
= nelt
- 1;
38743 bool identity_perm
= true;
38744 bool broadcast_perm
= true;
38746 for (i
= 0; i
< nelt
; i
++)
38748 perm2
[i
] = d
->perm
[i
] & mask
;
38750 identity_perm
= false;
38752 broadcast_perm
= false;
38758 emit_move_insn (d
->target
, d
->op0
);
38761 else if (broadcast_perm
&& TARGET_AVX2
)
38763 /* Use vpbroadcast{b,w,d}. */
38764 rtx (*gen
) (rtx
, rtx
) = NULL
;
38768 gen
= gen_avx2_pbroadcastv32qi_1
;
38771 gen
= gen_avx2_pbroadcastv16hi_1
;
38774 gen
= gen_avx2_pbroadcastv8si_1
;
38777 gen
= gen_avx2_pbroadcastv16qi
;
38780 gen
= gen_avx2_pbroadcastv8hi
;
38783 gen
= gen_avx2_vec_dupv8sf_1
;
38785 /* For other modes prefer other shuffles this function creates. */
38791 emit_insn (gen (d
->target
, d
->op0
));
38796 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
38799 /* There are plenty of patterns in sse.md that are written for
38800 SEL+CONCAT and are not replicated for a single op. Perhaps
38801 that should be changed, to avoid the nastiness here. */
38803 /* Recognize interleave style patterns, which means incrementing
38804 every other permutation operand. */
38805 for (i
= 0; i
< nelt
; i
+= 2)
38807 perm2
[i
] = d
->perm
[i
] & mask
;
38808 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
38810 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38814 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
38817 for (i
= 0; i
< nelt
; i
+= 4)
38819 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
38820 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
38821 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
38822 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
38825 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38831 /* Finally, try the fully general two operand permute. */
38832 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
38836 /* Recognize interleave style patterns with reversed operands. */
38837 if (!d
->one_operand_p
)
38839 for (i
= 0; i
< nelt
; ++i
)
38841 unsigned e
= d
->perm
[i
];
38849 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
38854 /* Try the SSE4.1 blend variable merge instructions. */
38855 if (expand_vec_perm_blend (d
))
38858 /* Try one of the AVX vpermil variable permutations. */
38859 if (expand_vec_perm_vpermil (d
))
38862 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
38863 vpshufb, vpermd, vpermps or vpermq variable permutation. */
38864 if (expand_vec_perm_pshufb (d
))
38870 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38871 in terms of a pair of pshuflw + pshufhw instructions. */
38874 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
38876 unsigned char perm2
[MAX_VECT_LEN
];
38880 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
38883 /* The two permutations only operate in 64-bit lanes. */
38884 for (i
= 0; i
< 4; ++i
)
38885 if (d
->perm
[i
] >= 4)
38887 for (i
= 4; i
< 8; ++i
)
38888 if (d
->perm
[i
] < 4)
38894 /* Emit the pshuflw. */
38895 memcpy (perm2
, d
->perm
, 4);
38896 for (i
= 4; i
< 8; ++i
)
38898 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
38901 /* Emit the pshufhw. */
38902 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
38903 for (i
= 0; i
< 4; ++i
)
38905 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
38911 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38912 the permutation using the SSSE3 palignr instruction. This succeeds
38913 when all of the elements in PERM fit within one vector and we merely
38914 need to shift them down so that a single vector permutation has a
38915 chance to succeed. */
38918 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
38920 unsigned i
, nelt
= d
->nelt
;
38925 /* Even with AVX, palignr only operates on 128-bit vectors. */
38926 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38929 min
= nelt
, max
= 0;
38930 for (i
= 0; i
< nelt
; ++i
)
38932 unsigned e
= d
->perm
[i
];
38938 if (min
== 0 || max
- min
>= nelt
)
38941 /* Given that we have SSSE3, we know we'll be able to implement the
38942 single operand permutation after the palignr with pshufb. */
38946 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
38947 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
38948 gen_lowpart (TImode
, d
->op1
),
38949 gen_lowpart (TImode
, d
->op0
), shift
));
38951 d
->op0
= d
->op1
= d
->target
;
38952 d
->one_operand_p
= true;
38955 for (i
= 0; i
< nelt
; ++i
)
38957 unsigned e
= d
->perm
[i
] - min
;
38963 /* Test for the degenerate case where the alignment by itself
38964 produces the desired permutation. */
38968 ok
= expand_vec_perm_1 (d
);
38974 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
38976 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38977 a two vector permutation into a single vector permutation by using
38978 an interleave operation to merge the vectors. */
38981 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
38983 struct expand_vec_perm_d dremap
, dfinal
;
38984 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
38985 unsigned HOST_WIDE_INT contents
;
38986 unsigned char remap
[2 * MAX_VECT_LEN
];
38988 bool ok
, same_halves
= false;
38990 if (GET_MODE_SIZE (d
->vmode
) == 16)
38992 if (d
->one_operand_p
)
38995 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38999 /* For 32-byte modes allow even d->one_operand_p.
39000 The lack of cross-lane shuffling in some instructions
39001 might prevent a single insn shuffle. */
39003 dfinal
.testing_p
= true;
39004 /* If expand_vec_perm_interleave3 can expand this into
39005 a 3 insn sequence, give up and let it be expanded as
39006 3 insn sequence. While that is one insn longer,
39007 it doesn't need a memory operand and in the common
39008 case that both interleave low and high permutations
39009 with the same operands are adjacent needs 4 insns
39010 for both after CSE. */
39011 if (expand_vec_perm_interleave3 (&dfinal
))
39017 /* Examine from whence the elements come. */
39019 for (i
= 0; i
< nelt
; ++i
)
39020 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39022 memset (remap
, 0xff, sizeof (remap
));
39025 if (GET_MODE_SIZE (d
->vmode
) == 16)
39027 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39029 /* Split the two input vectors into 4 halves. */
39030 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39035 /* If the elements from the low halves use interleave low, and similarly
39036 for interleave high. If the elements are from mis-matched halves, we
39037 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39038 if ((contents
& (h1
| h3
)) == contents
)
39041 for (i
= 0; i
< nelt2
; ++i
)
39044 remap
[i
+ nelt
] = i
* 2 + 1;
39045 dremap
.perm
[i
* 2] = i
;
39046 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39048 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39049 dremap
.vmode
= V4SFmode
;
39051 else if ((contents
& (h2
| h4
)) == contents
)
39054 for (i
= 0; i
< nelt2
; ++i
)
39056 remap
[i
+ nelt2
] = i
* 2;
39057 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39058 dremap
.perm
[i
* 2] = i
+ nelt2
;
39059 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39061 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39062 dremap
.vmode
= V4SFmode
;
39064 else if ((contents
& (h1
| h4
)) == contents
)
39067 for (i
= 0; i
< nelt2
; ++i
)
39070 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39071 dremap
.perm
[i
] = i
;
39072 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39077 dremap
.vmode
= V2DImode
;
39079 dremap
.perm
[0] = 0;
39080 dremap
.perm
[1] = 3;
39083 else if ((contents
& (h2
| h3
)) == contents
)
39086 for (i
= 0; i
< nelt2
; ++i
)
39088 remap
[i
+ nelt2
] = i
;
39089 remap
[i
+ nelt
] = i
+ nelt2
;
39090 dremap
.perm
[i
] = i
+ nelt2
;
39091 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39096 dremap
.vmode
= V2DImode
;
39098 dremap
.perm
[0] = 1;
39099 dremap
.perm
[1] = 2;
39107 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39108 unsigned HOST_WIDE_INT q
[8];
39109 unsigned int nonzero_halves
[4];
39111 /* Split the two input vectors into 8 quarters. */
39112 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39113 for (i
= 1; i
< 8; ++i
)
39114 q
[i
] = q
[0] << (nelt4
* i
);
39115 for (i
= 0; i
< 4; ++i
)
39116 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39118 nonzero_halves
[nzcnt
] = i
;
39124 gcc_assert (d
->one_operand_p
);
39125 nonzero_halves
[1] = nonzero_halves
[0];
39126 same_halves
= true;
39128 else if (d
->one_operand_p
)
39130 gcc_assert (nonzero_halves
[0] == 0);
39131 gcc_assert (nonzero_halves
[1] == 1);
39136 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39138 /* Attempt to increase the likelihood that dfinal
39139 shuffle will be intra-lane. */
39140 char tmph
= nonzero_halves
[0];
39141 nonzero_halves
[0] = nonzero_halves
[1];
39142 nonzero_halves
[1] = tmph
;
39145 /* vperm2f128 or vperm2i128. */
39146 for (i
= 0; i
< nelt2
; ++i
)
39148 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39149 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39150 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39151 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39154 if (d
->vmode
!= V8SFmode
39155 && d
->vmode
!= V4DFmode
39156 && d
->vmode
!= V8SImode
)
39158 dremap
.vmode
= V8SImode
;
39160 for (i
= 0; i
< 4; ++i
)
39162 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39163 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39167 else if (d
->one_operand_p
)
39169 else if (TARGET_AVX2
39170 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39173 for (i
= 0; i
< nelt4
; ++i
)
39176 remap
[i
+ nelt
] = i
* 2 + 1;
39177 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39178 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39179 dremap
.perm
[i
* 2] = i
;
39180 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39181 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39182 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39185 else if (TARGET_AVX2
39186 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39189 for (i
= 0; i
< nelt4
; ++i
)
39191 remap
[i
+ nelt4
] = i
* 2;
39192 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39193 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39194 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39195 dremap
.perm
[i
* 2] = i
+ nelt4
;
39196 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39197 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39198 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39205 /* Use the remapping array set up above to move the elements from their
39206 swizzled locations into their final destinations. */
39208 for (i
= 0; i
< nelt
; ++i
)
39210 unsigned e
= remap
[d
->perm
[i
]];
39211 gcc_assert (e
< nelt
);
39212 /* If same_halves is true, both halves of the remapped vector are the
39213 same. Avoid cross-lane accesses if possible. */
39214 if (same_halves
&& i
>= nelt2
)
39216 gcc_assert (e
< nelt2
);
39217 dfinal
.perm
[i
] = e
+ nelt2
;
39220 dfinal
.perm
[i
] = e
;
39222 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39223 dfinal
.op1
= dfinal
.op0
;
39224 dfinal
.one_operand_p
= true;
39225 dremap
.target
= dfinal
.op0
;
39227 /* Test if the final remap can be done with a single insn. For V4SFmode or
39228 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39230 ok
= expand_vec_perm_1 (&dfinal
);
39231 seq
= get_insns ();
39240 if (dremap
.vmode
!= dfinal
.vmode
)
39242 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39243 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39244 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39247 ok
= expand_vec_perm_1 (&dremap
);
39254 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39255 a single vector cross-lane permutation into vpermq followed
39256 by any of the single insn permutations. */
39259 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39261 struct expand_vec_perm_d dremap
, dfinal
;
39262 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39263 unsigned contents
[2];
39267 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39268 && d
->one_operand_p
))
39273 for (i
= 0; i
< nelt2
; ++i
)
39275 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39276 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39279 for (i
= 0; i
< 2; ++i
)
39281 unsigned int cnt
= 0;
39282 for (j
= 0; j
< 4; ++j
)
39283 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39291 dremap
.vmode
= V4DImode
;
39293 dremap
.target
= gen_reg_rtx (V4DImode
);
39294 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39295 dremap
.op1
= dremap
.op0
;
39296 dremap
.one_operand_p
= true;
39297 for (i
= 0; i
< 2; ++i
)
39299 unsigned int cnt
= 0;
39300 for (j
= 0; j
< 4; ++j
)
39301 if ((contents
[i
] & (1u << j
)) != 0)
39302 dremap
.perm
[2 * i
+ cnt
++] = j
;
39303 for (; cnt
< 2; ++cnt
)
39304 dremap
.perm
[2 * i
+ cnt
] = 0;
39308 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39309 dfinal
.op1
= dfinal
.op0
;
39310 dfinal
.one_operand_p
= true;
39311 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39315 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39316 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39318 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39319 dfinal
.perm
[i
] |= nelt4
;
39321 gcc_unreachable ();
39324 ok
= expand_vec_perm_1 (&dremap
);
39327 ok
= expand_vec_perm_1 (&dfinal
);
39333 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39334 a vector permutation using two instructions, vperm2f128 resp.
39335 vperm2i128 followed by any single in-lane permutation. */
39338 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39340 struct expand_vec_perm_d dfirst
, dsecond
;
39341 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39345 || GET_MODE_SIZE (d
->vmode
) != 32
39346 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39350 dsecond
.one_operand_p
= false;
39351 dsecond
.testing_p
= true;
39353 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39354 immediate. For perm < 16 the second permutation uses
39355 d->op0 as first operand, for perm >= 16 it uses d->op1
39356 as first operand. The second operand is the result of
39358 for (perm
= 0; perm
< 32; perm
++)
39360 /* Ignore permutations which do not move anything cross-lane. */
39363 /* The second shuffle for e.g. V4DFmode has
39364 0123 and ABCD operands.
39365 Ignore AB23, as 23 is already in the second lane
39366 of the first operand. */
39367 if ((perm
& 0xc) == (1 << 2)) continue;
39368 /* And 01CD, as 01 is in the first lane of the first
39370 if ((perm
& 3) == 0) continue;
39371 /* And 4567, as then the vperm2[fi]128 doesn't change
39372 anything on the original 4567 second operand. */
39373 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39377 /* The second shuffle for e.g. V4DFmode has
39378 4567 and ABCD operands.
39379 Ignore AB67, as 67 is already in the second lane
39380 of the first operand. */
39381 if ((perm
& 0xc) == (3 << 2)) continue;
39382 /* And 45CD, as 45 is in the first lane of the first
39384 if ((perm
& 3) == 2) continue;
39385 /* And 0123, as then the vperm2[fi]128 doesn't change
39386 anything on the original 0123 first operand. */
39387 if ((perm
& 0xf) == (1 << 2)) continue;
39390 for (i
= 0; i
< nelt
; i
++)
39392 j
= d
->perm
[i
] / nelt2
;
39393 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39394 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39395 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39396 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39404 ok
= expand_vec_perm_1 (&dsecond
);
39415 /* Found a usable second shuffle. dfirst will be
39416 vperm2f128 on d->op0 and d->op1. */
39417 dsecond
.testing_p
= false;
39419 dfirst
.target
= gen_reg_rtx (d
->vmode
);
39420 for (i
= 0; i
< nelt
; i
++)
39421 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
39422 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
39424 ok
= expand_vec_perm_1 (&dfirst
);
39427 /* And dsecond is some single insn shuffle, taking
39428 d->op0 and result of vperm2f128 (if perm < 16) or
39429 d->op1 and result of vperm2f128 (otherwise). */
39430 dsecond
.op1
= dfirst
.target
;
39432 dsecond
.op0
= dfirst
.op1
;
39434 ok
= expand_vec_perm_1 (&dsecond
);
39440 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39441 if (d
->one_operand_p
)
39448 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39449 a two vector permutation using 2 intra-lane interleave insns
39450 and cross-lane shuffle for 32-byte vectors. */
39453 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
39456 rtx (*gen
) (rtx
, rtx
, rtx
);
39458 if (d
->one_operand_p
)
39460 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
39462 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
39468 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
39470 for (i
= 0; i
< nelt
; i
+= 2)
39471 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
39472 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
39482 gen
= gen_vec_interleave_highv32qi
;
39484 gen
= gen_vec_interleave_lowv32qi
;
39488 gen
= gen_vec_interleave_highv16hi
;
39490 gen
= gen_vec_interleave_lowv16hi
;
39494 gen
= gen_vec_interleave_highv8si
;
39496 gen
= gen_vec_interleave_lowv8si
;
39500 gen
= gen_vec_interleave_highv4di
;
39502 gen
= gen_vec_interleave_lowv4di
;
39506 gen
= gen_vec_interleave_highv8sf
;
39508 gen
= gen_vec_interleave_lowv8sf
;
39512 gen
= gen_vec_interleave_highv4df
;
39514 gen
= gen_vec_interleave_lowv4df
;
39517 gcc_unreachable ();
39520 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
39524 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39525 a single vector permutation using a single intra-lane vector
39526 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39527 the non-swapped and swapped vectors together. */
39530 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
39532 struct expand_vec_perm_d dfirst
, dsecond
;
39533 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39536 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39540 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
39541 || !d
->one_operand_p
)
39545 for (i
= 0; i
< nelt
; i
++)
39546 dfirst
.perm
[i
] = 0xff;
39547 for (i
= 0, msk
= 0; i
< nelt
; i
++)
39549 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
39550 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
39552 dfirst
.perm
[j
] = d
->perm
[i
];
39556 for (i
= 0; i
< nelt
; i
++)
39557 if (dfirst
.perm
[i
] == 0xff)
39558 dfirst
.perm
[i
] = i
;
39561 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39564 ok
= expand_vec_perm_1 (&dfirst
);
39565 seq
= get_insns ();
39577 dsecond
.op0
= dfirst
.target
;
39578 dsecond
.op1
= dfirst
.target
;
39579 dsecond
.one_operand_p
= true;
39580 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39581 for (i
= 0; i
< nelt
; i
++)
39582 dsecond
.perm
[i
] = i
^ nelt2
;
39584 ok
= expand_vec_perm_1 (&dsecond
);
39587 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
39588 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
39592 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39593 permutation using two vperm2f128, followed by a vshufpd insn blending
39594 the two vectors together. */
39597 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
39599 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
39602 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
39612 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
39613 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
39614 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
39615 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
39616 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
39617 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
39618 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
39619 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
39620 dthird
.perm
[0] = (d
->perm
[0] % 2);
39621 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
39622 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
39623 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
39625 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39626 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39627 dthird
.op0
= dfirst
.target
;
39628 dthird
.op1
= dsecond
.target
;
39629 dthird
.one_operand_p
= false;
39631 canonicalize_perm (&dfirst
);
39632 canonicalize_perm (&dsecond
);
39634 ok
= expand_vec_perm_1 (&dfirst
)
39635 && expand_vec_perm_1 (&dsecond
)
39636 && expand_vec_perm_1 (&dthird
);
39643 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39644 permutation with two pshufb insns and an ior. We should have already
39645 failed all two instruction sequences. */
39648 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
39650 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
39651 unsigned int i
, nelt
, eltsz
;
39653 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39655 gcc_assert (!d
->one_operand_p
);
39658 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39660 /* Generate two permutation masks. If the required element is within
39661 the given vector it is shuffled into the proper lane. If the required
39662 element is in the other vector, force a zero into the lane by setting
39663 bit 7 in the permutation mask. */
39664 m128
= GEN_INT (-128);
39665 for (i
= 0; i
< nelt
; ++i
)
39667 unsigned j
, e
= d
->perm
[i
];
39668 unsigned which
= (e
>= nelt
);
39672 for (j
= 0; j
< eltsz
; ++j
)
39674 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
39675 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
39679 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
39680 vperm
= force_reg (V16QImode
, vperm
);
39682 l
= gen_reg_rtx (V16QImode
);
39683 op
= gen_lowpart (V16QImode
, d
->op0
);
39684 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
39686 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
39687 vperm
= force_reg (V16QImode
, vperm
);
39689 h
= gen_reg_rtx (V16QImode
);
39690 op
= gen_lowpart (V16QImode
, d
->op1
);
39691 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
39693 op
= gen_lowpart (V16QImode
, d
->target
);
39694 emit_insn (gen_iorv16qi3 (op
, l
, h
));
39699 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
39700 with two vpshufb insns, vpermq and vpor. We should have already failed
39701 all two or three instruction sequences. */
39704 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
39706 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
39707 unsigned int i
, nelt
, eltsz
;
39710 || !d
->one_operand_p
39711 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39718 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39720 /* Generate two permutation masks. If the required element is within
39721 the same lane, it is shuffled in. If the required element from the
39722 other lane, force a zero by setting bit 7 in the permutation mask.
39723 In the other mask the mask has non-negative elements if element
39724 is requested from the other lane, but also moved to the other lane,
39725 so that the result of vpshufb can have the two V2TImode halves
39727 m128
= GEN_INT (-128);
39728 for (i
= 0; i
< nelt
; ++i
)
39730 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39731 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39733 for (j
= 0; j
< eltsz
; ++j
)
39735 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
39736 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
39740 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39741 vperm
= force_reg (V32QImode
, vperm
);
39743 h
= gen_reg_rtx (V32QImode
);
39744 op
= gen_lowpart (V32QImode
, d
->op0
);
39745 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39747 /* Swap the 128-byte lanes of h into hp. */
39748 hp
= gen_reg_rtx (V4DImode
);
39749 op
= gen_lowpart (V4DImode
, h
);
39750 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
39753 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39754 vperm
= force_reg (V32QImode
, vperm
);
39756 l
= gen_reg_rtx (V32QImode
);
39757 op
= gen_lowpart (V32QImode
, d
->op0
);
39758 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39760 op
= gen_lowpart (V32QImode
, d
->target
);
39761 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
39766 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
39767 and extract-odd permutations of two V32QImode and V16QImode operand
39768 with two vpshufb insns, vpor and vpermq. We should have already
39769 failed all two or three instruction sequences. */
39772 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
39774 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
39775 unsigned int i
, nelt
, eltsz
;
39778 || d
->one_operand_p
39779 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39782 for (i
= 0; i
< d
->nelt
; ++i
)
39783 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
39790 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39792 /* Generate two permutation masks. In the first permutation mask
39793 the first quarter will contain indexes for the first half
39794 of the op0, the second quarter will contain bit 7 set, third quarter
39795 will contain indexes for the second half of the op0 and the
39796 last quarter bit 7 set. In the second permutation mask
39797 the first quarter will contain bit 7 set, the second quarter
39798 indexes for the first half of the op1, the third quarter bit 7 set
39799 and last quarter indexes for the second half of the op1.
39800 I.e. the first mask e.g. for V32QImode extract even will be:
39801 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
39802 (all values masked with 0xf except for -128) and second mask
39803 for extract even will be
39804 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
39805 m128
= GEN_INT (-128);
39806 for (i
= 0; i
< nelt
; ++i
)
39808 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39809 unsigned which
= d
->perm
[i
] >= nelt
;
39810 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
39812 for (j
= 0; j
< eltsz
; ++j
)
39814 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
39815 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
39819 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39820 vperm
= force_reg (V32QImode
, vperm
);
39822 l
= gen_reg_rtx (V32QImode
);
39823 op
= gen_lowpart (V32QImode
, d
->op0
);
39824 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39826 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39827 vperm
= force_reg (V32QImode
, vperm
);
39829 h
= gen_reg_rtx (V32QImode
);
39830 op
= gen_lowpart (V32QImode
, d
->op1
);
39831 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39833 ior
= gen_reg_rtx (V32QImode
);
39834 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
39836 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
39837 op
= gen_lowpart (V4DImode
, d
->target
);
39838 ior
= gen_lowpart (V4DImode
, ior
);
39839 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
39840 const1_rtx
, GEN_INT (3)));
39845 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
39846 and extract-odd permutations. */
39849 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
39856 t1
= gen_reg_rtx (V4DFmode
);
39857 t2
= gen_reg_rtx (V4DFmode
);
39859 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39860 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39861 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39863 /* Now an unpck[lh]pd will produce the result required. */
39865 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
39867 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
39873 int mask
= odd
? 0xdd : 0x88;
39875 t1
= gen_reg_rtx (V8SFmode
);
39876 t2
= gen_reg_rtx (V8SFmode
);
39877 t3
= gen_reg_rtx (V8SFmode
);
39879 /* Shuffle within the 128-bit lanes to produce:
39880 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
39881 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
39884 /* Shuffle the lanes around to produce:
39885 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
39886 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
39889 /* Shuffle within the 128-bit lanes to produce:
39890 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
39891 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
39893 /* Shuffle within the 128-bit lanes to produce:
39894 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
39895 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
39897 /* Shuffle the lanes around to produce:
39898 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
39899 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
39908 /* These are always directly implementable by expand_vec_perm_1. */
39909 gcc_unreachable ();
39913 return expand_vec_perm_pshufb2 (d
);
39916 /* We need 2*log2(N)-1 operations to achieve odd/even
39917 with interleave. */
39918 t1
= gen_reg_rtx (V8HImode
);
39919 t2
= gen_reg_rtx (V8HImode
);
39920 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
39921 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
39922 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
39923 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
39925 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
39927 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
39934 return expand_vec_perm_pshufb2 (d
);
39937 t1
= gen_reg_rtx (V16QImode
);
39938 t2
= gen_reg_rtx (V16QImode
);
39939 t3
= gen_reg_rtx (V16QImode
);
39940 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
39941 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
39942 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
39943 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
39944 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
39945 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
39947 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
39949 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
39956 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
39961 struct expand_vec_perm_d d_copy
= *d
;
39962 d_copy
.vmode
= V4DFmode
;
39963 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
39964 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
39965 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
39966 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39969 t1
= gen_reg_rtx (V4DImode
);
39970 t2
= gen_reg_rtx (V4DImode
);
39972 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39973 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39974 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39976 /* Now an vpunpck[lh]qdq will produce the result required. */
39978 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
39980 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
39987 struct expand_vec_perm_d d_copy
= *d
;
39988 d_copy
.vmode
= V8SFmode
;
39989 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
39990 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
39991 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
39992 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39995 t1
= gen_reg_rtx (V8SImode
);
39996 t2
= gen_reg_rtx (V8SImode
);
39998 /* Shuffle the lanes around into
39999 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40000 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40001 gen_lowpart (V4DImode
, d
->op0
),
40002 gen_lowpart (V4DImode
, d
->op1
),
40004 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40005 gen_lowpart (V4DImode
, d
->op0
),
40006 gen_lowpart (V4DImode
, d
->op1
),
40009 /* Swap the 2nd and 3rd position in each lane into
40010 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40011 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40012 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40013 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40014 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40016 /* Now an vpunpck[lh]qdq will produce
40017 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40019 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40020 gen_lowpart (V4DImode
, t1
),
40021 gen_lowpart (V4DImode
, t2
));
40023 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40024 gen_lowpart (V4DImode
, t1
),
40025 gen_lowpart (V4DImode
, t2
));
40030 gcc_unreachable ();
40036 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40037 extract-even and extract-odd permutations. */
40040 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40042 unsigned i
, odd
, nelt
= d
->nelt
;
40045 if (odd
!= 0 && odd
!= 1)
40048 for (i
= 1; i
< nelt
; ++i
)
40049 if (d
->perm
[i
] != 2 * i
+ odd
)
40052 return expand_vec_perm_even_odd_1 (d
, odd
);
40055 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40056 permutations. We assume that expand_vec_perm_1 has already failed. */
40059 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40061 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40062 enum machine_mode vmode
= d
->vmode
;
40063 unsigned char perm2
[4];
40071 /* These are special-cased in sse.md so that we can optionally
40072 use the vbroadcast instruction. They expand to two insns
40073 if the input happens to be in a register. */
40074 gcc_unreachable ();
40080 /* These are always implementable using standard shuffle patterns. */
40081 gcc_unreachable ();
40085 /* These can be implemented via interleave. We save one insn by
40086 stopping once we have promoted to V4SImode and then use pshufd. */
40090 rtx (*gen
) (rtx
, rtx
, rtx
)
40091 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40092 : gen_vec_interleave_lowv8hi
;
40096 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40097 : gen_vec_interleave_highv8hi
;
40102 dest
= gen_reg_rtx (vmode
);
40103 emit_insn (gen (dest
, op0
, op0
));
40104 vmode
= get_mode_wider_vector (vmode
);
40105 op0
= gen_lowpart (vmode
, dest
);
40107 while (vmode
!= V4SImode
);
40109 memset (perm2
, elt
, 4);
40110 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40119 /* For AVX2 broadcasts of the first element vpbroadcast* or
40120 vpermq should be used by expand_vec_perm_1. */
40121 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40125 gcc_unreachable ();
40129 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40130 broadcast permutations. */
40133 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40135 unsigned i
, elt
, nelt
= d
->nelt
;
40137 if (!d
->one_operand_p
)
40141 for (i
= 1; i
< nelt
; ++i
)
40142 if (d
->perm
[i
] != elt
)
40145 return expand_vec_perm_broadcast_1 (d
);
40148 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40149 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40150 all the shorter instruction sequences. */
40153 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40155 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40156 unsigned int i
, nelt
, eltsz
;
40160 || d
->one_operand_p
40161 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40168 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40170 /* Generate 4 permutation masks. If the required element is within
40171 the same lane, it is shuffled in. If the required element from the
40172 other lane, force a zero by setting bit 7 in the permutation mask.
40173 In the other mask the mask has non-negative elements if element
40174 is requested from the other lane, but also moved to the other lane,
40175 so that the result of vpshufb can have the two V2TImode halves
40177 m128
= GEN_INT (-128);
40178 for (i
= 0; i
< 32; ++i
)
40180 rperm
[0][i
] = m128
;
40181 rperm
[1][i
] = m128
;
40182 rperm
[2][i
] = m128
;
40183 rperm
[3][i
] = m128
;
40189 for (i
= 0; i
< nelt
; ++i
)
40191 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40192 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40193 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40195 for (j
= 0; j
< eltsz
; ++j
)
40196 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40197 used
[which
] = true;
40200 for (i
= 0; i
< 2; ++i
)
40202 if (!used
[2 * i
+ 1])
40207 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40208 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40209 vperm
= force_reg (V32QImode
, vperm
);
40210 h
[i
] = gen_reg_rtx (V32QImode
);
40211 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40212 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40215 /* Swap the 128-byte lanes of h[X]. */
40216 for (i
= 0; i
< 2; ++i
)
40218 if (h
[i
] == NULL_RTX
)
40220 op
= gen_reg_rtx (V4DImode
);
40221 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40222 const2_rtx
, GEN_INT (3), const0_rtx
,
40224 h
[i
] = gen_lowpart (V32QImode
, op
);
40227 for (i
= 0; i
< 2; ++i
)
40234 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40235 vperm
= force_reg (V32QImode
, vperm
);
40236 l
[i
] = gen_reg_rtx (V32QImode
);
40237 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40238 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40241 for (i
= 0; i
< 2; ++i
)
40245 op
= gen_reg_rtx (V32QImode
);
40246 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40253 gcc_assert (l
[0] && l
[1]);
40254 op
= gen_lowpart (V32QImode
, d
->target
);
40255 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40259 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40260 With all of the interface bits taken care of, perform the expansion
40261 in D and return true on success. */
40264 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40266 /* Try a single instruction expansion. */
40267 if (expand_vec_perm_1 (d
))
40270 /* Try sequences of two instructions. */
40272 if (expand_vec_perm_pshuflw_pshufhw (d
))
40275 if (expand_vec_perm_palignr (d
))
40278 if (expand_vec_perm_interleave2 (d
))
40281 if (expand_vec_perm_broadcast (d
))
40284 if (expand_vec_perm_vpermq_perm_1 (d
))
40287 if (expand_vec_perm_vperm2f128 (d
))
40290 /* Try sequences of three instructions. */
40292 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40295 if (expand_vec_perm_pshufb2 (d
))
40298 if (expand_vec_perm_interleave3 (d
))
40301 if (expand_vec_perm_vperm2f128_vblend (d
))
40304 /* Try sequences of four instructions. */
40306 if (expand_vec_perm_vpshufb2_vpermq (d
))
40309 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40312 /* ??? Look for narrow permutations whose element orderings would
40313 allow the promotion to a wider mode. */
40315 /* ??? Look for sequences of interleave or a wider permute that place
40316 the data into the correct lanes for a half-vector shuffle like
40317 pshuf[lh]w or vpermilps. */
40319 /* ??? Look for sequences of interleave that produce the desired results.
40320 The combinatorics of punpck[lh] get pretty ugly... */
40322 if (expand_vec_perm_even_odd (d
))
40325 /* Even longer sequences. */
40326 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40332 /* If a permutation only uses one operand, make it clear. Returns true
40333 if the permutation references both operands. */
40336 canonicalize_perm (struct expand_vec_perm_d
*d
)
40338 int i
, which
, nelt
= d
->nelt
;
40340 for (i
= which
= 0; i
< nelt
; ++i
)
40341 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40343 d
->one_operand_p
= true;
40350 if (!rtx_equal_p (d
->op0
, d
->op1
))
40352 d
->one_operand_p
= false;
40355 /* The elements of PERM do not suggest that only the first operand
40356 is used, but both operands are identical. Allow easier matching
40357 of the permutation by folding the permutation into the single
40362 for (i
= 0; i
< nelt
; ++i
)
40363 d
->perm
[i
] &= nelt
- 1;
40372 return (which
== 3);
40376 ix86_expand_vec_perm_const (rtx operands
[4])
40378 struct expand_vec_perm_d d
;
40379 unsigned char perm
[MAX_VECT_LEN
];
40384 d
.target
= operands
[0];
40385 d
.op0
= operands
[1];
40386 d
.op1
= operands
[2];
40389 d
.vmode
= GET_MODE (d
.target
);
40390 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40391 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40392 d
.testing_p
= false;
40394 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40395 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40396 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40398 for (i
= 0; i
< nelt
; ++i
)
40400 rtx e
= XVECEXP (sel
, 0, i
);
40401 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40406 two_args
= canonicalize_perm (&d
);
40408 if (ix86_expand_vec_perm_const_1 (&d
))
40411 /* If the selector says both arguments are needed, but the operands are the
40412 same, the above tried to expand with one_operand_p and flattened selector.
40413 If that didn't work, retry without one_operand_p; we succeeded with that
40415 if (two_args
&& d
.one_operand_p
)
40417 d
.one_operand_p
= false;
40418 memcpy (d
.perm
, perm
, sizeof (perm
));
40419 return ix86_expand_vec_perm_const_1 (&d
);
40425 /* Implement targetm.vectorize.vec_perm_const_ok. */
40428 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
40429 const unsigned char *sel
)
40431 struct expand_vec_perm_d d
;
40432 unsigned int i
, nelt
, which
;
40436 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40437 d
.testing_p
= true;
40439 /* Given sufficient ISA support we can just return true here
40440 for selected vector modes. */
40441 if (GET_MODE_SIZE (d
.vmode
) == 16)
40443 /* All implementable with a single vpperm insn. */
40446 /* All implementable with 2 pshufb + 1 ior. */
40449 /* All implementable with shufpd or unpck[lh]pd. */
40454 /* Extract the values from the vector CST into the permutation
40456 memcpy (d
.perm
, sel
, nelt
);
40457 for (i
= which
= 0; i
< nelt
; ++i
)
40459 unsigned char e
= d
.perm
[i
];
40460 gcc_assert (e
< 2 * nelt
);
40461 which
|= (e
< nelt
? 1 : 2);
40464 /* For all elements from second vector, fold the elements to first. */
40466 for (i
= 0; i
< nelt
; ++i
)
40469 /* Check whether the mask can be applied to the vector type. */
40470 d
.one_operand_p
= (which
!= 3);
40472 /* Implementable with shufps or pshufd. */
40473 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
40476 /* Otherwise we have to go through the motions and see if we can
40477 figure out how to generate the requested permutation. */
40478 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
40479 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
40480 if (!d
.one_operand_p
)
40481 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
40484 ret
= ix86_expand_vec_perm_const_1 (&d
);
40491 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
40493 struct expand_vec_perm_d d
;
40499 d
.vmode
= GET_MODE (targ
);
40500 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40501 d
.one_operand_p
= false;
40502 d
.testing_p
= false;
40504 for (i
= 0; i
< nelt
; ++i
)
40505 d
.perm
[i
] = i
* 2 + odd
;
40507 /* We'll either be able to implement the permutation directly... */
40508 if (expand_vec_perm_1 (&d
))
40511 /* ... or we use the special-case patterns. */
40512 expand_vec_perm_even_odd_1 (&d
, odd
);
40516 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
40518 struct expand_vec_perm_d d
;
40519 unsigned i
, nelt
, base
;
40525 d
.vmode
= GET_MODE (targ
);
40526 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40527 d
.one_operand_p
= false;
40528 d
.testing_p
= false;
40530 base
= high_p
? nelt
/ 2 : 0;
40531 for (i
= 0; i
< nelt
/ 2; ++i
)
40533 d
.perm
[i
* 2] = i
+ base
;
40534 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
40537 /* Note that for AVX this isn't one instruction. */
40538 ok
= ix86_expand_vec_perm_const_1 (&d
);
40543 /* Expand a vector operation CODE for a V*QImode in terms of the
40544 same operation on V*HImode. */
40547 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
40549 enum machine_mode qimode
= GET_MODE (dest
);
40550 enum machine_mode himode
;
40551 rtx (*gen_il
) (rtx
, rtx
, rtx
);
40552 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
40553 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
40554 struct expand_vec_perm_d d
;
40555 bool ok
, full_interleave
;
40556 bool uns_p
= false;
40563 gen_il
= gen_vec_interleave_lowv16qi
;
40564 gen_ih
= gen_vec_interleave_highv16qi
;
40567 himode
= V16HImode
;
40568 gen_il
= gen_avx2_interleave_lowv32qi
;
40569 gen_ih
= gen_avx2_interleave_highv32qi
;
40572 gcc_unreachable ();
40575 op2_l
= op2_h
= op2
;
40579 /* Unpack data such that we've got a source byte in each low byte of
40580 each word. We don't care what goes into the high byte of each word.
40581 Rather than trying to get zero in there, most convenient is to let
40582 it be a copy of the low byte. */
40583 op2_l
= gen_reg_rtx (qimode
);
40584 op2_h
= gen_reg_rtx (qimode
);
40585 emit_insn (gen_il (op2_l
, op2
, op2
));
40586 emit_insn (gen_ih (op2_h
, op2
, op2
));
40589 op1_l
= gen_reg_rtx (qimode
);
40590 op1_h
= gen_reg_rtx (qimode
);
40591 emit_insn (gen_il (op1_l
, op1
, op1
));
40592 emit_insn (gen_ih (op1_h
, op1
, op1
));
40593 full_interleave
= qimode
== V16QImode
;
40601 op1_l
= gen_reg_rtx (himode
);
40602 op1_h
= gen_reg_rtx (himode
);
40603 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
40604 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
40605 full_interleave
= true;
40608 gcc_unreachable ();
40611 /* Perform the operation. */
40612 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
40614 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
40616 gcc_assert (res_l
&& res_h
);
40618 /* Merge the data back into the right place. */
40620 d
.op0
= gen_lowpart (qimode
, res_l
);
40621 d
.op1
= gen_lowpart (qimode
, res_h
);
40623 d
.nelt
= GET_MODE_NUNITS (qimode
);
40624 d
.one_operand_p
= false;
40625 d
.testing_p
= false;
40627 if (full_interleave
)
40629 /* For SSE2, we used an full interleave, so the desired
40630 results are in the even elements. */
40631 for (i
= 0; i
< 32; ++i
)
40636 /* For AVX, the interleave used above was not cross-lane. So the
40637 extraction is evens but with the second and third quarter swapped.
40638 Happily, that is even one insn shorter than even extraction. */
40639 for (i
= 0; i
< 32; ++i
)
40640 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
40643 ok
= ix86_expand_vec_perm_const_1 (&d
);
40646 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40647 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
40651 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
40652 bool uns_p
, bool odd_p
)
40654 enum machine_mode mode
= GET_MODE (op1
);
40655 enum machine_mode wmode
= GET_MODE (dest
);
40658 /* We only play even/odd games with vectors of SImode. */
40659 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
40661 /* If we're looking for the odd results, shift those members down to
40662 the even slots. For some cpus this is faster than a PSHUFD. */
40665 if (TARGET_XOP
&& mode
== V4SImode
)
40667 x
= force_reg (wmode
, CONST0_RTX (wmode
));
40668 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
40672 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
40673 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
40674 x
, NULL
, 1, OPTAB_DIRECT
);
40675 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
40676 x
, NULL
, 1, OPTAB_DIRECT
);
40677 op1
= gen_lowpart (mode
, op1
);
40678 op2
= gen_lowpart (mode
, op2
);
40681 if (mode
== V8SImode
)
40684 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
40686 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
40689 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
40690 else if (TARGET_SSE4_1
)
40691 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
40694 rtx s1
, s2
, t0
, t1
, t2
;
40696 /* The easiest way to implement this without PMULDQ is to go through
40697 the motions as if we are performing a full 64-bit multiply. With
40698 the exception that we need to do less shuffling of the elements. */
40700 /* Compute the sign-extension, aka highparts, of the two operands. */
40701 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40702 op1
, pc_rtx
, pc_rtx
);
40703 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40704 op2
, pc_rtx
, pc_rtx
);
40706 /* Multiply LO(A) * HI(B), and vice-versa. */
40707 t1
= gen_reg_rtx (wmode
);
40708 t2
= gen_reg_rtx (wmode
);
40709 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
40710 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
40712 /* Multiply LO(A) * LO(B). */
40713 t0
= gen_reg_rtx (wmode
);
40714 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
40716 /* Combine and shift the highparts into place. */
40717 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
40718 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
40721 /* Combine high and low parts. */
40722 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
40729 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
40730 bool uns_p
, bool high_p
)
40732 enum machine_mode wmode
= GET_MODE (dest
);
40733 enum machine_mode mode
= GET_MODE (op1
);
40734 rtx t1
, t2
, t3
, t4
, mask
;
40739 t1
= gen_reg_rtx (mode
);
40740 t2
= gen_reg_rtx (mode
);
40741 if (TARGET_XOP
&& !uns_p
)
40743 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
40744 shuffle the elements once so that all elements are in the right
40745 place for immediate use: { A C B D }. */
40746 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
40747 const1_rtx
, GEN_INT (3)));
40748 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
40749 const1_rtx
, GEN_INT (3)));
40753 /* Put the elements into place for the multiply. */
40754 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
40755 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
40758 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
40762 /* Shuffle the elements between the lanes. After this we
40763 have { A B E F | C D G H } for each operand. */
40764 t1
= gen_reg_rtx (V4DImode
);
40765 t2
= gen_reg_rtx (V4DImode
);
40766 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
40767 const0_rtx
, const2_rtx
,
40768 const1_rtx
, GEN_INT (3)));
40769 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
40770 const0_rtx
, const2_rtx
,
40771 const1_rtx
, GEN_INT (3)));
40773 /* Shuffle the elements within the lanes. After this we
40774 have { A A B B | C C D D } or { E E F F | G G H H }. */
40775 t3
= gen_reg_rtx (V8SImode
);
40776 t4
= gen_reg_rtx (V8SImode
);
40777 mask
= GEN_INT (high_p
40778 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
40779 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
40780 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
40781 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
40783 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
40788 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
40789 uns_p
, OPTAB_DIRECT
);
40790 t2
= expand_binop (mode
,
40791 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
40792 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
40793 gcc_assert (t1
&& t2
);
40795 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
40800 t1
= gen_reg_rtx (wmode
);
40801 t2
= gen_reg_rtx (wmode
);
40802 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
40803 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
40805 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
40809 gcc_unreachable ();
40814 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
40818 res_1
= gen_reg_rtx (V4SImode
);
40819 res_2
= gen_reg_rtx (V4SImode
);
40820 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
40821 op1
, op2
, true, false);
40822 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
40823 op1
, op2
, true, true);
40825 /* Move the results in element 2 down to element 1; we don't care
40826 what goes in elements 2 and 3. Then we can merge the parts
40827 back together with an interleave.
40829 Note that two other sequences were tried:
40830 (1) Use interleaves at the start instead of psrldq, which allows
40831 us to use a single shufps to merge things back at the end.
40832 (2) Use shufps here to combine the two vectors, then pshufd to
40833 put the elements in the correct order.
40834 In both cases the cost of the reformatting stall was too high
40835 and the overall sequence slower. */
40837 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
40838 const0_rtx
, const0_rtx
));
40839 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
40840 const0_rtx
, const0_rtx
));
40841 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
40843 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
40847 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
40849 enum machine_mode mode
= GET_MODE (op0
);
40850 rtx t1
, t2
, t3
, t4
, t5
, t6
;
40852 if (TARGET_XOP
&& mode
== V2DImode
)
40854 /* op1: A,B,C,D, op2: E,F,G,H */
40855 op1
= gen_lowpart (V4SImode
, op1
);
40856 op2
= gen_lowpart (V4SImode
, op2
);
40858 t1
= gen_reg_rtx (V4SImode
);
40859 t2
= gen_reg_rtx (V4SImode
);
40860 t3
= gen_reg_rtx (V2DImode
);
40861 t4
= gen_reg_rtx (V2DImode
);
40864 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
40870 /* t2: (B*E),(A*F),(D*G),(C*H) */
40871 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
40873 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
40874 emit_insn (gen_xop_phadddq (t3
, t2
));
40876 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
40877 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
40879 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
40880 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
40884 enum machine_mode nmode
;
40885 rtx (*umul
) (rtx
, rtx
, rtx
);
40887 if (mode
== V2DImode
)
40889 umul
= gen_vec_widen_umult_even_v4si
;
40892 else if (mode
== V4DImode
)
40894 umul
= gen_vec_widen_umult_even_v8si
;
40898 gcc_unreachable ();
40901 /* Multiply low parts. */
40902 t1
= gen_reg_rtx (mode
);
40903 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
40905 /* Shift input vectors right 32 bits so we can multiply high parts. */
40907 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
40908 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
40910 /* Multiply high parts by low parts. */
40911 t4
= gen_reg_rtx (mode
);
40912 t5
= gen_reg_rtx (mode
);
40913 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
40914 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
40916 /* Combine and shift the highparts back. */
40917 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
40918 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
40920 /* Combine high and low parts. */
40921 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
40924 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40925 gen_rtx_MULT (mode
, op1
, op2
));
40928 /* Expand an insert into a vector register through pinsr insn.
40929 Return true if successful. */
40932 ix86_expand_pinsr (rtx
*operands
)
40934 rtx dst
= operands
[0];
40935 rtx src
= operands
[3];
40937 unsigned int size
= INTVAL (operands
[1]);
40938 unsigned int pos
= INTVAL (operands
[2]);
40940 if (GET_CODE (dst
) == SUBREG
)
40942 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
40943 dst
= SUBREG_REG (dst
);
40946 if (GET_CODE (src
) == SUBREG
)
40947 src
= SUBREG_REG (src
);
40949 switch (GET_MODE (dst
))
40956 enum machine_mode srcmode
, dstmode
;
40957 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
40959 srcmode
= mode_for_size (size
, MODE_INT
, 0);
40964 if (!TARGET_SSE4_1
)
40966 dstmode
= V16QImode
;
40967 pinsr
= gen_sse4_1_pinsrb
;
40973 dstmode
= V8HImode
;
40974 pinsr
= gen_sse2_pinsrw
;
40978 if (!TARGET_SSE4_1
)
40980 dstmode
= V4SImode
;
40981 pinsr
= gen_sse4_1_pinsrd
;
40985 gcc_assert (TARGET_64BIT
);
40986 if (!TARGET_SSE4_1
)
40988 dstmode
= V2DImode
;
40989 pinsr
= gen_sse4_1_pinsrq
;
40996 dst
= gen_lowpart (dstmode
, dst
);
40997 src
= gen_lowpart (srcmode
, src
);
41001 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41010 /* This function returns the calling abi specific va_list type node.
41011 It returns the FNDECL specific va_list type. */
41014 ix86_fn_abi_va_list (tree fndecl
)
41017 return va_list_type_node
;
41018 gcc_assert (fndecl
!= NULL_TREE
);
41020 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41021 return ms_va_list_type_node
;
41023 return sysv_va_list_type_node
;
41026 /* Returns the canonical va_list type specified by TYPE. If there
41027 is no valid TYPE provided, it return NULL_TREE. */
41030 ix86_canonical_va_list_type (tree type
)
41034 /* Resolve references and pointers to va_list type. */
41035 if (TREE_CODE (type
) == MEM_REF
)
41036 type
= TREE_TYPE (type
);
41037 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41038 type
= TREE_TYPE (type
);
41039 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41040 type
= TREE_TYPE (type
);
41042 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41044 wtype
= va_list_type_node
;
41045 gcc_assert (wtype
!= NULL_TREE
);
41047 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41049 /* If va_list is an array type, the argument may have decayed
41050 to a pointer type, e.g. by being passed to another function.
41051 In that case, unwrap both types so that we can compare the
41052 underlying records. */
41053 if (TREE_CODE (htype
) == ARRAY_TYPE
41054 || POINTER_TYPE_P (htype
))
41056 wtype
= TREE_TYPE (wtype
);
41057 htype
= TREE_TYPE (htype
);
41060 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41061 return va_list_type_node
;
41062 wtype
= sysv_va_list_type_node
;
41063 gcc_assert (wtype
!= NULL_TREE
);
41065 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41067 /* If va_list is an array type, the argument may have decayed
41068 to a pointer type, e.g. by being passed to another function.
41069 In that case, unwrap both types so that we can compare the
41070 underlying records. */
41071 if (TREE_CODE (htype
) == ARRAY_TYPE
41072 || POINTER_TYPE_P (htype
))
41074 wtype
= TREE_TYPE (wtype
);
41075 htype
= TREE_TYPE (htype
);
41078 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41079 return sysv_va_list_type_node
;
41080 wtype
= ms_va_list_type_node
;
41081 gcc_assert (wtype
!= NULL_TREE
);
41083 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41085 /* If va_list is an array type, the argument may have decayed
41086 to a pointer type, e.g. by being passed to another function.
41087 In that case, unwrap both types so that we can compare the
41088 underlying records. */
41089 if (TREE_CODE (htype
) == ARRAY_TYPE
41090 || POINTER_TYPE_P (htype
))
41092 wtype
= TREE_TYPE (wtype
);
41093 htype
= TREE_TYPE (htype
);
41096 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41097 return ms_va_list_type_node
;
41100 return std_canonical_va_list_type (type
);
41103 /* Iterate through the target-specific builtin types for va_list.
41104 IDX denotes the iterator, *PTREE is set to the result type of
41105 the va_list builtin, and *PNAME to its internal type.
41106 Returns zero if there is no element for this index, otherwise
41107 IDX should be increased upon the next call.
41108 Note, do not iterate a base builtin's name like __builtin_va_list.
41109 Used from c_common_nodes_and_builtins. */
41112 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41122 *ptree
= ms_va_list_type_node
;
41123 *pname
= "__builtin_ms_va_list";
41127 *ptree
= sysv_va_list_type_node
;
41128 *pname
= "__builtin_sysv_va_list";
41136 #undef TARGET_SCHED_DISPATCH
41137 #define TARGET_SCHED_DISPATCH has_dispatch
41138 #undef TARGET_SCHED_DISPATCH_DO
41139 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41140 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41141 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41142 #undef TARGET_SCHED_REORDER
41143 #define TARGET_SCHED_REORDER ix86_sched_reorder
41144 #undef TARGET_SCHED_ADJUST_PRIORITY
41145 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41146 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41147 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
41149 /* The size of the dispatch window is the total number of bytes of
41150 object code allowed in a window. */
41151 #define DISPATCH_WINDOW_SIZE 16
41153 /* Number of dispatch windows considered for scheduling. */
41154 #define MAX_DISPATCH_WINDOWS 3
41156 /* Maximum number of instructions in a window. */
41159 /* Maximum number of immediate operands in a window. */
41162 /* Maximum number of immediate bits allowed in a window. */
41163 #define MAX_IMM_SIZE 128
41165 /* Maximum number of 32 bit immediates allowed in a window. */
41166 #define MAX_IMM_32 4
41168 /* Maximum number of 64 bit immediates allowed in a window. */
41169 #define MAX_IMM_64 2
41171 /* Maximum total of loads or prefetches allowed in a window. */
41174 /* Maximum total of stores allowed in a window. */
41175 #define MAX_STORE 1
41181 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41182 enum dispatch_group
{
41197 /* Number of allowable groups in a dispatch window. It is an array
41198 indexed by dispatch_group enum. 100 is used as a big number,
41199 because the number of these kind of operations does not have any
41200 effect in dispatch window, but we need them for other reasons in
41202 static unsigned int num_allowable_groups
[disp_last
] = {
41203 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41206 char group_name
[disp_last
+ 1][16] = {
41207 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41208 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41209 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41212 /* Instruction path. */
41215 path_single
, /* Single micro op. */
41216 path_double
, /* Double micro op. */
41217 path_multi
, /* Instructions with more than 2 micro op.. */
41221 /* sched_insn_info defines a window to the instructions scheduled in
41222 the basic block. It contains a pointer to the insn_info table and
41223 the instruction scheduled.
41225 Windows are allocated for each basic block and are linked
41227 typedef struct sched_insn_info_s
{
41229 enum dispatch_group group
;
41230 enum insn_path path
;
41235 /* Linked list of dispatch windows. This is a two way list of
41236 dispatch windows of a basic block. It contains information about
41237 the number of uops in the window and the total number of
41238 instructions and of bytes in the object code for this dispatch
41240 typedef struct dispatch_windows_s
{
41241 int num_insn
; /* Number of insn in the window. */
41242 int num_uops
; /* Number of uops in the window. */
41243 int window_size
; /* Number of bytes in the window. */
41244 int window_num
; /* Window number between 0 or 1. */
41245 int num_imm
; /* Number of immediates in an insn. */
41246 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41247 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41248 int imm_size
; /* Total immediates in the window. */
41249 int num_loads
; /* Total memory loads in the window. */
41250 int num_stores
; /* Total memory stores in the window. */
41251 int violation
; /* Violation exists in window. */
41252 sched_insn_info
*window
; /* Pointer to the window. */
41253 struct dispatch_windows_s
*next
;
41254 struct dispatch_windows_s
*prev
;
41255 } dispatch_windows
;
41257 /* Immediate valuse used in an insn. */
41258 typedef struct imm_info_s
41265 static dispatch_windows
*dispatch_window_list
;
41266 static dispatch_windows
*dispatch_window_list1
;
41268 /* Get dispatch group of insn. */
41270 static enum dispatch_group
41271 get_mem_group (rtx insn
)
41273 enum attr_memory memory
;
41275 if (INSN_CODE (insn
) < 0)
41276 return disp_no_group
;
41277 memory
= get_attr_memory (insn
);
41278 if (memory
== MEMORY_STORE
)
41281 if (memory
== MEMORY_LOAD
)
41284 if (memory
== MEMORY_BOTH
)
41285 return disp_load_store
;
41287 return disp_no_group
;
41290 /* Return true if insn is a compare instruction. */
41295 enum attr_type type
;
41297 type
= get_attr_type (insn
);
41298 return (type
== TYPE_TEST
41299 || type
== TYPE_ICMP
41300 || type
== TYPE_FCMP
41301 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41304 /* Return true if a dispatch violation encountered. */
41307 dispatch_violation (void)
41309 if (dispatch_window_list
->next
)
41310 return dispatch_window_list
->next
->violation
;
41311 return dispatch_window_list
->violation
;
41314 /* Return true if insn is a branch instruction. */
41317 is_branch (rtx insn
)
41319 return (CALL_P (insn
) || JUMP_P (insn
));
41322 /* Return true if insn is a prefetch instruction. */
41325 is_prefetch (rtx insn
)
41327 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41330 /* This function initializes a dispatch window and the list container holding a
41331 pointer to the window. */
41334 init_window (int window_num
)
41337 dispatch_windows
*new_list
;
41339 if (window_num
== 0)
41340 new_list
= dispatch_window_list
;
41342 new_list
= dispatch_window_list1
;
41344 new_list
->num_insn
= 0;
41345 new_list
->num_uops
= 0;
41346 new_list
->window_size
= 0;
41347 new_list
->next
= NULL
;
41348 new_list
->prev
= NULL
;
41349 new_list
->window_num
= window_num
;
41350 new_list
->num_imm
= 0;
41351 new_list
->num_imm_32
= 0;
41352 new_list
->num_imm_64
= 0;
41353 new_list
->imm_size
= 0;
41354 new_list
->num_loads
= 0;
41355 new_list
->num_stores
= 0;
41356 new_list
->violation
= false;
41358 for (i
= 0; i
< MAX_INSN
; i
++)
41360 new_list
->window
[i
].insn
= NULL
;
41361 new_list
->window
[i
].group
= disp_no_group
;
41362 new_list
->window
[i
].path
= no_path
;
41363 new_list
->window
[i
].byte_len
= 0;
41364 new_list
->window
[i
].imm_bytes
= 0;
41369 /* This function allocates and initializes a dispatch window and the
41370 list container holding a pointer to the window. */
41372 static dispatch_windows
*
41373 allocate_window (void)
41375 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41376 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
41381 /* This routine initializes the dispatch scheduling information. It
41382 initiates building dispatch scheduler tables and constructs the
41383 first dispatch window. */
41386 init_dispatch_sched (void)
41388 /* Allocate a dispatch list and a window. */
41389 dispatch_window_list
= allocate_window ();
41390 dispatch_window_list1
= allocate_window ();
41395 /* This function returns true if a branch is detected. End of a basic block
41396 does not have to be a branch, but here we assume only branches end a
41400 is_end_basic_block (enum dispatch_group group
)
41402 return group
== disp_branch
;
41405 /* This function is called when the end of a window processing is reached. */
41408 process_end_window (void)
41410 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
41411 if (dispatch_window_list
->next
)
41413 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
41414 gcc_assert (dispatch_window_list
->window_size
41415 + dispatch_window_list1
->window_size
<= 48);
41421 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41422 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41423 for 48 bytes of instructions. Note that these windows are not dispatch
41424 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41426 static dispatch_windows
*
41427 allocate_next_window (int window_num
)
41429 if (window_num
== 0)
41431 if (dispatch_window_list
->next
)
41434 return dispatch_window_list
;
41437 dispatch_window_list
->next
= dispatch_window_list1
;
41438 dispatch_window_list1
->prev
= dispatch_window_list
;
41440 return dispatch_window_list1
;
41443 /* Increment the number of immediate operands of an instruction. */
41446 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
41451 switch ( GET_CODE (*in_rtx
))
41456 (imm_values
->imm
)++;
41457 if (x86_64_immediate_operand (*in_rtx
, SImode
))
41458 (imm_values
->imm32
)++;
41460 (imm_values
->imm64
)++;
41464 (imm_values
->imm
)++;
41465 (imm_values
->imm64
)++;
41469 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
41471 (imm_values
->imm
)++;
41472 (imm_values
->imm32
)++;
41483 /* Compute number of immediate operands of an instruction. */
41486 find_constant (rtx in_rtx
, imm_info
*imm_values
)
41488 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
41489 (rtx_function
) find_constant_1
, (void *) imm_values
);
41492 /* Return total size of immediate operands of an instruction along with number
41493 of corresponding immediate-operands. It initializes its parameters to zero
41494 befor calling FIND_CONSTANT.
41495 INSN is the input instruction. IMM is the total of immediates.
41496 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41500 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
41502 imm_info imm_values
= {0, 0, 0};
41504 find_constant (insn
, &imm_values
);
41505 *imm
= imm_values
.imm
;
41506 *imm32
= imm_values
.imm32
;
41507 *imm64
= imm_values
.imm64
;
41508 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
41511 /* This function indicates if an operand of an instruction is an
41515 has_immediate (rtx insn
)
41517 int num_imm_operand
;
41518 int num_imm32_operand
;
41519 int num_imm64_operand
;
41522 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41523 &num_imm64_operand
);
41527 /* Return single or double path for instructions. */
41529 static enum insn_path
41530 get_insn_path (rtx insn
)
41532 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
41534 if ((int)path
== 0)
41535 return path_single
;
41537 if ((int)path
== 1)
41538 return path_double
;
41543 /* Return insn dispatch group. */
41545 static enum dispatch_group
41546 get_insn_group (rtx insn
)
41548 enum dispatch_group group
= get_mem_group (insn
);
41552 if (is_branch (insn
))
41553 return disp_branch
;
41558 if (has_immediate (insn
))
41561 if (is_prefetch (insn
))
41562 return disp_prefetch
;
41564 return disp_no_group
;
41567 /* Count number of GROUP restricted instructions in a dispatch
41568 window WINDOW_LIST. */
41571 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
41573 enum dispatch_group group
= get_insn_group (insn
);
41575 int num_imm_operand
;
41576 int num_imm32_operand
;
41577 int num_imm64_operand
;
41579 if (group
== disp_no_group
)
41582 if (group
== disp_imm
)
41584 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41585 &num_imm64_operand
);
41586 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
41587 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
41588 || (num_imm32_operand
> 0
41589 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
41590 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
41591 || (num_imm64_operand
> 0
41592 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
41593 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
41594 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
41595 && num_imm64_operand
> 0
41596 && ((window_list
->num_imm_64
> 0
41597 && window_list
->num_insn
>= 2)
41598 || window_list
->num_insn
>= 3)))
41604 if ((group
== disp_load_store
41605 && (window_list
->num_loads
>= MAX_LOAD
41606 || window_list
->num_stores
>= MAX_STORE
))
41607 || ((group
== disp_load
41608 || group
== disp_prefetch
)
41609 && window_list
->num_loads
>= MAX_LOAD
)
41610 || (group
== disp_store
41611 && window_list
->num_stores
>= MAX_STORE
))
41617 /* This function returns true if insn satisfies dispatch rules on the
41618 last window scheduled. */
41621 fits_dispatch_window (rtx insn
)
41623 dispatch_windows
*window_list
= dispatch_window_list
;
41624 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
41625 unsigned int num_restrict
;
41626 enum dispatch_group group
= get_insn_group (insn
);
41627 enum insn_path path
= get_insn_path (insn
);
41630 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41631 instructions should be given the lowest priority in the
41632 scheduling process in Haifa scheduler to make sure they will be
41633 scheduled in the same dispatch window as the reference to them. */
41634 if (group
== disp_jcc
|| group
== disp_cmp
)
41637 /* Check nonrestricted. */
41638 if (group
== disp_no_group
|| group
== disp_branch
)
41641 /* Get last dispatch window. */
41642 if (window_list_next
)
41643 window_list
= window_list_next
;
41645 if (window_list
->window_num
== 1)
41647 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
41650 || (min_insn_size (insn
) + sum
) >= 48)
41651 /* Window 1 is full. Go for next window. */
41655 num_restrict
= count_num_restricted (insn
, window_list
);
41657 if (num_restrict
> num_allowable_groups
[group
])
41660 /* See if it fits in the first window. */
41661 if (window_list
->window_num
== 0)
41663 /* The first widow should have only single and double path
41665 if (path
== path_double
41666 && (window_list
->num_uops
+ 2) > MAX_INSN
)
41668 else if (path
!= path_single
)
41674 /* Add an instruction INSN with NUM_UOPS micro-operations to the
41675 dispatch window WINDOW_LIST. */
41678 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
41680 int byte_len
= min_insn_size (insn
);
41681 int num_insn
= window_list
->num_insn
;
41683 sched_insn_info
*window
= window_list
->window
;
41684 enum dispatch_group group
= get_insn_group (insn
);
41685 enum insn_path path
= get_insn_path (insn
);
41686 int num_imm_operand
;
41687 int num_imm32_operand
;
41688 int num_imm64_operand
;
41690 if (!window_list
->violation
&& group
!= disp_cmp
41691 && !fits_dispatch_window (insn
))
41692 window_list
->violation
= true;
41694 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41695 &num_imm64_operand
);
41697 /* Initialize window with new instruction. */
41698 window
[num_insn
].insn
= insn
;
41699 window
[num_insn
].byte_len
= byte_len
;
41700 window
[num_insn
].group
= group
;
41701 window
[num_insn
].path
= path
;
41702 window
[num_insn
].imm_bytes
= imm_size
;
41704 window_list
->window_size
+= byte_len
;
41705 window_list
->num_insn
= num_insn
+ 1;
41706 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
41707 window_list
->imm_size
+= imm_size
;
41708 window_list
->num_imm
+= num_imm_operand
;
41709 window_list
->num_imm_32
+= num_imm32_operand
;
41710 window_list
->num_imm_64
+= num_imm64_operand
;
41712 if (group
== disp_store
)
41713 window_list
->num_stores
+= 1;
41714 else if (group
== disp_load
41715 || group
== disp_prefetch
)
41716 window_list
->num_loads
+= 1;
41717 else if (group
== disp_load_store
)
41719 window_list
->num_stores
+= 1;
41720 window_list
->num_loads
+= 1;
41724 /* Adds a scheduled instruction, INSN, to the current dispatch window.
41725 If the total bytes of instructions or the number of instructions in
41726 the window exceed allowable, it allocates a new window. */
41729 add_to_dispatch_window (rtx insn
)
41732 dispatch_windows
*window_list
;
41733 dispatch_windows
*next_list
;
41734 dispatch_windows
*window0_list
;
41735 enum insn_path path
;
41736 enum dispatch_group insn_group
;
41744 if (INSN_CODE (insn
) < 0)
41747 byte_len
= min_insn_size (insn
);
41748 window_list
= dispatch_window_list
;
41749 next_list
= window_list
->next
;
41750 path
= get_insn_path (insn
);
41751 insn_group
= get_insn_group (insn
);
41753 /* Get the last dispatch window. */
41755 window_list
= dispatch_window_list
->next
;
41757 if (path
== path_single
)
41759 else if (path
== path_double
)
41762 insn_num_uops
= (int) path
;
41764 /* If current window is full, get a new window.
41765 Window number zero is full, if MAX_INSN uops are scheduled in it.
41766 Window number one is full, if window zero's bytes plus window
41767 one's bytes is 32, or if the bytes of the new instruction added
41768 to the total makes it greater than 48, or it has already MAX_INSN
41769 instructions in it. */
41770 num_insn
= window_list
->num_insn
;
41771 num_uops
= window_list
->num_uops
;
41772 window_num
= window_list
->window_num
;
41773 insn_fits
= fits_dispatch_window (insn
);
41775 if (num_insn
>= MAX_INSN
41776 || num_uops
+ insn_num_uops
> MAX_INSN
41779 window_num
= ~window_num
& 1;
41780 window_list
= allocate_next_window (window_num
);
41783 if (window_num
== 0)
41785 add_insn_window (insn
, window_list
, insn_num_uops
);
41786 if (window_list
->num_insn
>= MAX_INSN
41787 && insn_group
== disp_branch
)
41789 process_end_window ();
41793 else if (window_num
== 1)
41795 window0_list
= window_list
->prev
;
41796 sum
= window0_list
->window_size
+ window_list
->window_size
;
41798 || (byte_len
+ sum
) >= 48)
41800 process_end_window ();
41801 window_list
= dispatch_window_list
;
41804 add_insn_window (insn
, window_list
, insn_num_uops
);
41807 gcc_unreachable ();
41809 if (is_end_basic_block (insn_group
))
41811 /* End of basic block is reached do end-basic-block process. */
41812 process_end_window ();
41817 /* Print the dispatch window, WINDOW_NUM, to FILE. */
41819 DEBUG_FUNCTION
static void
41820 debug_dispatch_window_file (FILE *file
, int window_num
)
41822 dispatch_windows
*list
;
41825 if (window_num
== 0)
41826 list
= dispatch_window_list
;
41828 list
= dispatch_window_list1
;
41830 fprintf (file
, "Window #%d:\n", list
->window_num
);
41831 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
41832 list
->num_insn
, list
->num_uops
, list
->window_size
);
41833 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41834 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
41836 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
41838 fprintf (file
, " insn info:\n");
41840 for (i
= 0; i
< MAX_INSN
; i
++)
41842 if (!list
->window
[i
].insn
)
41844 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
41845 i
, group_name
[list
->window
[i
].group
],
41846 i
, (void *)list
->window
[i
].insn
,
41847 i
, list
->window
[i
].path
,
41848 i
, list
->window
[i
].byte_len
,
41849 i
, list
->window
[i
].imm_bytes
);
41853 /* Print to stdout a dispatch window. */
41855 DEBUG_FUNCTION
void
41856 debug_dispatch_window (int window_num
)
41858 debug_dispatch_window_file (stdout
, window_num
);
41861 /* Print INSN dispatch information to FILE. */
41863 DEBUG_FUNCTION
static void
41864 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
41867 enum insn_path path
;
41868 enum dispatch_group group
;
41870 int num_imm_operand
;
41871 int num_imm32_operand
;
41872 int num_imm64_operand
;
41874 if (INSN_CODE (insn
) < 0)
41877 byte_len
= min_insn_size (insn
);
41878 path
= get_insn_path (insn
);
41879 group
= get_insn_group (insn
);
41880 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41881 &num_imm64_operand
);
41883 fprintf (file
, " insn info:\n");
41884 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
41885 group_name
[group
], path
, byte_len
);
41886 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41887 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
41890 /* Print to STDERR the status of the ready list with respect to
41891 dispatch windows. */
41893 DEBUG_FUNCTION
void
41894 debug_ready_dispatch (void)
41897 int no_ready
= number_in_ready ();
41899 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
41901 for (i
= 0; i
< no_ready
; i
++)
41902 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
41905 /* This routine is the driver of the dispatch scheduler. */
41908 do_dispatch (rtx insn
, int mode
)
41910 if (mode
== DISPATCH_INIT
)
41911 init_dispatch_sched ();
41912 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
41913 add_to_dispatch_window (insn
);
41916 /* Return TRUE if Dispatch Scheduling is supported. */
41919 has_dispatch (rtx insn
, int action
)
41921 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
41922 && flag_dispatch_scheduler
)
41928 case IS_DISPATCH_ON
:
41933 return is_cmp (insn
);
41935 case DISPATCH_VIOLATION
:
41936 return dispatch_violation ();
41938 case FITS_DISPATCH_WINDOW
:
41939 return fits_dispatch_window (insn
);
41945 /* Implementation of reassociation_width target hook used by
41946 reassoc phase to identify parallelism level in reassociated
41947 tree. Statements tree_code is passed in OPC. Arguments type
41950 Currently parallel reassociation is enabled for Atom
41951 processors only and we set reassociation width to be 2
41952 because Atom may issue up to 2 instructions per cycle.
41954 Return value should be fixed if parallel reassociation is
41955 enabled for other processors. */
41958 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
41959 enum machine_mode mode
)
41963 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
41965 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
41971 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
41972 place emms and femms instructions. */
41974 static enum machine_mode
41975 ix86_preferred_simd_mode (enum machine_mode mode
)
41983 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
41985 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
41987 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
41989 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
41992 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41998 if (!TARGET_VECTORIZE_DOUBLE
)
42000 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42002 else if (TARGET_SSE2
)
42011 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42014 static unsigned int
42015 ix86_autovectorize_vector_sizes (void)
42017 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42022 /* Return class of registers which could be used for pseudo of MODE
42023 and of class RCLASS for spilling instead of memory. Return NO_REGS
42024 if it is not possible or non-profitable. */
42026 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42028 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42029 && hard_reg_set_subset_p (reg_class_contents
[rclass
],
42030 reg_class_contents
[GENERAL_REGS
])
42031 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
)))
42036 /* Implement targetm.vectorize.init_cost. */
42039 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42041 unsigned *cost
= XNEWVEC (unsigned, 3);
42042 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42046 /* Implement targetm.vectorize.add_stmt_cost. */
42049 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42050 struct _stmt_vec_info
*stmt_info
, int misalign
,
42051 enum vect_cost_model_location where
)
42053 unsigned *cost
= (unsigned *) data
;
42054 unsigned retval
= 0;
42056 if (flag_vect_cost_model
)
42058 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42059 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42061 /* Statements in an inner loop relative to the loop being
42062 vectorized are weighted more heavily. The value here is
42063 arbitrary and could potentially be improved with analysis. */
42064 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42065 count
*= 50; /* FIXME. */
42067 retval
= (unsigned) (count
* stmt_cost
);
42068 cost
[where
] += retval
;
42074 /* Implement targetm.vectorize.finish_cost. */
42077 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42078 unsigned *body_cost
, unsigned *epilogue_cost
)
42080 unsigned *cost
= (unsigned *) data
;
42081 *prologue_cost
= cost
[vect_prologue
];
42082 *body_cost
= cost
[vect_body
];
42083 *epilogue_cost
= cost
[vect_epilogue
];
42086 /* Implement targetm.vectorize.destroy_cost_data. */
42089 ix86_destroy_cost_data (void *data
)
42094 /* Validate target specific memory model bits in VAL. */
42096 static unsigned HOST_WIDE_INT
42097 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42099 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42102 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42104 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42106 warning (OPT_Winvalid_memory_model
,
42107 "Unknown architecture specific memory model");
42108 return MEMMODEL_SEQ_CST
;
42110 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42111 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42113 warning (OPT_Winvalid_memory_model
,
42114 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42115 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42117 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42119 warning (OPT_Winvalid_memory_model
,
42120 "HLE_RELEASE not used with RELEASE or stronger memory model");
42121 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42126 /* Initialize the GCC target structure. */
42127 #undef TARGET_RETURN_IN_MEMORY
42128 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42130 #undef TARGET_LEGITIMIZE_ADDRESS
42131 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42133 #undef TARGET_ATTRIBUTE_TABLE
42134 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42136 # undef TARGET_MERGE_DECL_ATTRIBUTES
42137 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42140 #undef TARGET_COMP_TYPE_ATTRIBUTES
42141 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42143 #undef TARGET_INIT_BUILTINS
42144 #define TARGET_INIT_BUILTINS ix86_init_builtins
42145 #undef TARGET_BUILTIN_DECL
42146 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42147 #undef TARGET_EXPAND_BUILTIN
42148 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42150 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42151 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42152 ix86_builtin_vectorized_function
42154 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42155 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42157 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42158 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42160 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42161 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42163 #undef TARGET_BUILTIN_RECIPROCAL
42164 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42166 #undef TARGET_ASM_FUNCTION_EPILOGUE
42167 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42169 #undef TARGET_ENCODE_SECTION_INFO
42170 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42171 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42173 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42176 #undef TARGET_ASM_OPEN_PAREN
42177 #define TARGET_ASM_OPEN_PAREN ""
42178 #undef TARGET_ASM_CLOSE_PAREN
42179 #define TARGET_ASM_CLOSE_PAREN ""
42181 #undef TARGET_ASM_BYTE_OP
42182 #define TARGET_ASM_BYTE_OP ASM_BYTE
42184 #undef TARGET_ASM_ALIGNED_HI_OP
42185 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42186 #undef TARGET_ASM_ALIGNED_SI_OP
42187 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42189 #undef TARGET_ASM_ALIGNED_DI_OP
42190 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42193 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42194 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42196 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42197 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42199 #undef TARGET_ASM_UNALIGNED_HI_OP
42200 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42201 #undef TARGET_ASM_UNALIGNED_SI_OP
42202 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42203 #undef TARGET_ASM_UNALIGNED_DI_OP
42204 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42206 #undef TARGET_PRINT_OPERAND
42207 #define TARGET_PRINT_OPERAND ix86_print_operand
42208 #undef TARGET_PRINT_OPERAND_ADDRESS
42209 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42210 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42211 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42212 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42213 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42215 #undef TARGET_SCHED_INIT_GLOBAL
42216 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42217 #undef TARGET_SCHED_ADJUST_COST
42218 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42219 #undef TARGET_SCHED_ISSUE_RATE
42220 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42221 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42222 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42223 ia32_multipass_dfa_lookahead
42225 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42226 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42228 #undef TARGET_MEMMODEL_CHECK
42229 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42232 #undef TARGET_HAVE_TLS
42233 #define TARGET_HAVE_TLS true
42235 #undef TARGET_CANNOT_FORCE_CONST_MEM
42236 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42237 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42238 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42240 #undef TARGET_DELEGITIMIZE_ADDRESS
42241 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42243 #undef TARGET_MS_BITFIELD_LAYOUT_P
42244 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42247 #undef TARGET_BINDS_LOCAL_P
42248 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42250 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42251 #undef TARGET_BINDS_LOCAL_P
42252 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42255 #undef TARGET_ASM_OUTPUT_MI_THUNK
42256 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42257 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42258 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42260 #undef TARGET_ASM_FILE_START
42261 #define TARGET_ASM_FILE_START x86_file_start
42263 #undef TARGET_OPTION_OVERRIDE
42264 #define TARGET_OPTION_OVERRIDE ix86_option_override
42266 #undef TARGET_REGISTER_MOVE_COST
42267 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42268 #undef TARGET_MEMORY_MOVE_COST
42269 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42270 #undef TARGET_RTX_COSTS
42271 #define TARGET_RTX_COSTS ix86_rtx_costs
42272 #undef TARGET_ADDRESS_COST
42273 #define TARGET_ADDRESS_COST ix86_address_cost
42275 #undef TARGET_FIXED_CONDITION_CODE_REGS
42276 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42277 #undef TARGET_CC_MODES_COMPATIBLE
42278 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42280 #undef TARGET_MACHINE_DEPENDENT_REORG
42281 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42283 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42284 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42286 #undef TARGET_BUILD_BUILTIN_VA_LIST
42287 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42289 #undef TARGET_FOLD_BUILTIN
42290 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42292 #undef TARGET_COMPARE_VERSION_PRIORITY
42293 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42295 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42296 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42297 ix86_generate_version_dispatcher_body
42299 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42300 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42301 ix86_get_function_versions_dispatcher
42303 #undef TARGET_ENUM_VA_LIST_P
42304 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42306 #undef TARGET_FN_ABI_VA_LIST
42307 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42309 #undef TARGET_CANONICAL_VA_LIST_TYPE
42310 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42312 #undef TARGET_EXPAND_BUILTIN_VA_START
42313 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42315 #undef TARGET_MD_ASM_CLOBBERS
42316 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42318 #undef TARGET_PROMOTE_PROTOTYPES
42319 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42320 #undef TARGET_STRUCT_VALUE_RTX
42321 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42322 #undef TARGET_SETUP_INCOMING_VARARGS
42323 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42324 #undef TARGET_MUST_PASS_IN_STACK
42325 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42326 #undef TARGET_FUNCTION_ARG_ADVANCE
42327 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42328 #undef TARGET_FUNCTION_ARG
42329 #define TARGET_FUNCTION_ARG ix86_function_arg
42330 #undef TARGET_FUNCTION_ARG_BOUNDARY
42331 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42332 #undef TARGET_PASS_BY_REFERENCE
42333 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42334 #undef TARGET_INTERNAL_ARG_POINTER
42335 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42336 #undef TARGET_UPDATE_STACK_BOUNDARY
42337 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42338 #undef TARGET_GET_DRAP_RTX
42339 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42340 #undef TARGET_STRICT_ARGUMENT_NAMING
42341 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42342 #undef TARGET_STATIC_CHAIN
42343 #define TARGET_STATIC_CHAIN ix86_static_chain
42344 #undef TARGET_TRAMPOLINE_INIT
42345 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42346 #undef TARGET_RETURN_POPS_ARGS
42347 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42349 #undef TARGET_LEGITIMATE_COMBINED_INSN
42350 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42352 #undef TARGET_ASAN_SHADOW_OFFSET
42353 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42355 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42356 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42358 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42359 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42361 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42362 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42364 #undef TARGET_C_MODE_FOR_SUFFIX
42365 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42368 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42369 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42372 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42373 #undef TARGET_INSERT_ATTRIBUTES
42374 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42377 #undef TARGET_MANGLE_TYPE
42378 #define TARGET_MANGLE_TYPE ix86_mangle_type
42381 #undef TARGET_STACK_PROTECT_FAIL
42382 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42385 #undef TARGET_FUNCTION_VALUE
42386 #define TARGET_FUNCTION_VALUE ix86_function_value
42388 #undef TARGET_FUNCTION_VALUE_REGNO_P
42389 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42391 #undef TARGET_PROMOTE_FUNCTION_MODE
42392 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42394 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42395 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42397 #undef TARGET_INSTANTIATE_DECLS
42398 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42400 #undef TARGET_SECONDARY_RELOAD
42401 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42403 #undef TARGET_CLASS_MAX_NREGS
42404 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42406 #undef TARGET_PREFERRED_RELOAD_CLASS
42407 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42408 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42409 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42410 #undef TARGET_CLASS_LIKELY_SPILLED_P
42411 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42413 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42414 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42415 ix86_builtin_vectorization_cost
42416 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42417 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42418 ix86_vectorize_vec_perm_const_ok
42419 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42420 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42421 ix86_preferred_simd_mode
42422 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42423 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42424 ix86_autovectorize_vector_sizes
42425 #undef TARGET_VECTORIZE_INIT_COST
42426 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42427 #undef TARGET_VECTORIZE_ADD_STMT_COST
42428 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42429 #undef TARGET_VECTORIZE_FINISH_COST
42430 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42431 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42432 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42434 #undef TARGET_SET_CURRENT_FUNCTION
42435 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42437 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42438 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42440 #undef TARGET_OPTION_SAVE
42441 #define TARGET_OPTION_SAVE ix86_function_specific_save
42443 #undef TARGET_OPTION_RESTORE
42444 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42446 #undef TARGET_OPTION_PRINT
42447 #define TARGET_OPTION_PRINT ix86_function_specific_print
42449 #undef TARGET_OPTION_FUNCTION_VERSIONS
42450 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42452 #undef TARGET_OPTION_SUPPORTS_FUNCTION_VERSIONS
42453 #define TARGET_OPTION_SUPPORTS_FUNCTION_VERSIONS \
42454 ix86_supports_function_versions
42456 #undef TARGET_CAN_INLINE_P
42457 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42459 #undef TARGET_EXPAND_TO_RTL_HOOK
42460 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42462 #undef TARGET_LEGITIMATE_ADDRESS_P
42463 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42465 #undef TARGET_LRA_P
42466 #define TARGET_LRA_P hook_bool_void_true
42468 #undef TARGET_REGISTER_PRIORITY
42469 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42471 #undef TARGET_LEGITIMATE_CONSTANT_P
42472 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42474 #undef TARGET_FRAME_POINTER_REQUIRED
42475 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42477 #undef TARGET_CAN_ELIMINATE
42478 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42480 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42481 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42483 #undef TARGET_ASM_CODE_END
42484 #define TARGET_ASM_CODE_END ix86_code_end
42486 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42487 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42490 #undef TARGET_INIT_LIBFUNCS
42491 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42494 #undef TARGET_SPILL_CLASS
42495 #define TARGET_SPILL_CLASS ix86_spill_class
42497 struct gcc_target targetm
= TARGET_INITIALIZER
;
42499 #include "gt-i386.h"