1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2019 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
56 UNSPEC_XOP_UNSIGNED_CMP
67 UNSPEC_AESKEYGENASSIST
88 ;; For AVX512F support
90 UNSPEC_UNSIGNED_FIX_NOTRUNC
105 UNSPEC_COMPRESS_STORE
115 ;; For embed. rounding feature
116 UNSPEC_EMBEDDED_ROUNDING
118 ;; For AVX512PF support
119 UNSPEC_GATHER_PREFETCH
120 UNSPEC_SCATTER_PREFETCH
122 ;; For AVX512ER support
136 ;; For AVX512BW support
144 ;; For AVX512DQ support
149 ;; For AVX512IFMA support
153 ;; For AVX512VBMI support
156 ;; For AVX5124FMAPS/AVX5124VNNIW support
163 UNSPEC_GF2P8AFFINEINV
167 ;; For AVX512VBMI2 support
173 ;; For AVX512VNNI support
174 UNSPEC_VPMADDUBSWACCD
175 UNSPEC_VPMADDUBSWACCSSD
177 UNSPEC_VPMADDWDACCSSD
185 ;; For VPCLMULQDQ support
188 ;; For AVX512BITALG support
191 ;; For AVX512BF16 support
192 UNSPEC_VCVTNE2PS2BF16
197 (define_c_enum "unspecv" [
207 ;; All vector modes including V?TImode, used in move patterns.
208 (define_mode_iterator VMOVE
209 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
210 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
211 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
212 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
213 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
214 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
215 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
217 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
218 (define_mode_iterator V48_AVX512VL
219 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
220 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
221 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
222 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
224 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
225 (define_mode_iterator VI12_AVX512VL
226 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
227 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
229 ;; Same iterator, but without supposed TARGET_AVX512BW
230 (define_mode_iterator VI12_AVX512VLBW
231 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
232 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
233 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
235 (define_mode_iterator VI1_AVX512VL
236 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
239 (define_mode_iterator V
240 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
241 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
242 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
243 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
244 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
245 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
247 ;; All 128bit vector modes
248 (define_mode_iterator V_128
249 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
251 ;; All 256bit vector modes
252 (define_mode_iterator V_256
253 [V32QI V16HI V8SI V4DI V8SF V4DF])
255 ;; All 128bit and 256bit vector modes
256 (define_mode_iterator V_128_256
257 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
259 ;; All 512bit vector modes
260 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
262 ;; All 256bit and 512bit vector modes
263 (define_mode_iterator V_256_512
264 [V32QI V16HI V8SI V4DI V8SF V4DF
265 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
266 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
268 ;; All vector float modes
269 (define_mode_iterator VF
270 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
271 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
273 ;; 128- and 256-bit float vector modes
274 (define_mode_iterator VF_128_256
275 [(V8SF "TARGET_AVX") V4SF
276 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
278 ;; All SFmode vector float modes
279 (define_mode_iterator VF1
280 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
282 (define_mode_iterator VF1_AVX2
283 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
285 ;; 128- and 256-bit SF vector modes
286 (define_mode_iterator VF1_128_256
287 [(V8SF "TARGET_AVX") V4SF])
289 (define_mode_iterator VF1_128_256VL
290 [V8SF (V4SF "TARGET_AVX512VL")])
292 ;; All DFmode vector float modes
293 (define_mode_iterator VF2
294 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
296 ;; 128- and 256-bit DF vector modes
297 (define_mode_iterator VF2_128_256
298 [(V4DF "TARGET_AVX") V2DF])
300 (define_mode_iterator VF2_512_256
301 [(V8DF "TARGET_AVX512F") V4DF])
303 (define_mode_iterator VF2_512_256VL
304 [V8DF (V4DF "TARGET_AVX512VL")])
306 ;; All 128bit vector float modes
307 (define_mode_iterator VF_128
308 [V4SF (V2DF "TARGET_SSE2")])
310 ;; All 256bit vector float modes
311 (define_mode_iterator VF_256
314 ;; All 512bit vector float modes
315 (define_mode_iterator VF_512
318 (define_mode_iterator VI48_AVX512VL
319 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
320 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
322 (define_mode_iterator VF_AVX512VL
323 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
324 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
326 (define_mode_iterator VF2_AVX512VL
327 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
329 (define_mode_iterator VF1_AVX512VL
330 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
332 ;; All vector integer modes
333 (define_mode_iterator VI
334 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
335 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
336 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
337 (V8SI "TARGET_AVX") V4SI
338 (V4DI "TARGET_AVX") V2DI])
340 (define_mode_iterator VI_AVX2
341 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
342 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
343 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
344 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
346 ;; All QImode vector integer modes
347 (define_mode_iterator VI1
348 [(V32QI "TARGET_AVX") V16QI])
350 ;; All DImode vector integer modes
351 (define_mode_iterator V_AVX
352 [V16QI V8HI V4SI V2DI V4SF V2DF
353 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
354 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
355 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
357 (define_mode_iterator VI48_AVX
359 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
361 (define_mode_iterator VI8
362 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
364 (define_mode_iterator VI8_FVL
365 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
367 (define_mode_iterator VI8_AVX512VL
368 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
370 (define_mode_iterator VI8_256_512
371 [V8DI (V4DI "TARGET_AVX512VL")])
373 (define_mode_iterator VI1_AVX2
374 [(V32QI "TARGET_AVX2") V16QI])
376 (define_mode_iterator VI1_AVX512
377 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
379 (define_mode_iterator VI1_AVX512F
380 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
382 (define_mode_iterator VI2_AVX2
383 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
385 (define_mode_iterator VI2_AVX512F
386 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
388 (define_mode_iterator VI4_AVX
389 [(V8SI "TARGET_AVX") V4SI])
391 (define_mode_iterator VI4_AVX2
392 [(V8SI "TARGET_AVX2") V4SI])
394 (define_mode_iterator VI4_AVX512F
395 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
397 (define_mode_iterator VI4_AVX512VL
398 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
400 (define_mode_iterator VI48_AVX512F_AVX512VL
401 [V4SI V8SI (V16SI "TARGET_AVX512F")
402 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
404 (define_mode_iterator VI2_AVX512VL
405 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
407 (define_mode_iterator VI1_AVX512VL_F
408 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
410 (define_mode_iterator VI8_AVX2_AVX512BW
411 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
413 (define_mode_iterator VI8_AVX2
414 [(V4DI "TARGET_AVX2") V2DI])
416 (define_mode_iterator VI8_AVX2_AVX512F
417 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
419 (define_mode_iterator VI8_AVX_AVX512F
420 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
422 (define_mode_iterator VI4_128_8_256
426 (define_mode_iterator V8FI
430 (define_mode_iterator V16FI
433 ;; ??? We should probably use TImode instead.
434 (define_mode_iterator VIMAX_AVX2_AVX512BW
435 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
437 ;; Suppose TARGET_AVX512BW as baseline
438 (define_mode_iterator VIMAX_AVX512VL
439 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
441 (define_mode_iterator VIMAX_AVX2
442 [(V2TI "TARGET_AVX2") V1TI])
444 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
445 (define_mode_iterator SSESCALARMODE
446 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
448 (define_mode_iterator VI12_AVX2
449 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
450 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
452 (define_mode_iterator VI24_AVX2
453 [(V16HI "TARGET_AVX2") V8HI
454 (V8SI "TARGET_AVX2") V4SI])
456 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
457 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
458 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
459 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
461 (define_mode_iterator VI124_AVX2
462 [(V32QI "TARGET_AVX2") V16QI
463 (V16HI "TARGET_AVX2") V8HI
464 (V8SI "TARGET_AVX2") V4SI])
466 (define_mode_iterator VI2_AVX2_AVX512BW
467 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
469 (define_mode_iterator VI248_AVX512VL
471 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
472 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
473 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
475 (define_mode_iterator VI48_AVX2
476 [(V8SI "TARGET_AVX2") V4SI
477 (V4DI "TARGET_AVX2") V2DI])
479 (define_mode_iterator VI248_AVX2
480 [(V16HI "TARGET_AVX2") V8HI
481 (V8SI "TARGET_AVX2") V4SI
482 (V4DI "TARGET_AVX2") V2DI])
484 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
485 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
486 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
487 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
489 (define_mode_iterator VI248_AVX512BW
490 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
492 (define_mode_iterator VI248_AVX512BW_AVX512VL
493 [(V32HI "TARGET_AVX512BW")
494 (V4DI "TARGET_AVX512VL") V16SI V8DI])
496 ;; Suppose TARGET_AVX512VL as baseline
497 (define_mode_iterator VI248_AVX512BW_1
498 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
502 (define_mode_iterator VI248_AVX512BW_2
503 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
507 (define_mode_iterator VI48_AVX512F
508 [(V16SI "TARGET_AVX512F") V8SI V4SI
509 (V8DI "TARGET_AVX512F") V4DI V2DI])
511 (define_mode_iterator VI48_AVX_AVX512F
512 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
513 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
515 (define_mode_iterator VI12_AVX_AVX512F
516 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
517 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
519 (define_mode_iterator V48_AVX2
522 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
523 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
525 (define_mode_iterator VI1_AVX512VLBW
526 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
527 (V16QI "TARGET_AVX512VL")])
529 (define_mode_attr avx512
530 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
531 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
532 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
533 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
534 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
535 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
537 (define_mode_attr sse2_avx_avx512f
538 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
539 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
540 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
541 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
542 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
543 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
545 (define_mode_attr sse2_avx2
546 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
547 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
548 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
549 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
550 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
552 (define_mode_attr ssse3_avx2
553 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
554 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
555 (V4SI "ssse3") (V8SI "avx2")
556 (V2DI "ssse3") (V4DI "avx2")
557 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
559 (define_mode_attr sse4_1_avx2
560 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
561 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
562 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
563 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
565 (define_mode_attr avx_avx2
566 [(V4SF "avx") (V2DF "avx")
567 (V8SF "avx") (V4DF "avx")
568 (V4SI "avx2") (V2DI "avx2")
569 (V8SI "avx2") (V4DI "avx2")])
571 (define_mode_attr vec_avx2
572 [(V16QI "vec") (V32QI "avx2")
573 (V8HI "vec") (V16HI "avx2")
574 (V4SI "vec") (V8SI "avx2")
575 (V2DI "vec") (V4DI "avx2")])
577 (define_mode_attr avx2_avx512
578 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
579 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
580 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
581 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
582 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
584 (define_mode_attr shuffletype
585 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
586 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
587 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
588 (V32HI "i") (V16HI "i") (V8HI "i")
589 (V64QI "i") (V32QI "i") (V16QI "i")
590 (V4TI "i") (V2TI "i") (V1TI "i")])
592 (define_mode_attr ssequartermode
593 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
595 (define_mode_attr ssequarterinsnmode
596 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
598 (define_mode_attr ssedoublemodelower
599 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
600 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
601 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
603 (define_mode_attr ssedoublemode
604 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
605 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
606 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
607 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
608 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
609 (V4DI "V8DI") (V8DI "V16DI")])
611 (define_mode_attr ssebytemode
612 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
613 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
615 ;; All 128bit vector integer modes
616 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
618 ;; All 256bit vector integer modes
619 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
621 ;; Various 128bit vector integer mode combinations
622 (define_mode_iterator VI12_128 [V16QI V8HI])
623 (define_mode_iterator VI14_128 [V16QI V4SI])
624 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
625 (define_mode_iterator VI24_128 [V8HI V4SI])
626 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
627 (define_mode_iterator VI48_128 [V4SI V2DI])
629 ;; Various 256bit and 512 vector integer mode combinations
630 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
631 (define_mode_iterator VI124_256_AVX512F_AVX512BW
633 (V64QI "TARGET_AVX512BW")
634 (V32HI "TARGET_AVX512BW")
635 (V16SI "TARGET_AVX512F")])
636 (define_mode_iterator VI48_256 [V8SI V4DI])
637 (define_mode_iterator VI48_512 [V16SI V8DI])
638 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
639 (define_mode_iterator VI_AVX512BW
640 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
642 ;; Int-float size matches
643 (define_mode_iterator VI4F_128 [V4SI V4SF])
644 (define_mode_iterator VI8F_128 [V2DI V2DF])
645 (define_mode_iterator VI4F_256 [V8SI V8SF])
646 (define_mode_iterator VI8F_256 [V4DI V4DF])
647 (define_mode_iterator VI4F_256_512
649 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
650 (define_mode_iterator VI48F_256_512
652 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
653 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
654 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
655 (define_mode_iterator VF48_I1248
656 [V16SI V16SF V8DI V8DF V32HI V64QI])
657 (define_mode_iterator VI48F
658 [V16SI V16SF V8DI V8DF
659 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
660 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
661 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
662 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
663 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
665 (define_mode_iterator VF_AVX512
666 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
667 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
670 (define_mode_attr avx512bcst
671 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
672 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
673 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
674 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
675 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
676 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
678 ;; Mapping from float mode to required SSE level
679 (define_mode_attr sse
680 [(SF "sse") (DF "sse2")
681 (V4SF "sse") (V2DF "sse2")
682 (V16SF "avx512f") (V8SF "avx")
683 (V8DF "avx512f") (V4DF "avx")])
685 (define_mode_attr sse2
686 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
687 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
689 (define_mode_attr sse3
690 [(V16QI "sse3") (V32QI "avx")])
692 (define_mode_attr sse4_1
693 [(V4SF "sse4_1") (V2DF "sse4_1")
694 (V8SF "avx") (V4DF "avx")
696 (V4DI "avx") (V2DI "sse4_1")
697 (V8SI "avx") (V4SI "sse4_1")
698 (V16QI "sse4_1") (V32QI "avx")
699 (V8HI "sse4_1") (V16HI "avx")])
701 (define_mode_attr avxsizesuffix
702 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
703 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
704 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
705 (V16SF "512") (V8DF "512")
706 (V8SF "256") (V4DF "256")
707 (V4SF "") (V2DF "")])
709 ;; SSE instruction mode
710 (define_mode_attr sseinsnmode
711 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
712 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
713 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
714 (V16SF "V16SF") (V8DF "V8DF")
715 (V8SF "V8SF") (V4DF "V4DF")
716 (V4SF "V4SF") (V2DF "V2DF")
719 ;; Mapping of vector modes to corresponding mask size
720 (define_mode_attr avx512fmaskmode
721 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
722 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
723 (V16SI "HI") (V8SI "QI") (V4SI "QI")
724 (V8DI "QI") (V4DI "QI") (V2DI "QI")
725 (V16SF "HI") (V8SF "QI") (V4SF "QI")
726 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
728 ;; Mapping of vector modes to corresponding mask size
729 (define_mode_attr avx512fmaskmodelower
730 [(V64QI "di") (V32QI "si") (V16QI "hi")
731 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
732 (V16SI "hi") (V8SI "qi") (V4SI "qi")
733 (V8DI "qi") (V4DI "qi") (V2DI "qi")
734 (V16SF "hi") (V8SF "qi") (V4SF "qi")
735 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
737 ;; Mapping of vector modes to corresponding mask half size
738 (define_mode_attr avx512fmaskhalfmode
739 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
740 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
741 (V16SI "QI") (V8SI "QI") (V4SI "QI")
742 (V8DI "QI") (V4DI "QI") (V2DI "QI")
743 (V16SF "QI") (V8SF "QI") (V4SF "QI")
744 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
746 ;; Mapping of vector float modes to an integer mode of the same size
747 (define_mode_attr sseintvecmode
748 [(V16SF "V16SI") (V8DF "V8DI")
749 (V8SF "V8SI") (V4DF "V4DI")
750 (V4SF "V4SI") (V2DF "V2DI")
751 (V16SI "V16SI") (V8DI "V8DI")
752 (V8SI "V8SI") (V4DI "V4DI")
753 (V4SI "V4SI") (V2DI "V2DI")
754 (V16HI "V16HI") (V8HI "V8HI")
755 (V32HI "V32HI") (V64QI "V64QI")
756 (V32QI "V32QI") (V16QI "V16QI")])
758 (define_mode_attr sseintvecmode2
759 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
760 (V8SF "OI") (V4SF "TI")])
762 (define_mode_attr sseintvecmodelower
763 [(V16SF "v16si") (V8DF "v8di")
764 (V8SF "v8si") (V4DF "v4di")
765 (V4SF "v4si") (V2DF "v2di")
766 (V8SI "v8si") (V4DI "v4di")
767 (V4SI "v4si") (V2DI "v2di")
768 (V16HI "v16hi") (V8HI "v8hi")
769 (V32QI "v32qi") (V16QI "v16qi")])
771 ;; Mapping of vector modes to a vector mode of double size
772 (define_mode_attr ssedoublevecmode
773 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
774 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
775 (V8SF "V16SF") (V4DF "V8DF")
776 (V4SF "V8SF") (V2DF "V4DF")])
778 ;; Mapping of vector modes to a vector mode of half size
779 (define_mode_attr ssehalfvecmode
780 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
781 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
782 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
783 (V16SF "V8SF") (V8DF "V4DF")
784 (V8SF "V4SF") (V4DF "V2DF")
787 (define_mode_attr ssehalfvecmodelower
788 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
789 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
790 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
791 (V16SF "v8sf") (V8DF "v4df")
792 (V8SF "v4sf") (V4DF "v2df")
795 ;; Mapping of vector modes ti packed single mode of the same size
796 (define_mode_attr ssePSmode
797 [(V16SI "V16SF") (V8DF "V16SF")
798 (V16SF "V16SF") (V8DI "V16SF")
799 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
800 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
801 (V8SI "V8SF") (V4SI "V4SF")
802 (V4DI "V8SF") (V2DI "V4SF")
803 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
804 (V8SF "V8SF") (V4SF "V4SF")
805 (V4DF "V8SF") (V2DF "V4SF")])
807 (define_mode_attr ssePSmode2
808 [(V8DI "V8SF") (V4DI "V4SF")])
810 ;; Mapping of vector modes back to the scalar modes
811 (define_mode_attr ssescalarmode
812 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
813 (V32HI "HI") (V16HI "HI") (V8HI "HI")
814 (V16SI "SI") (V8SI "SI") (V4SI "SI")
815 (V8DI "DI") (V4DI "DI") (V2DI "DI")
816 (V16SF "SF") (V8SF "SF") (V4SF "SF")
817 (V8DF "DF") (V4DF "DF") (V2DF "DF")
818 (V4TI "TI") (V2TI "TI")])
820 ;; Mapping of vector modes back to the scalar modes
821 (define_mode_attr ssescalarmodelower
822 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
823 (V32HI "hi") (V16HI "hi") (V8HI "hi")
824 (V16SI "si") (V8SI "si") (V4SI "si")
825 (V8DI "di") (V4DI "di") (V2DI "di")
826 (V16SF "sf") (V8SF "sf") (V4SF "sf")
827 (V8DF "df") (V4DF "df") (V2DF "df")
828 (V4TI "ti") (V2TI "ti")])
830 ;; Mapping of vector modes to the 128bit modes
831 (define_mode_attr ssexmmmode
832 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
833 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
834 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
835 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
836 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
837 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
839 ;; Pointer size override for scalar modes (Intel asm dialect)
840 (define_mode_attr iptr
841 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
842 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
843 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
844 (V16SF "k") (V8DF "q")
845 (V8SF "k") (V4DF "q")
846 (V4SF "k") (V2DF "q")
849 ;; Number of scalar elements in each vector type
850 (define_mode_attr ssescalarnum
851 [(V64QI "64") (V16SI "16") (V8DI "8")
852 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
853 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
854 (V16SF "16") (V8DF "8")
855 (V8SF "8") (V4DF "4")
856 (V4SF "4") (V2DF "2")])
858 ;; Mask of scalar elements in each vector type
859 (define_mode_attr ssescalarnummask
860 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
861 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
862 (V8SF "7") (V4DF "3")
863 (V4SF "3") (V2DF "1")])
865 (define_mode_attr ssescalarsize
866 [(V4TI "64") (V2TI "64") (V1TI "64")
867 (V8DI "64") (V4DI "64") (V2DI "64")
868 (V64QI "8") (V32QI "8") (V16QI "8")
869 (V32HI "16") (V16HI "16") (V8HI "16")
870 (V16SI "32") (V8SI "32") (V4SI "32")
871 (V16SF "32") (V8SF "32") (V4SF "32")
872 (V8DF "64") (V4DF "64") (V2DF "64")])
874 ;; SSE prefix for integer vector modes
875 (define_mode_attr sseintprefix
876 [(V2DI "p") (V2DF "")
881 (V16SI "p") (V16SF "")
882 (V16QI "p") (V8HI "p")
883 (V32QI "p") (V16HI "p")
884 (V64QI "p") (V32HI "p")])
886 ;; SSE scalar suffix for vector modes
887 (define_mode_attr ssescalarmodesuffix
889 (V16SF "ss") (V8DF "sd")
890 (V8SF "ss") (V4DF "sd")
891 (V4SF "ss") (V2DF "sd")
892 (V16SI "d") (V8DI "q")
893 (V8SI "d") (V4DI "q")
894 (V4SI "d") (V2DI "q")])
896 ;; Pack/unpack vector modes
897 (define_mode_attr sseunpackmode
898 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
899 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
900 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
902 (define_mode_attr ssepackmode
903 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
904 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
905 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
907 ;; Mapping of the max integer size for xop rotate immediate constraint
908 (define_mode_attr sserotatemax
909 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
911 ;; Mapping of mode to cast intrinsic name
912 (define_mode_attr castmode
913 [(V8SI "si") (V8SF "ps") (V4DF "pd")
914 (V16SI "si") (V16SF "ps") (V8DF "pd")])
916 ;; Instruction suffix for sign and zero extensions.
917 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
919 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
920 ;; i64x4 or f64x4 for 512bit modes.
921 (define_mode_attr i128
922 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
923 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
924 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
926 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
927 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
928 (define_mode_attr i128vldq
929 [(V8SF "f32x4") (V4DF "f64x2")
930 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
933 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
934 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
936 ;; Mapping for dbpsabbw modes
937 (define_mode_attr dbpsadbwmode
938 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
940 ;; Mapping suffixes for broadcast
941 (define_mode_attr bcstscalarsuff
942 [(V64QI "b") (V32QI "b") (V16QI "b")
943 (V32HI "w") (V16HI "w") (V8HI "w")
944 (V16SI "d") (V8SI "d") (V4SI "d")
945 (V8DI "q") (V4DI "q") (V2DI "q")
946 (V16SF "ss") (V8SF "ss") (V4SF "ss")
947 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
949 ;; Tie mode of assembler operand to mode iterator
950 (define_mode_attr xtg_mode
951 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
952 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
953 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
955 ;; Half mask mode for unpacks
956 (define_mode_attr HALFMASKMODE
957 [(DI "SI") (SI "HI")])
959 ;; Double mask mode for packs
960 (define_mode_attr DOUBLEMASKMODE
961 [(HI "SI") (SI "DI")])
964 ;; Include define_subst patterns for instructions with mask
967 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
969 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
975 ;; All of these patterns are enabled for SSE1 as well as SSE2.
976 ;; This is essential for maintaining stable calling conventions.
978 (define_expand "mov<mode>"
979 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
980 (match_operand:VMOVE 1 "nonimmediate_operand"))]
983 ix86_expand_vector_move (<MODE>mode, operands);
987 (define_insn "mov<mode>_internal"
988 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
990 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
993 && (register_operand (operands[0], <MODE>mode)
994 || register_operand (operands[1], <MODE>mode))"
996 switch (get_attr_type (insn))
999 return standard_sse_constant_opcode (insn, operands);
1002 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
1003 in avx512f, so we need to use workarounds, to access sse registers
1004 16-31, which are evex-only. In avx512vl we don't need workarounds. */
1005 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
1006 && (EXT_REX_SSE_REG_P (operands[0])
1007 || EXT_REX_SSE_REG_P (operands[1])))
1009 if (memory_operand (operands[0], <MODE>mode))
1011 if (<MODE_SIZE> == 32)
1012 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
1013 else if (<MODE_SIZE> == 16)
1014 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
1018 else if (memory_operand (operands[1], <MODE>mode))
1020 if (<MODE_SIZE> == 32)
1021 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
1022 else if (<MODE_SIZE> == 16)
1023 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
1028 /* Reg -> reg move is always aligned. Just use wider move. */
1029 switch (get_attr_mode (insn))
1033 return "vmovaps\t{%g1, %g0|%g0, %g1}";
1036 return "vmovapd\t{%g1, %g0|%g0, %g1}";
1039 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
1045 switch (get_attr_mode (insn))
1050 if (misaligned_operand (operands[0], <MODE>mode)
1051 || misaligned_operand (operands[1], <MODE>mode))
1052 return "%vmovups\t{%1, %0|%0, %1}";
1054 return "%vmovaps\t{%1, %0|%0, %1}";
1059 if (misaligned_operand (operands[0], <MODE>mode)
1060 || misaligned_operand (operands[1], <MODE>mode))
1061 return "%vmovupd\t{%1, %0|%0, %1}";
1063 return "%vmovapd\t{%1, %0|%0, %1}";
1067 if (misaligned_operand (operands[0], <MODE>mode)
1068 || misaligned_operand (operands[1], <MODE>mode))
1069 return TARGET_AVX512VL
1070 && (<MODE>mode == V4SImode
1071 || <MODE>mode == V2DImode
1072 || <MODE>mode == V8SImode
1073 || <MODE>mode == V4DImode
1075 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1076 : "%vmovdqu\t{%1, %0|%0, %1}";
1078 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
1079 : "%vmovdqa\t{%1, %0|%0, %1}";
1081 if (misaligned_operand (operands[0], <MODE>mode)
1082 || misaligned_operand (operands[1], <MODE>mode))
1083 return (<MODE>mode == V16SImode
1084 || <MODE>mode == V8DImode
1086 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1087 : "vmovdqu64\t{%1, %0|%0, %1}";
1089 return "vmovdqa64\t{%1, %0|%0, %1}";
1099 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1100 (set_attr "prefix" "maybe_vex")
1102 (cond [(and (eq_attr "alternative" "1")
1103 (match_test "TARGET_AVX512VL"))
1104 (const_string "<sseinsnmode>")
1105 (and (match_test "<MODE_SIZE> == 16")
1106 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1107 (and (eq_attr "alternative" "3")
1108 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1109 (const_string "<ssePSmode>")
1110 (match_test "TARGET_AVX")
1111 (const_string "<sseinsnmode>")
1112 (ior (not (match_test "TARGET_SSE2"))
1113 (match_test "optimize_function_for_size_p (cfun)"))
1114 (const_string "V4SF")
1115 (and (eq_attr "alternative" "0")
1116 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1119 (const_string "<sseinsnmode>")))
1120 (set (attr "enabled")
1121 (cond [(and (match_test "<MODE_SIZE> == 16")
1122 (eq_attr "alternative" "1"))
1123 (symbol_ref "TARGET_SSE2")
1124 (and (match_test "<MODE_SIZE> == 32")
1125 (eq_attr "alternative" "1"))
1126 (symbol_ref "TARGET_AVX2")
1128 (symbol_ref "true")))])
1130 (define_insn "<avx512>_load<mode>_mask"
1131 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1132 (vec_merge:V48_AVX512VL
1133 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1134 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1135 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1138 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1140 if (misaligned_operand (operands[1], <MODE>mode))
1141 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1143 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1147 if (misaligned_operand (operands[1], <MODE>mode))
1148 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1150 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1153 [(set_attr "type" "ssemov")
1154 (set_attr "prefix" "evex")
1155 (set_attr "memory" "none,load")
1156 (set_attr "mode" "<sseinsnmode>")])
1158 (define_insn "<avx512>_load<mode>_mask"
1159 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1160 (vec_merge:VI12_AVX512VL
1161 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1162 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1163 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1165 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1166 [(set_attr "type" "ssemov")
1167 (set_attr "prefix" "evex")
1168 (set_attr "memory" "none,load")
1169 (set_attr "mode" "<sseinsnmode>")])
1171 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1172 [(set (match_operand:VF_128 0 "register_operand" "=v")
1175 (match_operand:VF_128 2 "register_operand" "v")
1176 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1177 (match_operand:QI 4 "register_operand" "Yk"))
1178 (match_operand:VF_128 1 "register_operand" "v")
1181 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1182 [(set_attr "type" "ssemov")
1183 (set_attr "prefix" "evex")
1184 (set_attr "mode" "<ssescalarmode>")])
1186 (define_expand "avx512f_load<mode>_mask"
1187 [(set (match_operand:<ssevecmode> 0 "register_operand")
1188 (vec_merge:<ssevecmode>
1189 (vec_merge:<ssevecmode>
1190 (vec_duplicate:<ssevecmode>
1191 (match_operand:MODEF 1 "memory_operand"))
1192 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1193 (match_operand:QI 3 "register_operand"))
1197 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1199 (define_insn "*avx512f_load<mode>_mask"
1200 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1201 (vec_merge:<ssevecmode>
1202 (vec_merge:<ssevecmode>
1203 (vec_duplicate:<ssevecmode>
1204 (match_operand:MODEF 1 "memory_operand" "m"))
1205 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1206 (match_operand:QI 3 "register_operand" "Yk"))
1207 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1210 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1211 [(set_attr "type" "ssemov")
1212 (set_attr "prefix" "evex")
1213 (set_attr "memory" "load")
1214 (set_attr "mode" "<MODE>")])
1216 (define_insn "avx512f_store<mode>_mask"
1217 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1219 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1222 (match_operand:<ssevecmode> 1 "register_operand" "v")
1223 (parallel [(const_int 0)]))
1226 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1227 [(set_attr "type" "ssemov")
1228 (set_attr "prefix" "evex")
1229 (set_attr "memory" "store")
1230 (set_attr "mode" "<MODE>")])
1232 (define_insn "<avx512>_blendm<mode>"
1233 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1234 (vec_merge:V48_AVX512VL
1235 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1236 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1237 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1239 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1240 [(set_attr "type" "ssemov")
1241 (set_attr "prefix" "evex")
1242 (set_attr "mode" "<sseinsnmode>")])
1244 (define_insn "<avx512>_blendm<mode>"
1245 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1246 (vec_merge:VI12_AVX512VL
1247 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1248 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1249 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1251 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1252 [(set_attr "type" "ssemov")
1253 (set_attr "prefix" "evex")
1254 (set_attr "mode" "<sseinsnmode>")])
1256 (define_insn "<avx512>_store<mode>_mask"
1257 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1258 (vec_merge:V48_AVX512VL
1259 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1261 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1264 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1266 if (misaligned_operand (operands[0], <MODE>mode))
1267 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1269 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1273 if (misaligned_operand (operands[0], <MODE>mode))
1274 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1276 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1279 [(set_attr "type" "ssemov")
1280 (set_attr "prefix" "evex")
1281 (set_attr "memory" "store")
1282 (set_attr "mode" "<sseinsnmode>")])
1284 (define_insn "<avx512>_store<mode>_mask"
1285 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1286 (vec_merge:VI12_AVX512VL
1287 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1289 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1291 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1292 [(set_attr "type" "ssemov")
1293 (set_attr "prefix" "evex")
1294 (set_attr "memory" "store")
1295 (set_attr "mode" "<sseinsnmode>")])
1297 (define_insn "sse2_movq128"
1298 [(set (match_operand:V2DI 0 "register_operand" "=v")
1301 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1302 (parallel [(const_int 0)]))
1305 "%vmovq\t{%1, %0|%0, %q1}"
1306 [(set_attr "type" "ssemov")
1307 (set_attr "prefix" "maybe_vex")
1308 (set_attr "mode" "TI")])
1310 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1311 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1312 ;; from memory, we'd prefer to load the memory directly into the %xmm
1313 ;; register. To facilitate this happy circumstance, this pattern won't
1314 ;; split until after register allocation. If the 64-bit value didn't
1315 ;; come from memory, this is the best we can do. This is much better
1316 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1319 (define_insn_and_split "movdi_to_sse"
1320 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1321 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1322 UNSPEC_MOVDI_TO_SSE))
1323 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1324 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1326 "&& reload_completed"
1329 if (register_operand (operands[1], DImode))
1331 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1332 Assemble the 64-bit DImode value in an xmm register. */
1333 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1334 gen_lowpart (SImode, operands[1])));
1336 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1337 gen_highpart (SImode, operands[1]),
1341 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1342 gen_highpart (SImode, operands[1])));
1343 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1347 else if (memory_operand (operands[1], DImode))
1348 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1349 operands[1], const0_rtx));
1354 [(set_attr "isa" "sse4,*,*")])
1357 [(set (match_operand:V4SF 0 "register_operand")
1358 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1359 "TARGET_SSE && reload_completed"
1362 (vec_duplicate:V4SF (match_dup 1))
1366 operands[1] = gen_lowpart (SFmode, operands[1]);
1367 operands[2] = CONST0_RTX (V4SFmode);
1371 [(set (match_operand:V2DF 0 "register_operand")
1372 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1373 "TARGET_SSE2 && reload_completed"
1374 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1376 operands[1] = gen_lowpart (DFmode, operands[1]);
1377 operands[2] = CONST0_RTX (DFmode);
1380 (define_expand "movmisalign<mode>"
1381 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1382 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1385 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1389 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1391 [(set (match_operand:V2DF 0 "sse_reg_operand")
1392 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1393 (match_operand:DF 4 "const0_operand")))
1394 (set (match_operand:V2DF 2 "sse_reg_operand")
1395 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1396 (parallel [(const_int 0)]))
1397 (match_operand:DF 3 "memory_operand")))]
1398 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1399 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1400 [(set (match_dup 2) (match_dup 5))]
1401 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1404 [(set (match_operand:DF 0 "sse_reg_operand")
1405 (match_operand:DF 1 "memory_operand"))
1406 (set (match_operand:V2DF 2 "sse_reg_operand")
1407 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1408 (match_operand:DF 3 "memory_operand")))]
1409 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1410 && REGNO (operands[4]) == REGNO (operands[2])
1411 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1412 [(set (match_dup 2) (match_dup 5))]
1413 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1415 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1417 [(set (match_operand:DF 0 "memory_operand")
1418 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1419 (parallel [(const_int 0)])))
1420 (set (match_operand:DF 2 "memory_operand")
1421 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1422 (parallel [(const_int 1)])))]
1423 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1424 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1425 [(set (match_dup 4) (match_dup 1))]
1426 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1428 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1429 [(set (match_operand:VI1 0 "register_operand" "=x")
1430 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1433 "%vlddqu\t{%1, %0|%0, %1}"
1434 [(set_attr "type" "ssemov")
1435 (set_attr "movu" "1")
1436 (set (attr "prefix_data16")
1438 (match_test "TARGET_AVX")
1440 (const_string "0")))
1441 (set (attr "prefix_rep")
1443 (match_test "TARGET_AVX")
1445 (const_string "1")))
1446 (set_attr "prefix" "maybe_vex")
1447 (set_attr "mode" "<sseinsnmode>")])
1449 (define_insn "sse2_movnti<mode>"
1450 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1451 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1454 "movnti\t{%1, %0|%0, %1}"
1455 [(set_attr "type" "ssemov")
1456 (set_attr "prefix_data16" "0")
1457 (set_attr "mode" "<MODE>")])
1459 (define_insn "<sse>_movnt<mode>"
1460 [(set (match_operand:VF 0 "memory_operand" "=m")
1462 [(match_operand:VF 1 "register_operand" "v")]
1465 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1466 [(set_attr "type" "ssemov")
1467 (set_attr "prefix" "maybe_vex")
1468 (set_attr "mode" "<MODE>")])
1470 (define_insn "<sse2>_movnt<mode>"
1471 [(set (match_operand:VI8 0 "memory_operand" "=m")
1472 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1475 "%vmovntdq\t{%1, %0|%0, %1}"
1476 [(set_attr "type" "ssecvt")
1477 (set (attr "prefix_data16")
1479 (match_test "TARGET_AVX")
1481 (const_string "1")))
1482 (set_attr "prefix" "maybe_vex")
1483 (set_attr "mode" "<sseinsnmode>")])
1485 ; Expand patterns for non-temporal stores. At the moment, only those
1486 ; that directly map to insns are defined; it would be possible to
1487 ; define patterns for other modes that would expand to several insns.
1489 ;; Modes handled by storent patterns.
1490 (define_mode_iterator STORENT_MODE
1491 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1492 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1493 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1494 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1495 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1497 (define_expand "storent<mode>"
1498 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1499 (unspec:STORENT_MODE
1500 [(match_operand:STORENT_MODE 1 "register_operand")]
1504 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1508 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1510 ;; All integer modes with AVX512BW/DQ.
1511 (define_mode_iterator SWI1248_AVX512BWDQ
1512 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1514 ;; All integer modes with AVX512BW, where HImode operation
1515 ;; can be used instead of QImode.
1516 (define_mode_iterator SWI1248_AVX512BW
1517 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1519 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1520 (define_mode_iterator SWI1248_AVX512BWDQ2
1521 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1522 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1524 (define_expand "kmov<mskmodesuffix>"
1525 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1526 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1528 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1530 (define_insn "k<code><mode>"
1531 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1532 (any_logic:SWI1248_AVX512BW
1533 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1534 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1535 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1538 if (get_attr_mode (insn) == MODE_HI)
1539 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1541 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1543 [(set_attr "type" "msklog")
1544 (set_attr "prefix" "vex")
1546 (cond [(and (match_test "<MODE>mode == QImode")
1547 (not (match_test "TARGET_AVX512DQ")))
1550 (const_string "<MODE>")))])
1552 (define_insn "kandn<mode>"
1553 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1554 (and:SWI1248_AVX512BW
1555 (not:SWI1248_AVX512BW
1556 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1557 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1558 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1561 if (get_attr_mode (insn) == MODE_HI)
1562 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1564 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1566 [(set_attr "type" "msklog")
1567 (set_attr "prefix" "vex")
1569 (cond [(and (match_test "<MODE>mode == QImode")
1570 (not (match_test "TARGET_AVX512DQ")))
1573 (const_string "<MODE>")))])
1575 (define_insn "kxnor<mode>"
1576 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1577 (not:SWI1248_AVX512BW
1578 (xor:SWI1248_AVX512BW
1579 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1580 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1581 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1584 if (get_attr_mode (insn) == MODE_HI)
1585 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1587 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1589 [(set_attr "type" "msklog")
1590 (set_attr "prefix" "vex")
1592 (cond [(and (match_test "<MODE>mode == QImode")
1593 (not (match_test "TARGET_AVX512DQ")))
1596 (const_string "<MODE>")))])
1598 (define_insn "knot<mode>"
1599 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1600 (not:SWI1248_AVX512BW
1601 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1602 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1605 if (get_attr_mode (insn) == MODE_HI)
1606 return "knotw\t{%1, %0|%0, %1}";
1608 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1610 [(set_attr "type" "msklog")
1611 (set_attr "prefix" "vex")
1613 (cond [(and (match_test "<MODE>mode == QImode")
1614 (not (match_test "TARGET_AVX512DQ")))
1617 (const_string "<MODE>")))])
1619 (define_insn "kadd<mode>"
1620 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1621 (plus:SWI1248_AVX512BWDQ2
1622 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1623 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1624 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1626 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1627 [(set_attr "type" "msklog")
1628 (set_attr "prefix" "vex")
1629 (set_attr "mode" "<MODE>")])
1631 ;; Mask variant shift mnemonics
1632 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1634 (define_insn "k<code><mode>"
1635 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1636 (any_lshift:SWI1248_AVX512BWDQ
1637 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1638 (match_operand:QI 2 "immediate_operand" "n")))
1639 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1641 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1642 [(set_attr "type" "msklog")
1643 (set_attr "prefix" "vex")
1644 (set_attr "mode" "<MODE>")])
1646 (define_insn "ktest<mode>"
1647 [(set (reg:CC FLAGS_REG)
1649 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1650 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1653 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1654 [(set_attr "mode" "<MODE>")
1655 (set_attr "type" "msklog")
1656 (set_attr "prefix" "vex")])
1658 (define_insn "kortest<mode>"
1659 [(set (reg:CC FLAGS_REG)
1661 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1662 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1665 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1666 [(set_attr "mode" "<MODE>")
1667 (set_attr "type" "msklog")
1668 (set_attr "prefix" "vex")])
1670 (define_insn "kunpckhi"
1671 [(set (match_operand:HI 0 "register_operand" "=k")
1674 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1676 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1678 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1679 [(set_attr "mode" "HI")
1680 (set_attr "type" "msklog")
1681 (set_attr "prefix" "vex")])
1683 (define_insn "kunpcksi"
1684 [(set (match_operand:SI 0 "register_operand" "=k")
1687 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1689 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1691 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1692 [(set_attr "mode" "SI")])
1694 (define_insn "kunpckdi"
1695 [(set (match_operand:DI 0 "register_operand" "=k")
1698 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1700 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1702 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1703 [(set_attr "mode" "DI")])
1706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1708 ;; Parallel floating point arithmetic
1710 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1712 (define_expand "<code><mode>2"
1713 [(set (match_operand:VF 0 "register_operand")
1715 (match_operand:VF 1 "register_operand")))]
1717 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1719 (define_insn_and_split "*absneg<mode>2"
1720 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1721 (match_operator:VF 3 "absneg_operator"
1722 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1723 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1726 "&& reload_completed"
1729 enum rtx_code absneg_op;
1735 if (MEM_P (operands[1]))
1736 op1 = operands[2], op2 = operands[1];
1738 op1 = operands[1], op2 = operands[2];
1743 if (rtx_equal_p (operands[0], operands[1]))
1749 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1750 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1751 t = gen_rtx_SET (operands[0], t);
1755 [(set_attr "isa" "noavx,noavx,avx,avx")])
1757 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1758 [(set (match_operand:VF 0 "register_operand")
1760 (match_operand:VF 1 "<round_nimm_predicate>")
1761 (match_operand:VF 2 "<round_nimm_predicate>")))]
1762 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1763 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1765 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1766 [(set (match_operand:VF 0 "register_operand" "=x,v")
1768 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1769 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1770 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1771 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1773 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1774 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1775 [(set_attr "isa" "noavx,avx")
1776 (set_attr "type" "sseadd")
1777 (set_attr "prefix" "<mask_prefix3>")
1778 (set_attr "mode" "<MODE>")])
1780 (define_insn "*sub<mode>3<mask_name>_bcst"
1781 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1783 (match_operand:VF_AVX512 1 "register_operand" "v")
1784 (vec_duplicate:VF_AVX512
1785 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1787 && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
1788 && <mask_mode512bit_condition>"
1789 "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1790 [(set_attr "prefix" "evex")
1791 (set_attr "type" "sseadd")
1792 (set_attr "mode" "<MODE>")])
1794 (define_insn "*add<mode>3<mask_name>_bcst"
1795 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1797 (vec_duplicate:VF_AVX512
1798 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1799 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1801 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
1802 && <mask_mode512bit_condition>"
1803 "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1804 [(set_attr "prefix" "evex")
1805 (set_attr "type" "sseadd")
1806 (set_attr "mode" "<MODE>")])
1808 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1809 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1812 (match_operand:VF_128 1 "register_operand" "0,v")
1813 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1818 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1819 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1820 [(set_attr "isa" "noavx,avx")
1821 (set_attr "type" "sseadd")
1822 (set_attr "prefix" "<round_scalar_prefix>")
1823 (set_attr "mode" "<ssescalarmode>")])
1825 (define_expand "mul<mode>3<mask_name><round_name>"
1826 [(set (match_operand:VF 0 "register_operand")
1828 (match_operand:VF 1 "<round_nimm_predicate>")
1829 (match_operand:VF 2 "<round_nimm_predicate>")))]
1830 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1831 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1833 (define_insn "*mul<mode>3<mask_name><round_name>"
1834 [(set (match_operand:VF 0 "register_operand" "=x,v")
1836 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1837 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1839 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1840 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1842 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1843 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1844 [(set_attr "isa" "noavx,avx")
1845 (set_attr "type" "ssemul")
1846 (set_attr "prefix" "<mask_prefix3>")
1847 (set_attr "btver2_decode" "direct,double")
1848 (set_attr "mode" "<MODE>")])
1850 (define_insn "*mul<mode>3<mask_name>_bcst"
1851 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1853 (vec_duplicate:VF_AVX512
1854 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1855 (match_operand:VF_AVX512 2 "register_operand" "v")))]
1856 "TARGET_AVX512F && <mask_mode512bit_condition>"
1857 "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<<avx512bcst>>}"
1858 [(set_attr "prefix" "evex")
1859 (set_attr "type" "ssemul")
1860 (set_attr "mode" "<MODE>")])
1862 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1863 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1866 (match_operand:VF_128 1 "register_operand" "0,v")
1867 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1872 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1873 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1874 [(set_attr "isa" "noavx,avx")
1875 (set_attr "type" "sse<multdiv_mnemonic>")
1876 (set_attr "prefix" "<round_scalar_prefix>")
1877 (set_attr "btver2_decode" "direct,double")
1878 (set_attr "mode" "<ssescalarmode>")])
1880 (define_expand "div<mode>3"
1881 [(set (match_operand:VF2 0 "register_operand")
1882 (div:VF2 (match_operand:VF2 1 "register_operand")
1883 (match_operand:VF2 2 "vector_operand")))]
1885 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1887 (define_expand "div<mode>3"
1888 [(set (match_operand:VF1 0 "register_operand")
1889 (div:VF1 (match_operand:VF1 1 "register_operand")
1890 (match_operand:VF1 2 "vector_operand")))]
1893 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1896 && TARGET_RECIP_VEC_DIV
1897 && !optimize_insn_for_size_p ()
1898 && flag_finite_math_only && !flag_trapping_math
1899 && flag_unsafe_math_optimizations)
1901 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1906 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1907 [(set (match_operand:VF 0 "register_operand" "=x,v")
1909 (match_operand:VF 1 "register_operand" "0,v")
1910 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1911 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1913 div<ssemodesuffix>\t{%2, %0|%0, %2}
1914 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1915 [(set_attr "isa" "noavx,avx")
1916 (set_attr "type" "ssediv")
1917 (set_attr "prefix" "<mask_prefix3>")
1918 (set_attr "mode" "<MODE>")])
1920 (define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
1921 [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1923 (match_operand:VF_AVX512 1 "register_operand" "v")
1924 (vec_duplicate:VF_AVX512
1925 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1926 "TARGET_AVX512F && <mask_mode512bit_condition>"
1927 "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<<avx512bcst>>}"
1928 [(set_attr "prefix" "evex")
1929 (set_attr "type" "ssediv")
1930 (set_attr "mode" "<MODE>")])
1932 (define_insn "<sse>_rcp<mode>2"
1933 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1935 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1937 "%vrcpps\t{%1, %0|%0, %1}"
1938 [(set_attr "type" "sse")
1939 (set_attr "atom_sse_attr" "rcp")
1940 (set_attr "btver2_sse_attr" "rcp")
1941 (set_attr "prefix" "maybe_vex")
1942 (set_attr "mode" "<MODE>")])
1944 (define_insn "sse_vmrcpv4sf2"
1945 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1947 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1949 (match_operand:V4SF 2 "register_operand" "0,x")
1953 rcpss\t{%1, %0|%0, %k1}
1954 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1955 [(set_attr "isa" "noavx,avx")
1956 (set_attr "type" "sse")
1957 (set_attr "atom_sse_attr" "rcp")
1958 (set_attr "btver2_sse_attr" "rcp")
1959 (set_attr "prefix" "orig,vex")
1960 (set_attr "mode" "SF")])
1962 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1963 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1965 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1968 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1969 [(set_attr "type" "sse")
1970 (set_attr "prefix" "evex")
1971 (set_attr "mode" "<MODE>")])
1973 (define_insn "srcp14<mode>"
1974 [(set (match_operand:VF_128 0 "register_operand" "=v")
1977 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1979 (match_operand:VF_128 2 "register_operand" "v")
1982 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1983 [(set_attr "type" "sse")
1984 (set_attr "prefix" "evex")
1985 (set_attr "mode" "<MODE>")])
1987 (define_insn "srcp14<mode>_mask"
1988 [(set (match_operand:VF_128 0 "register_operand" "=v")
1992 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1994 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1995 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1996 (match_operand:VF_128 2 "register_operand" "v")
1999 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2000 [(set_attr "type" "sse")
2001 (set_attr "prefix" "evex")
2002 (set_attr "mode" "<MODE>")])
2004 (define_expand "sqrt<mode>2"
2005 [(set (match_operand:VF2 0 "register_operand")
2006 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2009 (define_expand "sqrt<mode>2"
2010 [(set (match_operand:VF1 0 "register_operand")
2011 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2015 && TARGET_RECIP_VEC_SQRT
2016 && !optimize_insn_for_size_p ()
2017 && flag_finite_math_only && !flag_trapping_math
2018 && flag_unsafe_math_optimizations)
2020 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2025 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2026 [(set (match_operand:VF 0 "register_operand" "=x,v")
2027 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2028 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2030 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2031 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2032 [(set_attr "isa" "noavx,avx")
2033 (set_attr "type" "sse")
2034 (set_attr "atom_sse_attr" "sqrt")
2035 (set_attr "btver2_sse_attr" "sqrt")
2036 (set_attr "prefix" "maybe_vex")
2037 (set_attr "mode" "<MODE>")])
2039 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2040 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2043 (match_operand:VF_128 1 "vector_operand" "xBm,<round_scalar_constraint>"))
2044 (match_operand:VF_128 2 "register_operand" "0,v")
2048 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2049 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2050 [(set_attr "isa" "noavx,avx")
2051 (set_attr "type" "sse")
2052 (set_attr "atom_sse_attr" "sqrt")
2053 (set_attr "prefix" "<round_scalar_prefix>")
2054 (set_attr "btver2_sse_attr" "sqrt")
2055 (set_attr "mode" "<ssescalarmode>")])
2057 (define_expand "rsqrt<mode>2"
2058 [(set (match_operand:VF1_128_256 0 "register_operand")
2060 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
2061 "TARGET_SSE && TARGET_SSE_MATH"
2063 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2067 (define_expand "rsqrtv16sf2"
2068 [(set (match_operand:V16SF 0 "register_operand")
2070 [(match_operand:V16SF 1 "vector_operand")]
2072 "TARGET_AVX512ER && TARGET_SSE_MATH"
2074 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
2078 (define_insn "<sse>_rsqrt<mode>2"
2079 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2081 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2083 "%vrsqrtps\t{%1, %0|%0, %1}"
2084 [(set_attr "type" "sse")
2085 (set_attr "prefix" "maybe_vex")
2086 (set_attr "mode" "<MODE>")])
2088 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2089 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2091 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2094 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2095 [(set_attr "type" "sse")
2096 (set_attr "prefix" "evex")
2097 (set_attr "mode" "<MODE>")])
2099 (define_insn "rsqrt14<mode>"
2100 [(set (match_operand:VF_128 0 "register_operand" "=v")
2103 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2105 (match_operand:VF_128 2 "register_operand" "v")
2108 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2109 [(set_attr "type" "sse")
2110 (set_attr "prefix" "evex")
2111 (set_attr "mode" "<MODE>")])
2113 (define_insn "rsqrt14_<mode>_mask"
2114 [(set (match_operand:VF_128 0 "register_operand" "=v")
2118 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2120 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2121 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2122 (match_operand:VF_128 2 "register_operand" "v")
2125 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2126 [(set_attr "type" "sse")
2127 (set_attr "prefix" "evex")
2128 (set_attr "mode" "<MODE>")])
2130 (define_insn "sse_vmrsqrtv4sf2"
2131 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2133 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2135 (match_operand:V4SF 2 "register_operand" "0,x")
2139 rsqrtss\t{%1, %0|%0, %k1}
2140 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2141 [(set_attr "isa" "noavx,avx")
2142 (set_attr "type" "sse")
2143 (set_attr "prefix" "orig,vex")
2144 (set_attr "mode" "SF")])
2146 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2147 [(set (match_operand:VF 0 "register_operand")
2149 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2150 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2151 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2153 if (!flag_finite_math_only || flag_signed_zeros)
2155 operands[1] = force_reg (<MODE>mode, operands[1]);
2156 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2157 (operands[0], operands[1], operands[2]
2158 <mask_operand_arg34>
2159 <round_saeonly_mask_arg3>));
2163 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2166 ;; These versions of the min/max patterns are intentionally ignorant of
2167 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2168 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2169 ;; are undefined in this condition, we're certain this is correct.
2171 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2172 [(set (match_operand:VF 0 "register_operand" "=x,v")
2174 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2175 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2177 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2178 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2180 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2181 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2182 [(set_attr "isa" "noavx,avx")
2183 (set_attr "type" "sseadd")
2184 (set_attr "btver2_sse_attr" "maxmin")
2185 (set_attr "prefix" "<mask_prefix3>")
2186 (set_attr "mode" "<MODE>")])
2188 ;; These versions of the min/max patterns implement exactly the operations
2189 ;; min = (op1 < op2 ? op1 : op2)
2190 ;; max = (!(op1 < op2) ? op1 : op2)
2191 ;; Their operands are not commutative, and thus they may be used in the
2192 ;; presence of -0.0 and NaN.
2194 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2195 [(set (match_operand:VF 0 "register_operand" "=x,v")
2197 [(match_operand:VF 1 "register_operand" "0,v")
2198 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2201 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2203 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2204 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2205 [(set_attr "isa" "noavx,avx")
2206 (set_attr "type" "sseadd")
2207 (set_attr "btver2_sse_attr" "maxmin")
2208 (set_attr "prefix" "<mask_prefix3>")
2209 (set_attr "mode" "<MODE>")])
2211 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2212 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2215 (match_operand:VF_128 1 "register_operand" "0,v")
2216 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
2221 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2222 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2223 [(set_attr "isa" "noavx,avx")
2224 (set_attr "type" "sse")
2225 (set_attr "btver2_sse_attr" "maxmin")
2226 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2227 (set_attr "mode" "<ssescalarmode>")])
2229 (define_insn "avx_addsubv4df3"
2230 [(set (match_operand:V4DF 0 "register_operand" "=x")
2233 (match_operand:V4DF 1 "register_operand" "x")
2234 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2235 (plus:V4DF (match_dup 1) (match_dup 2))
2238 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2239 [(set_attr "type" "sseadd")
2240 (set_attr "prefix" "vex")
2241 (set_attr "mode" "V4DF")])
2243 (define_insn "sse3_addsubv2df3"
2244 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2247 (match_operand:V2DF 1 "register_operand" "0,x")
2248 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2249 (plus:V2DF (match_dup 1) (match_dup 2))
2253 addsubpd\t{%2, %0|%0, %2}
2254 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2255 [(set_attr "isa" "noavx,avx")
2256 (set_attr "type" "sseadd")
2257 (set_attr "atom_unit" "complex")
2258 (set_attr "prefix" "orig,vex")
2259 (set_attr "mode" "V2DF")])
2261 (define_insn "avx_addsubv8sf3"
2262 [(set (match_operand:V8SF 0 "register_operand" "=x")
2265 (match_operand:V8SF 1 "register_operand" "x")
2266 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2267 (plus:V8SF (match_dup 1) (match_dup 2))
2270 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2271 [(set_attr "type" "sseadd")
2272 (set_attr "prefix" "vex")
2273 (set_attr "mode" "V8SF")])
2275 (define_insn "sse3_addsubv4sf3"
2276 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2279 (match_operand:V4SF 1 "register_operand" "0,x")
2280 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2281 (plus:V4SF (match_dup 1) (match_dup 2))
2285 addsubps\t{%2, %0|%0, %2}
2286 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2287 [(set_attr "isa" "noavx,avx")
2288 (set_attr "type" "sseadd")
2289 (set_attr "prefix" "orig,vex")
2290 (set_attr "prefix_rep" "1,*")
2291 (set_attr "mode" "V4SF")])
2294 [(set (match_operand:VF_128_256 0 "register_operand")
2295 (match_operator:VF_128_256 6 "addsub_vm_operator"
2297 (match_operand:VF_128_256 1 "register_operand")
2298 (match_operand:VF_128_256 2 "vector_operand"))
2300 (match_operand:VF_128_256 3 "vector_operand")
2301 (match_operand:VF_128_256 4 "vector_operand"))
2302 (match_operand 5 "const_int_operand")]))]
2304 && can_create_pseudo_p ()
2305 && ((rtx_equal_p (operands[1], operands[3])
2306 && rtx_equal_p (operands[2], operands[4]))
2307 || (rtx_equal_p (operands[1], operands[4])
2308 && rtx_equal_p (operands[2], operands[3])))"
2310 (vec_merge:VF_128_256
2311 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2312 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2316 [(set (match_operand:VF_128_256 0 "register_operand")
2317 (match_operator:VF_128_256 6 "addsub_vm_operator"
2319 (match_operand:VF_128_256 1 "vector_operand")
2320 (match_operand:VF_128_256 2 "vector_operand"))
2322 (match_operand:VF_128_256 3 "register_operand")
2323 (match_operand:VF_128_256 4 "vector_operand"))
2324 (match_operand 5 "const_int_operand")]))]
2326 && can_create_pseudo_p ()
2327 && ((rtx_equal_p (operands[1], operands[3])
2328 && rtx_equal_p (operands[2], operands[4]))
2329 || (rtx_equal_p (operands[1], operands[4])
2330 && rtx_equal_p (operands[2], operands[3])))"
2332 (vec_merge:VF_128_256
2333 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2334 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2337 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2339 = GEN_INT (~INTVAL (operands[5])
2340 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2344 [(set (match_operand:VF_128_256 0 "register_operand")
2345 (match_operator:VF_128_256 7 "addsub_vs_operator"
2346 [(vec_concat:<ssedoublemode>
2348 (match_operand:VF_128_256 1 "register_operand")
2349 (match_operand:VF_128_256 2 "vector_operand"))
2351 (match_operand:VF_128_256 3 "vector_operand")
2352 (match_operand:VF_128_256 4 "vector_operand")))
2353 (match_parallel 5 "addsub_vs_parallel"
2354 [(match_operand 6 "const_int_operand")])]))]
2356 && can_create_pseudo_p ()
2357 && ((rtx_equal_p (operands[1], operands[3])
2358 && rtx_equal_p (operands[2], operands[4]))
2359 || (rtx_equal_p (operands[1], operands[4])
2360 && rtx_equal_p (operands[2], operands[3])))"
2362 (vec_merge:VF_128_256
2363 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2364 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2367 int i, nelt = XVECLEN (operands[5], 0);
2368 HOST_WIDE_INT ival = 0;
2370 for (i = 0; i < nelt; i++)
2371 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2372 ival |= HOST_WIDE_INT_1 << i;
2374 operands[5] = GEN_INT (ival);
2378 [(set (match_operand:VF_128_256 0 "register_operand")
2379 (match_operator:VF_128_256 7 "addsub_vs_operator"
2380 [(vec_concat:<ssedoublemode>
2382 (match_operand:VF_128_256 1 "vector_operand")
2383 (match_operand:VF_128_256 2 "vector_operand"))
2385 (match_operand:VF_128_256 3 "register_operand")
2386 (match_operand:VF_128_256 4 "vector_operand")))
2387 (match_parallel 5 "addsub_vs_parallel"
2388 [(match_operand 6 "const_int_operand")])]))]
2390 && can_create_pseudo_p ()
2391 && ((rtx_equal_p (operands[1], operands[3])
2392 && rtx_equal_p (operands[2], operands[4]))
2393 || (rtx_equal_p (operands[1], operands[4])
2394 && rtx_equal_p (operands[2], operands[3])))"
2396 (vec_merge:VF_128_256
2397 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2398 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2401 int i, nelt = XVECLEN (operands[5], 0);
2402 HOST_WIDE_INT ival = 0;
2404 for (i = 0; i < nelt; i++)
2405 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2406 ival |= HOST_WIDE_INT_1 << i;
2408 operands[5] = GEN_INT (ival);
2411 (define_insn "avx_h<plusminus_insn>v4df3"
2412 [(set (match_operand:V4DF 0 "register_operand" "=x")
2417 (match_operand:V4DF 1 "register_operand" "x")
2418 (parallel [(const_int 0)]))
2419 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2422 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2423 (parallel [(const_int 0)]))
2424 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2427 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2428 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2430 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2431 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2433 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2434 [(set_attr "type" "sseadd")
2435 (set_attr "prefix" "vex")
2436 (set_attr "mode" "V4DF")])
2438 (define_expand "sse3_haddv2df3"
2439 [(set (match_operand:V2DF 0 "register_operand")
2443 (match_operand:V2DF 1 "register_operand")
2444 (parallel [(const_int 0)]))
2445 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2448 (match_operand:V2DF 2 "vector_operand")
2449 (parallel [(const_int 0)]))
2450 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2453 (define_insn "*sse3_haddv2df3"
2454 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2458 (match_operand:V2DF 1 "register_operand" "0,x")
2459 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2462 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2465 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2466 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2469 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2471 && INTVAL (operands[3]) != INTVAL (operands[4])
2472 && INTVAL (operands[5]) != INTVAL (operands[6])"
2474 haddpd\t{%2, %0|%0, %2}
2475 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2476 [(set_attr "isa" "noavx,avx")
2477 (set_attr "type" "sseadd")
2478 (set_attr "prefix" "orig,vex")
2479 (set_attr "mode" "V2DF")])
2481 (define_insn "sse3_hsubv2df3"
2482 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2486 (match_operand:V2DF 1 "register_operand" "0,x")
2487 (parallel [(const_int 0)]))
2488 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2491 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2492 (parallel [(const_int 0)]))
2493 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2496 hsubpd\t{%2, %0|%0, %2}
2497 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2498 [(set_attr "isa" "noavx,avx")
2499 (set_attr "type" "sseadd")
2500 (set_attr "prefix" "orig,vex")
2501 (set_attr "mode" "V2DF")])
2503 (define_insn "*sse3_haddv2df3_low"
2504 [(set (match_operand:DF 0 "register_operand" "=x,x")
2507 (match_operand:V2DF 1 "register_operand" "0,x")
2508 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2511 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2513 && INTVAL (operands[2]) != INTVAL (operands[3])"
2515 haddpd\t{%0, %0|%0, %0}
2516 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2517 [(set_attr "isa" "noavx,avx")
2518 (set_attr "type" "sseadd1")
2519 (set_attr "prefix" "orig,vex")
2520 (set_attr "mode" "V2DF")])
2522 (define_insn "*sse3_hsubv2df3_low"
2523 [(set (match_operand:DF 0 "register_operand" "=x,x")
2526 (match_operand:V2DF 1 "register_operand" "0,x")
2527 (parallel [(const_int 0)]))
2530 (parallel [(const_int 1)]))))]
2533 hsubpd\t{%0, %0|%0, %0}
2534 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2535 [(set_attr "isa" "noavx,avx")
2536 (set_attr "type" "sseadd1")
2537 (set_attr "prefix" "orig,vex")
2538 (set_attr "mode" "V2DF")])
2540 (define_insn "avx_h<plusminus_insn>v8sf3"
2541 [(set (match_operand:V8SF 0 "register_operand" "=x")
2547 (match_operand:V8SF 1 "register_operand" "x")
2548 (parallel [(const_int 0)]))
2549 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2551 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2552 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2556 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2557 (parallel [(const_int 0)]))
2558 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2560 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2561 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2565 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2566 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2568 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2569 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2572 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2573 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2575 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2576 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2578 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2579 [(set_attr "type" "sseadd")
2580 (set_attr "prefix" "vex")
2581 (set_attr "mode" "V8SF")])
2583 (define_insn "sse3_h<plusminus_insn>v4sf3"
2584 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2589 (match_operand:V4SF 1 "register_operand" "0,x")
2590 (parallel [(const_int 0)]))
2591 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2593 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2594 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2598 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2599 (parallel [(const_int 0)]))
2600 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2602 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2603 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2606 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2607 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2608 [(set_attr "isa" "noavx,avx")
2609 (set_attr "type" "sseadd")
2610 (set_attr "atom_unit" "complex")
2611 (set_attr "prefix" "orig,vex")
2612 (set_attr "prefix_rep" "1,*")
2613 (set_attr "mode" "V4SF")])
2615 (define_mode_iterator REDUC_SSE_PLUS_MODE
2616 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2618 (define_expand "reduc_plus_scal_<mode>"
2619 [(plus:REDUC_SSE_PLUS_MODE
2620 (match_operand:<ssescalarmode> 0 "register_operand")
2621 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2624 rtx tmp = gen_reg_rtx (<MODE>mode);
2625 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2626 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2631 (define_mode_iterator REDUC_PLUS_MODE
2632 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2633 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
2635 (define_expand "reduc_plus_scal_<mode>"
2636 [(plus:REDUC_PLUS_MODE
2637 (match_operand:<ssescalarmode> 0 "register_operand")
2638 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2641 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2642 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2643 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2644 emit_insn (gen_add<ssehalfvecmodelower>3
2645 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2646 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2650 ;; Modes handled by reduc_sm{in,ax}* patterns.
2651 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2652 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2653 (V2DI "TARGET_SSE") (V4SI "TARGET_SSE") (V8HI "TARGET_SSE")
2654 (V16QI "TARGET_SSE")])
2656 (define_expand "reduc_<code>_scal_<mode>"
2657 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2658 (match_operand:<ssescalarmode> 0 "register_operand")
2659 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2662 rtx tmp = gen_reg_rtx (<MODE>mode);
2663 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2664 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2669 (define_mode_iterator REDUC_SMINMAX_MODE
2670 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2671 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2672 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2673 (V64QI "TARGET_AVX512BW")
2674 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2675 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2676 (V8DF "TARGET_AVX512F")])
2678 (define_expand "reduc_<code>_scal_<mode>"
2679 [(smaxmin:REDUC_SMINMAX_MODE
2680 (match_operand:<ssescalarmode> 0 "register_operand")
2681 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2684 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2685 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2686 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2687 emit_insn (gen_<code><ssehalfvecmodelower>3
2688 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2689 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2693 (define_expand "reduc_<code>_scal_<mode>"
2694 [(umaxmin:VI_AVX512BW
2695 (match_operand:<ssescalarmode> 0 "register_operand")
2696 (match_operand:VI_AVX512BW 1 "register_operand"))]
2699 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2700 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2701 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2702 emit_insn (gen_<code><ssehalfvecmodelower>3
2703 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2704 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2708 (define_expand "reduc_<code>_scal_<mode>"
2710 (match_operand:<ssescalarmode> 0 "register_operand")
2711 (match_operand:VI_256 1 "register_operand"))]
2714 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2715 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2716 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2717 emit_insn (gen_<code><ssehalfvecmodelower>3
2718 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2719 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2720 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2721 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2722 (operands[0], tmp3, const0_rtx));
2726 (define_expand "reduc_umin_scal_v8hi"
2728 (match_operand:HI 0 "register_operand")
2729 (match_operand:V8HI 1 "register_operand"))]
2732 rtx tmp = gen_reg_rtx (V8HImode);
2733 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2734 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2738 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2739 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2741 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2742 (match_operand:SI 2 "const_0_to_255_operand")]
2745 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2746 [(set_attr "type" "sse")
2747 (set_attr "prefix" "evex")
2748 (set_attr "mode" "<MODE>")])
2750 (define_insn "reduces<mode><mask_scalar_name>"
2751 [(set (match_operand:VF_128 0 "register_operand" "=v")
2754 [(match_operand:VF_128 1 "register_operand" "v")
2755 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2756 (match_operand:SI 3 "const_0_to_255_operand")]
2761 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2762 [(set_attr "type" "sse")
2763 (set_attr "prefix" "evex")
2764 (set_attr "mode" "<MODE>")])
2766 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2768 ;; Parallel floating point comparisons
2770 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2772 (define_insn "avx_cmp<mode>3"
2773 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2775 [(match_operand:VF_128_256 1 "register_operand" "x")
2776 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2777 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2780 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2781 [(set_attr "type" "ssecmp")
2782 (set_attr "length_immediate" "1")
2783 (set_attr "prefix" "vex")
2784 (set_attr "mode" "<MODE>")])
2786 (define_insn "avx_vmcmp<mode>3"
2787 [(set (match_operand:VF_128 0 "register_operand" "=x")
2790 [(match_operand:VF_128 1 "register_operand" "x")
2791 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2792 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2797 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2798 [(set_attr "type" "ssecmp")
2799 (set_attr "length_immediate" "1")
2800 (set_attr "prefix" "vex")
2801 (set_attr "mode" "<ssescalarmode>")])
2803 (define_insn "*<sse>_maskcmp<mode>3_comm"
2804 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2805 (match_operator:VF_128_256 3 "sse_comparison_operator"
2806 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2807 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2809 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2811 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2812 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2813 [(set_attr "isa" "noavx,avx")
2814 (set_attr "type" "ssecmp")
2815 (set_attr "length_immediate" "1")
2816 (set_attr "prefix" "orig,vex")
2817 (set_attr "mode" "<MODE>")])
2819 (define_insn "<sse>_maskcmp<mode>3"
2820 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2821 (match_operator:VF_128_256 3 "sse_comparison_operator"
2822 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2823 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2826 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2827 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2828 [(set_attr "isa" "noavx,avx")
2829 (set_attr "type" "ssecmp")
2830 (set_attr "length_immediate" "1")
2831 (set_attr "prefix" "orig,vex")
2832 (set_attr "mode" "<MODE>")])
2834 (define_insn "<sse>_vmmaskcmp<mode>3"
2835 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2837 (match_operator:VF_128 3 "sse_comparison_operator"
2838 [(match_operand:VF_128 1 "register_operand" "0,x")
2839 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2844 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2845 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2846 [(set_attr "isa" "noavx,avx")
2847 (set_attr "type" "ssecmp")
2848 (set_attr "length_immediate" "1,*")
2849 (set_attr "prefix" "orig,vex")
2850 (set_attr "mode" "<ssescalarmode>")])
2852 (define_mode_attr cmp_imm_predicate
2853 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2854 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2855 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2856 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2857 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2858 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2859 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2860 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2861 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2863 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2864 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2865 (unspec:<avx512fmaskmode>
2866 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2867 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2868 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2870 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2871 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2872 [(set_attr "type" "ssecmp")
2873 (set_attr "length_immediate" "1")
2874 (set_attr "prefix" "evex")
2875 (set_attr "mode" "<sseinsnmode>")])
2877 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2878 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2879 (unspec:<avx512fmaskmode>
2880 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2881 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2882 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2885 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2886 [(set_attr "type" "ssecmp")
2887 (set_attr "length_immediate" "1")
2888 (set_attr "prefix" "evex")
2889 (set_attr "mode" "<sseinsnmode>")])
2891 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2892 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2893 (unspec:<avx512fmaskmode>
2894 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2895 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2896 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2897 UNSPEC_UNSIGNED_PCMP))]
2899 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2900 [(set_attr "type" "ssecmp")
2901 (set_attr "length_immediate" "1")
2902 (set_attr "prefix" "evex")
2903 (set_attr "mode" "<sseinsnmode>")])
2905 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2906 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2907 (unspec:<avx512fmaskmode>
2908 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2909 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2910 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2911 UNSPEC_UNSIGNED_PCMP))]
2913 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2914 [(set_attr "type" "ssecmp")
2915 (set_attr "length_immediate" "1")
2916 (set_attr "prefix" "evex")
2917 (set_attr "mode" "<sseinsnmode>")])
2919 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2920 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2921 (and:<avx512fmaskmode>
2922 (unspec:<avx512fmaskmode>
2923 [(match_operand:VF_128 1 "register_operand" "v")
2924 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2925 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2929 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
2930 [(set_attr "type" "ssecmp")
2931 (set_attr "length_immediate" "1")
2932 (set_attr "prefix" "evex")
2933 (set_attr "mode" "<ssescalarmode>")])
2935 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2936 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2937 (and:<avx512fmaskmode>
2938 (unspec:<avx512fmaskmode>
2939 [(match_operand:VF_128 1 "register_operand" "v")
2940 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2941 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2943 (and:<avx512fmaskmode>
2944 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2947 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
2948 [(set_attr "type" "ssecmp")
2949 (set_attr "length_immediate" "1")
2950 (set_attr "prefix" "evex")
2951 (set_attr "mode" "<ssescalarmode>")])
2953 (define_insn "avx512f_maskcmp<mode>3"
2954 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2955 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2956 [(match_operand:VF 1 "register_operand" "v")
2957 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2959 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2960 [(set_attr "type" "ssecmp")
2961 (set_attr "length_immediate" "1")
2962 (set_attr "prefix" "evex")
2963 (set_attr "mode" "<sseinsnmode>")])
2965 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
2966 [(set (reg:CCFP FLAGS_REG)
2969 (match_operand:<ssevecmode> 0 "register_operand" "v")
2970 (parallel [(const_int 0)]))
2972 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2973 (parallel [(const_int 0)]))))]
2974 "SSE_FLOAT_MODE_P (<MODE>mode)"
2975 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2976 [(set_attr "type" "ssecomi")
2977 (set_attr "prefix" "maybe_vex")
2978 (set_attr "prefix_rep" "0")
2979 (set (attr "prefix_data16")
2980 (if_then_else (eq_attr "mode" "DF")
2982 (const_string "0")))
2983 (set_attr "mode" "<MODE>")])
2985 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2986 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2987 (match_operator:<avx512fmaskmode> 1 ""
2988 [(match_operand:V48_AVX512VL 2 "register_operand")
2989 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2992 bool ok = ix86_expand_mask_vec_cmp (operands);
2997 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2998 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2999 (match_operator:<avx512fmaskmode> 1 ""
3000 [(match_operand:VI12_AVX512VL 2 "register_operand")
3001 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3004 bool ok = ix86_expand_mask_vec_cmp (operands);
3009 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3010 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3011 (match_operator:<sseintvecmode> 1 ""
3012 [(match_operand:VI_256 2 "register_operand")
3013 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3016 bool ok = ix86_expand_int_vec_cmp (operands);
3021 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3022 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3023 (match_operator:<sseintvecmode> 1 ""
3024 [(match_operand:VI124_128 2 "register_operand")
3025 (match_operand:VI124_128 3 "vector_operand")]))]
3028 bool ok = ix86_expand_int_vec_cmp (operands);
3033 (define_expand "vec_cmpv2div2di"
3034 [(set (match_operand:V2DI 0 "register_operand")
3035 (match_operator:V2DI 1 ""
3036 [(match_operand:V2DI 2 "register_operand")
3037 (match_operand:V2DI 3 "vector_operand")]))]
3040 bool ok = ix86_expand_int_vec_cmp (operands);
3045 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3046 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3047 (match_operator:<sseintvecmode> 1 ""
3048 [(match_operand:VF_256 2 "register_operand")
3049 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3052 bool ok = ix86_expand_fp_vec_cmp (operands);
3057 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3058 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3059 (match_operator:<sseintvecmode> 1 ""
3060 [(match_operand:VF_128 2 "register_operand")
3061 (match_operand:VF_128 3 "vector_operand")]))]
3064 bool ok = ix86_expand_fp_vec_cmp (operands);
3069 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3070 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3071 (match_operator:<avx512fmaskmode> 1 ""
3072 [(match_operand:VI48_AVX512VL 2 "register_operand")
3073 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3076 bool ok = ix86_expand_mask_vec_cmp (operands);
3081 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3082 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3083 (match_operator:<avx512fmaskmode> 1 ""
3084 [(match_operand:VI12_AVX512VL 2 "register_operand")
3085 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3088 bool ok = ix86_expand_mask_vec_cmp (operands);
3093 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3094 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3095 (match_operator:<sseintvecmode> 1 ""
3096 [(match_operand:VI_256 2 "register_operand")
3097 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3100 bool ok = ix86_expand_int_vec_cmp (operands);
3105 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3106 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3107 (match_operator:<sseintvecmode> 1 ""
3108 [(match_operand:VI124_128 2 "register_operand")
3109 (match_operand:VI124_128 3 "vector_operand")]))]
3112 bool ok = ix86_expand_int_vec_cmp (operands);
3117 (define_expand "vec_cmpuv2div2di"
3118 [(set (match_operand:V2DI 0 "register_operand")
3119 (match_operator:V2DI 1 ""
3120 [(match_operand:V2DI 2 "register_operand")
3121 (match_operand:V2DI 3 "vector_operand")]))]
3124 bool ok = ix86_expand_int_vec_cmp (operands);
3129 (define_expand "vec_cmpeqv2div2di"
3130 [(set (match_operand:V2DI 0 "register_operand")
3131 (match_operator:V2DI 1 ""
3132 [(match_operand:V2DI 2 "register_operand")
3133 (match_operand:V2DI 3 "vector_operand")]))]
3136 bool ok = ix86_expand_int_vec_cmp (operands);
3141 (define_expand "vcond<V_512:mode><VF_512:mode>"
3142 [(set (match_operand:V_512 0 "register_operand")
3144 (match_operator 3 ""
3145 [(match_operand:VF_512 4 "nonimmediate_operand")
3146 (match_operand:VF_512 5 "nonimmediate_operand")])
3147 (match_operand:V_512 1 "general_operand")
3148 (match_operand:V_512 2 "general_operand")))]
3150 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3151 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3153 bool ok = ix86_expand_fp_vcond (operands);
3158 (define_expand "vcond<V_256:mode><VF_256:mode>"
3159 [(set (match_operand:V_256 0 "register_operand")
3161 (match_operator 3 ""
3162 [(match_operand:VF_256 4 "nonimmediate_operand")
3163 (match_operand:VF_256 5 "nonimmediate_operand")])
3164 (match_operand:V_256 1 "general_operand")
3165 (match_operand:V_256 2 "general_operand")))]
3167 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3168 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3170 bool ok = ix86_expand_fp_vcond (operands);
3175 (define_expand "vcond<V_128:mode><VF_128:mode>"
3176 [(set (match_operand:V_128 0 "register_operand")
3178 (match_operator 3 ""
3179 [(match_operand:VF_128 4 "vector_operand")
3180 (match_operand:VF_128 5 "vector_operand")])
3181 (match_operand:V_128 1 "general_operand")
3182 (match_operand:V_128 2 "general_operand")))]
3184 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3185 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3187 bool ok = ix86_expand_fp_vcond (operands);
3192 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3193 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3194 (vec_merge:V48_AVX512VL
3195 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3196 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3197 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3200 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3201 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3202 (vec_merge:VI12_AVX512VL
3203 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3204 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3205 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3208 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3209 [(set (match_operand:VI_256 0 "register_operand")
3211 (match_operand:VI_256 1 "nonimmediate_operand")
3212 (match_operand:VI_256 2 "nonimm_or_0_operand")
3213 (match_operand:<sseintvecmode> 3 "register_operand")))]
3216 ix86_expand_sse_movcc (operands[0], operands[3],
3217 operands[1], operands[2]);
3221 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3222 [(set (match_operand:VI124_128 0 "register_operand")
3223 (vec_merge:VI124_128
3224 (match_operand:VI124_128 1 "vector_operand")
3225 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3226 (match_operand:<sseintvecmode> 3 "register_operand")))]
3229 ix86_expand_sse_movcc (operands[0], operands[3],
3230 operands[1], operands[2]);
3234 (define_expand "vcond_mask_v2div2di"
3235 [(set (match_operand:V2DI 0 "register_operand")
3237 (match_operand:V2DI 1 "vector_operand")
3238 (match_operand:V2DI 2 "nonimm_or_0_operand")
3239 (match_operand:V2DI 3 "register_operand")))]
3242 ix86_expand_sse_movcc (operands[0], operands[3],
3243 operands[1], operands[2]);
3247 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3248 [(set (match_operand:VF_256 0 "register_operand")
3250 (match_operand:VF_256 1 "nonimmediate_operand")
3251 (match_operand:VF_256 2 "nonimm_or_0_operand")
3252 (match_operand:<sseintvecmode> 3 "register_operand")))]
3255 ix86_expand_sse_movcc (operands[0], operands[3],
3256 operands[1], operands[2]);
3260 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3261 [(set (match_operand:VF_128 0 "register_operand")
3263 (match_operand:VF_128 1 "vector_operand")
3264 (match_operand:VF_128 2 "nonimm_or_0_operand")
3265 (match_operand:<sseintvecmode> 3 "register_operand")))]
3268 ix86_expand_sse_movcc (operands[0], operands[3],
3269 operands[1], operands[2]);
3273 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3275 ;; Parallel floating point logical operations
3277 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3279 (define_insn "<sse>_andnot<mode>3<mask_name>"
3280 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3283 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3284 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3285 "TARGET_SSE && <mask_avx512vl_condition>"
3291 switch (which_alternative)
3294 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3299 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3305 switch (get_attr_mode (insn))
3313 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3314 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3315 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3318 suffix = "<ssemodesuffix>";
3321 snprintf (buf, sizeof (buf), ops, suffix);
3322 output_asm_insn (buf, operands);
3325 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3326 (set_attr "type" "sselog")
3327 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3329 (cond [(and (match_test "<mask_applied>")
3330 (and (eq_attr "alternative" "1")
3331 (match_test "!TARGET_AVX512DQ")))
3332 (const_string "<sseintvecmode2>")
3333 (eq_attr "alternative" "3")
3334 (const_string "<sseintvecmode2>")
3335 (and (match_test "<MODE_SIZE> == 16")
3336 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3337 (const_string "<ssePSmode>")
3338 (match_test "TARGET_AVX")
3339 (const_string "<MODE>")
3340 (match_test "optimize_function_for_size_p (cfun)")
3341 (const_string "V4SF")
3343 (const_string "<MODE>")))])
3346 (define_insn "<sse>_andnot<mode>3<mask_name>"
3347 [(set (match_operand:VF_512 0 "register_operand" "=v")
3350 (match_operand:VF_512 1 "register_operand" "v"))
3351 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3358 suffix = "<ssemodesuffix>";
3361 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3362 if (!TARGET_AVX512DQ)
3364 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3368 snprintf (buf, sizeof (buf),
3369 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3371 output_asm_insn (buf, operands);
3374 [(set_attr "type" "sselog")
3375 (set_attr "prefix" "evex")
3377 (if_then_else (match_test "TARGET_AVX512DQ")
3378 (const_string "<sseinsnmode>")
3379 (const_string "XI")))])
3381 (define_expand "<code><mode>3<mask_name>"
3382 [(set (match_operand:VF_128_256 0 "register_operand")
3383 (any_logic:VF_128_256
3384 (match_operand:VF_128_256 1 "vector_operand")
3385 (match_operand:VF_128_256 2 "vector_operand")))]
3386 "TARGET_SSE && <mask_avx512vl_condition>"
3387 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3389 (define_expand "<code><mode>3<mask_name>"
3390 [(set (match_operand:VF_512 0 "register_operand")
3392 (match_operand:VF_512 1 "nonimmediate_operand")
3393 (match_operand:VF_512 2 "nonimmediate_operand")))]
3395 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3397 (define_insn "*<code><mode>3<mask_name>"
3398 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3399 (any_logic:VF_128_256
3400 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3401 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3402 "TARGET_SSE && <mask_avx512vl_condition>
3403 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3409 switch (which_alternative)
3412 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3417 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3423 switch (get_attr_mode (insn))
3431 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3432 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3433 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3436 suffix = "<ssemodesuffix>";
3439 snprintf (buf, sizeof (buf), ops, suffix);
3440 output_asm_insn (buf, operands);
3443 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3444 (set_attr "type" "sselog")
3445 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3447 (cond [(and (match_test "<mask_applied>")
3448 (and (eq_attr "alternative" "1")
3449 (match_test "!TARGET_AVX512DQ")))
3450 (const_string "<sseintvecmode2>")
3451 (eq_attr "alternative" "3")
3452 (const_string "<sseintvecmode2>")
3453 (and (match_test "<MODE_SIZE> == 16")
3454 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3455 (const_string "<ssePSmode>")
3456 (match_test "TARGET_AVX")
3457 (const_string "<MODE>")
3458 (match_test "optimize_function_for_size_p (cfun)")
3459 (const_string "V4SF")
3461 (const_string "<MODE>")))])
3463 (define_insn "*<code><mode>3<mask_name>"
3464 [(set (match_operand:VF_512 0 "register_operand" "=v")
3466 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3467 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3468 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3474 suffix = "<ssemodesuffix>";
3477 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3478 if (!TARGET_AVX512DQ)
3480 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3484 snprintf (buf, sizeof (buf),
3485 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3487 output_asm_insn (buf, operands);
3490 [(set_attr "type" "sselog")
3491 (set_attr "prefix" "evex")
3493 (if_then_else (match_test "TARGET_AVX512DQ")
3494 (const_string "<sseinsnmode>")
3495 (const_string "XI")))])
3497 (define_expand "copysign<mode>3"
3500 (not:VF (match_dup 3))
3501 (match_operand:VF 1 "vector_operand")))
3503 (and:VF (match_dup 3)
3504 (match_operand:VF 2 "vector_operand")))
3505 (set (match_operand:VF 0 "register_operand")
3506 (ior:VF (match_dup 4) (match_dup 5)))]
3509 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3511 operands[4] = gen_reg_rtx (<MODE>mode);
3512 operands[5] = gen_reg_rtx (<MODE>mode);
3515 (define_expand "xorsign<mode>3"
3517 (and:VF (match_dup 3)
3518 (match_operand:VF 2 "vector_operand")))
3519 (set (match_operand:VF 0 "register_operand")
3520 (xor:VF (match_dup 4)
3521 (match_operand:VF 1 "vector_operand")))]
3524 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3526 operands[4] = gen_reg_rtx (<MODE>mode);
3529 (define_expand "signbit<mode>2"
3530 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3531 (lshiftrt:<sseintvecmode>
3532 (subreg:<sseintvecmode>
3533 (match_operand:VF1_AVX2 1 "register_operand") 0)
3536 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3538 ;; Also define scalar versions. These are used for abs, neg, and
3539 ;; conditional move. Using subregs into vector modes causes register
3540 ;; allocation lossage. These patterns do not allow memory operands
3541 ;; because the native instructions read the full 128-bits.
3543 (define_insn "*andnot<mode>3"
3544 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3547 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3548 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3549 "SSE_FLOAT_MODE_P (<MODE>mode)"
3554 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3556 switch (which_alternative)
3559 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3562 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3565 if (TARGET_AVX512DQ)
3566 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3569 suffix = <MODE>mode == DFmode ? "q" : "d";
3570 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3574 if (TARGET_AVX512DQ)
3575 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3578 suffix = <MODE>mode == DFmode ? "q" : "d";
3579 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3586 snprintf (buf, sizeof (buf), ops, suffix);
3587 output_asm_insn (buf, operands);
3590 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3591 (set_attr "type" "sselog")
3592 (set_attr "prefix" "orig,vex,evex,evex")
3594 (cond [(eq_attr "alternative" "2")
3595 (if_then_else (match_test "TARGET_AVX512DQ")
3596 (const_string "<ssevecmode>")
3597 (const_string "TI"))
3598 (eq_attr "alternative" "3")
3599 (if_then_else (match_test "TARGET_AVX512DQ")
3600 (const_string "<avx512fvecmode>")
3601 (const_string "XI"))
3602 (and (match_test "<MODE_SIZE> == 16")
3603 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3604 (const_string "V4SF")
3605 (match_test "TARGET_AVX")
3606 (const_string "<ssevecmode>")
3607 (match_test "optimize_function_for_size_p (cfun)")
3608 (const_string "V4SF")
3610 (const_string "<ssevecmode>")))])
3612 (define_insn "*andnottf3"
3613 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3615 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3616 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3622 = (which_alternative >= 2 ? "pandnq"
3623 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3625 switch (which_alternative)
3628 ops = "%s\t{%%2, %%0|%%0, %%2}";
3632 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3635 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3641 snprintf (buf, sizeof (buf), ops, tmp);
3642 output_asm_insn (buf, operands);
3645 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3646 (set_attr "type" "sselog")
3647 (set (attr "prefix_data16")
3649 (and (eq_attr "alternative" "0")
3650 (eq_attr "mode" "TI"))
3652 (const_string "*")))
3653 (set_attr "prefix" "orig,vex,evex,evex")
3655 (cond [(eq_attr "alternative" "2")
3657 (eq_attr "alternative" "3")
3659 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3660 (const_string "V4SF")
3661 (match_test "TARGET_AVX")
3663 (ior (not (match_test "TARGET_SSE2"))
3664 (match_test "optimize_function_for_size_p (cfun)"))
3665 (const_string "V4SF")
3667 (const_string "TI")))])
3669 (define_insn "*<code><mode>3"
3670 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3672 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3673 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3674 "SSE_FLOAT_MODE_P (<MODE>mode)"
3679 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3681 switch (which_alternative)
3684 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3687 if (!TARGET_AVX512DQ)
3689 suffix = <MODE>mode == DFmode ? "q" : "d";
3690 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3695 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3698 if (TARGET_AVX512DQ)
3699 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3702 suffix = <MODE>mode == DFmode ? "q" : "d";
3703 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3710 snprintf (buf, sizeof (buf), ops, suffix);
3711 output_asm_insn (buf, operands);
3714 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3715 (set_attr "type" "sselog")
3716 (set_attr "prefix" "orig,vex,evex,evex")
3718 (cond [(eq_attr "alternative" "2")
3719 (if_then_else (match_test "TARGET_AVX512DQ")
3720 (const_string "<ssevecmode>")
3721 (const_string "TI"))
3722 (eq_attr "alternative" "3")
3723 (if_then_else (match_test "TARGET_AVX512DQ")
3724 (const_string "<avx512fvecmode>")
3725 (const_string "XI"))
3726 (and (match_test "<MODE_SIZE> == 16")
3727 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3728 (const_string "V4SF")
3729 (match_test "TARGET_AVX")
3730 (const_string "<ssevecmode>")
3731 (match_test "optimize_function_for_size_p (cfun)")
3732 (const_string "V4SF")
3734 (const_string "<ssevecmode>")))])
3736 (define_expand "<code>tf3"
3737 [(set (match_operand:TF 0 "register_operand")
3739 (match_operand:TF 1 "vector_operand")
3740 (match_operand:TF 2 "vector_operand")))]
3742 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3744 (define_insn "*<code>tf3"
3745 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3747 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3748 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3749 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3754 = (which_alternative >= 2 ? "p<logic>q"
3755 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3757 switch (which_alternative)
3760 ops = "%s\t{%%2, %%0|%%0, %%2}";
3764 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3767 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3773 snprintf (buf, sizeof (buf), ops, tmp);
3774 output_asm_insn (buf, operands);
3777 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3778 (set_attr "type" "sselog")
3779 (set (attr "prefix_data16")
3781 (and (eq_attr "alternative" "0")
3782 (eq_attr "mode" "TI"))
3784 (const_string "*")))
3785 (set_attr "prefix" "orig,vex,evex,evex")
3787 (cond [(eq_attr "alternative" "2")
3789 (eq_attr "alternative" "3")
3791 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3792 (const_string "V4SF")
3793 (match_test "TARGET_AVX")
3795 (ior (not (match_test "TARGET_SSE2"))
3796 (match_test "optimize_function_for_size_p (cfun)"))
3797 (const_string "V4SF")
3799 (const_string "TI")))])
3801 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3803 ;; FMA floating point multiply/accumulate instructions. These include
3804 ;; scalar versions of the instructions as well as vector versions.
3806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3808 ;; The standard names for scalar FMA are only available with SSE math enabled.
3809 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3810 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3811 ;; and TARGET_FMA4 are both false.
3812 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3813 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3814 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3815 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3816 (define_mode_iterator FMAMODEM
3817 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3818 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3819 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3820 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3821 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3822 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3823 (V16SF "TARGET_AVX512F")
3824 (V8DF "TARGET_AVX512F")])
3826 (define_expand "fma<mode>4"
3827 [(set (match_operand:FMAMODEM 0 "register_operand")
3829 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3830 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3831 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3833 (define_expand "fms<mode>4"
3834 [(set (match_operand:FMAMODEM 0 "register_operand")
3836 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3837 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3838 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3840 (define_expand "fnma<mode>4"
3841 [(set (match_operand:FMAMODEM 0 "register_operand")
3843 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3844 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3845 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3847 (define_expand "fnms<mode>4"
3848 [(set (match_operand:FMAMODEM 0 "register_operand")
3850 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3851 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3852 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3854 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3855 (define_mode_iterator FMAMODE_AVX512
3856 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3857 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3858 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3859 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3860 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3861 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3862 (V16SF "TARGET_AVX512F")
3863 (V8DF "TARGET_AVX512F")])
3865 (define_mode_iterator FMAMODE
3866 [SF DF V4SF V2DF V8SF V4DF])
3868 (define_expand "fma4i_fmadd_<mode>"
3869 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3871 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3872 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3873 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3875 (define_expand "fma4i_fmsub_<mode>"
3876 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3878 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3879 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3881 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3883 (define_expand "fma4i_fnmadd_<mode>"
3884 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3887 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3888 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3889 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3891 (define_expand "fma4i_fnmsub_<mode>"
3892 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3895 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3896 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3898 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3900 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3901 [(match_operand:VF_AVX512VL 0 "register_operand")
3902 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3903 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3904 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3905 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3906 "TARGET_AVX512F && <round_mode512bit_condition>"
3908 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3909 operands[0], operands[1], operands[2], operands[3],
3910 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3914 (define_insn "*fma_fmadd_<mode>"
3915 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3917 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3918 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3919 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3920 "TARGET_FMA || TARGET_FMA4"
3922 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3923 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3924 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3925 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3926 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3927 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3928 (set_attr "type" "ssemuladd")
3929 (set_attr "mode" "<MODE>")])
3931 ;; Suppose AVX-512F as baseline
3932 (define_mode_iterator VF_SF_AVX512VL
3933 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3934 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3936 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3937 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3939 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3940 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3941 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3942 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3944 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3945 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3946 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3947 [(set_attr "type" "ssemuladd")
3948 (set_attr "mode" "<MODE>")])
3950 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
3951 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3953 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3954 (match_operand:VF_AVX512 2 "register_operand" "v,0")
3955 (vec_duplicate:VF_AVX512
3956 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
3957 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3958 "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
3959 [(set_attr "type" "ssemuladd")
3960 (set_attr "mode" "<MODE>")])
3962 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
3963 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3965 (vec_duplicate:VF_AVX512
3966 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
3967 (match_operand:VF_AVX512 2 "register_operand" "0,v")
3968 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3969 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3971 vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
3972 vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
3973 [(set_attr "type" "ssemuladd")
3974 (set_attr "mode" "<MODE>")])
3976 (define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
3977 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
3979 (match_operand:VF_AVX512 1 "register_operand" "0,v")
3980 (vec_duplicate:VF_AVX512
3981 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
3982 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
3983 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
3985 vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
3986 vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
3987 [(set_attr "type" "ssemuladd")
3988 (set_attr "mode" "<MODE>")])
3990 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3991 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3992 (vec_merge:VF_AVX512VL
3994 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3995 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
3996 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3998 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3999 "TARGET_AVX512F && <round_mode512bit_condition>"
4001 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4002 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4003 [(set_attr "type" "ssemuladd")
4004 (set_attr "mode" "<MODE>")])
4006 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4007 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4008 (vec_merge:VF_AVX512VL
4010 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4011 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4012 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4014 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4016 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4017 [(set_attr "type" "ssemuladd")
4018 (set_attr "mode" "<MODE>")])
4020 (define_insn "*fma_fmsub_<mode>"
4021 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4023 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4024 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4026 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4027 "TARGET_FMA || TARGET_FMA4"
4029 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4030 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4031 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4032 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4033 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4034 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4035 (set_attr "type" "ssemuladd")
4036 (set_attr "mode" "<MODE>")])
4038 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4039 [(match_operand:VF_AVX512VL 0 "register_operand")
4040 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4041 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4042 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4043 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4044 "TARGET_AVX512F && <round_mode512bit_condition>"
4046 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4047 operands[0], operands[1], operands[2], operands[3],
4048 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4052 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4053 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4055 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4056 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4058 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4059 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4061 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4062 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4063 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4064 [(set_attr "type" "ssemuladd")
4065 (set_attr "mode" "<MODE>")])
4067 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
4068 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4070 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4071 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4073 (vec_duplicate:VF_AVX512
4074 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4075 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4076 "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4077 [(set_attr "type" "ssemuladd")
4078 (set_attr "mode" "<MODE>")])
4080 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
4081 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4083 (vec_duplicate:VF_AVX512
4084 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4085 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4087 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4088 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4090 vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4091 vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4092 [(set_attr "type" "ssemuladd")
4093 (set_attr "mode" "<MODE>")])
4095 (define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
4096 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4098 (match_operand:VF_AVX512 1 "register_operand" "0,v")
4099 (vec_duplicate:VF_AVX512
4100 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4102 (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
4103 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4105 vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4106 vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4107 [(set_attr "type" "ssemuladd")
4108 (set_attr "mode" "<MODE>")])
4110 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4111 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4112 (vec_merge:VF_AVX512VL
4114 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4115 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4117 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4119 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4122 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4123 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4124 [(set_attr "type" "ssemuladd")
4125 (set_attr "mode" "<MODE>")])
4127 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4128 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4129 (vec_merge:VF_AVX512VL
4131 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4132 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4134 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4136 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4137 "TARGET_AVX512F && <round_mode512bit_condition>"
4138 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4139 [(set_attr "type" "ssemuladd")
4140 (set_attr "mode" "<MODE>")])
4142 (define_insn "*fma_fnmadd_<mode>"
4143 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4146 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4147 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4148 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4149 "TARGET_FMA || TARGET_FMA4"
4151 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4152 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4153 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4154 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4155 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4156 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4157 (set_attr "type" "ssemuladd")
4158 (set_attr "mode" "<MODE>")])
4160 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4161 [(match_operand:VF_AVX512VL 0 "register_operand")
4162 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4163 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4164 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4165 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4166 "TARGET_AVX512F && <round_mode512bit_condition>"
4168 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4169 operands[0], operands[1], operands[2], operands[3],
4170 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4174 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4175 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4178 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4179 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4180 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4181 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4183 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4184 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4185 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4186 [(set_attr "type" "ssemuladd")
4187 (set_attr "mode" "<MODE>")])
4189 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
4190 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4193 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4194 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4195 (vec_duplicate:VF_AVX512
4196 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m"))))]
4197 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4198 "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4199 [(set_attr "type" "ssemuladd")
4200 (set_attr "mode" "<MODE>")])
4202 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
4203 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4206 (vec_duplicate:VF_AVX512
4207 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4208 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4209 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4210 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4212 vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4213 vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4214 [(set_attr "type" "ssemuladd")
4215 (set_attr "mode" "<MODE>")])
4217 (define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
4218 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4221 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4222 (vec_duplicate:VF_AVX512
4223 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4224 (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4225 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4227 vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4228 vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4229 [(set_attr "type" "ssemuladd")
4230 (set_attr "mode" "<MODE>")])
4232 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4233 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4234 (vec_merge:VF_AVX512VL
4237 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4238 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4239 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4241 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4242 "TARGET_AVX512F && <round_mode512bit_condition>"
4244 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4245 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4246 [(set_attr "type" "ssemuladd")
4247 (set_attr "mode" "<MODE>")])
4249 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4250 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4251 (vec_merge:VF_AVX512VL
4254 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4255 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4256 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4258 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4259 "TARGET_AVX512F && <round_mode512bit_condition>"
4260 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4261 [(set_attr "type" "ssemuladd")
4262 (set_attr "mode" "<MODE>")])
4264 (define_insn "*fma_fnmsub_<mode>"
4265 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4268 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4269 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4271 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4272 "TARGET_FMA || TARGET_FMA4"
4274 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4275 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4276 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4277 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4278 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4279 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4280 (set_attr "type" "ssemuladd")
4281 (set_attr "mode" "<MODE>")])
4283 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4284 [(match_operand:VF_AVX512VL 0 "register_operand")
4285 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4286 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4287 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4288 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4289 "TARGET_AVX512F && <round_mode512bit_condition>"
4291 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4292 operands[0], operands[1], operands[2], operands[3],
4293 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4297 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4298 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4301 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4302 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4304 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4305 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4307 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4308 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4309 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4310 [(set_attr "type" "ssemuladd")
4311 (set_attr "mode" "<MODE>")])
4313 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
4314 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4317 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4318 (match_operand:VF_AVX512 2 "register_operand" "v,0")
4320 (vec_duplicate:VF_AVX512
4321 (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
4322 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4323 "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4324 [(set_attr "type" "ssemuladd")
4325 (set_attr "mode" "<MODE>")])
4327 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
4328 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4331 (vec_duplicate:VF_AVX512
4332 (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4333 (match_operand:VF_AVX512 2 "register_operand" "0,v")
4335 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4336 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4338 vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4339 vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4340 [(set_attr "type" "ssemuladd")
4341 (set_attr "mode" "<MODE>")])
4343 (define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
4344 [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4347 (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4348 (vec_duplicate:VF_AVX512
4349 (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4351 (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4352 "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4354 vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4355 vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4356 [(set_attr "type" "ssemuladd")
4357 (set_attr "mode" "<MODE>")])
4359 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4360 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4361 (vec_merge:VF_AVX512VL
4364 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4365 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4367 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4369 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4370 "TARGET_AVX512F && <round_mode512bit_condition>"
4372 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4373 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4374 [(set_attr "type" "ssemuladd")
4375 (set_attr "mode" "<MODE>")])
4377 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4378 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4379 (vec_merge:VF_AVX512VL
4382 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4383 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4385 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4387 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4389 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4390 [(set_attr "type" "ssemuladd")
4391 (set_attr "mode" "<MODE>")])
4393 ;; FMA parallel floating point multiply addsub and subadd operations.
4395 ;; It would be possible to represent these without the UNSPEC as
4398 ;; (fma op1 op2 op3)
4399 ;; (fma op1 op2 (neg op3))
4402 ;; But this doesn't seem useful in practice.
4404 (define_expand "fmaddsub_<mode>"
4405 [(set (match_operand:VF 0 "register_operand")
4407 [(match_operand:VF 1 "nonimmediate_operand")
4408 (match_operand:VF 2 "nonimmediate_operand")
4409 (match_operand:VF 3 "nonimmediate_operand")]
4411 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4413 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4414 [(match_operand:VF_AVX512VL 0 "register_operand")
4415 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4416 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4417 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4418 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4421 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4422 operands[0], operands[1], operands[2], operands[3],
4423 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4427 (define_insn "*fma_fmaddsub_<mode>"
4428 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4430 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4431 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4432 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4434 "TARGET_FMA || TARGET_FMA4"
4436 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4437 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4438 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4439 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4440 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4441 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4442 (set_attr "type" "ssemuladd")
4443 (set_attr "mode" "<MODE>")])
4445 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4446 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4447 (unspec:VF_SF_AVX512VL
4448 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4449 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4450 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4452 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4454 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4455 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4456 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4457 [(set_attr "type" "ssemuladd")
4458 (set_attr "mode" "<MODE>")])
4460 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4461 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4462 (vec_merge:VF_AVX512VL
4464 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4465 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4466 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4469 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4472 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4473 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4474 [(set_attr "type" "ssemuladd")
4475 (set_attr "mode" "<MODE>")])
4477 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4478 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4479 (vec_merge:VF_AVX512VL
4481 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4482 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4483 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4486 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4488 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4489 [(set_attr "type" "ssemuladd")
4490 (set_attr "mode" "<MODE>")])
4492 (define_insn "*fma_fmsubadd_<mode>"
4493 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4495 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4496 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4498 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4500 "TARGET_FMA || TARGET_FMA4"
4502 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4503 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4504 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4505 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4506 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4507 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4508 (set_attr "type" "ssemuladd")
4509 (set_attr "mode" "<MODE>")])
4511 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4512 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4513 (unspec:VF_SF_AVX512VL
4514 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4515 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4517 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4519 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4521 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4522 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4523 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4524 [(set_attr "type" "ssemuladd")
4525 (set_attr "mode" "<MODE>")])
4527 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4528 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4529 (vec_merge:VF_AVX512VL
4531 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4532 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4534 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4537 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4540 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4541 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4542 [(set_attr "type" "ssemuladd")
4543 (set_attr "mode" "<MODE>")])
4545 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4546 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4547 (vec_merge:VF_AVX512VL
4549 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4550 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4552 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4555 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4557 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4558 [(set_attr "type" "ssemuladd")
4559 (set_attr "mode" "<MODE>")])
4561 ;; FMA3 floating point scalar intrinsics. These merge result with
4562 ;; high-order elements from the destination register.
4564 (define_expand "fmai_vmfmadd_<mode><round_name>"
4565 [(set (match_operand:VF_128 0 "register_operand")
4568 (match_operand:VF_128 1 "register_operand")
4569 (match_operand:VF_128 2 "<round_nimm_predicate>")
4570 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4575 (define_expand "fmai_vmfmsub_<mode><round_name>"
4576 [(set (match_operand:VF_128 0 "register_operand")
4579 (match_operand:VF_128 1 "register_operand")
4580 (match_operand:VF_128 2 "<round_nimm_predicate>")
4582 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4587 (define_expand "fmai_vmfnmadd_<mode><round_name>"
4588 [(set (match_operand:VF_128 0 "register_operand")
4592 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4593 (match_operand:VF_128 1 "register_operand")
4594 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4599 (define_expand "fmai_vmfnmsub_<mode><round_name>"
4600 [(set (match_operand:VF_128 0 "register_operand")
4604 (match_operand:VF_128 2 "<round_nimm_predicate>"))
4605 (match_operand:VF_128 1 "register_operand")
4607 (match_operand:VF_128 3 "<round_nimm_predicate>")))
4612 (define_insn "*fmai_fmadd_<mode>"
4613 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4616 (match_operand:VF_128 1 "register_operand" "0,0")
4617 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4618 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4621 "TARGET_FMA || TARGET_AVX512F"
4623 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4624 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4625 [(set_attr "type" "ssemuladd")
4626 (set_attr "mode" "<MODE>")])
4628 (define_insn "*fmai_fmsub_<mode>"
4629 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4632 (match_operand:VF_128 1 "register_operand" "0,0")
4633 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4635 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4638 "TARGET_FMA || TARGET_AVX512F"
4640 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4641 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4642 [(set_attr "type" "ssemuladd")
4643 (set_attr "mode" "<MODE>")])
4645 (define_insn "*fmai_fnmadd_<mode><round_name>"
4646 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4650 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4651 (match_operand:VF_128 1 "register_operand" "0,0")
4652 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4655 "TARGET_FMA || TARGET_AVX512F"
4657 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4658 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4659 [(set_attr "type" "ssemuladd")
4660 (set_attr "mode" "<MODE>")])
4662 (define_insn "*fmai_fnmsub_<mode><round_name>"
4663 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4667 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4668 (match_operand:VF_128 1 "register_operand" "0,0")
4670 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4673 "TARGET_FMA || TARGET_AVX512F"
4675 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4676 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4677 [(set_attr "type" "ssemuladd")
4678 (set_attr "mode" "<MODE>")])
4680 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4681 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4685 (match_operand:VF_128 1 "register_operand" "0,0")
4686 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4687 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4689 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4694 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4695 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4696 [(set_attr "type" "ssemuladd")
4697 (set_attr "mode" "<MODE>")])
4699 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4700 [(set (match_operand:VF_128 0 "register_operand" "=v")
4704 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4705 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
4706 (match_operand:VF_128 3 "register_operand" "0"))
4708 (match_operand:QI 4 "register_operand" "Yk"))
4712 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4713 [(set_attr "type" "ssemuladd")
4714 (set_attr "mode" "<MODE>")])
4716 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4717 [(match_operand:VF_128 0 "register_operand")
4718 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4719 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4720 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4721 (match_operand:QI 4 "register_operand")]
4724 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4725 operands[0], operands[1], operands[2], operands[3],
4726 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4730 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4731 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4735 (match_operand:VF_128 1 "register_operand" "0,0")
4736 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4737 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4738 (match_operand:VF_128 4 "const0_operand" "C,C")
4739 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4744 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4745 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4746 [(set_attr "type" "ssemuladd")
4747 (set_attr "mode" "<MODE>")])
4749 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4750 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4754 (match_operand:VF_128 1 "register_operand" "0,0")
4755 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4757 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4759 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4764 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4765 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4766 [(set_attr "type" "ssemuladd")
4767 (set_attr "mode" "<MODE>")])
4769 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4770 [(set (match_operand:VF_128 0 "register_operand" "=v")
4774 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4775 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")
4777 (match_operand:VF_128 3 "register_operand" "0")))
4779 (match_operand:QI 4 "register_operand" "Yk"))
4783 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4784 [(set_attr "type" "ssemuladd")
4785 (set_attr "mode" "<MODE>")])
4787 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4788 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4792 (match_operand:VF_128 1 "register_operand" "0,0")
4793 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4795 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4796 (match_operand:VF_128 4 "const0_operand" "C,C")
4797 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4802 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4803 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4804 [(set_attr "type" "ssemuladd")
4805 (set_attr "mode" "<MODE>")])
4807 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4808 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4813 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4814 (match_operand:VF_128 1 "register_operand" "0,0")
4815 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4817 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4822 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4823 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4824 [(set_attr "type" "ssemuladd")
4825 (set_attr "mode" "<MODE>")])
4827 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4828 [(set (match_operand:VF_128 0 "register_operand" "=v")
4833 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
4834 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4835 (match_operand:VF_128 3 "register_operand" "0"))
4837 (match_operand:QI 4 "register_operand" "Yk"))
4841 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4842 [(set_attr "type" "ssemuladd")
4843 (set_attr "mode" "<MODE>")])
4845 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4846 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4851 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4852 (match_operand:VF_128 1 "register_operand" "0,0")
4853 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4854 (match_operand:VF_128 4 "const0_operand" "C,C")
4855 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4860 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4861 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4862 [(set_attr "type" "ssemuladd")
4863 (set_attr "mode" "<MODE>")])
4865 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4866 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4871 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4872 (match_operand:VF_128 1 "register_operand" "0,0")
4874 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4876 (match_operand:QI 4 "register_operand" "Yk,Yk"))
4881 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4882 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4883 [(set_attr "type" "ssemuladd")
4884 (set_attr "mode" "<MODE>")])
4886 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4887 [(set (match_operand:VF_128 0 "register_operand" "=v")
4892 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>"))
4893 (match_operand:VF_128 1 "<round_nimm_predicate>" "%v")
4895 (match_operand:VF_128 3 "register_operand" "0")))
4897 (match_operand:QI 4 "register_operand" "Yk"))
4901 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4902 [(set_attr "type" "ssemuladd")
4903 (set_attr "mode" "<MODE>")])
4905 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
4906 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4911 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4912 (match_operand:VF_128 1 "register_operand" "0,0")
4914 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4915 (match_operand:VF_128 4 "const0_operand" "C,C")
4916 (match_operand:QI 5 "register_operand" "Yk,Yk"))
4921 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4922 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4923 [(set_attr "type" "ssemuladd")
4924 (set_attr "mode" "<MODE>")])
4926 ;; FMA4 floating point scalar intrinsics. These write the
4927 ;; entire destination register, with the high-order elements zeroed.
4929 (define_expand "fma4i_vmfmadd_<mode>"
4930 [(set (match_operand:VF_128 0 "register_operand")
4933 (match_operand:VF_128 1 "nonimmediate_operand")
4934 (match_operand:VF_128 2 "nonimmediate_operand")
4935 (match_operand:VF_128 3 "nonimmediate_operand"))
4939 "operands[4] = CONST0_RTX (<MODE>mode);")
4941 (define_insn "*fma4i_vmfmadd_<mode>"
4942 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4945 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4946 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4947 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4948 (match_operand:VF_128 4 "const0_operand")
4951 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4952 [(set_attr "type" "ssemuladd")
4953 (set_attr "mode" "<MODE>")])
4955 (define_insn "*fma4i_vmfmsub_<mode>"
4956 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4959 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4960 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4962 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4963 (match_operand:VF_128 4 "const0_operand")
4966 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4967 [(set_attr "type" "ssemuladd")
4968 (set_attr "mode" "<MODE>")])
4970 (define_insn "*fma4i_vmfnmadd_<mode>"
4971 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4975 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4976 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4977 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4978 (match_operand:VF_128 4 "const0_operand")
4981 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4982 [(set_attr "type" "ssemuladd")
4983 (set_attr "mode" "<MODE>")])
4985 (define_insn "*fma4i_vmfnmsub_<mode>"
4986 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4990 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4991 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4993 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4994 (match_operand:VF_128 4 "const0_operand")
4997 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4998 [(set_attr "type" "ssemuladd")
4999 (set_attr "mode" "<MODE>")])
5001 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5003 ;; Parallel single-precision floating point conversion operations
5005 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5007 (define_insn_and_split "sse_cvtpi2ps"
5008 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5011 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5012 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5014 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5015 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5017 cvtpi2ps\t{%2, %0|%0, %2}
5020 "TARGET_MMX_WITH_SSE && reload_completed"
5023 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5024 GET_MODE (operands[2]));
5025 /* Generate SSE2 cvtdq2ps. */
5026 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5028 /* Merge operands[3] with operands[0]. */
5032 mask = gen_rtx_PARALLEL (VOIDmode,
5033 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5034 GEN_INT (6), GEN_INT (7)));
5035 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5036 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5037 emit_insn (gen_rtx_SET (operands[0], op2));
5041 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5042 mask = gen_rtx_PARALLEL (VOIDmode,
5043 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5044 GEN_INT (4), GEN_INT (5)));
5045 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5046 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5047 emit_insn (gen_rtx_SET (operands[0], op2));
5049 /* Swap bits 0:63 with bits 64:127. */
5050 mask = gen_rtx_PARALLEL (VOIDmode,
5051 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5052 GEN_INT (0), GEN_INT (1)));
5053 rtx dest = lowpart_subreg (V4SImode, operands[0],
5054 GET_MODE (operands[0]));
5055 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5056 emit_insn (gen_rtx_SET (dest, op1));
5060 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
5061 (set_attr "type" "ssecvt")
5062 (set_attr "mode" "V4SF")])
5064 (define_insn "sse_cvtps2pi"
5065 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5067 (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")]
5069 (parallel [(const_int 0) (const_int 1)])))]
5070 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5072 cvtps2pi\t{%1, %0|%0, %q1}
5073 %vcvtps2dq\t{%1, %0|%0, %1}"
5074 [(set_attr "mmx_isa" "native,x64")
5075 (set_attr "type" "ssecvt")
5076 (set_attr "unit" "mmx,*")
5077 (set_attr "mode" "DI")])
5079 (define_insn "sse_cvttps2pi"
5080 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5082 (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm"))
5083 (parallel [(const_int 0) (const_int 1)])))]
5084 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5086 cvttps2pi\t{%1, %0|%0, %q1}
5087 %vcvttps2dq\t{%1, %0|%0, %1}"
5088 [(set_attr "mmx_isa" "native,x64")
5089 (set_attr "type" "ssecvt")
5090 (set_attr "unit" "mmx,*")
5091 (set_attr "prefix_rep" "0")
5092 (set_attr "mode" "SF")])
5094 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5095 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5098 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5099 (match_operand:V4SF 1 "register_operand" "0,0,v")
5103 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5104 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5105 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5106 [(set_attr "isa" "noavx,noavx,avx")
5107 (set_attr "type" "sseicvt")
5108 (set_attr "athlon_decode" "vector,double,*")
5109 (set_attr "amdfam10_decode" "vector,double,*")
5110 (set_attr "bdver1_decode" "double,direct,*")
5111 (set_attr "btver2_decode" "double,double,double")
5112 (set_attr "znver1_decode" "double,double,double")
5113 (set (attr "length_vex")
5115 (and (match_test "<MODE>mode == DImode")
5116 (eq_attr "alternative" "2"))
5118 (const_string "*")))
5119 (set (attr "prefix_rex")
5121 (and (match_test "<MODE>mode == DImode")
5122 (eq_attr "alternative" "0,1"))
5124 (const_string "*")))
5125 (set_attr "prefix" "orig,orig,maybe_evex")
5126 (set_attr "mode" "SF")])
5128 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5129 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5132 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5133 (parallel [(const_int 0)]))]
5134 UNSPEC_FIX_NOTRUNC))]
5136 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5137 [(set_attr "type" "sseicvt")
5138 (set_attr "athlon_decode" "double,vector")
5139 (set_attr "bdver1_decode" "double,double")
5140 (set_attr "prefix_rep" "1")
5141 (set_attr "prefix" "maybe_vex")
5142 (set_attr "mode" "<MODE>")])
5144 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5145 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5146 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5147 UNSPEC_FIX_NOTRUNC))]
5149 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %k1}"
5150 [(set_attr "type" "sseicvt")
5151 (set_attr "athlon_decode" "double,vector")
5152 (set_attr "amdfam10_decode" "double,double")
5153 (set_attr "bdver1_decode" "double,double")
5154 (set_attr "prefix_rep" "1")
5155 (set_attr "prefix" "maybe_vex")
5156 (set_attr "mode" "<MODE>")])
5158 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5159 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5162 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5163 (parallel [(const_int 0)]))))]
5165 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5166 [(set_attr "type" "sseicvt")
5167 (set_attr "athlon_decode" "double,vector")
5168 (set_attr "amdfam10_decode" "double,double")
5169 (set_attr "bdver1_decode" "double,double")
5170 (set_attr "prefix_rep" "1")
5171 (set_attr "prefix" "maybe_vex")
5172 (set_attr "mode" "<MODE>")])
5174 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5175 [(set (match_operand:VF_128 0 "register_operand" "=v")
5177 (vec_duplicate:VF_128
5178 (unsigned_float:<ssescalarmode>
5179 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
5180 (match_operand:VF_128 1 "register_operand" "v")
5182 "TARGET_AVX512F && <round_modev4sf_condition>"
5183 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5184 [(set_attr "type" "sseicvt")
5185 (set_attr "prefix" "evex")
5186 (set_attr "mode" "<ssescalarmode>")])
5188 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5189 [(set (match_operand:VF_128 0 "register_operand" "=v")
5191 (vec_duplicate:VF_128
5192 (unsigned_float:<ssescalarmode>
5193 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
5194 (match_operand:VF_128 1 "register_operand" "v")
5196 "TARGET_AVX512F && TARGET_64BIT"
5197 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5198 [(set_attr "type" "sseicvt")
5199 (set_attr "prefix" "evex")
5200 (set_attr "mode" "<ssescalarmode>")])
5202 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5203 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5205 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5206 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5208 cvtdq2ps\t{%1, %0|%0, %1}
5209 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5210 [(set_attr "isa" "noavx,avx")
5211 (set_attr "type" "ssecvt")
5212 (set_attr "prefix" "maybe_vex")
5213 (set_attr "mode" "<sseinsnmode>")])
5215 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5216 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5217 (unsigned_float:VF1_AVX512VL
5218 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5220 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5221 [(set_attr "type" "ssecvt")
5222 (set_attr "prefix" "evex")
5223 (set_attr "mode" "<MODE>")])
5225 (define_expand "floatuns<sseintvecmodelower><mode>2"
5226 [(match_operand:VF1 0 "register_operand")
5227 (match_operand:<sseintvecmode> 1 "register_operand")]
5228 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5230 if (<MODE>mode == V16SFmode)
5231 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5233 if (TARGET_AVX512VL)
5235 if (<MODE>mode == V4SFmode)
5236 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5238 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5241 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5247 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5248 (define_mode_attr sf2simodelower
5249 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5251 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5252 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5254 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5255 UNSPEC_FIX_NOTRUNC))]
5256 "TARGET_SSE2 && <mask_mode512bit_condition>"
5257 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5258 [(set_attr "type" "ssecvt")
5259 (set (attr "prefix_data16")
5261 (match_test "TARGET_AVX")
5263 (const_string "1")))
5264 (set_attr "prefix" "maybe_vex")
5265 (set_attr "mode" "<sseinsnmode>")])
5267 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5268 [(set (match_operand:V16SI 0 "register_operand" "=v")
5270 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5271 UNSPEC_FIX_NOTRUNC))]
5273 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5274 [(set_attr "type" "ssecvt")
5275 (set_attr "prefix" "evex")
5276 (set_attr "mode" "XI")])
5278 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5279 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5280 (unspec:VI4_AVX512VL
5281 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5282 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5284 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5285 [(set_attr "type" "ssecvt")
5286 (set_attr "prefix" "evex")
5287 (set_attr "mode" "<sseinsnmode>")])
5289 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5290 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5291 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5292 UNSPEC_FIX_NOTRUNC))]
5293 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5294 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5295 [(set_attr "type" "ssecvt")
5296 (set_attr "prefix" "evex")
5297 (set_attr "mode" "<sseinsnmode>")])
5299 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5300 [(set (match_operand:V2DI 0 "register_operand" "=v")
5303 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5304 (parallel [(const_int 0) (const_int 1)]))]
5305 UNSPEC_FIX_NOTRUNC))]
5306 "TARGET_AVX512DQ && TARGET_AVX512VL"
5307 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5308 [(set_attr "type" "ssecvt")
5309 (set_attr "prefix" "evex")
5310 (set_attr "mode" "TI")])
5312 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5313 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5314 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5315 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5316 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5317 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5318 [(set_attr "type" "ssecvt")
5319 (set_attr "prefix" "evex")
5320 (set_attr "mode" "<sseinsnmode>")])
5322 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5323 [(set (match_operand:V2DI 0 "register_operand" "=v")
5326 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5327 (parallel [(const_int 0) (const_int 1)]))]
5328 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5329 "TARGET_AVX512DQ && TARGET_AVX512VL"
5330 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5331 [(set_attr "type" "ssecvt")
5332 (set_attr "prefix" "evex")
5333 (set_attr "mode" "TI")])
5335 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5336 [(set (match_operand:V16SI 0 "register_operand" "=v")
5338 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5340 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5341 [(set_attr "type" "ssecvt")
5342 (set_attr "prefix" "evex")
5343 (set_attr "mode" "XI")])
5345 (define_insn "fix_truncv8sfv8si2<mask_name>"
5346 [(set (match_operand:V8SI 0 "register_operand" "=v")
5347 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5348 "TARGET_AVX && <mask_avx512vl_condition>"
5349 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5350 [(set_attr "type" "ssecvt")
5351 (set_attr "prefix" "<mask_prefix>")
5352 (set_attr "mode" "OI")])
5354 (define_insn "fix_truncv4sfv4si2<mask_name>"
5355 [(set (match_operand:V4SI 0 "register_operand" "=v")
5356 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5357 "TARGET_SSE2 && <mask_avx512vl_condition>"
5358 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5359 [(set_attr "type" "ssecvt")
5360 (set (attr "prefix_rep")
5362 (match_test "TARGET_AVX")
5364 (const_string "1")))
5365 (set (attr "prefix_data16")
5367 (match_test "TARGET_AVX")
5369 (const_string "0")))
5370 (set_attr "prefix_data16" "0")
5371 (set_attr "prefix" "<mask_prefix2>")
5372 (set_attr "mode" "TI")])
5374 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5375 [(match_operand:<sseintvecmode> 0 "register_operand")
5376 (match_operand:VF1 1 "register_operand")]
5379 if (<MODE>mode == V16SFmode)
5380 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5385 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5386 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5387 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5388 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5393 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5395 ;; Parallel double-precision floating point conversion operations
5397 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5399 (define_insn "sse2_cvtpi2pd"
5400 [(set (match_operand:V2DF 0 "register_operand" "=v,x")
5401 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,?!y")))]
5404 %vcvtdq2pd\t{%1, %0|%0, %1}
5405 cvtpi2pd\t{%1, %0|%0, %1}"
5406 [(set_attr "mmx_isa" "*,native")
5407 (set_attr "type" "ssecvt")
5408 (set_attr "unit" "*,mmx")
5409 (set_attr "prefix_data16" "*,1")
5410 (set_attr "prefix" "maybe_vex,*")
5411 (set_attr "mode" "V2DF")])
5413 (define_insn "sse2_cvtpd2pi"
5414 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5415 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")]
5416 UNSPEC_FIX_NOTRUNC))]
5419 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5420 cvtpd2pi\t{%1, %0|%0, %1}"
5421 [(set_attr "mmx_isa" "*,native")
5422 (set_attr "type" "ssecvt")
5423 (set_attr "unit" "*,mmx")
5424 (set_attr "amdfam10_decode" "double")
5425 (set_attr "athlon_decode" "vector")
5426 (set_attr "bdver1_decode" "double")
5427 (set_attr "prefix_data16" "*,1")
5428 (set_attr "prefix" "maybe_vex,*")
5429 (set_attr "mode" "TI")])
5431 (define_insn "sse2_cvttpd2pi"
5432 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5433 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")))]
5436 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5437 cvttpd2pi\t{%1, %0|%0, %1}"
5438 [(set_attr "mmx_isa" "*,native")
5439 (set_attr "type" "ssecvt")
5440 (set_attr "unit" "*,mmx")
5441 (set_attr "amdfam10_decode" "double")
5442 (set_attr "athlon_decode" "vector")
5443 (set_attr "bdver1_decode" "double")
5444 (set_attr "prefix_data16" "*,1")
5445 (set_attr "prefix" "maybe_vex,*")
5446 (set_attr "mode" "TI")])
5448 (define_insn "sse2_cvtsi2sd"
5449 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5452 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5453 (match_operand:V2DF 1 "register_operand" "0,0,v")
5457 cvtsi2sd{l}\t{%2, %0|%0, %2}
5458 cvtsi2sd{l}\t{%2, %0|%0, %2}
5459 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5460 [(set_attr "isa" "noavx,noavx,avx")
5461 (set_attr "type" "sseicvt")
5462 (set_attr "athlon_decode" "double,direct,*")
5463 (set_attr "amdfam10_decode" "vector,double,*")
5464 (set_attr "bdver1_decode" "double,direct,*")
5465 (set_attr "btver2_decode" "double,double,double")
5466 (set_attr "znver1_decode" "double,double,double")
5467 (set_attr "prefix" "orig,orig,maybe_evex")
5468 (set_attr "mode" "DF")])
5470 (define_insn "sse2_cvtsi2sdq<round_name>"
5471 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5474 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5475 (match_operand:V2DF 1 "register_operand" "0,0,v")
5477 "TARGET_SSE2 && TARGET_64BIT"
5479 cvtsi2sd{q}\t{%2, %0|%0, %2}
5480 cvtsi2sd{q}\t{%2, %0|%0, %2}
5481 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5482 [(set_attr "isa" "noavx,noavx,avx")
5483 (set_attr "type" "sseicvt")
5484 (set_attr "athlon_decode" "double,direct,*")
5485 (set_attr "amdfam10_decode" "vector,double,*")
5486 (set_attr "bdver1_decode" "double,direct,*")
5487 (set_attr "length_vex" "*,*,4")
5488 (set_attr "prefix_rex" "1,1,*")
5489 (set_attr "prefix" "orig,orig,maybe_evex")
5490 (set_attr "mode" "DF")])
5492 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5493 [(set (match_operand:SWI48 0 "register_operand" "=r")
5496 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5497 (parallel [(const_int 0)]))]
5498 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5500 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5501 [(set_attr "type" "sseicvt")
5502 (set_attr "prefix" "evex")
5503 (set_attr "mode" "<MODE>")])
5505 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5506 [(set (match_operand:SWI48 0 "register_operand" "=r")
5509 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5510 (parallel [(const_int 0)]))))]
5512 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5513 [(set_attr "type" "sseicvt")
5514 (set_attr "prefix" "evex")
5515 (set_attr "mode" "<MODE>")])
5517 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5518 [(set (match_operand:SWI48 0 "register_operand" "=r")
5521 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5522 (parallel [(const_int 0)]))]
5523 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5525 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5526 [(set_attr "type" "sseicvt")
5527 (set_attr "prefix" "evex")
5528 (set_attr "mode" "<MODE>")])
5530 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5531 [(set (match_operand:SWI48 0 "register_operand" "=r")
5534 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5535 (parallel [(const_int 0)]))))]
5537 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5538 [(set_attr "type" "sseicvt")
5539 (set_attr "prefix" "evex")
5540 (set_attr "mode" "<MODE>")])
5542 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5543 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5546 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5547 (parallel [(const_int 0)]))]
5548 UNSPEC_FIX_NOTRUNC))]
5550 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5551 [(set_attr "type" "sseicvt")
5552 (set_attr "athlon_decode" "double,vector")
5553 (set_attr "bdver1_decode" "double,double")
5554 (set_attr "btver2_decode" "double,double")
5555 (set_attr "prefix_rep" "1")
5556 (set_attr "prefix" "maybe_vex")
5557 (set_attr "mode" "<MODE>")])
5559 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5560 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5561 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5562 UNSPEC_FIX_NOTRUNC))]
5564 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5565 [(set_attr "type" "sseicvt")
5566 (set_attr "athlon_decode" "double,vector")
5567 (set_attr "amdfam10_decode" "double,double")
5568 (set_attr "bdver1_decode" "double,double")
5569 (set_attr "prefix_rep" "1")
5570 (set_attr "prefix" "maybe_vex")
5571 (set_attr "mode" "<MODE>")])
5573 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5574 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5577 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5578 (parallel [(const_int 0)]))))]
5580 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5581 [(set_attr "type" "sseicvt")
5582 (set_attr "athlon_decode" "double,vector")
5583 (set_attr "amdfam10_decode" "double,double")
5584 (set_attr "bdver1_decode" "double,double")
5585 (set_attr "btver2_decode" "double,double")
5586 (set_attr "prefix_rep" "1")
5587 (set_attr "prefix" "maybe_vex")
5588 (set_attr "mode" "<MODE>")])
5590 ;; For float<si2dfmode><mode>2 insn pattern
5591 (define_mode_attr si2dfmode
5592 [(V8DF "V8SI") (V4DF "V4SI")])
5593 (define_mode_attr si2dfmodelower
5594 [(V8DF "v8si") (V4DF "v4si")])
5596 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
5597 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5598 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5599 "TARGET_AVX && <mask_mode512bit_condition>"
5600 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5601 [(set_attr "type" "ssecvt")
5602 (set_attr "prefix" "maybe_vex")
5603 (set_attr "mode" "<MODE>")])
5605 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5606 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5607 (any_float:VF2_AVX512VL
5608 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5610 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5611 [(set_attr "type" "ssecvt")
5612 (set_attr "prefix" "evex")
5613 (set_attr "mode" "<MODE>")])
5615 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5616 (define_mode_attr qq2pssuff
5617 [(V8SF "") (V4SF "{y}")])
5619 (define_mode_attr sselongvecmode
5620 [(V8SF "V8DI") (V4SF "V4DI")])
5622 (define_mode_attr sselongvecmodelower
5623 [(V8SF "v8di") (V4SF "v4di")])
5625 (define_mode_attr sseintvecmode3
5626 [(V8SF "XI") (V4SF "OI")
5627 (V8DF "OI") (V4DF "TI")])
5629 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5630 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5631 (any_float:VF1_128_256VL
5632 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5633 "TARGET_AVX512DQ && <round_modev8sf_condition>"
5634 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5635 [(set_attr "type" "ssecvt")
5636 (set_attr "prefix" "evex")
5637 (set_attr "mode" "<MODE>")])
5639 (define_expand "float<floatunssuffix>v2div2sf2"
5640 [(set (match_operand:V4SF 0 "register_operand" "=v")
5642 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5644 "TARGET_AVX512DQ && TARGET_AVX512VL"
5645 "operands[2] = CONST0_RTX (V2SFmode);")
5647 (define_insn "*float<floatunssuffix>v2div2sf2"
5648 [(set (match_operand:V4SF 0 "register_operand" "=v")
5650 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5651 (match_operand:V2SF 2 "const0_operand" "C")))]
5652 "TARGET_AVX512DQ && TARGET_AVX512VL"
5653 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5654 [(set_attr "type" "ssecvt")
5655 (set_attr "prefix" "evex")
5656 (set_attr "mode" "V4SF")])
5658 (define_mode_attr vpckfloat_concat_mode
5659 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5660 (define_mode_attr vpckfloat_temp_mode
5661 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5662 (define_mode_attr vpckfloat_op_mode
5663 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5665 (define_expand "vec_pack<floatprefix>_float_<mode>"
5666 [(match_operand:<ssePSmode> 0 "register_operand")
5667 (any_float:<ssePSmode>
5668 (match_operand:VI8_AVX512VL 1 "register_operand"))
5669 (match_operand:VI8_AVX512VL 2 "register_operand")]
5672 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5673 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5674 rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5675 emit_insn (gen (r1, operands[1]));
5676 emit_insn (gen (r2, operands[2]));
5677 if (<MODE>mode == V2DImode)
5678 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5680 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5685 (define_expand "float<floatunssuffix>v2div2sf2_mask"
5686 [(set (match_operand:V4SF 0 "register_operand" "=v")
5689 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5691 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5692 (parallel [(const_int 0) (const_int 1)]))
5693 (match_operand:QI 3 "register_operand" "Yk"))
5695 "TARGET_AVX512DQ && TARGET_AVX512VL"
5696 "operands[4] = CONST0_RTX (V2SFmode);")
5698 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
5699 [(set (match_operand:V4SF 0 "register_operand" "=v")
5702 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5704 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5705 (parallel [(const_int 0) (const_int 1)]))
5706 (match_operand:QI 3 "register_operand" "Yk"))
5707 (match_operand:V2SF 4 "const0_operand" "C")))]
5708 "TARGET_AVX512DQ && TARGET_AVX512VL"
5709 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5710 [(set_attr "type" "ssecvt")
5711 (set_attr "prefix" "evex")
5712 (set_attr "mode" "V4SF")])
5714 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5715 [(set (match_operand:V4SF 0 "register_operand" "=v")
5718 (any_float:V2SF (match_operand:V2DI 1
5719 "nonimmediate_operand" "vm"))
5720 (match_operand:V2SF 3 "const0_operand" "C")
5721 (match_operand:QI 2 "register_operand" "Yk"))
5722 (match_operand:V2SF 4 "const0_operand" "C")))]
5723 "TARGET_AVX512DQ && TARGET_AVX512VL"
5724 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5725 [(set_attr "type" "ssecvt")
5726 (set_attr "prefix" "evex")
5727 (set_attr "mode" "V4SF")])
5729 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5730 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5731 (unsigned_float:VF2_512_256VL
5732 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5734 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5735 [(set_attr "type" "ssecvt")
5736 (set_attr "prefix" "evex")
5737 (set_attr "mode" "<MODE>")])
5739 (define_insn "ufloatv2siv2df2<mask_name>"
5740 [(set (match_operand:V2DF 0 "register_operand" "=v")
5741 (unsigned_float:V2DF
5743 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5744 (parallel [(const_int 0) (const_int 1)]))))]
5746 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5747 [(set_attr "type" "ssecvt")
5748 (set_attr "prefix" "evex")
5749 (set_attr "mode" "V2DF")])
5751 (define_insn "avx512f_cvtdq2pd512_2"
5752 [(set (match_operand:V8DF 0 "register_operand" "=v")
5755 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5756 (parallel [(const_int 0) (const_int 1)
5757 (const_int 2) (const_int 3)
5758 (const_int 4) (const_int 5)
5759 (const_int 6) (const_int 7)]))))]
5761 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5762 [(set_attr "type" "ssecvt")
5763 (set_attr "prefix" "evex")
5764 (set_attr "mode" "V8DF")])
5766 (define_insn "avx_cvtdq2pd256_2"
5767 [(set (match_operand:V4DF 0 "register_operand" "=v")
5770 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5771 (parallel [(const_int 0) (const_int 1)
5772 (const_int 2) (const_int 3)]))))]
5774 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5775 [(set_attr "type" "ssecvt")
5776 (set_attr "prefix" "maybe_evex")
5777 (set_attr "mode" "V4DF")])
5779 (define_insn "sse2_cvtdq2pd<mask_name>"
5780 [(set (match_operand:V2DF 0 "register_operand" "=v")
5783 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5784 (parallel [(const_int 0) (const_int 1)]))))]
5785 "TARGET_SSE2 && <mask_avx512vl_condition>"
5786 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5787 [(set_attr "type" "ssecvt")
5788 (set_attr "prefix" "maybe_vex")
5789 (set_attr "mode" "V2DF")])
5791 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5792 [(set (match_operand:V8SI 0 "register_operand" "=v")
5794 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5795 UNSPEC_FIX_NOTRUNC))]
5797 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5798 [(set_attr "type" "ssecvt")
5799 (set_attr "prefix" "evex")
5800 (set_attr "mode" "OI")])
5802 (define_insn "avx_cvtpd2dq256<mask_name>"
5803 [(set (match_operand:V4SI 0 "register_operand" "=v")
5804 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5805 UNSPEC_FIX_NOTRUNC))]
5806 "TARGET_AVX && <mask_avx512vl_condition>"
5807 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5808 [(set_attr "type" "ssecvt")
5809 (set_attr "prefix" "<mask_prefix>")
5810 (set_attr "mode" "OI")])
5812 (define_expand "avx_cvtpd2dq256_2"
5813 [(set (match_operand:V8SI 0 "register_operand")
5815 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5819 "operands[2] = CONST0_RTX (V4SImode);")
5821 (define_insn "*avx_cvtpd2dq256_2"
5822 [(set (match_operand:V8SI 0 "register_operand" "=v")
5824 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5826 (match_operand:V4SI 2 "const0_operand")))]
5828 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5829 [(set_attr "type" "ssecvt")
5830 (set_attr "prefix" "vex")
5831 (set_attr "btver2_decode" "vector")
5832 (set_attr "mode" "OI")])
5834 (define_insn "sse2_cvtpd2dq<mask_name>"
5835 [(set (match_operand:V4SI 0 "register_operand" "=v")
5837 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5839 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5840 "TARGET_SSE2 && <mask_avx512vl_condition>"
5843 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5845 return "cvtpd2dq\t{%1, %0|%0, %1}";
5847 [(set_attr "type" "ssecvt")
5848 (set_attr "prefix_rep" "1")
5849 (set_attr "prefix_data16" "0")
5850 (set_attr "prefix" "maybe_vex")
5851 (set_attr "mode" "TI")
5852 (set_attr "amdfam10_decode" "double")
5853 (set_attr "athlon_decode" "vector")
5854 (set_attr "bdver1_decode" "double")])
5856 ;; For ufix_notrunc* insn patterns
5857 (define_mode_attr pd2udqsuff
5858 [(V8DF "") (V4DF "{y}")])
5860 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5861 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5863 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5864 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5866 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5867 [(set_attr "type" "ssecvt")
5868 (set_attr "prefix" "evex")
5869 (set_attr "mode" "<sseinsnmode>")])
5871 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5872 [(set (match_operand:V4SI 0 "register_operand" "=v")
5875 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5876 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5877 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5879 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5880 [(set_attr "type" "ssecvt")
5881 (set_attr "prefix" "evex")
5882 (set_attr "mode" "TI")])
5884 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
5885 [(set (match_operand:V8SI 0 "register_operand" "=v")
5887 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5889 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5890 [(set_attr "type" "ssecvt")
5891 (set_attr "prefix" "evex")
5892 (set_attr "mode" "OI")])
5894 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5895 [(set (match_operand:V4SI 0 "register_operand" "=v")
5897 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5898 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5900 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5901 [(set_attr "type" "ssecvt")
5902 (set_attr "prefix" "evex")
5903 (set_attr "mode" "TI")])
5905 (define_insn "fix_truncv4dfv4si2<mask_name>"
5906 [(set (match_operand:V4SI 0 "register_operand" "=v")
5907 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5908 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5909 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5910 [(set_attr "type" "ssecvt")
5911 (set_attr "prefix" "maybe_evex")
5912 (set_attr "mode" "OI")])
5914 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5915 [(set (match_operand:V4SI 0 "register_operand" "=v")
5916 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5917 "TARGET_AVX512VL && TARGET_AVX512F"
5918 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5919 [(set_attr "type" "ssecvt")
5920 (set_attr "prefix" "maybe_evex")
5921 (set_attr "mode" "OI")])
5923 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5924 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5925 (any_fix:<sseintvecmode>
5926 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5927 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5928 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5929 [(set_attr "type" "ssecvt")
5930 (set_attr "prefix" "evex")
5931 (set_attr "mode" "<sseintvecmode2>")])
5933 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5934 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5935 (unspec:<sseintvecmode>
5936 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5937 UNSPEC_FIX_NOTRUNC))]
5938 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5939 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5940 [(set_attr "type" "ssecvt")
5941 (set_attr "prefix" "evex")
5942 (set_attr "mode" "<sseintvecmode2>")])
5944 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5945 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5946 (unspec:<sseintvecmode>
5947 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5948 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5949 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5950 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5951 [(set_attr "type" "ssecvt")
5952 (set_attr "prefix" "evex")
5953 (set_attr "mode" "<sseintvecmode2>")])
5955 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5956 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5957 (any_fix:<sselongvecmode>
5958 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5959 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5960 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5961 [(set_attr "type" "ssecvt")
5962 (set_attr "prefix" "evex")
5963 (set_attr "mode" "<sseintvecmode3>")])
5965 (define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
5966 [(set (match_operand:V2DI 0 "register_operand" "=v")
5969 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5970 (parallel [(const_int 0) (const_int 1)]))))]
5971 "TARGET_AVX512DQ && TARGET_AVX512VL"
5972 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5973 [(set_attr "type" "ssecvt")
5974 (set_attr "prefix" "evex")
5975 (set_attr "mode" "TI")])
5977 (define_mode_attr vunpckfixt_mode
5978 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
5979 (define_mode_attr vunpckfixt_model
5980 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
5981 (define_mode_attr vunpckfixt_extract_mode
5982 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
5984 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
5985 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
5986 (any_fix:<vunpckfixt_mode>
5987 (match_operand:VF1_AVX512VL 1 "register_operand"))]
5990 rtx tem = operands[1];
5991 if (<MODE>mode != V4SFmode)
5993 tem = gen_reg_rtx (<ssehalfvecmode>mode);
5994 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
5997 rtx (*gen) (rtx, rtx)
5998 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
5999 emit_insn (gen (operands[0], tem));
6003 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6004 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6005 (any_fix:<vunpckfixt_mode>
6006 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6010 if (<MODE>mode != V4SFmode)
6012 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6013 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6018 tem = gen_reg_rtx (V4SFmode);
6019 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6021 rtx (*gen) (rtx, rtx)
6022 = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6023 emit_insn (gen (operands[0], tem));
6027 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6028 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6029 (unsigned_fix:<sseintvecmode>
6030 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6032 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6033 [(set_attr "type" "ssecvt")
6034 (set_attr "prefix" "evex")
6035 (set_attr "mode" "<sseintvecmode2>")])
6037 (define_expand "avx_cvttpd2dq256_2"
6038 [(set (match_operand:V8SI 0 "register_operand")
6040 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6043 "operands[2] = CONST0_RTX (V4SImode);")
6045 (define_insn "sse2_cvttpd2dq<mask_name>"
6046 [(set (match_operand:V4SI 0 "register_operand" "=v")
6048 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6049 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6050 "TARGET_SSE2 && <mask_avx512vl_condition>"
6053 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
6055 return "cvttpd2dq\t{%1, %0|%0, %1}";
6057 [(set_attr "type" "ssecvt")
6058 (set_attr "amdfam10_decode" "double")
6059 (set_attr "athlon_decode" "vector")
6060 (set_attr "bdver1_decode" "double")
6061 (set_attr "prefix" "maybe_vex")
6062 (set_attr "mode" "TI")])
6064 (define_insn "sse2_cvtsd2ss<round_name>"
6065 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6068 (float_truncate:V2SF
6069 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6070 (match_operand:V4SF 1 "register_operand" "0,0,v")
6074 cvtsd2ss\t{%2, %0|%0, %2}
6075 cvtsd2ss\t{%2, %0|%0, %q2}
6076 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
6077 [(set_attr "isa" "noavx,noavx,avx")
6078 (set_attr "type" "ssecvt")
6079 (set_attr "athlon_decode" "vector,double,*")
6080 (set_attr "amdfam10_decode" "vector,double,*")
6081 (set_attr "bdver1_decode" "direct,direct,*")
6082 (set_attr "btver2_decode" "double,double,double")
6083 (set_attr "prefix" "orig,orig,<round_prefix>")
6084 (set_attr "mode" "SF")])
6086 (define_insn "*sse2_vd_cvtsd2ss"
6087 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6090 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6091 (match_operand:V4SF 1 "register_operand" "0,0,v")
6095 cvtsd2ss\t{%2, %0|%0, %2}
6096 cvtsd2ss\t{%2, %0|%0, %2}
6097 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6098 [(set_attr "isa" "noavx,noavx,avx")
6099 (set_attr "type" "ssecvt")
6100 (set_attr "athlon_decode" "vector,double,*")
6101 (set_attr "amdfam10_decode" "vector,double,*")
6102 (set_attr "bdver1_decode" "direct,direct,*")
6103 (set_attr "btver2_decode" "double,double,double")
6104 (set_attr "prefix" "orig,orig,vex")
6105 (set_attr "mode" "SF")])
6107 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
6108 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6112 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6113 (parallel [(const_int 0) (const_int 1)])))
6114 (match_operand:V2DF 1 "register_operand" "0,0,v")
6118 cvtss2sd\t{%2, %0|%0, %2}
6119 cvtss2sd\t{%2, %0|%0, %k2}
6120 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
6121 [(set_attr "isa" "noavx,noavx,avx")
6122 (set_attr "type" "ssecvt")
6123 (set_attr "amdfam10_decode" "vector,double,*")
6124 (set_attr "athlon_decode" "direct,direct,*")
6125 (set_attr "bdver1_decode" "direct,direct,*")
6126 (set_attr "btver2_decode" "double,double,double")
6127 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6128 (set_attr "mode" "DF")])
6130 (define_insn "*sse2_vd_cvtss2sd"
6131 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6134 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6135 (match_operand:V2DF 1 "register_operand" "0,0,v")
6139 cvtss2sd\t{%2, %0|%0, %2}
6140 cvtss2sd\t{%2, %0|%0, %2}
6141 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6142 [(set_attr "isa" "noavx,noavx,avx")
6143 (set_attr "type" "ssecvt")
6144 (set_attr "amdfam10_decode" "vector,double,*")
6145 (set_attr "athlon_decode" "direct,direct,*")
6146 (set_attr "bdver1_decode" "direct,direct,*")
6147 (set_attr "btver2_decode" "double,double,double")
6148 (set_attr "prefix" "orig,orig,vex")
6149 (set_attr "mode" "DF")])
6151 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6152 [(set (match_operand:V8SF 0 "register_operand" "=v")
6153 (float_truncate:V8SF
6154 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6156 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6157 [(set_attr "type" "ssecvt")
6158 (set_attr "prefix" "evex")
6159 (set_attr "mode" "V8SF")])
6161 (define_insn "avx_cvtpd2ps256<mask_name>"
6162 [(set (match_operand:V4SF 0 "register_operand" "=v")
6163 (float_truncate:V4SF
6164 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6165 "TARGET_AVX && <mask_avx512vl_condition>"
6166 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6167 [(set_attr "type" "ssecvt")
6168 (set_attr "prefix" "maybe_evex")
6169 (set_attr "btver2_decode" "vector")
6170 (set_attr "mode" "V4SF")])
6172 (define_expand "sse2_cvtpd2ps"
6173 [(set (match_operand:V4SF 0 "register_operand")
6175 (float_truncate:V2SF
6176 (match_operand:V2DF 1 "vector_operand"))
6179 "operands[2] = CONST0_RTX (V2SFmode);")
6181 (define_expand "sse2_cvtpd2ps_mask"
6182 [(set (match_operand:V4SF 0 "register_operand")
6185 (float_truncate:V2SF
6186 (match_operand:V2DF 1 "vector_operand"))
6188 (match_operand:V4SF 2 "register_operand")
6189 (match_operand:QI 3 "register_operand")))]
6191 "operands[4] = CONST0_RTX (V2SFmode);")
6193 (define_insn "*sse2_cvtpd2ps<mask_name>"
6194 [(set (match_operand:V4SF 0 "register_operand" "=v")
6196 (float_truncate:V2SF
6197 (match_operand:V2DF 1 "vector_operand" "vBm"))
6198 (match_operand:V2SF 2 "const0_operand")))]
6199 "TARGET_SSE2 && <mask_avx512vl_condition>"
6202 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
6204 return "cvtpd2ps\t{%1, %0|%0, %1}";
6206 [(set_attr "type" "ssecvt")
6207 (set_attr "amdfam10_decode" "double")
6208 (set_attr "athlon_decode" "vector")
6209 (set_attr "bdver1_decode" "double")
6210 (set_attr "prefix_data16" "1")
6211 (set_attr "prefix" "maybe_vex")
6212 (set_attr "mode" "V4SF")])
6214 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6215 (define_mode_attr sf2dfmode
6216 [(V8DF "V8SF") (V4DF "V4SF")])
6218 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6219 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6220 (float_extend:VF2_512_256
6221 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6222 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6223 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6224 [(set_attr "type" "ssecvt")
6225 (set_attr "prefix" "maybe_vex")
6226 (set_attr "mode" "<MODE>")])
6228 (define_insn "*avx_cvtps2pd256_2"
6229 [(set (match_operand:V4DF 0 "register_operand" "=v")
6232 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6233 (parallel [(const_int 0) (const_int 1)
6234 (const_int 2) (const_int 3)]))))]
6236 "vcvtps2pd\t{%x1, %0|%0, %x1}"
6237 [(set_attr "type" "ssecvt")
6238 (set_attr "prefix" "vex")
6239 (set_attr "mode" "V4DF")])
6241 (define_insn "vec_unpacks_lo_v16sf"
6242 [(set (match_operand:V8DF 0 "register_operand" "=v")
6245 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6246 (parallel [(const_int 0) (const_int 1)
6247 (const_int 2) (const_int 3)
6248 (const_int 4) (const_int 5)
6249 (const_int 6) (const_int 7)]))))]
6251 "vcvtps2pd\t{%t1, %0|%0, %t1}"
6252 [(set_attr "type" "ssecvt")
6253 (set_attr "prefix" "evex")
6254 (set_attr "mode" "V8DF")])
6256 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6257 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6258 (unspec:<avx512fmaskmode>
6259 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6260 UNSPEC_CVTINT2MASK))]
6262 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6263 [(set_attr "prefix" "evex")
6264 (set_attr "mode" "<sseinsnmode>")])
6266 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6267 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6268 (unspec:<avx512fmaskmode>
6269 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6270 UNSPEC_CVTINT2MASK))]
6272 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6273 [(set_attr "prefix" "evex")
6274 (set_attr "mode" "<sseinsnmode>")])
6276 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6277 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6278 (vec_merge:VI12_AVX512VL
6281 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6284 operands[2] = CONSTM1_RTX (<MODE>mode);
6285 operands[3] = CONST0_RTX (<MODE>mode);
6288 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6289 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6290 (vec_merge:VI12_AVX512VL
6291 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6292 (match_operand:VI12_AVX512VL 3 "const0_operand")
6293 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6295 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6296 [(set_attr "prefix" "evex")
6297 (set_attr "mode" "<sseinsnmode>")])
6299 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6300 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6301 (vec_merge:VI48_AVX512VL
6304 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6307 operands[2] = CONSTM1_RTX (<MODE>mode);
6308 operands[3] = CONST0_RTX (<MODE>mode);
6311 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6312 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
6313 (vec_merge:VI48_AVX512VL
6314 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6315 (match_operand:VI48_AVX512VL 3 "const0_operand")
6316 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6318 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6319 [(set_attr "prefix" "evex")
6320 (set_attr "mode" "<sseinsnmode>")])
6322 (define_insn "sse2_cvtps2pd<mask_name>"
6323 [(set (match_operand:V2DF 0 "register_operand" "=v")
6326 (match_operand:V4SF 1 "vector_operand" "vm")
6327 (parallel [(const_int 0) (const_int 1)]))))]
6328 "TARGET_SSE2 && <mask_avx512vl_condition>"
6329 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6330 [(set_attr "type" "ssecvt")
6331 (set_attr "amdfam10_decode" "direct")
6332 (set_attr "athlon_decode" "double")
6333 (set_attr "bdver1_decode" "double")
6334 (set_attr "prefix_data16" "0")
6335 (set_attr "prefix" "maybe_vex")
6336 (set_attr "mode" "V2DF")])
6338 (define_expand "vec_unpacks_hi_v4sf"
6343 (match_operand:V4SF 1 "vector_operand"))
6344 (parallel [(const_int 6) (const_int 7)
6345 (const_int 2) (const_int 3)])))
6346 (set (match_operand:V2DF 0 "register_operand")
6350 (parallel [(const_int 0) (const_int 1)]))))]
6352 "operands[2] = gen_reg_rtx (V4SFmode);")
6354 (define_expand "vec_unpacks_hi_v8sf"
6357 (match_operand:V8SF 1 "register_operand")
6358 (parallel [(const_int 4) (const_int 5)
6359 (const_int 6) (const_int 7)])))
6360 (set (match_operand:V4DF 0 "register_operand")
6364 "operands[2] = gen_reg_rtx (V4SFmode);")
6366 (define_expand "vec_unpacks_hi_v16sf"
6369 (match_operand:V16SF 1 "register_operand")
6370 (parallel [(const_int 8) (const_int 9)
6371 (const_int 10) (const_int 11)
6372 (const_int 12) (const_int 13)
6373 (const_int 14) (const_int 15)])))
6374 (set (match_operand:V8DF 0 "register_operand")
6378 "operands[2] = gen_reg_rtx (V8SFmode);")
6380 (define_expand "vec_unpacks_lo_v4sf"
6381 [(set (match_operand:V2DF 0 "register_operand")
6384 (match_operand:V4SF 1 "vector_operand")
6385 (parallel [(const_int 0) (const_int 1)]))))]
6388 (define_expand "vec_unpacks_lo_v8sf"
6389 [(set (match_operand:V4DF 0 "register_operand")
6392 (match_operand:V8SF 1 "nonimmediate_operand")
6393 (parallel [(const_int 0) (const_int 1)
6394 (const_int 2) (const_int 3)]))))]
6397 (define_mode_attr sseunpackfltmode
6398 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6399 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6401 (define_expand "vec_unpacks_float_hi_<mode>"
6402 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6403 (match_operand:VI2_AVX512F 1 "register_operand")]
6406 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6408 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6409 emit_insn (gen_rtx_SET (operands[0],
6410 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6414 (define_expand "vec_unpacks_float_lo_<mode>"
6415 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6416 (match_operand:VI2_AVX512F 1 "register_operand")]
6419 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6421 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6422 emit_insn (gen_rtx_SET (operands[0],
6423 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6427 (define_expand "vec_unpacku_float_hi_<mode>"
6428 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6429 (match_operand:VI2_AVX512F 1 "register_operand")]
6432 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6434 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6435 emit_insn (gen_rtx_SET (operands[0],
6436 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6440 (define_expand "vec_unpacku_float_lo_<mode>"
6441 [(match_operand:<sseunpackfltmode> 0 "register_operand")
6442 (match_operand:VI2_AVX512F 1 "register_operand")]
6445 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6447 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6448 emit_insn (gen_rtx_SET (operands[0],
6449 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6453 (define_expand "vec_unpacks_float_hi_v4si"
6456 (match_operand:V4SI 1 "vector_operand")
6457 (parallel [(const_int 2) (const_int 3)
6458 (const_int 2) (const_int 3)])))
6459 (set (match_operand:V2DF 0 "register_operand")
6463 (parallel [(const_int 0) (const_int 1)]))))]
6465 "operands[2] = gen_reg_rtx (V4SImode);")
6467 (define_expand "vec_unpacks_float_lo_v4si"
6468 [(set (match_operand:V2DF 0 "register_operand")
6471 (match_operand:V4SI 1 "vector_operand")
6472 (parallel [(const_int 0) (const_int 1)]))))]
6475 (define_expand "vec_unpacks_float_hi_v8si"
6478 (match_operand:V8SI 1 "vector_operand")
6479 (parallel [(const_int 4) (const_int 5)
6480 (const_int 6) (const_int 7)])))
6481 (set (match_operand:V4DF 0 "register_operand")
6485 "operands[2] = gen_reg_rtx (V4SImode);")
6487 (define_expand "vec_unpacks_float_lo_v8si"
6488 [(set (match_operand:V4DF 0 "register_operand")
6491 (match_operand:V8SI 1 "nonimmediate_operand")
6492 (parallel [(const_int 0) (const_int 1)
6493 (const_int 2) (const_int 3)]))))]
6496 (define_expand "vec_unpacks_float_hi_v16si"
6499 (match_operand:V16SI 1 "nonimmediate_operand")
6500 (parallel [(const_int 8) (const_int 9)
6501 (const_int 10) (const_int 11)
6502 (const_int 12) (const_int 13)
6503 (const_int 14) (const_int 15)])))
6504 (set (match_operand:V8DF 0 "register_operand")
6508 "operands[2] = gen_reg_rtx (V8SImode);")
6510 (define_expand "vec_unpacks_float_lo_v16si"
6511 [(set (match_operand:V8DF 0 "register_operand")
6514 (match_operand:V16SI 1 "nonimmediate_operand")
6515 (parallel [(const_int 0) (const_int 1)
6516 (const_int 2) (const_int 3)
6517 (const_int 4) (const_int 5)
6518 (const_int 6) (const_int 7)]))))]
6521 (define_expand "vec_unpacku_float_hi_v4si"
6524 (match_operand:V4SI 1 "vector_operand")
6525 (parallel [(const_int 2) (const_int 3)
6526 (const_int 2) (const_int 3)])))
6531 (parallel [(const_int 0) (const_int 1)]))))
6533 (lt:V2DF (match_dup 6) (match_dup 3)))
6535 (and:V2DF (match_dup 7) (match_dup 4)))
6536 (set (match_operand:V2DF 0 "register_operand")
6537 (plus:V2DF (match_dup 6) (match_dup 8)))]
6540 REAL_VALUE_TYPE TWO32r;
6544 real_ldexp (&TWO32r, &dconst1, 32);
6545 x = const_double_from_real_value (TWO32r, DFmode);
6547 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6548 operands[4] = force_reg (V2DFmode,
6549 ix86_build_const_vector (V2DFmode, 1, x));
6551 operands[5] = gen_reg_rtx (V4SImode);
6553 for (i = 6; i < 9; i++)
6554 operands[i] = gen_reg_rtx (V2DFmode);
6557 (define_expand "vec_unpacku_float_lo_v4si"
6561 (match_operand:V4SI 1 "vector_operand")
6562 (parallel [(const_int 0) (const_int 1)]))))
6564 (lt:V2DF (match_dup 5) (match_dup 3)))
6566 (and:V2DF (match_dup 6) (match_dup 4)))
6567 (set (match_operand:V2DF 0 "register_operand")
6568 (plus:V2DF (match_dup 5) (match_dup 7)))]
6571 REAL_VALUE_TYPE TWO32r;
6575 real_ldexp (&TWO32r, &dconst1, 32);
6576 x = const_double_from_real_value (TWO32r, DFmode);
6578 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6579 operands[4] = force_reg (V2DFmode,
6580 ix86_build_const_vector (V2DFmode, 1, x));
6582 for (i = 5; i < 8; i++)
6583 operands[i] = gen_reg_rtx (V2DFmode);
6586 (define_expand "vec_unpacku_float_hi_v8si"
6587 [(match_operand:V4DF 0 "register_operand")
6588 (match_operand:V8SI 1 "register_operand")]
6591 REAL_VALUE_TYPE TWO32r;
6595 real_ldexp (&TWO32r, &dconst1, 32);
6596 x = const_double_from_real_value (TWO32r, DFmode);
6598 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6599 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6600 tmp[5] = gen_reg_rtx (V4SImode);
6602 for (i = 2; i < 5; i++)
6603 tmp[i] = gen_reg_rtx (V4DFmode);
6604 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6605 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6606 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6607 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6608 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6612 (define_expand "vec_unpacku_float_hi_v16si"
6613 [(match_operand:V8DF 0 "register_operand")
6614 (match_operand:V16SI 1 "register_operand")]
6617 REAL_VALUE_TYPE TWO32r;
6620 real_ldexp (&TWO32r, &dconst1, 32);
6621 x = const_double_from_real_value (TWO32r, DFmode);
6623 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6624 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6625 tmp[2] = gen_reg_rtx (V8DFmode);
6626 tmp[3] = gen_reg_rtx (V8SImode);
6627 k = gen_reg_rtx (QImode);
6629 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6630 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6631 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6632 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6633 emit_move_insn (operands[0], tmp[2]);
6637 (define_expand "vec_unpacku_float_lo_v8si"
6638 [(match_operand:V4DF 0 "register_operand")
6639 (match_operand:V8SI 1 "nonimmediate_operand")]
6642 REAL_VALUE_TYPE TWO32r;
6646 real_ldexp (&TWO32r, &dconst1, 32);
6647 x = const_double_from_real_value (TWO32r, DFmode);
6649 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6650 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6652 for (i = 2; i < 5; i++)
6653 tmp[i] = gen_reg_rtx (V4DFmode);
6654 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
6655 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6656 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6657 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6661 (define_expand "vec_unpacku_float_lo_v16si"
6662 [(match_operand:V8DF 0 "register_operand")
6663 (match_operand:V16SI 1 "nonimmediate_operand")]
6666 REAL_VALUE_TYPE TWO32r;
6669 real_ldexp (&TWO32r, &dconst1, 32);
6670 x = const_double_from_real_value (TWO32r, DFmode);
6672 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6673 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6674 tmp[2] = gen_reg_rtx (V8DFmode);
6675 k = gen_reg_rtx (QImode);
6677 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
6678 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
6679 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6680 emit_move_insn (operands[0], tmp[2]);
6684 (define_expand "vec_pack_trunc_<mode>"
6686 (float_truncate:<sf2dfmode>
6687 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
6689 (float_truncate:<sf2dfmode>
6690 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
6691 (set (match_operand:<ssePSmode> 0 "register_operand")
6692 (vec_concat:<ssePSmode>
6697 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6698 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6701 (define_expand "vec_pack_trunc_v2df"
6702 [(match_operand:V4SF 0 "register_operand")
6703 (match_operand:V2DF 1 "vector_operand")
6704 (match_operand:V2DF 2 "vector_operand")]
6709 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6711 tmp0 = gen_reg_rtx (V4DFmode);
6712 tmp1 = force_reg (V2DFmode, operands[1]);
6714 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6715 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6719 tmp0 = gen_reg_rtx (V4SFmode);
6720 tmp1 = gen_reg_rtx (V4SFmode);
6722 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
6723 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
6724 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
6729 (define_expand "vec_pack_sfix_trunc_v8df"
6730 [(match_operand:V16SI 0 "register_operand")
6731 (match_operand:V8DF 1 "nonimmediate_operand")
6732 (match_operand:V8DF 2 "nonimmediate_operand")]
6737 r1 = gen_reg_rtx (V8SImode);
6738 r2 = gen_reg_rtx (V8SImode);
6740 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
6741 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
6742 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6746 (define_expand "vec_pack_sfix_trunc_v4df"
6747 [(match_operand:V8SI 0 "register_operand")
6748 (match_operand:V4DF 1 "nonimmediate_operand")
6749 (match_operand:V4DF 2 "nonimmediate_operand")]
6754 r1 = gen_reg_rtx (V4SImode);
6755 r2 = gen_reg_rtx (V4SImode);
6757 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6758 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6759 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6763 (define_expand "vec_pack_sfix_trunc_v2df"
6764 [(match_operand:V4SI 0 "register_operand")
6765 (match_operand:V2DF 1 "vector_operand")
6766 (match_operand:V2DF 2 "vector_operand")]
6769 rtx tmp0, tmp1, tmp2;
6771 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6773 tmp0 = gen_reg_rtx (V4DFmode);
6774 tmp1 = force_reg (V2DFmode, operands[1]);
6776 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6777 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6781 tmp0 = gen_reg_rtx (V4SImode);
6782 tmp1 = gen_reg_rtx (V4SImode);
6783 tmp2 = gen_reg_rtx (V2DImode);
6785 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6786 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6787 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6788 gen_lowpart (V2DImode, tmp0),
6789 gen_lowpart (V2DImode, tmp1)));
6790 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6795 (define_mode_attr ssepackfltmode
6796 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6798 (define_expand "vec_pack_ufix_trunc_<mode>"
6799 [(match_operand:<ssepackfltmode> 0 "register_operand")
6800 (match_operand:VF2 1 "register_operand")
6801 (match_operand:VF2 2 "register_operand")]
6804 if (<MODE>mode == V8DFmode)
6808 r1 = gen_reg_rtx (V8SImode);
6809 r2 = gen_reg_rtx (V8SImode);
6811 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
6812 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
6813 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6818 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6819 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6820 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6821 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6822 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6824 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6825 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6829 tmp[5] = gen_reg_rtx (V8SFmode);
6830 ix86_expand_vec_extract_even_odd (tmp[5],
6831 gen_lowpart (V8SFmode, tmp[2]),
6832 gen_lowpart (V8SFmode, tmp[3]), 0);
6833 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6835 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6836 operands[0], 0, OPTAB_DIRECT);
6837 if (tmp[6] != operands[0])
6838 emit_move_insn (operands[0], tmp[6]);
6844 (define_expand "avx512f_vec_pack_sfix_v8df"
6845 [(match_operand:V16SI 0 "register_operand")
6846 (match_operand:V8DF 1 "nonimmediate_operand")
6847 (match_operand:V8DF 2 "nonimmediate_operand")]
6852 r1 = gen_reg_rtx (V8SImode);
6853 r2 = gen_reg_rtx (V8SImode);
6855 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6856 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6857 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6861 (define_expand "vec_pack_sfix_v4df"
6862 [(match_operand:V8SI 0 "register_operand")
6863 (match_operand:V4DF 1 "nonimmediate_operand")
6864 (match_operand:V4DF 2 "nonimmediate_operand")]
6869 r1 = gen_reg_rtx (V4SImode);
6870 r2 = gen_reg_rtx (V4SImode);
6872 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6873 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6874 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6878 (define_expand "vec_pack_sfix_v2df"
6879 [(match_operand:V4SI 0 "register_operand")
6880 (match_operand:V2DF 1 "vector_operand")
6881 (match_operand:V2DF 2 "vector_operand")]
6884 rtx tmp0, tmp1, tmp2;
6886 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6888 tmp0 = gen_reg_rtx (V4DFmode);
6889 tmp1 = force_reg (V2DFmode, operands[1]);
6891 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6892 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6896 tmp0 = gen_reg_rtx (V4SImode);
6897 tmp1 = gen_reg_rtx (V4SImode);
6898 tmp2 = gen_reg_rtx (V2DImode);
6900 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6901 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6902 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6903 gen_lowpart (V2DImode, tmp0),
6904 gen_lowpart (V2DImode, tmp1)));
6905 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6910 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6912 ;; Parallel single-precision floating point element swizzling
6914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6916 (define_expand "sse_movhlps_exp"
6917 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6920 (match_operand:V4SF 1 "nonimmediate_operand")
6921 (match_operand:V4SF 2 "nonimmediate_operand"))
6922 (parallel [(const_int 6)
6928 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6930 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6932 /* Fix up the destination if needed. */
6933 if (dst != operands[0])
6934 emit_move_insn (operands[0], dst);
6939 (define_insn "sse_movhlps"
6940 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6943 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6944 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6945 (parallel [(const_int 6)
6949 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6951 movhlps\t{%2, %0|%0, %2}
6952 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6953 movlps\t{%H2, %0|%0, %H2}
6954 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6955 %vmovhps\t{%2, %0|%q0, %2}"
6956 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6957 (set_attr "type" "ssemov")
6958 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6959 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6961 (define_expand "sse_movlhps_exp"
6962 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6965 (match_operand:V4SF 1 "nonimmediate_operand")
6966 (match_operand:V4SF 2 "nonimmediate_operand"))
6967 (parallel [(const_int 0)
6973 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6975 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6977 /* Fix up the destination if needed. */
6978 if (dst != operands[0])
6979 emit_move_insn (operands[0], dst);
6984 (define_insn "sse_movlhps"
6985 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6988 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6989 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6990 (parallel [(const_int 0)
6994 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6996 movlhps\t{%2, %0|%0, %2}
6997 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6998 movhps\t{%2, %0|%0, %q2}
6999 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7000 %vmovlps\t{%2, %H0|%H0, %2}"
7001 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7002 (set_attr "type" "ssemov")
7003 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7004 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7006 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7007 [(set (match_operand:V16SF 0 "register_operand" "=v")
7010 (match_operand:V16SF 1 "register_operand" "v")
7011 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7012 (parallel [(const_int 2) (const_int 18)
7013 (const_int 3) (const_int 19)
7014 (const_int 6) (const_int 22)
7015 (const_int 7) (const_int 23)
7016 (const_int 10) (const_int 26)
7017 (const_int 11) (const_int 27)
7018 (const_int 14) (const_int 30)
7019 (const_int 15) (const_int 31)])))]
7021 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7022 [(set_attr "type" "sselog")
7023 (set_attr "prefix" "evex")
7024 (set_attr "mode" "V16SF")])
7026 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7027 (define_insn "avx_unpckhps256<mask_name>"
7028 [(set (match_operand:V8SF 0 "register_operand" "=v")
7031 (match_operand:V8SF 1 "register_operand" "v")
7032 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7033 (parallel [(const_int 2) (const_int 10)
7034 (const_int 3) (const_int 11)
7035 (const_int 6) (const_int 14)
7036 (const_int 7) (const_int 15)])))]
7037 "TARGET_AVX && <mask_avx512vl_condition>"
7038 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7039 [(set_attr "type" "sselog")
7040 (set_attr "prefix" "vex")
7041 (set_attr "mode" "V8SF")])
7043 (define_expand "vec_interleave_highv8sf"
7047 (match_operand:V8SF 1 "register_operand")
7048 (match_operand:V8SF 2 "nonimmediate_operand"))
7049 (parallel [(const_int 0) (const_int 8)
7050 (const_int 1) (const_int 9)
7051 (const_int 4) (const_int 12)
7052 (const_int 5) (const_int 13)])))
7058 (parallel [(const_int 2) (const_int 10)
7059 (const_int 3) (const_int 11)
7060 (const_int 6) (const_int 14)
7061 (const_int 7) (const_int 15)])))
7062 (set (match_operand:V8SF 0 "register_operand")
7067 (parallel [(const_int 4) (const_int 5)
7068 (const_int 6) (const_int 7)
7069 (const_int 12) (const_int 13)
7070 (const_int 14) (const_int 15)])))]
7073 operands[3] = gen_reg_rtx (V8SFmode);
7074 operands[4] = gen_reg_rtx (V8SFmode);
7077 (define_insn "vec_interleave_highv4sf<mask_name>"
7078 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7081 (match_operand:V4SF 1 "register_operand" "0,v")
7082 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7083 (parallel [(const_int 2) (const_int 6)
7084 (const_int 3) (const_int 7)])))]
7085 "TARGET_SSE && <mask_avx512vl_condition>"
7087 unpckhps\t{%2, %0|%0, %2}
7088 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7089 [(set_attr "isa" "noavx,avx")
7090 (set_attr "type" "sselog")
7091 (set_attr "prefix" "orig,vex")
7092 (set_attr "mode" "V4SF")])
7094 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7095 [(set (match_operand:V16SF 0 "register_operand" "=v")
7098 (match_operand:V16SF 1 "register_operand" "v")
7099 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7100 (parallel [(const_int 0) (const_int 16)
7101 (const_int 1) (const_int 17)
7102 (const_int 4) (const_int 20)
7103 (const_int 5) (const_int 21)
7104 (const_int 8) (const_int 24)
7105 (const_int 9) (const_int 25)
7106 (const_int 12) (const_int 28)
7107 (const_int 13) (const_int 29)])))]
7109 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7110 [(set_attr "type" "sselog")
7111 (set_attr "prefix" "evex")
7112 (set_attr "mode" "V16SF")])
7114 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7115 (define_insn "avx_unpcklps256<mask_name>"
7116 [(set (match_operand:V8SF 0 "register_operand" "=v")
7119 (match_operand:V8SF 1 "register_operand" "v")
7120 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7121 (parallel [(const_int 0) (const_int 8)
7122 (const_int 1) (const_int 9)
7123 (const_int 4) (const_int 12)
7124 (const_int 5) (const_int 13)])))]
7125 "TARGET_AVX && <mask_avx512vl_condition>"
7126 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7127 [(set_attr "type" "sselog")
7128 (set_attr "prefix" "vex")
7129 (set_attr "mode" "V8SF")])
7131 (define_insn "unpcklps128_mask"
7132 [(set (match_operand:V4SF 0 "register_operand" "=v")
7136 (match_operand:V4SF 1 "register_operand" "v")
7137 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7138 (parallel [(const_int 0) (const_int 4)
7139 (const_int 1) (const_int 5)]))
7140 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7141 (match_operand:QI 4 "register_operand" "Yk")))]
7143 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7144 [(set_attr "type" "sselog")
7145 (set_attr "prefix" "evex")
7146 (set_attr "mode" "V4SF")])
7148 (define_expand "vec_interleave_lowv8sf"
7152 (match_operand:V8SF 1 "register_operand")
7153 (match_operand:V8SF 2 "nonimmediate_operand"))
7154 (parallel [(const_int 0) (const_int 8)
7155 (const_int 1) (const_int 9)
7156 (const_int 4) (const_int 12)
7157 (const_int 5) (const_int 13)])))
7163 (parallel [(const_int 2) (const_int 10)
7164 (const_int 3) (const_int 11)
7165 (const_int 6) (const_int 14)
7166 (const_int 7) (const_int 15)])))
7167 (set (match_operand:V8SF 0 "register_operand")
7172 (parallel [(const_int 0) (const_int 1)
7173 (const_int 2) (const_int 3)
7174 (const_int 8) (const_int 9)
7175 (const_int 10) (const_int 11)])))]
7178 operands[3] = gen_reg_rtx (V8SFmode);
7179 operands[4] = gen_reg_rtx (V8SFmode);
7182 (define_insn "vec_interleave_lowv4sf"
7183 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7186 (match_operand:V4SF 1 "register_operand" "0,v")
7187 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7188 (parallel [(const_int 0) (const_int 4)
7189 (const_int 1) (const_int 5)])))]
7192 unpcklps\t{%2, %0|%0, %2}
7193 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7194 [(set_attr "isa" "noavx,avx")
7195 (set_attr "type" "sselog")
7196 (set_attr "prefix" "orig,maybe_evex")
7197 (set_attr "mode" "V4SF")])
7199 ;; These are modeled with the same vec_concat as the others so that we
7200 ;; capture users of shufps that can use the new instructions
7201 (define_insn "avx_movshdup256<mask_name>"
7202 [(set (match_operand:V8SF 0 "register_operand" "=v")
7205 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7207 (parallel [(const_int 1) (const_int 1)
7208 (const_int 3) (const_int 3)
7209 (const_int 5) (const_int 5)
7210 (const_int 7) (const_int 7)])))]
7211 "TARGET_AVX && <mask_avx512vl_condition>"
7212 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7213 [(set_attr "type" "sse")
7214 (set_attr "prefix" "vex")
7215 (set_attr "mode" "V8SF")])
7217 (define_insn "sse3_movshdup<mask_name>"
7218 [(set (match_operand:V4SF 0 "register_operand" "=v")
7221 (match_operand:V4SF 1 "vector_operand" "vBm")
7223 (parallel [(const_int 1)
7227 "TARGET_SSE3 && <mask_avx512vl_condition>"
7228 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7229 [(set_attr "type" "sse")
7230 (set_attr "prefix_rep" "1")
7231 (set_attr "prefix" "maybe_vex")
7232 (set_attr "mode" "V4SF")])
7234 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7235 [(set (match_operand:V16SF 0 "register_operand" "=v")
7238 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7240 (parallel [(const_int 1) (const_int 1)
7241 (const_int 3) (const_int 3)
7242 (const_int 5) (const_int 5)
7243 (const_int 7) (const_int 7)
7244 (const_int 9) (const_int 9)
7245 (const_int 11) (const_int 11)
7246 (const_int 13) (const_int 13)
7247 (const_int 15) (const_int 15)])))]
7249 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7250 [(set_attr "type" "sse")
7251 (set_attr "prefix" "evex")
7252 (set_attr "mode" "V16SF")])
7254 (define_insn "avx_movsldup256<mask_name>"
7255 [(set (match_operand:V8SF 0 "register_operand" "=v")
7258 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7260 (parallel [(const_int 0) (const_int 0)
7261 (const_int 2) (const_int 2)
7262 (const_int 4) (const_int 4)
7263 (const_int 6) (const_int 6)])))]
7264 "TARGET_AVX && <mask_avx512vl_condition>"
7265 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7266 [(set_attr "type" "sse")
7267 (set_attr "prefix" "vex")
7268 (set_attr "mode" "V8SF")])
7270 (define_insn "sse3_movsldup<mask_name>"
7271 [(set (match_operand:V4SF 0 "register_operand" "=v")
7274 (match_operand:V4SF 1 "vector_operand" "vBm")
7276 (parallel [(const_int 0)
7280 "TARGET_SSE3 && <mask_avx512vl_condition>"
7281 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7282 [(set_attr "type" "sse")
7283 (set_attr "prefix_rep" "1")
7284 (set_attr "prefix" "maybe_vex")
7285 (set_attr "mode" "V4SF")])
7287 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7288 [(set (match_operand:V16SF 0 "register_operand" "=v")
7291 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7293 (parallel [(const_int 0) (const_int 0)
7294 (const_int 2) (const_int 2)
7295 (const_int 4) (const_int 4)
7296 (const_int 6) (const_int 6)
7297 (const_int 8) (const_int 8)
7298 (const_int 10) (const_int 10)
7299 (const_int 12) (const_int 12)
7300 (const_int 14) (const_int 14)])))]
7302 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7303 [(set_attr "type" "sse")
7304 (set_attr "prefix" "evex")
7305 (set_attr "mode" "V16SF")])
7307 (define_expand "avx_shufps256<mask_expand4_name>"
7308 [(match_operand:V8SF 0 "register_operand")
7309 (match_operand:V8SF 1 "register_operand")
7310 (match_operand:V8SF 2 "nonimmediate_operand")
7311 (match_operand:SI 3 "const_int_operand")]
7314 int mask = INTVAL (operands[3]);
7315 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7318 GEN_INT ((mask >> 0) & 3),
7319 GEN_INT ((mask >> 2) & 3),
7320 GEN_INT (((mask >> 4) & 3) + 8),
7321 GEN_INT (((mask >> 6) & 3) + 8),
7322 GEN_INT (((mask >> 0) & 3) + 4),
7323 GEN_INT (((mask >> 2) & 3) + 4),
7324 GEN_INT (((mask >> 4) & 3) + 12),
7325 GEN_INT (((mask >> 6) & 3) + 12)
7326 <mask_expand4_args>));
7330 ;; One bit in mask selects 2 elements.
7331 (define_insn "avx_shufps256_1<mask_name>"
7332 [(set (match_operand:V8SF 0 "register_operand" "=v")
7335 (match_operand:V8SF 1 "register_operand" "v")
7336 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7337 (parallel [(match_operand 3 "const_0_to_3_operand" )
7338 (match_operand 4 "const_0_to_3_operand" )
7339 (match_operand 5 "const_8_to_11_operand" )
7340 (match_operand 6 "const_8_to_11_operand" )
7341 (match_operand 7 "const_4_to_7_operand" )
7342 (match_operand 8 "const_4_to_7_operand" )
7343 (match_operand 9 "const_12_to_15_operand")
7344 (match_operand 10 "const_12_to_15_operand")])))]
7346 && <mask_avx512vl_condition>
7347 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7348 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7349 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7350 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7353 mask = INTVAL (operands[3]);
7354 mask |= INTVAL (operands[4]) << 2;
7355 mask |= (INTVAL (operands[5]) - 8) << 4;
7356 mask |= (INTVAL (operands[6]) - 8) << 6;
7357 operands[3] = GEN_INT (mask);
7359 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7361 [(set_attr "type" "sseshuf")
7362 (set_attr "length_immediate" "1")
7363 (set_attr "prefix" "<mask_prefix>")
7364 (set_attr "mode" "V8SF")])
7366 (define_expand "sse_shufps<mask_expand4_name>"
7367 [(match_operand:V4SF 0 "register_operand")
7368 (match_operand:V4SF 1 "register_operand")
7369 (match_operand:V4SF 2 "vector_operand")
7370 (match_operand:SI 3 "const_int_operand")]
7373 int mask = INTVAL (operands[3]);
7374 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7377 GEN_INT ((mask >> 0) & 3),
7378 GEN_INT ((mask >> 2) & 3),
7379 GEN_INT (((mask >> 4) & 3) + 4),
7380 GEN_INT (((mask >> 6) & 3) + 4)
7381 <mask_expand4_args>));
7385 (define_insn "sse_shufps_v4sf_mask"
7386 [(set (match_operand:V4SF 0 "register_operand" "=v")
7390 (match_operand:V4SF 1 "register_operand" "v")
7391 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7392 (parallel [(match_operand 3 "const_0_to_3_operand")
7393 (match_operand 4 "const_0_to_3_operand")
7394 (match_operand 5 "const_4_to_7_operand")
7395 (match_operand 6 "const_4_to_7_operand")]))
7396 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7397 (match_operand:QI 8 "register_operand" "Yk")))]
7401 mask |= INTVAL (operands[3]) << 0;
7402 mask |= INTVAL (operands[4]) << 2;
7403 mask |= (INTVAL (operands[5]) - 4) << 4;
7404 mask |= (INTVAL (operands[6]) - 4) << 6;
7405 operands[3] = GEN_INT (mask);
7407 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7409 [(set_attr "type" "sseshuf")
7410 (set_attr "length_immediate" "1")
7411 (set_attr "prefix" "evex")
7412 (set_attr "mode" "V4SF")])
7414 (define_insn "sse_shufps_<mode>"
7415 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7416 (vec_select:VI4F_128
7417 (vec_concat:<ssedoublevecmode>
7418 (match_operand:VI4F_128 1 "register_operand" "0,v")
7419 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7420 (parallel [(match_operand 3 "const_0_to_3_operand")
7421 (match_operand 4 "const_0_to_3_operand")
7422 (match_operand 5 "const_4_to_7_operand")
7423 (match_operand 6 "const_4_to_7_operand")])))]
7427 mask |= INTVAL (operands[3]) << 0;
7428 mask |= INTVAL (operands[4]) << 2;
7429 mask |= (INTVAL (operands[5]) - 4) << 4;
7430 mask |= (INTVAL (operands[6]) - 4) << 6;
7431 operands[3] = GEN_INT (mask);
7433 switch (which_alternative)
7436 return "shufps\t{%3, %2, %0|%0, %2, %3}";
7438 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7443 [(set_attr "isa" "noavx,avx")
7444 (set_attr "type" "sseshuf")
7445 (set_attr "length_immediate" "1")
7446 (set_attr "prefix" "orig,maybe_evex")
7447 (set_attr "mode" "V4SF")])
7449 (define_insn "sse_storehps"
7450 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7452 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7453 (parallel [(const_int 2) (const_int 3)])))]
7454 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7456 %vmovhps\t{%1, %0|%q0, %1}
7457 %vmovhlps\t{%1, %d0|%d0, %1}
7458 %vmovlps\t{%H1, %d0|%d0, %H1}"
7459 [(set_attr "type" "ssemov")
7460 (set_attr "prefix" "maybe_vex")
7461 (set_attr "mode" "V2SF,V4SF,V2SF")])
7463 (define_expand "sse_loadhps_exp"
7464 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7467 (match_operand:V4SF 1 "nonimmediate_operand")
7468 (parallel [(const_int 0) (const_int 1)]))
7469 (match_operand:V2SF 2 "nonimmediate_operand")))]
7472 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7474 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7476 /* Fix up the destination if needed. */
7477 if (dst != operands[0])
7478 emit_move_insn (operands[0], dst);
7483 (define_insn "sse_loadhps"
7484 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7487 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7488 (parallel [(const_int 0) (const_int 1)]))
7489 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
7492 movhps\t{%2, %0|%0, %q2}
7493 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7494 movlhps\t{%2, %0|%0, %2}
7495 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7496 %vmovlps\t{%2, %H0|%H0, %2}"
7497 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7498 (set_attr "type" "ssemov")
7499 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7500 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7502 (define_insn "sse_storelps"
7503 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7505 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7506 (parallel [(const_int 0) (const_int 1)])))]
7507 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7509 %vmovlps\t{%1, %0|%q0, %1}
7510 %vmovaps\t{%1, %0|%0, %1}
7511 %vmovlps\t{%1, %d0|%d0, %q1}"
7512 [(set_attr "type" "ssemov")
7513 (set_attr "prefix" "maybe_vex")
7514 (set_attr "mode" "V2SF,V4SF,V2SF")])
7516 (define_expand "sse_loadlps_exp"
7517 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7519 (match_operand:V2SF 2 "nonimmediate_operand")
7521 (match_operand:V4SF 1 "nonimmediate_operand")
7522 (parallel [(const_int 2) (const_int 3)]))))]
7525 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7527 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7529 /* Fix up the destination if needed. */
7530 if (dst != operands[0])
7531 emit_move_insn (operands[0], dst);
7536 (define_insn "sse_loadlps"
7537 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7539 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
7541 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7542 (parallel [(const_int 2) (const_int 3)]))))]
7545 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7546 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7547 movlps\t{%2, %0|%0, %q2}
7548 vmovlps\t{%2, %1, %0|%0, %1, %q2}
7549 %vmovlps\t{%2, %0|%q0, %2}"
7550 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7551 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7552 (set (attr "length_immediate")
7553 (if_then_else (eq_attr "alternative" "0,1")
7555 (const_string "*")))
7556 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7557 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7559 (define_insn "sse_movss"
7560 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7562 (match_operand:V4SF 2 "register_operand" " x,v")
7563 (match_operand:V4SF 1 "register_operand" " 0,v")
7567 movss\t{%2, %0|%0, %2}
7568 vmovss\t{%2, %1, %0|%0, %1, %2}"
7569 [(set_attr "isa" "noavx,avx")
7570 (set_attr "type" "ssemov")
7571 (set_attr "prefix" "orig,maybe_evex")
7572 (set_attr "mode" "SF")])
7574 (define_insn "avx2_vec_dup<mode>"
7575 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7576 (vec_duplicate:VF1_128_256
7578 (match_operand:V4SF 1 "register_operand" "v")
7579 (parallel [(const_int 0)]))))]
7581 "vbroadcastss\t{%1, %0|%0, %1}"
7582 [(set_attr "type" "sselog1")
7583 (set_attr "prefix" "maybe_evex")
7584 (set_attr "mode" "<MODE>")])
7586 (define_insn "avx2_vec_dupv8sf_1"
7587 [(set (match_operand:V8SF 0 "register_operand" "=v")
7590 (match_operand:V8SF 1 "register_operand" "v")
7591 (parallel [(const_int 0)]))))]
7593 "vbroadcastss\t{%x1, %0|%0, %x1}"
7594 [(set_attr "type" "sselog1")
7595 (set_attr "prefix" "maybe_evex")
7596 (set_attr "mode" "V8SF")])
7598 (define_insn "avx512f_vec_dup<mode>_1"
7599 [(set (match_operand:VF_512 0 "register_operand" "=v")
7600 (vec_duplicate:VF_512
7601 (vec_select:<ssescalarmode>
7602 (match_operand:VF_512 1 "register_operand" "v")
7603 (parallel [(const_int 0)]))))]
7605 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7606 [(set_attr "type" "sselog1")
7607 (set_attr "prefix" "evex")
7608 (set_attr "mode" "<MODE>")])
7610 ;; Although insertps takes register source, we prefer
7611 ;; unpcklps with register source since it is shorter.
7612 (define_insn "*vec_concatv2sf_sse4_1"
7613 [(set (match_operand:V2SF 0 "register_operand"
7614 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7616 (match_operand:SF 1 "nonimmediate_operand"
7617 " 0, 0,Yv, 0,0, v,m, 0 , m")
7618 (match_operand:SF 2 "nonimm_or_0_operand"
7619 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7620 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7622 unpcklps\t{%2, %0|%0, %2}
7623 unpcklps\t{%2, %0|%0, %2}
7624 vunpcklps\t{%2, %1, %0|%0, %1, %2}
7625 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7626 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7627 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7628 %vmovss\t{%1, %0|%0, %1}
7629 punpckldq\t{%2, %0|%0, %2}
7630 movd\t{%1, %0|%0, %1}"
7632 (cond [(eq_attr "alternative" "0,1,3,4")
7633 (const_string "noavx")
7634 (eq_attr "alternative" "2,5")
7635 (const_string "avx")
7637 (const_string "*")))
7639 (cond [(eq_attr "alternative" "6")
7640 (const_string "ssemov")
7641 (eq_attr "alternative" "7")
7642 (const_string "mmxcvt")
7643 (eq_attr "alternative" "8")
7644 (const_string "mmxmov")
7646 (const_string "sselog")))
7647 (set (attr "mmx_isa")
7648 (if_then_else (eq_attr "alternative" "7,8")
7649 (const_string "native")
7650 (const_string "*")))
7651 (set (attr "prefix_data16")
7652 (if_then_else (eq_attr "alternative" "3,4")
7654 (const_string "*")))
7655 (set (attr "prefix_extra")
7656 (if_then_else (eq_attr "alternative" "3,4,5")
7658 (const_string "*")))
7659 (set (attr "length_immediate")
7660 (if_then_else (eq_attr "alternative" "3,4,5")
7662 (const_string "*")))
7663 (set (attr "prefix")
7664 (cond [(eq_attr "alternative" "2,5")
7665 (const_string "maybe_evex")
7666 (eq_attr "alternative" "6")
7667 (const_string "maybe_vex")
7669 (const_string "orig")))
7670 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
7672 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7673 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
7674 ;; alternatives pretty much forces the MMX alternative to be chosen.
7675 (define_insn "*vec_concatv2sf_sse"
7676 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
7678 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
7679 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
7682 unpcklps\t{%2, %0|%0, %2}
7683 movss\t{%1, %0|%0, %1}
7684 punpckldq\t{%2, %0|%0, %2}
7685 movd\t{%1, %0|%0, %1}"
7686 [(set_attr "mmx_isa" "*,*,native,native")
7687 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7688 (set_attr "mode" "V4SF,SF,DI,DI")])
7690 (define_insn "*vec_concatv4sf"
7691 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
7693 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
7694 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
7697 movlhps\t{%2, %0|%0, %2}
7698 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7699 movhps\t{%2, %0|%0, %q2}
7700 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7701 [(set_attr "isa" "noavx,avx,noavx,avx")
7702 (set_attr "type" "ssemov")
7703 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7704 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7706 (define_insn "*vec_concatv4sf_0"
7707 [(set (match_operand:V4SF 0 "register_operand" "=v")
7709 (match_operand:V2SF 1 "nonimmediate_operand" "xm")
7710 (match_operand:V2SF 2 "const0_operand" " C")))]
7712 "%vmovq\t{%1, %0|%0, %1}"
7713 [(set_attr "type" "ssemov")
7714 (set_attr "prefix" "maybe_vex")
7715 (set_attr "mode" "DF")])
7717 ;; Avoid combining registers from different units in a single alternative,
7718 ;; see comment above inline_secondary_memory_needed function in i386.c
7719 (define_insn "vec_set<mode>_0"
7720 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7721 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
7723 (vec_duplicate:VI4F_128
7724 (match_operand:<ssescalarmode> 2 "general_operand"
7725 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
7726 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
7727 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
7731 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7732 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
7733 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
7734 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
7735 %vmovd\t{%2, %0|%0, %2}
7736 movss\t{%2, %0|%0, %2}
7737 movss\t{%2, %0|%0, %2}
7738 vmovss\t{%2, %1, %0|%0, %1, %2}
7739 pinsrd\t{$0, %2, %0|%0, %2, 0}
7740 pinsrd\t{$0, %2, %0|%0, %2, 0}
7741 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
7746 (cond [(eq_attr "alternative" "0,1,8,9")
7747 (const_string "sse4_noavx")
7748 (eq_attr "alternative" "2,7,10")
7749 (const_string "avx")
7750 (eq_attr "alternative" "3,4")
7751 (const_string "sse2")
7752 (eq_attr "alternative" "5,6")
7753 (const_string "noavx")
7755 (const_string "*")))
7757 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
7758 (const_string "sselog")
7759 (eq_attr "alternative" "12")
7760 (const_string "imov")
7761 (eq_attr "alternative" "13")
7762 (const_string "fmov")
7764 (const_string "ssemov")))
7765 (set (attr "prefix_extra")
7766 (if_then_else (eq_attr "alternative" "8,9,10")
7768 (const_string "*")))
7769 (set (attr "length_immediate")
7770 (if_then_else (eq_attr "alternative" "8,9,10")
7772 (const_string "*")))
7773 (set (attr "prefix")
7774 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7775 (const_string "orig")
7776 (eq_attr "alternative" "2")
7777 (const_string "maybe_evex")
7778 (eq_attr "alternative" "3,4")
7779 (const_string "maybe_vex")
7780 (eq_attr "alternative" "7,10")
7781 (const_string "vex")
7783 (const_string "*")))
7784 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
7785 (set (attr "preferred_for_speed")
7786 (cond [(eq_attr "alternative" "4")
7787 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7789 (symbol_ref "true")))])
7791 ;; A subset is vec_setv4sf.
7792 (define_insn "*vec_setv4sf_sse4_1"
7793 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7796 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7797 (match_operand:V4SF 1 "register_operand" "0,0,v")
7798 (match_operand:SI 3 "const_int_operand")))]
7800 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7801 < GET_MODE_NUNITS (V4SFmode))"
7803 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7804 switch (which_alternative)
7808 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7810 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7815 [(set_attr "isa" "noavx,noavx,avx")
7816 (set_attr "type" "sselog")
7817 (set_attr "prefix_data16" "1,1,*")
7818 (set_attr "prefix_extra" "1")
7819 (set_attr "length_immediate" "1")
7820 (set_attr "prefix" "orig,orig,maybe_evex")
7821 (set_attr "mode" "V4SF")])
7823 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
7824 (define_insn "vec_set<mode>_0"
7825 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
7826 (vec_merge:VI4F_256_512
7827 (vec_duplicate:VI4F_256_512
7828 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
7829 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
7833 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
7834 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
7835 vmovd\t{%2, %x0|%x0, %2}"
7837 (if_then_else (eq_attr "alternative" "0")
7838 (const_string "sselog")
7839 (const_string "ssemov")))
7840 (set_attr "prefix" "maybe_evex")
7841 (set_attr "mode" "SF,<ssescalarmode>,SI")
7842 (set (attr "preferred_for_speed")
7843 (cond [(eq_attr "alternative" "2")
7844 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
7846 (symbol_ref "true")))])
7848 (define_insn "sse4_1_insertps"
7849 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7850 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7851 (match_operand:V4SF 1 "register_operand" "0,0,v")
7852 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7856 if (MEM_P (operands[2]))
7858 unsigned count_s = INTVAL (operands[3]) >> 6;
7860 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7861 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7863 switch (which_alternative)
7867 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7869 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7874 [(set_attr "isa" "noavx,noavx,avx")
7875 (set_attr "type" "sselog")
7876 (set_attr "prefix_data16" "1,1,*")
7877 (set_attr "prefix_extra" "1")
7878 (set_attr "length_immediate" "1")
7879 (set_attr "prefix" "orig,orig,maybe_evex")
7880 (set_attr "mode" "V4SF")])
7883 [(set (match_operand:VI4F_128 0 "memory_operand")
7885 (vec_duplicate:VI4F_128
7886 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7889 "TARGET_SSE && reload_completed"
7890 [(set (match_dup 0) (match_dup 1))]
7891 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7893 (define_expand "vec_set<mode>"
7894 [(match_operand:V 0 "register_operand")
7895 (match_operand:<ssescalarmode> 1 "register_operand")
7896 (match_operand 2 "const_int_operand")]
7899 ix86_expand_vector_set (false, operands[0], operands[1],
7900 INTVAL (operands[2]));
7904 (define_insn_and_split "*vec_extractv4sf_0"
7905 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7907 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7908 (parallel [(const_int 0)])))]
7909 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7911 "&& reload_completed"
7912 [(set (match_dup 0) (match_dup 1))]
7913 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7915 (define_insn_and_split "*sse4_1_extractps"
7916 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7918 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7919 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7922 extractps\t{%2, %1, %0|%0, %1, %2}
7923 extractps\t{%2, %1, %0|%0, %1, %2}
7924 vextractps\t{%2, %1, %0|%0, %1, %2}
7927 "&& reload_completed && SSE_REG_P (operands[0])"
7930 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7931 switch (INTVAL (operands[2]))
7935 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7936 operands[2], operands[2],
7937 GEN_INT (INTVAL (operands[2]) + 4),
7938 GEN_INT (INTVAL (operands[2]) + 4)));
7941 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7944 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7949 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7950 (set_attr "type" "sselog,sselog,sselog,*,*")
7951 (set_attr "prefix_data16" "1,1,1,*,*")
7952 (set_attr "prefix_extra" "1,1,1,*,*")
7953 (set_attr "length_immediate" "1,1,1,*,*")
7954 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7955 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7957 (define_insn_and_split "*vec_extractv4sf_mem"
7958 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7960 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7961 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7964 "&& reload_completed"
7965 [(set (match_dup 0) (match_dup 1))]
7967 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7970 (define_mode_attr extract_type
7971 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7973 (define_mode_attr extract_suf
7974 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7976 (define_mode_iterator AVX512_VEC
7977 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7979 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7980 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7981 (match_operand:AVX512_VEC 1 "register_operand")
7982 (match_operand:SI 2 "const_0_to_3_operand")
7983 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7984 (match_operand:QI 4 "register_operand")]
7988 mask = INTVAL (operands[2]);
7989 rtx dest = operands[0];
7991 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7992 dest = gen_reg_rtx (<ssequartermode>mode);
7994 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7995 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7996 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7997 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8000 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8001 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8003 if (dest != operands[0])
8004 emit_move_insn (operands[0], dest);
8008 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
8009 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8010 (vec_merge:<ssequartermode>
8011 (vec_select:<ssequartermode>
8012 (match_operand:V8FI 1 "register_operand" "v")
8013 (parallel [(match_operand 2 "const_0_to_7_operand")
8014 (match_operand 3 "const_0_to_7_operand")]))
8015 (match_operand:<ssequartermode> 4 "memory_operand" "0")
8016 (match_operand:QI 5 "register_operand" "Yk")))]
8018 && INTVAL (operands[2]) % 2 == 0
8019 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8020 && rtx_equal_p (operands[4], operands[0])"
8022 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
8023 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
8025 [(set_attr "type" "sselog")
8026 (set_attr "prefix_extra" "1")
8027 (set_attr "length_immediate" "1")
8028 (set_attr "memory" "store")
8029 (set_attr "prefix" "evex")
8030 (set_attr "mode" "<sseinsnmode>")])
8032 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
8033 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8034 (vec_merge:<ssequartermode>
8035 (vec_select:<ssequartermode>
8036 (match_operand:V16FI 1 "register_operand" "v")
8037 (parallel [(match_operand 2 "const_0_to_15_operand")
8038 (match_operand 3 "const_0_to_15_operand")
8039 (match_operand 4 "const_0_to_15_operand")
8040 (match_operand 5 "const_0_to_15_operand")]))
8041 (match_operand:<ssequartermode> 6 "memory_operand" "0")
8042 (match_operand:QI 7 "register_operand" "Yk")))]
8044 && INTVAL (operands[2]) % 4 == 0
8045 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8046 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8047 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8048 && rtx_equal_p (operands[6], operands[0])"
8050 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8051 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
8053 [(set_attr "type" "sselog")
8054 (set_attr "prefix_extra" "1")
8055 (set_attr "length_immediate" "1")
8056 (set_attr "memory" "store")
8057 (set_attr "prefix" "evex")
8058 (set_attr "mode" "<sseinsnmode>")])
8060 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
8061 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8062 (vec_select:<ssequartermode>
8063 (match_operand:V8FI 1 "register_operand" "v")
8064 (parallel [(match_operand 2 "const_0_to_7_operand")
8065 (match_operand 3 "const_0_to_7_operand")])))]
8067 && INTVAL (operands[2]) % 2 == 0
8068 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8070 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8071 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
8073 [(set_attr "type" "sselog1")
8074 (set_attr "prefix_extra" "1")
8075 (set_attr "length_immediate" "1")
8076 (set_attr "prefix" "evex")
8077 (set_attr "mode" "<sseinsnmode>")])
8080 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8081 (vec_select:<ssequartermode>
8082 (match_operand:V8FI 1 "register_operand")
8083 (parallel [(const_int 0) (const_int 1)])))]
8087 || REG_P (operands[0])
8088 || !EXT_REX_SSE_REG_P (operands[1]))"
8089 [(set (match_dup 0) (match_dup 1))]
8091 if (!TARGET_AVX512VL
8092 && REG_P (operands[0])
8093 && EXT_REX_SSE_REG_P (operands[1]))
8095 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8097 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8100 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
8101 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8102 (vec_select:<ssequartermode>
8103 (match_operand:V16FI 1 "register_operand" "v")
8104 (parallel [(match_operand 2 "const_0_to_15_operand")
8105 (match_operand 3 "const_0_to_15_operand")
8106 (match_operand 4 "const_0_to_15_operand")
8107 (match_operand 5 "const_0_to_15_operand")])))]
8109 && INTVAL (operands[2]) % 4 == 0
8110 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8111 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8112 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8114 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8115 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
8117 [(set_attr "type" "sselog1")
8118 (set_attr "prefix_extra" "1")
8119 (set_attr "length_immediate" "1")
8120 (set_attr "prefix" "evex")
8121 (set_attr "mode" "<sseinsnmode>")])
8124 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8125 (vec_select:<ssequartermode>
8126 (match_operand:V16FI 1 "register_operand")
8127 (parallel [(const_int 0) (const_int 1)
8128 (const_int 2) (const_int 3)])))]
8132 || REG_P (operands[0])
8133 || !EXT_REX_SSE_REG_P (operands[1]))"
8134 [(set (match_dup 0) (match_dup 1))]
8136 if (!TARGET_AVX512VL
8137 && REG_P (operands[0])
8138 && EXT_REX_SSE_REG_P (operands[1]))
8140 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8142 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8145 (define_mode_attr extract_type_2
8146 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8148 (define_mode_attr extract_suf_2
8149 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8151 (define_mode_iterator AVX512_VEC_2
8152 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8154 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8155 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8156 (match_operand:AVX512_VEC_2 1 "register_operand")
8157 (match_operand:SI 2 "const_0_to_1_operand")
8158 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8159 (match_operand:QI 4 "register_operand")]
8162 rtx (*insn)(rtx, rtx, rtx, rtx);
8163 rtx dest = operands[0];
8165 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8166 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8168 switch (INTVAL (operands[2]))
8171 insn = gen_vec_extract_lo_<mode>_mask;
8174 insn = gen_vec_extract_hi_<mode>_mask;
8180 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8181 if (dest != operands[0])
8182 emit_move_insn (operands[0], dest);
8187 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8188 (vec_select:<ssehalfvecmode>
8189 (match_operand:V8FI 1 "nonimmediate_operand")
8190 (parallel [(const_int 0) (const_int 1)
8191 (const_int 2) (const_int 3)])))]
8192 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8195 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8196 [(set (match_dup 0) (match_dup 1))]
8197 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8199 (define_insn "vec_extract_lo_<mode>_maskm"
8200 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8201 (vec_merge:<ssehalfvecmode>
8202 (vec_select:<ssehalfvecmode>
8203 (match_operand:V8FI 1 "register_operand" "v")
8204 (parallel [(const_int 0) (const_int 1)
8205 (const_int 2) (const_int 3)]))
8206 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8207 (match_operand:QI 3 "register_operand" "Yk")))]
8209 && rtx_equal_p (operands[2], operands[0])"
8210 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8211 [(set_attr "type" "sselog1")
8212 (set_attr "prefix_extra" "1")
8213 (set_attr "length_immediate" "1")
8214 (set_attr "prefix" "evex")
8215 (set_attr "mode" "<sseinsnmode>")])
8217 (define_insn "vec_extract_lo_<mode><mask_name>"
8218 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
8219 (vec_select:<ssehalfvecmode>
8220 (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
8221 (parallel [(const_int 0) (const_int 1)
8222 (const_int 2) (const_int 3)])))]
8224 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8226 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
8227 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8231 [(set_attr "type" "sselog1")
8232 (set_attr "prefix_extra" "1")
8233 (set_attr "length_immediate" "1")
8234 (set_attr "memory" "none,store,load")
8235 (set_attr "prefix" "evex")
8236 (set_attr "mode" "<sseinsnmode>")])
8238 (define_insn "vec_extract_hi_<mode>_maskm"
8239 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8240 (vec_merge:<ssehalfvecmode>
8241 (vec_select:<ssehalfvecmode>
8242 (match_operand:V8FI 1 "register_operand" "v")
8243 (parallel [(const_int 4) (const_int 5)
8244 (const_int 6) (const_int 7)]))
8245 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8246 (match_operand:QI 3 "register_operand" "Yk")))]
8248 && rtx_equal_p (operands[2], operands[0])"
8249 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8250 [(set_attr "type" "sselog")
8251 (set_attr "prefix_extra" "1")
8252 (set_attr "length_immediate" "1")
8253 (set_attr "memory" "store")
8254 (set_attr "prefix" "evex")
8255 (set_attr "mode" "<sseinsnmode>")])
8257 (define_insn "vec_extract_hi_<mode><mask_name>"
8258 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8259 (vec_select:<ssehalfvecmode>
8260 (match_operand:V8FI 1 "register_operand" "v")
8261 (parallel [(const_int 4) (const_int 5)
8262 (const_int 6) (const_int 7)])))]
8264 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
8265 [(set_attr "type" "sselog1")
8266 (set_attr "prefix_extra" "1")
8267 (set_attr "length_immediate" "1")
8268 (set_attr "prefix" "evex")
8269 (set_attr "mode" "<sseinsnmode>")])
8271 (define_insn "vec_extract_hi_<mode>_maskm"
8272 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8273 (vec_merge:<ssehalfvecmode>
8274 (vec_select:<ssehalfvecmode>
8275 (match_operand:V16FI 1 "register_operand" "v")
8276 (parallel [(const_int 8) (const_int 9)
8277 (const_int 10) (const_int 11)
8278 (const_int 12) (const_int 13)
8279 (const_int 14) (const_int 15)]))
8280 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8281 (match_operand:QI 3 "register_operand" "Yk")))]
8283 && rtx_equal_p (operands[2], operands[0])"
8284 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8285 [(set_attr "type" "sselog1")
8286 (set_attr "prefix_extra" "1")
8287 (set_attr "length_immediate" "1")
8288 (set_attr "prefix" "evex")
8289 (set_attr "mode" "<sseinsnmode>")])
8291 (define_insn "vec_extract_hi_<mode><mask_name>"
8292 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
8293 (vec_select:<ssehalfvecmode>
8294 (match_operand:V16FI 1 "register_operand" "v,v")
8295 (parallel [(const_int 8) (const_int 9)
8296 (const_int 10) (const_int 11)
8297 (const_int 12) (const_int 13)
8298 (const_int 14) (const_int 15)])))]
8299 "TARGET_AVX512F && <mask_avx512dq_condition>"
8301 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
8302 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8303 [(set_attr "type" "sselog1")
8304 (set_attr "prefix_extra" "1")
8305 (set_attr "isa" "avx512dq,noavx512dq")
8306 (set_attr "length_immediate" "1")
8307 (set_attr "prefix" "evex")
8308 (set_attr "mode" "<sseinsnmode>")])
8310 (define_expand "avx512vl_vextractf128<mode>"
8311 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8312 (match_operand:VI48F_256 1 "register_operand")
8313 (match_operand:SI 2 "const_0_to_1_operand")
8314 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8315 (match_operand:QI 4 "register_operand")]
8316 "TARGET_AVX512DQ && TARGET_AVX512VL"
8318 rtx (*insn)(rtx, rtx, rtx, rtx);
8319 rtx dest = operands[0];
8322 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8323 /* For V8S[IF]mode there are maskm insns with =m and 0
8325 ? !rtx_equal_p (dest, operands[3])
8326 /* For V4D[IF]mode, hi insns don't allow memory, and
8327 lo insns have =m and 0C constraints. */
8328 : (operands[2] != const0_rtx
8329 || (!rtx_equal_p (dest, operands[3])
8330 && GET_CODE (operands[3]) != CONST_VECTOR))))
8331 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8332 switch (INTVAL (operands[2]))
8335 insn = gen_vec_extract_lo_<mode>_mask;
8338 insn = gen_vec_extract_hi_<mode>_mask;
8344 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8345 if (dest != operands[0])
8346 emit_move_insn (operands[0], dest);
8350 (define_expand "avx_vextractf128<mode>"
8351 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8352 (match_operand:V_256 1 "register_operand")
8353 (match_operand:SI 2 "const_0_to_1_operand")]
8356 rtx (*insn)(rtx, rtx);
8358 switch (INTVAL (operands[2]))
8361 insn = gen_vec_extract_lo_<mode>;
8364 insn = gen_vec_extract_hi_<mode>;
8370 emit_insn (insn (operands[0], operands[1]));
8374 (define_insn "vec_extract_lo_<mode><mask_name>"
8375 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
8376 (vec_select:<ssehalfvecmode>
8377 (match_operand:V16FI 1 "<store_mask_predicate>"
8378 "v,<store_mask_constraint>,v")
8379 (parallel [(const_int 0) (const_int 1)
8380 (const_int 2) (const_int 3)
8381 (const_int 4) (const_int 5)
8382 (const_int 6) (const_int 7)])))]
8384 && <mask_mode512bit_condition>
8385 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8388 || (!TARGET_AVX512VL
8389 && !REG_P (operands[0])
8390 && EXT_REX_SSE_REG_P (operands[1])))
8391 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8395 [(set_attr "type" "sselog1")
8396 (set_attr "prefix_extra" "1")
8397 (set_attr "length_immediate" "1")
8398 (set_attr "memory" "none,load,store")
8399 (set_attr "prefix" "evex")
8400 (set_attr "mode" "<sseinsnmode>")])
8403 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8404 (vec_select:<ssehalfvecmode>
8405 (match_operand:V16FI 1 "nonimmediate_operand")
8406 (parallel [(const_int 0) (const_int 1)
8407 (const_int 2) (const_int 3)
8408 (const_int 4) (const_int 5)
8409 (const_int 6) (const_int 7)])))]
8410 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8413 || REG_P (operands[0])
8414 || !EXT_REX_SSE_REG_P (operands[1]))"
8415 [(set (match_dup 0) (match_dup 1))]
8417 if (!TARGET_AVX512VL
8418 && REG_P (operands[0])
8419 && EXT_REX_SSE_REG_P (operands[1]))
8421 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8423 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8426 (define_insn "vec_extract_lo_<mode><mask_name>"
8427 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,v,m")
8428 (vec_select:<ssehalfvecmode>
8429 (match_operand:VI8F_256 1 "<store_mask_predicate>"
8430 "v,<store_mask_constraint>,v")
8431 (parallel [(const_int 0) (const_int 1)])))]
8433 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8434 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8437 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
8441 [(set_attr "type" "sselog1")
8442 (set_attr "prefix_extra" "1")
8443 (set_attr "length_immediate" "1")
8444 (set_attr "memory" "none,load,store")
8445 (set_attr "prefix" "evex")
8446 (set_attr "mode" "XI")])
8449 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8450 (vec_select:<ssehalfvecmode>
8451 (match_operand:VI8F_256 1 "nonimmediate_operand")
8452 (parallel [(const_int 0) (const_int 1)])))]
8453 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8454 && reload_completed"
8455 [(set (match_dup 0) (match_dup 1))]
8456 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8458 (define_insn "vec_extract_hi_<mode><mask_name>"
8459 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
8460 (vec_select:<ssehalfvecmode>
8461 (match_operand:VI8F_256 1 "register_operand" "v,v")
8462 (parallel [(const_int 2) (const_int 3)])))]
8463 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
8465 if (TARGET_AVX512VL)
8467 if (TARGET_AVX512DQ)
8468 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
8470 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8473 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8475 [(set_attr "type" "sselog1")
8476 (set_attr "prefix_extra" "1")
8477 (set_attr "length_immediate" "1")
8478 (set_attr "prefix" "vex")
8479 (set_attr "mode" "<sseinsnmode>")])
8482 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8483 (vec_select:<ssehalfvecmode>
8484 (match_operand:VI4F_256 1 "nonimmediate_operand")
8485 (parallel [(const_int 0) (const_int 1)
8486 (const_int 2) (const_int 3)])))]
8487 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8488 && reload_completed"
8489 [(set (match_dup 0) (match_dup 1))]
8490 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8492 (define_insn "vec_extract_lo_<mode><mask_name>"
8493 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8494 "=<store_mask_constraint>,v")
8495 (vec_select:<ssehalfvecmode>
8496 (match_operand:VI4F_256 1 "<store_mask_predicate>"
8497 "v,<store_mask_constraint>")
8498 (parallel [(const_int 0) (const_int 1)
8499 (const_int 2) (const_int 3)])))]
8501 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8502 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8505 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8509 [(set_attr "type" "sselog1")
8510 (set_attr "prefix_extra" "1")
8511 (set_attr "length_immediate" "1")
8512 (set_attr "prefix" "evex")
8513 (set_attr "mode" "<sseinsnmode>")])
8515 (define_insn "vec_extract_lo_<mode>_maskm"
8516 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8517 (vec_merge:<ssehalfvecmode>
8518 (vec_select:<ssehalfvecmode>
8519 (match_operand:VI4F_256 1 "register_operand" "v")
8520 (parallel [(const_int 0) (const_int 1)
8521 (const_int 2) (const_int 3)]))
8522 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8523 (match_operand:QI 3 "register_operand" "Yk")))]
8524 "TARGET_AVX512VL && TARGET_AVX512F
8525 && rtx_equal_p (operands[2], operands[0])"
8526 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8527 [(set_attr "type" "sselog1")
8528 (set_attr "prefix_extra" "1")
8529 (set_attr "length_immediate" "1")
8530 (set_attr "prefix" "evex")
8531 (set_attr "mode" "<sseinsnmode>")])
8533 (define_insn "vec_extract_hi_<mode>_maskm"
8534 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8535 (vec_merge:<ssehalfvecmode>
8536 (vec_select:<ssehalfvecmode>
8537 (match_operand:VI4F_256 1 "register_operand" "v")
8538 (parallel [(const_int 4) (const_int 5)
8539 (const_int 6) (const_int 7)]))
8540 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8541 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
8542 "TARGET_AVX512F && TARGET_AVX512VL
8543 && rtx_equal_p (operands[2], operands[0])"
8544 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8545 [(set_attr "type" "sselog1")
8546 (set_attr "length_immediate" "1")
8547 (set_attr "prefix" "evex")
8548 (set_attr "mode" "<sseinsnmode>")])
8550 (define_insn "vec_extract_hi_<mode>_mask"
8551 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
8552 (vec_merge:<ssehalfvecmode>
8553 (vec_select:<ssehalfvecmode>
8554 (match_operand:VI4F_256 1 "register_operand" "v")
8555 (parallel [(const_int 4) (const_int 5)
8556 (const_int 6) (const_int 7)]))
8557 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C")
8558 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
8560 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8561 [(set_attr "type" "sselog1")
8562 (set_attr "length_immediate" "1")
8563 (set_attr "prefix" "evex")
8564 (set_attr "mode" "<sseinsnmode>")])
8566 (define_insn "vec_extract_hi_<mode>"
8567 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8568 (vec_select:<ssehalfvecmode>
8569 (match_operand:VI4F_256 1 "register_operand" "x, v")
8570 (parallel [(const_int 4) (const_int 5)
8571 (const_int 6) (const_int 7)])))]
8574 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8575 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8576 [(set_attr "isa" "*, avx512vl")
8577 (set_attr "prefix" "vex, evex")
8578 (set_attr "type" "sselog1")
8579 (set_attr "length_immediate" "1")
8580 (set_attr "mode" "<sseinsnmode>")])
8582 (define_insn_and_split "vec_extract_lo_v32hi"
8583 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8585 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8586 (parallel [(const_int 0) (const_int 1)
8587 (const_int 2) (const_int 3)
8588 (const_int 4) (const_int 5)
8589 (const_int 6) (const_int 7)
8590 (const_int 8) (const_int 9)
8591 (const_int 10) (const_int 11)
8592 (const_int 12) (const_int 13)
8593 (const_int 14) (const_int 15)])))]
8594 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8597 || REG_P (operands[0])
8598 || !EXT_REX_SSE_REG_P (operands[1]))
8601 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8603 "&& reload_completed
8605 || REG_P (operands[0])
8606 || !EXT_REX_SSE_REG_P (operands[1]))"
8607 [(set (match_dup 0) (match_dup 1))]
8609 if (!TARGET_AVX512VL
8610 && REG_P (operands[0])
8611 && EXT_REX_SSE_REG_P (operands[1]))
8612 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
8614 operands[1] = gen_lowpart (V16HImode, operands[1]);
8616 [(set_attr "type" "sselog1")
8617 (set_attr "prefix_extra" "1")
8618 (set_attr "length_immediate" "1")
8619 (set_attr "memory" "none,load,store")
8620 (set_attr "prefix" "evex")
8621 (set_attr "mode" "XI")])
8623 (define_insn "vec_extract_hi_v32hi"
8624 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
8626 (match_operand:V32HI 1 "register_operand" "v")
8627 (parallel [(const_int 16) (const_int 17)
8628 (const_int 18) (const_int 19)
8629 (const_int 20) (const_int 21)
8630 (const_int 22) (const_int 23)
8631 (const_int 24) (const_int 25)
8632 (const_int 26) (const_int 27)
8633 (const_int 28) (const_int 29)
8634 (const_int 30) (const_int 31)])))]
8636 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8637 [(set_attr "type" "sselog1")
8638 (set_attr "prefix_extra" "1")
8639 (set_attr "length_immediate" "1")
8640 (set_attr "prefix" "evex")
8641 (set_attr "mode" "XI")])
8643 (define_insn_and_split "vec_extract_lo_v16hi"
8644 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
8646 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
8647 (parallel [(const_int 0) (const_int 1)
8648 (const_int 2) (const_int 3)
8649 (const_int 4) (const_int 5)
8650 (const_int 6) (const_int 7)])))]
8651 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8653 "&& reload_completed"
8654 [(set (match_dup 0) (match_dup 1))]
8655 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
8657 (define_insn "vec_extract_hi_v16hi"
8658 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
8660 (match_operand:V16HI 1 "register_operand" "x,v,v")
8661 (parallel [(const_int 8) (const_int 9)
8662 (const_int 10) (const_int 11)
8663 (const_int 12) (const_int 13)
8664 (const_int 14) (const_int 15)])))]
8667 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8668 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8669 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8670 [(set_attr "type" "sselog1")
8671 (set_attr "prefix_extra" "1")
8672 (set_attr "length_immediate" "1")
8673 (set_attr "isa" "*,avx512dq,avx512f")
8674 (set_attr "prefix" "vex,evex,evex")
8675 (set_attr "mode" "OI")])
8677 (define_insn_and_split "vec_extract_lo_v64qi"
8678 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
8680 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
8681 (parallel [(const_int 0) (const_int 1)
8682 (const_int 2) (const_int 3)
8683 (const_int 4) (const_int 5)
8684 (const_int 6) (const_int 7)
8685 (const_int 8) (const_int 9)
8686 (const_int 10) (const_int 11)
8687 (const_int 12) (const_int 13)
8688 (const_int 14) (const_int 15)
8689 (const_int 16) (const_int 17)
8690 (const_int 18) (const_int 19)
8691 (const_int 20) (const_int 21)
8692 (const_int 22) (const_int 23)
8693 (const_int 24) (const_int 25)
8694 (const_int 26) (const_int 27)
8695 (const_int 28) (const_int 29)
8696 (const_int 30) (const_int 31)])))]
8697 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8700 || REG_P (operands[0])
8701 || !EXT_REX_SSE_REG_P (operands[1]))
8704 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8706 "&& reload_completed
8708 || REG_P (operands[0])
8709 || !EXT_REX_SSE_REG_P (operands[1]))"
8710 [(set (match_dup 0) (match_dup 1))]
8712 if (!TARGET_AVX512VL
8713 && REG_P (operands[0])
8714 && EXT_REX_SSE_REG_P (operands[1]))
8715 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
8717 operands[1] = gen_lowpart (V32QImode, operands[1]);
8719 [(set_attr "type" "sselog1")
8720 (set_attr "prefix_extra" "1")
8721 (set_attr "length_immediate" "1")
8722 (set_attr "memory" "none,load,store")
8723 (set_attr "prefix" "evex")
8724 (set_attr "mode" "XI")])
8726 (define_insn "vec_extract_hi_v64qi"
8727 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
8729 (match_operand:V64QI 1 "register_operand" "v")
8730 (parallel [(const_int 32) (const_int 33)
8731 (const_int 34) (const_int 35)
8732 (const_int 36) (const_int 37)
8733 (const_int 38) (const_int 39)
8734 (const_int 40) (const_int 41)
8735 (const_int 42) (const_int 43)
8736 (const_int 44) (const_int 45)
8737 (const_int 46) (const_int 47)
8738 (const_int 48) (const_int 49)
8739 (const_int 50) (const_int 51)
8740 (const_int 52) (const_int 53)
8741 (const_int 54) (const_int 55)
8742 (const_int 56) (const_int 57)
8743 (const_int 58) (const_int 59)
8744 (const_int 60) (const_int 61)
8745 (const_int 62) (const_int 63)])))]
8747 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8748 [(set_attr "type" "sselog1")
8749 (set_attr "prefix_extra" "1")
8750 (set_attr "length_immediate" "1")
8751 (set_attr "prefix" "evex")
8752 (set_attr "mode" "XI")])
8754 (define_insn_and_split "vec_extract_lo_v32qi"
8755 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
8757 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
8758 (parallel [(const_int 0) (const_int 1)
8759 (const_int 2) (const_int 3)
8760 (const_int 4) (const_int 5)
8761 (const_int 6) (const_int 7)
8762 (const_int 8) (const_int 9)
8763 (const_int 10) (const_int 11)
8764 (const_int 12) (const_int 13)
8765 (const_int 14) (const_int 15)])))]
8766 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8768 "&& reload_completed"
8769 [(set (match_dup 0) (match_dup 1))]
8770 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
8772 (define_insn "vec_extract_hi_v32qi"
8773 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
8775 (match_operand:V32QI 1 "register_operand" "x,v,v")
8776 (parallel [(const_int 16) (const_int 17)
8777 (const_int 18) (const_int 19)
8778 (const_int 20) (const_int 21)
8779 (const_int 22) (const_int 23)
8780 (const_int 24) (const_int 25)
8781 (const_int 26) (const_int 27)
8782 (const_int 28) (const_int 29)
8783 (const_int 30) (const_int 31)])))]
8786 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8787 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8788 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8789 [(set_attr "type" "sselog1")
8790 (set_attr "prefix_extra" "1")
8791 (set_attr "length_immediate" "1")
8792 (set_attr "isa" "*,avx512dq,avx512f")
8793 (set_attr "prefix" "vex,evex,evex")
8794 (set_attr "mode" "OI")])
8796 ;; Modes handled by vec_extract patterns.
8797 (define_mode_iterator VEC_EXTRACT_MODE
8798 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
8799 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
8800 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
8801 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
8802 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
8803 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
8804 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
8806 (define_expand "vec_extract<mode><ssescalarmodelower>"
8807 [(match_operand:<ssescalarmode> 0 "register_operand")
8808 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
8809 (match_operand 2 "const_int_operand")]
8812 ix86_expand_vector_extract (false, operands[0], operands[1],
8813 INTVAL (operands[2]));
8817 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
8818 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8819 (match_operand:V_256_512 1 "register_operand")
8820 (match_operand 2 "const_0_to_1_operand")]
8823 if (INTVAL (operands[2]))
8824 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
8826 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
8830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8832 ;; Parallel double-precision floating point element swizzling
8834 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8836 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
8837 [(set (match_operand:V8DF 0 "register_operand" "=v")
8840 (match_operand:V8DF 1 "register_operand" "v")
8841 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8842 (parallel [(const_int 1) (const_int 9)
8843 (const_int 3) (const_int 11)
8844 (const_int 5) (const_int 13)
8845 (const_int 7) (const_int 15)])))]
8847 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8848 [(set_attr "type" "sselog")
8849 (set_attr "prefix" "evex")
8850 (set_attr "mode" "V8DF")])
8852 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8853 (define_insn "avx_unpckhpd256<mask_name>"
8854 [(set (match_operand:V4DF 0 "register_operand" "=v")
8857 (match_operand:V4DF 1 "register_operand" "v")
8858 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8859 (parallel [(const_int 1) (const_int 5)
8860 (const_int 3) (const_int 7)])))]
8861 "TARGET_AVX && <mask_avx512vl_condition>"
8862 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8863 [(set_attr "type" "sselog")
8864 (set_attr "prefix" "vex")
8865 (set_attr "mode" "V4DF")])
8867 (define_expand "vec_interleave_highv4df"
8871 (match_operand:V4DF 1 "register_operand")
8872 (match_operand:V4DF 2 "nonimmediate_operand"))
8873 (parallel [(const_int 0) (const_int 4)
8874 (const_int 2) (const_int 6)])))
8880 (parallel [(const_int 1) (const_int 5)
8881 (const_int 3) (const_int 7)])))
8882 (set (match_operand:V4DF 0 "register_operand")
8887 (parallel [(const_int 2) (const_int 3)
8888 (const_int 6) (const_int 7)])))]
8891 operands[3] = gen_reg_rtx (V4DFmode);
8892 operands[4] = gen_reg_rtx (V4DFmode);
8896 (define_insn "avx512vl_unpckhpd128_mask"
8897 [(set (match_operand:V2DF 0 "register_operand" "=v")
8901 (match_operand:V2DF 1 "register_operand" "v")
8902 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8903 (parallel [(const_int 1) (const_int 3)]))
8904 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
8905 (match_operand:QI 4 "register_operand" "Yk")))]
8907 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8908 [(set_attr "type" "sselog")
8909 (set_attr "prefix" "evex")
8910 (set_attr "mode" "V2DF")])
8912 (define_expand "vec_interleave_highv2df"
8913 [(set (match_operand:V2DF 0 "register_operand")
8916 (match_operand:V2DF 1 "nonimmediate_operand")
8917 (match_operand:V2DF 2 "nonimmediate_operand"))
8918 (parallel [(const_int 1)
8922 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8923 operands[2] = force_reg (V2DFmode, operands[2]);
8926 (define_insn "*vec_interleave_highv2df"
8927 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8930 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8931 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8932 (parallel [(const_int 1)
8934 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8936 unpckhpd\t{%2, %0|%0, %2}
8937 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8938 %vmovddup\t{%H1, %0|%0, %H1}
8939 movlpd\t{%H1, %0|%0, %H1}
8940 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8941 %vmovhpd\t{%1, %0|%q0, %1}"
8942 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8943 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8944 (set (attr "prefix_data16")
8945 (if_then_else (eq_attr "alternative" "3,5")
8947 (const_string "*")))
8948 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8949 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8951 (define_expand "avx512f_movddup512<mask_name>"
8952 [(set (match_operand:V8DF 0 "register_operand")
8955 (match_operand:V8DF 1 "nonimmediate_operand")
8957 (parallel [(const_int 0) (const_int 8)
8958 (const_int 2) (const_int 10)
8959 (const_int 4) (const_int 12)
8960 (const_int 6) (const_int 14)])))]
8963 (define_expand "avx512f_unpcklpd512<mask_name>"
8964 [(set (match_operand:V8DF 0 "register_operand")
8967 (match_operand:V8DF 1 "register_operand")
8968 (match_operand:V8DF 2 "nonimmediate_operand"))
8969 (parallel [(const_int 0) (const_int 8)
8970 (const_int 2) (const_int 10)
8971 (const_int 4) (const_int 12)
8972 (const_int 6) (const_int 14)])))]
8975 (define_insn "*avx512f_unpcklpd512<mask_name>"
8976 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8979 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8980 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8981 (parallel [(const_int 0) (const_int 8)
8982 (const_int 2) (const_int 10)
8983 (const_int 4) (const_int 12)
8984 (const_int 6) (const_int 14)])))]
8987 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8988 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8989 [(set_attr "type" "sselog")
8990 (set_attr "prefix" "evex")
8991 (set_attr "mode" "V8DF")])
8993 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8994 (define_expand "avx_movddup256<mask_name>"
8995 [(set (match_operand:V4DF 0 "register_operand")
8998 (match_operand:V4DF 1 "nonimmediate_operand")
9000 (parallel [(const_int 0) (const_int 4)
9001 (const_int 2) (const_int 6)])))]
9002 "TARGET_AVX && <mask_avx512vl_condition>")
9004 (define_expand "avx_unpcklpd256<mask_name>"
9005 [(set (match_operand:V4DF 0 "register_operand")
9008 (match_operand:V4DF 1 "register_operand")
9009 (match_operand:V4DF 2 "nonimmediate_operand"))
9010 (parallel [(const_int 0) (const_int 4)
9011 (const_int 2) (const_int 6)])))]
9012 "TARGET_AVX && <mask_avx512vl_condition>")
9014 (define_insn "*avx_unpcklpd256<mask_name>"
9015 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9018 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9019 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9020 (parallel [(const_int 0) (const_int 4)
9021 (const_int 2) (const_int 6)])))]
9022 "TARGET_AVX && <mask_avx512vl_condition>"
9024 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9025 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9026 [(set_attr "type" "sselog")
9027 (set_attr "prefix" "vex")
9028 (set_attr "mode" "V4DF")])
9030 (define_expand "vec_interleave_lowv4df"
9034 (match_operand:V4DF 1 "register_operand")
9035 (match_operand:V4DF 2 "nonimmediate_operand"))
9036 (parallel [(const_int 0) (const_int 4)
9037 (const_int 2) (const_int 6)])))
9043 (parallel [(const_int 1) (const_int 5)
9044 (const_int 3) (const_int 7)])))
9045 (set (match_operand:V4DF 0 "register_operand")
9050 (parallel [(const_int 0) (const_int 1)
9051 (const_int 4) (const_int 5)])))]
9054 operands[3] = gen_reg_rtx (V4DFmode);
9055 operands[4] = gen_reg_rtx (V4DFmode);
9058 (define_insn "avx512vl_unpcklpd128_mask"
9059 [(set (match_operand:V2DF 0 "register_operand" "=v")
9063 (match_operand:V2DF 1 "register_operand" "v")
9064 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9065 (parallel [(const_int 0) (const_int 2)]))
9066 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9067 (match_operand:QI 4 "register_operand" "Yk")))]
9069 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9070 [(set_attr "type" "sselog")
9071 (set_attr "prefix" "evex")
9072 (set_attr "mode" "V2DF")])
9074 (define_expand "vec_interleave_lowv2df"
9075 [(set (match_operand:V2DF 0 "register_operand")
9078 (match_operand:V2DF 1 "nonimmediate_operand")
9079 (match_operand:V2DF 2 "nonimmediate_operand"))
9080 (parallel [(const_int 0)
9084 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9085 operands[1] = force_reg (V2DFmode, operands[1]);
9088 (define_insn "*vec_interleave_lowv2df"
9089 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9092 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9093 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9094 (parallel [(const_int 0)
9096 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9098 unpcklpd\t{%2, %0|%0, %2}
9099 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9100 %vmovddup\t{%1, %0|%0, %q1}
9101 movhpd\t{%2, %0|%0, %q2}
9102 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9103 %vmovlpd\t{%2, %H0|%H0, %2}"
9104 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9105 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9106 (set (attr "prefix_data16")
9107 (if_then_else (eq_attr "alternative" "3,5")
9109 (const_string "*")))
9110 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9111 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9114 [(set (match_operand:V2DF 0 "memory_operand")
9117 (match_operand:V2DF 1 "register_operand")
9119 (parallel [(const_int 0)
9121 "TARGET_SSE3 && reload_completed"
9124 rtx low = gen_lowpart (DFmode, operands[1]);
9126 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9127 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9132 [(set (match_operand:V2DF 0 "register_operand")
9135 (match_operand:V2DF 1 "memory_operand")
9137 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9138 (match_operand:SI 3 "const_int_operand")])))]
9139 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9140 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9142 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9145 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9146 [(set (match_operand:VF_128 0 "register_operand" "=v")
9149 [(match_operand:VF_128 1 "register_operand" "v")
9150 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9155 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9156 [(set_attr "prefix" "evex")
9157 (set_attr "mode" "<ssescalarmode>")])
9159 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9160 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9162 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9163 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9166 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9167 [(set_attr "prefix" "evex")
9168 (set_attr "mode" "<MODE>")])
9170 (define_expand "<avx512>_vternlog<mode>_maskz"
9171 [(match_operand:VI48_AVX512VL 0 "register_operand")
9172 (match_operand:VI48_AVX512VL 1 "register_operand")
9173 (match_operand:VI48_AVX512VL 2 "register_operand")
9174 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9175 (match_operand:SI 4 "const_0_to_255_operand")
9176 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9179 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9180 operands[0], operands[1], operands[2], operands[3],
9181 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9185 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9186 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9187 (unspec:VI48_AVX512VL
9188 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9189 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9190 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9191 (match_operand:SI 4 "const_0_to_255_operand")]
9194 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9195 [(set_attr "type" "sselog")
9196 (set_attr "prefix" "evex")
9197 (set_attr "mode" "<sseinsnmode>")])
9199 (define_insn "<avx512>_vternlog<mode>_mask"
9200 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9201 (vec_merge:VI48_AVX512VL
9202 (unspec:VI48_AVX512VL
9203 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9204 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9205 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9206 (match_operand:SI 4 "const_0_to_255_operand")]
9209 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9211 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9212 [(set_attr "type" "sselog")
9213 (set_attr "prefix" "evex")
9214 (set_attr "mode" "<sseinsnmode>")])
9216 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9217 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9218 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9221 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9222 [(set_attr "prefix" "evex")
9223 (set_attr "mode" "<MODE>")])
9225 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9226 [(set (match_operand:VF_128 0 "register_operand" "=v")
9229 [(match_operand:VF_128 1 "register_operand" "v")
9230 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9235 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9236 [(set_attr "prefix" "evex")
9237 (set_attr "mode" "<ssescalarmode>")])
9239 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9240 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9241 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9242 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9243 (match_operand:SI 3 "const_0_to_255_operand")]
9246 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9247 [(set_attr "prefix" "evex")
9248 (set_attr "mode" "<sseinsnmode>")])
9250 (define_expand "avx512f_shufps512_mask"
9251 [(match_operand:V16SF 0 "register_operand")
9252 (match_operand:V16SF 1 "register_operand")
9253 (match_operand:V16SF 2 "nonimmediate_operand")
9254 (match_operand:SI 3 "const_0_to_255_operand")
9255 (match_operand:V16SF 4 "register_operand")
9256 (match_operand:HI 5 "register_operand")]
9259 int mask = INTVAL (operands[3]);
9260 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9261 GEN_INT ((mask >> 0) & 3),
9262 GEN_INT ((mask >> 2) & 3),
9263 GEN_INT (((mask >> 4) & 3) + 16),
9264 GEN_INT (((mask >> 6) & 3) + 16),
9265 GEN_INT (((mask >> 0) & 3) + 4),
9266 GEN_INT (((mask >> 2) & 3) + 4),
9267 GEN_INT (((mask >> 4) & 3) + 20),
9268 GEN_INT (((mask >> 6) & 3) + 20),
9269 GEN_INT (((mask >> 0) & 3) + 8),
9270 GEN_INT (((mask >> 2) & 3) + 8),
9271 GEN_INT (((mask >> 4) & 3) + 24),
9272 GEN_INT (((mask >> 6) & 3) + 24),
9273 GEN_INT (((mask >> 0) & 3) + 12),
9274 GEN_INT (((mask >> 2) & 3) + 12),
9275 GEN_INT (((mask >> 4) & 3) + 28),
9276 GEN_INT (((mask >> 6) & 3) + 28),
9277 operands[4], operands[5]));
9282 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9283 [(match_operand:VF_AVX512VL 0 "register_operand")
9284 (match_operand:VF_AVX512VL 1 "register_operand")
9285 (match_operand:VF_AVX512VL 2 "register_operand")
9286 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9287 (match_operand:SI 4 "const_0_to_255_operand")
9288 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9291 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9292 operands[0], operands[1], operands[2], operands[3],
9293 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9294 <round_saeonly_expand_operand6>));
9298 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9299 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9301 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9302 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9303 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9304 (match_operand:SI 4 "const_0_to_255_operand")]
9307 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9308 [(set_attr "prefix" "evex")
9309 (set_attr "mode" "<MODE>")])
9311 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9312 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9313 (vec_merge:VF_AVX512VL
9315 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9316 (match_operand:VF_AVX512VL 2 "register_operand" "v")
9317 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9318 (match_operand:SI 4 "const_0_to_255_operand")]
9321 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9323 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9324 [(set_attr "prefix" "evex")
9325 (set_attr "mode" "<MODE>")])
9327 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9328 [(match_operand:VF_128 0 "register_operand")
9329 (match_operand:VF_128 1 "register_operand")
9330 (match_operand:VF_128 2 "register_operand")
9331 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9332 (match_operand:SI 4 "const_0_to_255_operand")
9333 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9336 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9337 operands[0], operands[1], operands[2], operands[3],
9338 operands[4], CONST0_RTX (<MODE>mode), operands[5]
9339 <round_saeonly_expand_operand6>));
9343 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9344 [(set (match_operand:VF_128 0 "register_operand" "=v")
9347 [(match_operand:VF_128 1 "register_operand" "0")
9348 (match_operand:VF_128 2 "register_operand" "v")
9349 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9350 (match_operand:SI 4 "const_0_to_255_operand")]
9355 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9356 [(set_attr "prefix" "evex")
9357 (set_attr "mode" "<ssescalarmode>")])
9359 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9360 [(set (match_operand:VF_128 0 "register_operand" "=v")
9364 [(match_operand:VF_128 1 "register_operand" "0")
9365 (match_operand:VF_128 2 "register_operand" "v")
9366 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9367 (match_operand:SI 4 "const_0_to_255_operand")]
9372 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9374 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9375 [(set_attr "prefix" "evex")
9376 (set_attr "mode" "<ssescalarmode>")])
9378 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9379 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9381 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9382 (match_operand:SI 2 "const_0_to_255_operand")]
9385 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9386 [(set_attr "length_immediate" "1")
9387 (set_attr "prefix" "evex")
9388 (set_attr "mode" "<MODE>")])
9390 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
9391 [(set (match_operand:VF_128 0 "register_operand" "=v")
9394 [(match_operand:VF_128 1 "register_operand" "v")
9395 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9396 (match_operand:SI 3 "const_0_to_255_operand")]
9401 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
9402 [(set_attr "length_immediate" "1")
9403 (set_attr "prefix" "evex")
9404 (set_attr "mode" "<MODE>")])
9406 ;; One bit in mask selects 2 elements.
9407 (define_insn "avx512f_shufps512_1<mask_name>"
9408 [(set (match_operand:V16SF 0 "register_operand" "=v")
9411 (match_operand:V16SF 1 "register_operand" "v")
9412 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9413 (parallel [(match_operand 3 "const_0_to_3_operand")
9414 (match_operand 4 "const_0_to_3_operand")
9415 (match_operand 5 "const_16_to_19_operand")
9416 (match_operand 6 "const_16_to_19_operand")
9417 (match_operand 7 "const_4_to_7_operand")
9418 (match_operand 8 "const_4_to_7_operand")
9419 (match_operand 9 "const_20_to_23_operand")
9420 (match_operand 10 "const_20_to_23_operand")
9421 (match_operand 11 "const_8_to_11_operand")
9422 (match_operand 12 "const_8_to_11_operand")
9423 (match_operand 13 "const_24_to_27_operand")
9424 (match_operand 14 "const_24_to_27_operand")
9425 (match_operand 15 "const_12_to_15_operand")
9426 (match_operand 16 "const_12_to_15_operand")
9427 (match_operand 17 "const_28_to_31_operand")
9428 (match_operand 18 "const_28_to_31_operand")])))]
9430 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9431 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9432 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9433 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9434 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9435 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9436 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9437 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9438 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9439 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9440 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9441 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9444 mask = INTVAL (operands[3]);
9445 mask |= INTVAL (operands[4]) << 2;
9446 mask |= (INTVAL (operands[5]) - 16) << 4;
9447 mask |= (INTVAL (operands[6]) - 16) << 6;
9448 operands[3] = GEN_INT (mask);
9450 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9452 [(set_attr "type" "sselog")
9453 (set_attr "length_immediate" "1")
9454 (set_attr "prefix" "evex")
9455 (set_attr "mode" "V16SF")])
9457 (define_expand "avx512f_shufpd512_mask"
9458 [(match_operand:V8DF 0 "register_operand")
9459 (match_operand:V8DF 1 "register_operand")
9460 (match_operand:V8DF 2 "nonimmediate_operand")
9461 (match_operand:SI 3 "const_0_to_255_operand")
9462 (match_operand:V8DF 4 "register_operand")
9463 (match_operand:QI 5 "register_operand")]
9466 int mask = INTVAL (operands[3]);
9467 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9469 GEN_INT (mask & 2 ? 9 : 8),
9470 GEN_INT (mask & 4 ? 3 : 2),
9471 GEN_INT (mask & 8 ? 11 : 10),
9472 GEN_INT (mask & 16 ? 5 : 4),
9473 GEN_INT (mask & 32 ? 13 : 12),
9474 GEN_INT (mask & 64 ? 7 : 6),
9475 GEN_INT (mask & 128 ? 15 : 14),
9476 operands[4], operands[5]));
9480 (define_insn "avx512f_shufpd512_1<mask_name>"
9481 [(set (match_operand:V8DF 0 "register_operand" "=v")
9484 (match_operand:V8DF 1 "register_operand" "v")
9485 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9486 (parallel [(match_operand 3 "const_0_to_1_operand")
9487 (match_operand 4 "const_8_to_9_operand")
9488 (match_operand 5 "const_2_to_3_operand")
9489 (match_operand 6 "const_10_to_11_operand")
9490 (match_operand 7 "const_4_to_5_operand")
9491 (match_operand 8 "const_12_to_13_operand")
9492 (match_operand 9 "const_6_to_7_operand")
9493 (match_operand 10 "const_14_to_15_operand")])))]
9497 mask = INTVAL (operands[3]);
9498 mask |= (INTVAL (operands[4]) - 8) << 1;
9499 mask |= (INTVAL (operands[5]) - 2) << 2;
9500 mask |= (INTVAL (operands[6]) - 10) << 3;
9501 mask |= (INTVAL (operands[7]) - 4) << 4;
9502 mask |= (INTVAL (operands[8]) - 12) << 5;
9503 mask |= (INTVAL (operands[9]) - 6) << 6;
9504 mask |= (INTVAL (operands[10]) - 14) << 7;
9505 operands[3] = GEN_INT (mask);
9507 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9509 [(set_attr "type" "sselog")
9510 (set_attr "length_immediate" "1")
9511 (set_attr "prefix" "evex")
9512 (set_attr "mode" "V8DF")])
9514 (define_expand "avx_shufpd256<mask_expand4_name>"
9515 [(match_operand:V4DF 0 "register_operand")
9516 (match_operand:V4DF 1 "register_operand")
9517 (match_operand:V4DF 2 "nonimmediate_operand")
9518 (match_operand:SI 3 "const_int_operand")]
9521 int mask = INTVAL (operands[3]);
9522 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9526 GEN_INT (mask & 2 ? 5 : 4),
9527 GEN_INT (mask & 4 ? 3 : 2),
9528 GEN_INT (mask & 8 ? 7 : 6)
9529 <mask_expand4_args>));
9533 (define_insn "avx_shufpd256_1<mask_name>"
9534 [(set (match_operand:V4DF 0 "register_operand" "=v")
9537 (match_operand:V4DF 1 "register_operand" "v")
9538 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9539 (parallel [(match_operand 3 "const_0_to_1_operand")
9540 (match_operand 4 "const_4_to_5_operand")
9541 (match_operand 5 "const_2_to_3_operand")
9542 (match_operand 6 "const_6_to_7_operand")])))]
9543 "TARGET_AVX && <mask_avx512vl_condition>"
9546 mask = INTVAL (operands[3]);
9547 mask |= (INTVAL (operands[4]) - 4) << 1;
9548 mask |= (INTVAL (operands[5]) - 2) << 2;
9549 mask |= (INTVAL (operands[6]) - 6) << 3;
9550 operands[3] = GEN_INT (mask);
9552 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9554 [(set_attr "type" "sseshuf")
9555 (set_attr "length_immediate" "1")
9556 (set_attr "prefix" "vex")
9557 (set_attr "mode" "V4DF")])
9559 (define_expand "sse2_shufpd<mask_expand4_name>"
9560 [(match_operand:V2DF 0 "register_operand")
9561 (match_operand:V2DF 1 "register_operand")
9562 (match_operand:V2DF 2 "vector_operand")
9563 (match_operand:SI 3 "const_int_operand")]
9566 int mask = INTVAL (operands[3]);
9567 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9568 operands[2], GEN_INT (mask & 1),
9569 GEN_INT (mask & 2 ? 3 : 2)
9570 <mask_expand4_args>));
9574 (define_insn "sse2_shufpd_v2df_mask"
9575 [(set (match_operand:V2DF 0 "register_operand" "=v")
9579 (match_operand:V2DF 1 "register_operand" "v")
9580 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9581 (parallel [(match_operand 3 "const_0_to_1_operand")
9582 (match_operand 4 "const_2_to_3_operand")]))
9583 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9584 (match_operand:QI 6 "register_operand" "Yk")))]
9588 mask = INTVAL (operands[3]);
9589 mask |= (INTVAL (operands[4]) - 2) << 1;
9590 operands[3] = GEN_INT (mask);
9592 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
9594 [(set_attr "type" "sseshuf")
9595 (set_attr "length_immediate" "1")
9596 (set_attr "prefix" "evex")
9597 (set_attr "mode" "V2DF")])
9599 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
9600 (define_insn "avx2_interleave_highv4di<mask_name>"
9601 [(set (match_operand:V4DI 0 "register_operand" "=v")
9604 (match_operand:V4DI 1 "register_operand" "v")
9605 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9606 (parallel [(const_int 1)
9610 "TARGET_AVX2 && <mask_avx512vl_condition>"
9611 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9612 [(set_attr "type" "sselog")
9613 (set_attr "prefix" "vex")
9614 (set_attr "mode" "OI")])
9616 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
9617 [(set (match_operand:V8DI 0 "register_operand" "=v")
9620 (match_operand:V8DI 1 "register_operand" "v")
9621 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9622 (parallel [(const_int 1) (const_int 9)
9623 (const_int 3) (const_int 11)
9624 (const_int 5) (const_int 13)
9625 (const_int 7) (const_int 15)])))]
9627 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9628 [(set_attr "type" "sselog")
9629 (set_attr "prefix" "evex")
9630 (set_attr "mode" "XI")])
9632 (define_insn "vec_interleave_highv2di<mask_name>"
9633 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9636 (match_operand:V2DI 1 "register_operand" "0,v")
9637 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9638 (parallel [(const_int 1)
9640 "TARGET_SSE2 && <mask_avx512vl_condition>"
9642 punpckhqdq\t{%2, %0|%0, %2}
9643 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9644 [(set_attr "isa" "noavx,avx")
9645 (set_attr "type" "sselog")
9646 (set_attr "prefix_data16" "1,*")
9647 (set_attr "prefix" "orig,<mask_prefix>")
9648 (set_attr "mode" "TI")])
9650 (define_insn "avx2_interleave_lowv4di<mask_name>"
9651 [(set (match_operand:V4DI 0 "register_operand" "=v")
9654 (match_operand:V4DI 1 "register_operand" "v")
9655 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9656 (parallel [(const_int 0)
9660 "TARGET_AVX2 && <mask_avx512vl_condition>"
9661 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9662 [(set_attr "type" "sselog")
9663 (set_attr "prefix" "vex")
9664 (set_attr "mode" "OI")])
9666 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
9667 [(set (match_operand:V8DI 0 "register_operand" "=v")
9670 (match_operand:V8DI 1 "register_operand" "v")
9671 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9672 (parallel [(const_int 0) (const_int 8)
9673 (const_int 2) (const_int 10)
9674 (const_int 4) (const_int 12)
9675 (const_int 6) (const_int 14)])))]
9677 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9678 [(set_attr "type" "sselog")
9679 (set_attr "prefix" "evex")
9680 (set_attr "mode" "XI")])
9682 (define_insn "vec_interleave_lowv2di<mask_name>"
9683 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9686 (match_operand:V2DI 1 "register_operand" "0,v")
9687 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9688 (parallel [(const_int 0)
9690 "TARGET_SSE2 && <mask_avx512vl_condition>"
9692 punpcklqdq\t{%2, %0|%0, %2}
9693 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9694 [(set_attr "isa" "noavx,avx")
9695 (set_attr "type" "sselog")
9696 (set_attr "prefix_data16" "1,*")
9697 (set_attr "prefix" "orig,vex")
9698 (set_attr "mode" "TI")])
9700 (define_insn "sse2_shufpd_<mode>"
9701 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
9702 (vec_select:VI8F_128
9703 (vec_concat:<ssedoublevecmode>
9704 (match_operand:VI8F_128 1 "register_operand" "0,v")
9705 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
9706 (parallel [(match_operand 3 "const_0_to_1_operand")
9707 (match_operand 4 "const_2_to_3_operand")])))]
9711 mask = INTVAL (operands[3]);
9712 mask |= (INTVAL (operands[4]) - 2) << 1;
9713 operands[3] = GEN_INT (mask);
9715 switch (which_alternative)
9718 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
9720 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9725 [(set_attr "isa" "noavx,avx")
9726 (set_attr "type" "sseshuf")
9727 (set_attr "length_immediate" "1")
9728 (set_attr "prefix" "orig,maybe_evex")
9729 (set_attr "mode" "V2DF")])
9731 ;; Avoid combining registers from different units in a single alternative,
9732 ;; see comment above inline_secondary_memory_needed function in i386.c
9733 (define_insn "sse2_storehpd"
9734 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
9736 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
9737 (parallel [(const_int 1)])))]
9738 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9740 %vmovhpd\t{%1, %0|%0, %1}
9742 vunpckhpd\t{%d1, %0|%0, %d1}
9746 [(set_attr "isa" "*,noavx,avx,*,*,*")
9747 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
9748 (set (attr "prefix_data16")
9750 (and (eq_attr "alternative" "0")
9751 (not (match_test "TARGET_AVX")))
9753 (const_string "*")))
9754 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
9755 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
9758 [(set (match_operand:DF 0 "register_operand")
9760 (match_operand:V2DF 1 "memory_operand")
9761 (parallel [(const_int 1)])))]
9762 "TARGET_SSE2 && reload_completed"
9763 [(set (match_dup 0) (match_dup 1))]
9764 "operands[1] = adjust_address (operands[1], DFmode, 8);")
9766 (define_insn "*vec_extractv2df_1_sse"
9767 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9769 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
9770 (parallel [(const_int 1)])))]
9771 "!TARGET_SSE2 && TARGET_SSE
9772 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9774 movhps\t{%1, %0|%q0, %1}
9775 movhlps\t{%1, %0|%0, %1}
9776 movlps\t{%H1, %0|%0, %H1}"
9777 [(set_attr "type" "ssemov")
9778 (set_attr "mode" "V2SF,V4SF,V2SF")])
9780 ;; Avoid combining registers from different units in a single alternative,
9781 ;; see comment above inline_secondary_memory_needed function in i386.c
9782 (define_insn "sse2_storelpd"
9783 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
9785 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
9786 (parallel [(const_int 0)])))]
9787 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9789 %vmovlpd\t{%1, %0|%0, %1}
9794 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
9795 (set (attr "prefix_data16")
9796 (if_then_else (eq_attr "alternative" "0")
9798 (const_string "*")))
9799 (set_attr "prefix" "maybe_vex")
9800 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
9803 [(set (match_operand:DF 0 "register_operand")
9805 (match_operand:V2DF 1 "nonimmediate_operand")
9806 (parallel [(const_int 0)])))]
9807 "TARGET_SSE2 && reload_completed"
9808 [(set (match_dup 0) (match_dup 1))]
9809 "operands[1] = gen_lowpart (DFmode, operands[1]);")
9811 (define_insn "*vec_extractv2df_0_sse"
9812 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
9814 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
9815 (parallel [(const_int 0)])))]
9816 "!TARGET_SSE2 && TARGET_SSE
9817 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9819 movlps\t{%1, %0|%0, %1}
9820 movaps\t{%1, %0|%0, %1}
9821 movlps\t{%1, %0|%0, %q1}"
9822 [(set_attr "type" "ssemov")
9823 (set_attr "mode" "V2SF,V4SF,V2SF")])
9825 (define_expand "sse2_loadhpd_exp"
9826 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9829 (match_operand:V2DF 1 "nonimmediate_operand")
9830 (parallel [(const_int 0)]))
9831 (match_operand:DF 2 "nonimmediate_operand")))]
9834 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9836 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
9838 /* Fix up the destination if needed. */
9839 if (dst != operands[0])
9840 emit_move_insn (operands[0], dst);
9845 ;; Avoid combining registers from different units in a single alternative,
9846 ;; see comment above inline_secondary_memory_needed function in i386.c
9847 (define_insn "sse2_loadhpd"
9848 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9852 (match_operand:V2DF 1 "nonimmediate_operand"
9854 (parallel [(const_int 0)]))
9855 (match_operand:DF 2 "nonimmediate_operand"
9856 " m,m,x,Yv,x,*f,r")))]
9857 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9859 movhpd\t{%2, %0|%0, %2}
9860 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9861 unpcklpd\t{%2, %0|%0, %2}
9862 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9866 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
9867 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
9868 (set (attr "prefix_data16")
9869 (if_then_else (eq_attr "alternative" "0")
9871 (const_string "*")))
9872 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
9873 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
9876 [(set (match_operand:V2DF 0 "memory_operand")
9878 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
9879 (match_operand:DF 1 "register_operand")))]
9880 "TARGET_SSE2 && reload_completed"
9881 [(set (match_dup 0) (match_dup 1))]
9882 "operands[0] = adjust_address (operands[0], DFmode, 8);")
9884 (define_expand "sse2_loadlpd_exp"
9885 [(set (match_operand:V2DF 0 "nonimmediate_operand")
9887 (match_operand:DF 2 "nonimmediate_operand")
9889 (match_operand:V2DF 1 "nonimmediate_operand")
9890 (parallel [(const_int 1)]))))]
9893 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9895 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9897 /* Fix up the destination if needed. */
9898 if (dst != operands[0])
9899 emit_move_insn (operands[0], dst);
9904 ;; Avoid combining registers from different units in a single alternative,
9905 ;; see comment above inline_secondary_memory_needed function in i386.c
9906 (define_insn "sse2_loadlpd"
9907 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9908 "=v,x,v,x,v,x,x,v,m,m ,m")
9910 (match_operand:DF 2 "nonimmediate_operand"
9911 "vm,m,m,x,v,0,0,v,x,*f,r")
9913 (match_operand:V2DF 1 "nonimm_or_0_operand"
9914 " C,0,v,0,v,x,o,o,0,0 ,0")
9915 (parallel [(const_int 1)]))))]
9916 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9918 %vmovq\t{%2, %0|%0, %2}
9919 movlpd\t{%2, %0|%0, %2}
9920 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9921 movsd\t{%2, %0|%0, %2}
9922 vmovsd\t{%2, %1, %0|%0, %1, %2}
9923 shufpd\t{$2, %1, %0|%0, %1, 2}
9924 movhpd\t{%H1, %0|%0, %H1}
9925 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9929 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9931 (cond [(eq_attr "alternative" "5")
9932 (const_string "sselog")
9933 (eq_attr "alternative" "9")
9934 (const_string "fmov")
9935 (eq_attr "alternative" "10")
9936 (const_string "imov")
9938 (const_string "ssemov")))
9939 (set (attr "prefix_data16")
9940 (if_then_else (eq_attr "alternative" "1,6")
9942 (const_string "*")))
9943 (set (attr "length_immediate")
9944 (if_then_else (eq_attr "alternative" "5")
9946 (const_string "*")))
9947 (set (attr "prefix")
9948 (cond [(eq_attr "alternative" "0")
9949 (const_string "maybe_vex")
9950 (eq_attr "alternative" "1,3,5,6")
9951 (const_string "orig")
9952 (eq_attr "alternative" "2,4,7")
9953 (const_string "maybe_evex")
9955 (const_string "*")))
9956 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9959 [(set (match_operand:V2DF 0 "memory_operand")
9961 (match_operand:DF 1 "register_operand")
9962 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9963 "TARGET_SSE2 && reload_completed"
9964 [(set (match_dup 0) (match_dup 1))]
9965 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9967 (define_insn "sse2_movsd"
9968 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9970 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9971 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9975 movsd\t{%2, %0|%0, %2}
9976 vmovsd\t{%2, %1, %0|%0, %1, %2}
9977 movlpd\t{%2, %0|%0, %q2}
9978 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9979 %vmovlpd\t{%2, %0|%q0, %2}
9980 shufpd\t{$2, %1, %0|%0, %1, 2}
9981 movhps\t{%H1, %0|%0, %H1}
9982 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9983 %vmovhps\t{%1, %H0|%H0, %1}"
9984 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9987 (eq_attr "alternative" "5")
9988 (const_string "sselog")
9989 (const_string "ssemov")))
9990 (set (attr "prefix_data16")
9992 (and (eq_attr "alternative" "2,4")
9993 (not (match_test "TARGET_AVX")))
9995 (const_string "*")))
9996 (set (attr "length_immediate")
9997 (if_then_else (eq_attr "alternative" "5")
9999 (const_string "*")))
10000 (set (attr "prefix")
10001 (cond [(eq_attr "alternative" "1,3,7")
10002 (const_string "maybe_evex")
10003 (eq_attr "alternative" "4,8")
10004 (const_string "maybe_vex")
10006 (const_string "orig")))
10007 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10009 (define_insn "vec_dupv2df<mask_name>"
10010 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10011 (vec_duplicate:V2DF
10012 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10013 "TARGET_SSE2 && <mask_avx512vl_condition>"
10016 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10017 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10018 [(set_attr "isa" "noavx,sse3,avx512vl")
10019 (set_attr "type" "sselog1")
10020 (set_attr "prefix" "orig,maybe_vex,evex")
10021 (set_attr "mode" "V2DF,DF,DF")])
10023 (define_insn "vec_concatv2df"
10024 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10026 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10027 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10029 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10030 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10032 unpcklpd\t{%2, %0|%0, %2}
10033 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10034 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10035 %vmovddup\t{%1, %0|%0, %1}
10036 vmovddup\t{%1, %0|%0, %1}
10037 movhpd\t{%2, %0|%0, %2}
10038 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10039 %vmovq\t{%1, %0|%0, %1}
10040 movlhps\t{%2, %0|%0, %2}
10041 movhps\t{%2, %0|%0, %2}"
10043 (cond [(eq_attr "alternative" "0,5")
10044 (const_string "sse2_noavx")
10045 (eq_attr "alternative" "1,6")
10046 (const_string "avx")
10047 (eq_attr "alternative" "2,4")
10048 (const_string "avx512vl")
10049 (eq_attr "alternative" "3")
10050 (const_string "sse3")
10051 (eq_attr "alternative" "7")
10052 (const_string "sse2")
10054 (const_string "noavx")))
10057 (eq_attr "alternative" "0,1,2,3,4")
10058 (const_string "sselog")
10059 (const_string "ssemov")))
10060 (set (attr "prefix_data16")
10061 (if_then_else (eq_attr "alternative" "5")
10063 (const_string "*")))
10064 (set (attr "prefix")
10065 (cond [(eq_attr "alternative" "1,6")
10066 (const_string "vex")
10067 (eq_attr "alternative" "2,4")
10068 (const_string "evex")
10069 (eq_attr "alternative" "3,7")
10070 (const_string "maybe_vex")
10072 (const_string "orig")))
10073 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10075 ;; vmovq clears also the higher bits.
10076 (define_insn "vec_set<mode>_0"
10077 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10078 (vec_merge:VF2_512_256
10079 (vec_duplicate:VF2_512_256
10080 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm"))
10081 (match_operand:VF2_512_256 1 "const0_operand" "C")
10084 "vmovq\t{%2, %x0|%x0, %2}"
10085 [(set_attr "type" "ssemov")
10086 (set_attr "prefix" "maybe_evex")
10087 (set_attr "mode" "DF")])
10089 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10091 ;; Parallel integer down-conversion operations
10093 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10095 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10096 (define_mode_attr pmov_src_mode
10097 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10098 (define_mode_attr pmov_src_lower
10099 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10100 (define_mode_attr pmov_suff_1
10101 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10103 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10104 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10105 (any_truncate:PMOV_DST_MODE_1
10106 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10108 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10109 [(set_attr "type" "ssemov")
10110 (set_attr "memory" "none,store")
10111 (set_attr "prefix" "evex")
10112 (set_attr "mode" "<sseinsnmode>")])
10114 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10115 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10116 (vec_merge:PMOV_DST_MODE_1
10117 (any_truncate:PMOV_DST_MODE_1
10118 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10119 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10120 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10122 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10123 [(set_attr "type" "ssemov")
10124 (set_attr "memory" "none,store")
10125 (set_attr "prefix" "evex")
10126 (set_attr "mode" "<sseinsnmode>")])
10128 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10129 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10130 (vec_merge:PMOV_DST_MODE_1
10131 (any_truncate:PMOV_DST_MODE_1
10132 (match_operand:<pmov_src_mode> 1 "register_operand"))
10134 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10137 (define_insn "avx512bw_<code>v32hiv32qi2"
10138 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10139 (any_truncate:V32QI
10140 (match_operand:V32HI 1 "register_operand" "v,v")))]
10142 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10143 [(set_attr "type" "ssemov")
10144 (set_attr "memory" "none,store")
10145 (set_attr "prefix" "evex")
10146 (set_attr "mode" "XI")])
10148 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
10149 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10151 (any_truncate:V32QI
10152 (match_operand:V32HI 1 "register_operand" "v,v"))
10153 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10154 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10156 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10157 [(set_attr "type" "ssemov")
10158 (set_attr "memory" "none,store")
10159 (set_attr "prefix" "evex")
10160 (set_attr "mode" "XI")])
10162 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10163 [(set (match_operand:V32QI 0 "nonimmediate_operand")
10165 (any_truncate:V32QI
10166 (match_operand:V32HI 1 "register_operand"))
10168 (match_operand:SI 2 "register_operand")))]
10171 (define_mode_iterator PMOV_DST_MODE_2
10172 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10173 (define_mode_attr pmov_suff_2
10174 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10176 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10177 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10178 (any_truncate:PMOV_DST_MODE_2
10179 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10181 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10182 [(set_attr "type" "ssemov")
10183 (set_attr "memory" "none,store")
10184 (set_attr "prefix" "evex")
10185 (set_attr "mode" "<sseinsnmode>")])
10187 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10188 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10189 (vec_merge:PMOV_DST_MODE_2
10190 (any_truncate:PMOV_DST_MODE_2
10191 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10192 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10193 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10195 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10196 [(set_attr "type" "ssemov")
10197 (set_attr "memory" "none,store")
10198 (set_attr "prefix" "evex")
10199 (set_attr "mode" "<sseinsnmode>")])
10201 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10202 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10203 (vec_merge:PMOV_DST_MODE_2
10204 (any_truncate:PMOV_DST_MODE_2
10205 (match_operand:<ssedoublemode> 1 "register_operand"))
10207 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10210 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10211 (define_mode_attr pmov_dst_3
10212 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10213 (define_mode_attr pmov_dst_zeroed_3
10214 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10215 (define_mode_attr pmov_suff_3
10216 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10218 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
10219 [(set (match_operand:V16QI 0 "register_operand" "=v")
10221 (any_truncate:<pmov_dst_3>
10222 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10223 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10225 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10226 [(set_attr "type" "ssemov")
10227 (set_attr "prefix" "evex")
10228 (set_attr "mode" "TI")])
10230 (define_insn "*avx512vl_<code>v2div2qi2_store"
10231 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10234 (match_operand:V2DI 1 "register_operand" "v"))
10237 (parallel [(const_int 2) (const_int 3)
10238 (const_int 4) (const_int 5)
10239 (const_int 6) (const_int 7)
10240 (const_int 8) (const_int 9)
10241 (const_int 10) (const_int 11)
10242 (const_int 12) (const_int 13)
10243 (const_int 14) (const_int 15)]))))]
10245 "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
10246 [(set_attr "type" "ssemov")
10247 (set_attr "memory" "store")
10248 (set_attr "prefix" "evex")
10249 (set_attr "mode" "TI")])
10251 (define_insn "avx512vl_<code>v2div2qi2_mask"
10252 [(set (match_operand:V16QI 0 "register_operand" "=v")
10256 (match_operand:V2DI 1 "register_operand" "v"))
10258 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10259 (parallel [(const_int 0) (const_int 1)]))
10260 (match_operand:QI 3 "register_operand" "Yk"))
10261 (const_vector:V14QI [(const_int 0) (const_int 0)
10262 (const_int 0) (const_int 0)
10263 (const_int 0) (const_int 0)
10264 (const_int 0) (const_int 0)
10265 (const_int 0) (const_int 0)
10266 (const_int 0) (const_int 0)
10267 (const_int 0) (const_int 0)])))]
10269 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10270 [(set_attr "type" "ssemov")
10271 (set_attr "prefix" "evex")
10272 (set_attr "mode" "TI")])
10274 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10275 [(set (match_operand:V16QI 0 "register_operand" "=v")
10279 (match_operand:V2DI 1 "register_operand" "v"))
10280 (const_vector:V2QI [(const_int 0) (const_int 0)])
10281 (match_operand:QI 2 "register_operand" "Yk"))
10282 (const_vector:V14QI [(const_int 0) (const_int 0)
10283 (const_int 0) (const_int 0)
10284 (const_int 0) (const_int 0)
10285 (const_int 0) (const_int 0)
10286 (const_int 0) (const_int 0)
10287 (const_int 0) (const_int 0)
10288 (const_int 0) (const_int 0)])))]
10290 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10291 [(set_attr "type" "ssemov")
10292 (set_attr "prefix" "evex")
10293 (set_attr "mode" "TI")])
10295 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
10296 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10300 (match_operand:V2DI 1 "register_operand" "v"))
10303 (parallel [(const_int 0) (const_int 1)]))
10304 (match_operand:QI 2 "register_operand" "Yk"))
10307 (parallel [(const_int 2) (const_int 3)
10308 (const_int 4) (const_int 5)
10309 (const_int 6) (const_int 7)
10310 (const_int 8) (const_int 9)
10311 (const_int 10) (const_int 11)
10312 (const_int 12) (const_int 13)
10313 (const_int 14) (const_int 15)]))))]
10315 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
10316 [(set_attr "type" "ssemov")
10317 (set_attr "memory" "store")
10318 (set_attr "prefix" "evex")
10319 (set_attr "mode" "TI")])
10321 (define_insn "*avx512vl_<code><mode>v4qi2_store"
10322 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10325 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10328 (parallel [(const_int 4) (const_int 5)
10329 (const_int 6) (const_int 7)
10330 (const_int 8) (const_int 9)
10331 (const_int 10) (const_int 11)
10332 (const_int 12) (const_int 13)
10333 (const_int 14) (const_int 15)]))))]
10335 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
10336 [(set_attr "type" "ssemov")
10337 (set_attr "memory" "store")
10338 (set_attr "prefix" "evex")
10339 (set_attr "mode" "TI")])
10341 (define_insn "avx512vl_<code><mode>v4qi2_mask"
10342 [(set (match_operand:V16QI 0 "register_operand" "=v")
10346 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10348 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10349 (parallel [(const_int 0) (const_int 1)
10350 (const_int 2) (const_int 3)]))
10351 (match_operand:QI 3 "register_operand" "Yk"))
10352 (const_vector:V12QI [(const_int 0) (const_int 0)
10353 (const_int 0) (const_int 0)
10354 (const_int 0) (const_int 0)
10355 (const_int 0) (const_int 0)
10356 (const_int 0) (const_int 0)
10357 (const_int 0) (const_int 0)])))]
10359 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10360 [(set_attr "type" "ssemov")
10361 (set_attr "prefix" "evex")
10362 (set_attr "mode" "TI")])
10364 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10365 [(set (match_operand:V16QI 0 "register_operand" "=v")
10369 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10370 (const_vector:V4QI [(const_int 0) (const_int 0)
10371 (const_int 0) (const_int 0)])
10372 (match_operand:QI 2 "register_operand" "Yk"))
10373 (const_vector:V12QI [(const_int 0) (const_int 0)
10374 (const_int 0) (const_int 0)
10375 (const_int 0) (const_int 0)
10376 (const_int 0) (const_int 0)
10377 (const_int 0) (const_int 0)
10378 (const_int 0) (const_int 0)])))]
10380 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10381 [(set_attr "type" "ssemov")
10382 (set_attr "prefix" "evex")
10383 (set_attr "mode" "TI")])
10385 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
10386 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10390 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10393 (parallel [(const_int 0) (const_int 1)
10394 (const_int 2) (const_int 3)]))
10395 (match_operand:QI 2 "register_operand" "Yk"))
10398 (parallel [(const_int 4) (const_int 5)
10399 (const_int 6) (const_int 7)
10400 (const_int 8) (const_int 9)
10401 (const_int 10) (const_int 11)
10402 (const_int 12) (const_int 13)
10403 (const_int 14) (const_int 15)]))))]
10405 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
10406 [(set_attr "type" "ssemov")
10407 (set_attr "memory" "store")
10408 (set_attr "prefix" "evex")
10409 (set_attr "mode" "TI")])
10411 (define_mode_iterator VI2_128_BW_4_256
10412 [(V8HI "TARGET_AVX512BW") V8SI])
10414 (define_insn "*avx512vl_<code><mode>v8qi2_store"
10415 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10418 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10421 (parallel [(const_int 8) (const_int 9)
10422 (const_int 10) (const_int 11)
10423 (const_int 12) (const_int 13)
10424 (const_int 14) (const_int 15)]))))]
10426 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
10427 [(set_attr "type" "ssemov")
10428 (set_attr "memory" "store")
10429 (set_attr "prefix" "evex")
10430 (set_attr "mode" "TI")])
10432 (define_insn "avx512vl_<code><mode>v8qi2_mask"
10433 [(set (match_operand:V16QI 0 "register_operand" "=v")
10437 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10439 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10440 (parallel [(const_int 0) (const_int 1)
10441 (const_int 2) (const_int 3)
10442 (const_int 4) (const_int 5)
10443 (const_int 6) (const_int 7)]))
10444 (match_operand:QI 3 "register_operand" "Yk"))
10445 (const_vector:V8QI [(const_int 0) (const_int 0)
10446 (const_int 0) (const_int 0)
10447 (const_int 0) (const_int 0)
10448 (const_int 0) (const_int 0)])))]
10450 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10451 [(set_attr "type" "ssemov")
10452 (set_attr "prefix" "evex")
10453 (set_attr "mode" "TI")])
10455 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10456 [(set (match_operand:V16QI 0 "register_operand" "=v")
10460 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10461 (const_vector:V8QI [(const_int 0) (const_int 0)
10462 (const_int 0) (const_int 0)
10463 (const_int 0) (const_int 0)
10464 (const_int 0) (const_int 0)])
10465 (match_operand:QI 2 "register_operand" "Yk"))
10466 (const_vector:V8QI [(const_int 0) (const_int 0)
10467 (const_int 0) (const_int 0)
10468 (const_int 0) (const_int 0)
10469 (const_int 0) (const_int 0)])))]
10471 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10472 [(set_attr "type" "ssemov")
10473 (set_attr "prefix" "evex")
10474 (set_attr "mode" "TI")])
10476 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
10477 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10481 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10484 (parallel [(const_int 0) (const_int 1)
10485 (const_int 2) (const_int 3)
10486 (const_int 4) (const_int 5)
10487 (const_int 6) (const_int 7)]))
10488 (match_operand:QI 2 "register_operand" "Yk"))
10491 (parallel [(const_int 8) (const_int 9)
10492 (const_int 10) (const_int 11)
10493 (const_int 12) (const_int 13)
10494 (const_int 14) (const_int 15)]))))]
10496 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10497 [(set_attr "type" "ssemov")
10498 (set_attr "memory" "store")
10499 (set_attr "prefix" "evex")
10500 (set_attr "mode" "TI")])
10502 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10503 (define_mode_attr pmov_dst_4
10504 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10505 (define_mode_attr pmov_dst_zeroed_4
10506 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
10507 (define_mode_attr pmov_suff_4
10508 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
10510 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
10511 [(set (match_operand:V8HI 0 "register_operand" "=v")
10513 (any_truncate:<pmov_dst_4>
10514 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
10515 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
10517 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10518 [(set_attr "type" "ssemov")
10519 (set_attr "prefix" "evex")
10520 (set_attr "mode" "TI")])
10522 (define_insn "*avx512vl_<code><mode>v4hi2_store"
10523 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10526 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10529 (parallel [(const_int 4) (const_int 5)
10530 (const_int 6) (const_int 7)]))))]
10532 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10533 [(set_attr "type" "ssemov")
10534 (set_attr "memory" "store")
10535 (set_attr "prefix" "evex")
10536 (set_attr "mode" "TI")])
10538 (define_insn "avx512vl_<code><mode>v4hi2_mask"
10539 [(set (match_operand:V8HI 0 "register_operand" "=v")
10543 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10545 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10546 (parallel [(const_int 0) (const_int 1)
10547 (const_int 2) (const_int 3)]))
10548 (match_operand:QI 3 "register_operand" "Yk"))
10549 (const_vector:V4HI [(const_int 0) (const_int 0)
10550 (const_int 0) (const_int 0)])))]
10552 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10553 [(set_attr "type" "ssemov")
10554 (set_attr "prefix" "evex")
10555 (set_attr "mode" "TI")])
10557 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
10558 [(set (match_operand:V8HI 0 "register_operand" "=v")
10562 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10563 (const_vector:V4HI [(const_int 0) (const_int 0)
10564 (const_int 0) (const_int 0)])
10565 (match_operand:QI 2 "register_operand" "Yk"))
10566 (const_vector:V4HI [(const_int 0) (const_int 0)
10567 (const_int 0) (const_int 0)])))]
10569 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10570 [(set_attr "type" "ssemov")
10571 (set_attr "prefix" "evex")
10572 (set_attr "mode" "TI")])
10574 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
10575 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10579 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10582 (parallel [(const_int 0) (const_int 1)
10583 (const_int 2) (const_int 3)]))
10584 (match_operand:QI 2 "register_operand" "Yk"))
10587 (parallel [(const_int 4) (const_int 5)
10588 (const_int 6) (const_int 7)]))))]
10591 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
10592 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
10593 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
10595 [(set_attr "type" "ssemov")
10596 (set_attr "memory" "store")
10597 (set_attr "prefix" "evex")
10598 (set_attr "mode" "TI")])
10600 (define_insn "*avx512vl_<code>v2div2hi2_store"
10601 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10604 (match_operand:V2DI 1 "register_operand" "v"))
10607 (parallel [(const_int 2) (const_int 3)
10608 (const_int 4) (const_int 5)
10609 (const_int 6) (const_int 7)]))))]
10611 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
10612 [(set_attr "type" "ssemov")
10613 (set_attr "memory" "store")
10614 (set_attr "prefix" "evex")
10615 (set_attr "mode" "TI")])
10617 (define_insn "avx512vl_<code>v2div2hi2_mask"
10618 [(set (match_operand:V8HI 0 "register_operand" "=v")
10622 (match_operand:V2DI 1 "register_operand" "v"))
10624 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10625 (parallel [(const_int 0) (const_int 1)]))
10626 (match_operand:QI 3 "register_operand" "Yk"))
10627 (const_vector:V6HI [(const_int 0) (const_int 0)
10628 (const_int 0) (const_int 0)
10629 (const_int 0) (const_int 0)])))]
10631 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10632 [(set_attr "type" "ssemov")
10633 (set_attr "prefix" "evex")
10634 (set_attr "mode" "TI")])
10636 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
10637 [(set (match_operand:V8HI 0 "register_operand" "=v")
10641 (match_operand:V2DI 1 "register_operand" "v"))
10642 (const_vector:V2HI [(const_int 0) (const_int 0)])
10643 (match_operand:QI 2 "register_operand" "Yk"))
10644 (const_vector:V6HI [(const_int 0) (const_int 0)
10645 (const_int 0) (const_int 0)
10646 (const_int 0) (const_int 0)])))]
10648 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10649 [(set_attr "type" "ssemov")
10650 (set_attr "prefix" "evex")
10651 (set_attr "mode" "TI")])
10653 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
10654 [(set (match_operand:V8HI 0 "memory_operand" "=m")
10658 (match_operand:V2DI 1 "register_operand" "v"))
10661 (parallel [(const_int 0) (const_int 1)]))
10662 (match_operand:QI 2 "register_operand" "Yk"))
10665 (parallel [(const_int 2) (const_int 3)
10666 (const_int 4) (const_int 5)
10667 (const_int 6) (const_int 7)]))))]
10669 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
10670 [(set_attr "type" "ssemov")
10671 (set_attr "memory" "store")
10672 (set_attr "prefix" "evex")
10673 (set_attr "mode" "TI")])
10675 (define_insn "*avx512vl_<code>v2div2si2"
10676 [(set (match_operand:V4SI 0 "register_operand" "=v")
10679 (match_operand:V2DI 1 "register_operand" "v"))
10680 (match_operand:V2SI 2 "const0_operand")))]
10682 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10683 [(set_attr "type" "ssemov")
10684 (set_attr "prefix" "evex")
10685 (set_attr "mode" "TI")])
10687 (define_insn "*avx512vl_<code>v2div2si2_store"
10688 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10691 (match_operand:V2DI 1 "register_operand" "v"))
10694 (parallel [(const_int 2) (const_int 3)]))))]
10696 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
10697 [(set_attr "type" "ssemov")
10698 (set_attr "memory" "store")
10699 (set_attr "prefix" "evex")
10700 (set_attr "mode" "TI")])
10702 (define_insn "avx512vl_<code>v2div2si2_mask"
10703 [(set (match_operand:V4SI 0 "register_operand" "=v")
10707 (match_operand:V2DI 1 "register_operand" "v"))
10709 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
10710 (parallel [(const_int 0) (const_int 1)]))
10711 (match_operand:QI 3 "register_operand" "Yk"))
10712 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10714 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10715 [(set_attr "type" "ssemov")
10716 (set_attr "prefix" "evex")
10717 (set_attr "mode" "TI")])
10719 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
10720 [(set (match_operand:V4SI 0 "register_operand" "=v")
10724 (match_operand:V2DI 1 "register_operand" "v"))
10725 (const_vector:V2SI [(const_int 0) (const_int 0)])
10726 (match_operand:QI 2 "register_operand" "Yk"))
10727 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
10729 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10730 [(set_attr "type" "ssemov")
10731 (set_attr "prefix" "evex")
10732 (set_attr "mode" "TI")])
10734 (define_insn "avx512vl_<code>v2div2si2_mask_store"
10735 [(set (match_operand:V4SI 0 "memory_operand" "=m")
10739 (match_operand:V2DI 1 "register_operand" "v"))
10742 (parallel [(const_int 0) (const_int 1)]))
10743 (match_operand:QI 2 "register_operand" "Yk"))
10746 (parallel [(const_int 2) (const_int 3)]))))]
10748 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
10749 [(set_attr "type" "ssemov")
10750 (set_attr "memory" "store")
10751 (set_attr "prefix" "evex")
10752 (set_attr "mode" "TI")])
10754 (define_insn "*avx512f_<code>v8div16qi2"
10755 [(set (match_operand:V16QI 0 "register_operand" "=v")
10758 (match_operand:V8DI 1 "register_operand" "v"))
10759 (const_vector:V8QI [(const_int 0) (const_int 0)
10760 (const_int 0) (const_int 0)
10761 (const_int 0) (const_int 0)
10762 (const_int 0) (const_int 0)])))]
10764 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10765 [(set_attr "type" "ssemov")
10766 (set_attr "prefix" "evex")
10767 (set_attr "mode" "TI")])
10769 (define_insn "*avx512f_<code>v8div16qi2_store"
10770 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10773 (match_operand:V8DI 1 "register_operand" "v"))
10776 (parallel [(const_int 8) (const_int 9)
10777 (const_int 10) (const_int 11)
10778 (const_int 12) (const_int 13)
10779 (const_int 14) (const_int 15)]))))]
10781 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
10782 [(set_attr "type" "ssemov")
10783 (set_attr "memory" "store")
10784 (set_attr "prefix" "evex")
10785 (set_attr "mode" "TI")])
10787 (define_insn "avx512f_<code>v8div16qi2_mask"
10788 [(set (match_operand:V16QI 0 "register_operand" "=v")
10792 (match_operand:V8DI 1 "register_operand" "v"))
10794 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10795 (parallel [(const_int 0) (const_int 1)
10796 (const_int 2) (const_int 3)
10797 (const_int 4) (const_int 5)
10798 (const_int 6) (const_int 7)]))
10799 (match_operand:QI 3 "register_operand" "Yk"))
10800 (const_vector:V8QI [(const_int 0) (const_int 0)
10801 (const_int 0) (const_int 0)
10802 (const_int 0) (const_int 0)
10803 (const_int 0) (const_int 0)])))]
10805 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10806 [(set_attr "type" "ssemov")
10807 (set_attr "prefix" "evex")
10808 (set_attr "mode" "TI")])
10810 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
10811 [(set (match_operand:V16QI 0 "register_operand" "=v")
10815 (match_operand:V8DI 1 "register_operand" "v"))
10816 (const_vector:V8QI [(const_int 0) (const_int 0)
10817 (const_int 0) (const_int 0)
10818 (const_int 0) (const_int 0)
10819 (const_int 0) (const_int 0)])
10820 (match_operand:QI 2 "register_operand" "Yk"))
10821 (const_vector:V8QI [(const_int 0) (const_int 0)
10822 (const_int 0) (const_int 0)
10823 (const_int 0) (const_int 0)
10824 (const_int 0) (const_int 0)])))]
10826 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10827 [(set_attr "type" "ssemov")
10828 (set_attr "prefix" "evex")
10829 (set_attr "mode" "TI")])
10831 (define_insn "avx512f_<code>v8div16qi2_mask_store"
10832 [(set (match_operand:V16QI 0 "memory_operand" "=m")
10836 (match_operand:V8DI 1 "register_operand" "v"))
10839 (parallel [(const_int 0) (const_int 1)
10840 (const_int 2) (const_int 3)
10841 (const_int 4) (const_int 5)
10842 (const_int 6) (const_int 7)]))
10843 (match_operand:QI 2 "register_operand" "Yk"))
10846 (parallel [(const_int 8) (const_int 9)
10847 (const_int 10) (const_int 11)
10848 (const_int 12) (const_int 13)
10849 (const_int 14) (const_int 15)]))))]
10851 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10852 [(set_attr "type" "ssemov")
10853 (set_attr "memory" "store")
10854 (set_attr "prefix" "evex")
10855 (set_attr "mode" "TI")])
10857 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10859 ;; Parallel integral arithmetic
10861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10863 (define_expand "neg<mode>2"
10864 [(set (match_operand:VI_AVX2 0 "register_operand")
10867 (match_operand:VI_AVX2 1 "vector_operand")))]
10869 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
10871 (define_expand "<plusminus_insn><mode>3"
10872 [(set (match_operand:VI_AVX2 0 "register_operand")
10874 (match_operand:VI_AVX2 1 "vector_operand")
10875 (match_operand:VI_AVX2 2 "vector_operand")))]
10877 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10879 (define_expand "<plusminus_insn><mode>3_mask"
10880 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10881 (vec_merge:VI48_AVX512VL
10882 (plusminus:VI48_AVX512VL
10883 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10884 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10885 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
10886 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10888 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10890 (define_expand "<plusminus_insn><mode>3_mask"
10891 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10892 (vec_merge:VI12_AVX512VL
10893 (plusminus:VI12_AVX512VL
10894 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
10895 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10896 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
10897 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10899 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10901 (define_insn "*<plusminus_insn><mode>3"
10902 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10904 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10905 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10906 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10908 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10909 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10910 [(set_attr "isa" "noavx,avx")
10911 (set_attr "type" "sseiadd")
10912 (set_attr "prefix_data16" "1,*")
10913 (set_attr "prefix" "orig,vex")
10914 (set_attr "mode" "<sseinsnmode>")])
10916 (define_insn "*sub<mode>3_bcst"
10917 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10918 (minus:VI48_AVX512VL
10919 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10920 (vec_duplicate:VI48_AVX512VL
10921 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
10922 "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
10923 "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
10924 [(set_attr "type" "sseiadd")
10925 (set_attr "prefix" "evex")
10926 (set_attr "mode" "<sseinsnmode>")])
10928 (define_insn "*add<mode>3_bcst"
10929 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10930 (plus:VI48_AVX512VL
10931 (vec_duplicate:VI48_AVX512VL
10932 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
10933 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
10934 "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10935 "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
10936 [(set_attr "type" "sseiadd")
10937 (set_attr "prefix" "evex")
10938 (set_attr "mode" "<sseinsnmode>")])
10940 (define_insn "*<plusminus_insn><mode>3_mask"
10941 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10942 (vec_merge:VI48_AVX512VL
10943 (plusminus:VI48_AVX512VL
10944 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10945 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10946 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
10947 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10948 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10949 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10950 [(set_attr "type" "sseiadd")
10951 (set_attr "prefix" "evex")
10952 (set_attr "mode" "<sseinsnmode>")])
10954 (define_insn "*<plusminus_insn><mode>3_mask"
10955 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10956 (vec_merge:VI12_AVX512VL
10957 (plusminus:VI12_AVX512VL
10958 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10959 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10960 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
10961 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10962 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10963 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10964 [(set_attr "type" "sseiadd")
10965 (set_attr "prefix" "evex")
10966 (set_attr "mode" "<sseinsnmode>")])
10968 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10969 [(set (match_operand:VI12_AVX2 0 "register_operand")
10970 (sat_plusminus:VI12_AVX2
10971 (match_operand:VI12_AVX2 1 "vector_operand")
10972 (match_operand:VI12_AVX2 2 "vector_operand")))]
10973 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10974 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10976 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10977 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10978 (sat_plusminus:VI12_AVX2
10979 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10980 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10981 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10982 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10984 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10985 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10986 [(set_attr "isa" "noavx,avx")
10987 (set_attr "type" "sseiadd")
10988 (set_attr "prefix_data16" "1,*")
10989 (set_attr "prefix" "orig,maybe_evex")
10990 (set_attr "mode" "TI")])
10992 (define_expand "mul<mode>3<mask_name>"
10993 [(set (match_operand:VI1_AVX512 0 "register_operand")
10994 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10995 (match_operand:VI1_AVX512 2 "register_operand")))]
10996 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10998 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11002 (define_expand "mul<mode>3<mask_name>"
11003 [(set (match_operand:VI2_AVX2 0 "register_operand")
11004 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11005 (match_operand:VI2_AVX2 2 "vector_operand")))]
11006 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11007 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11009 (define_insn "*mul<mode>3<mask_name>"
11010 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11011 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11012 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11013 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11014 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11016 pmullw\t{%2, %0|%0, %2}
11017 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11018 [(set_attr "isa" "noavx,avx")
11019 (set_attr "type" "sseimul")
11020 (set_attr "prefix_data16" "1,*")
11021 (set_attr "prefix" "orig,vex")
11022 (set_attr "mode" "<sseinsnmode>")])
11024 (define_expand "<s>mul<mode>3_highpart<mask_name>"
11025 [(set (match_operand:VI2_AVX2 0 "register_operand")
11027 (lshiftrt:<ssedoublemode>
11028 (mult:<ssedoublemode>
11029 (any_extend:<ssedoublemode>
11030 (match_operand:VI2_AVX2 1 "vector_operand"))
11031 (any_extend:<ssedoublemode>
11032 (match_operand:VI2_AVX2 2 "vector_operand")))
11035 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11036 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11038 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
11039 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11041 (lshiftrt:<ssedoublemode>
11042 (mult:<ssedoublemode>
11043 (any_extend:<ssedoublemode>
11044 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11045 (any_extend:<ssedoublemode>
11046 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11048 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11049 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11051 pmulh<u>w\t{%2, %0|%0, %2}
11052 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11053 [(set_attr "isa" "noavx,avx")
11054 (set_attr "type" "sseimul")
11055 (set_attr "prefix_data16" "1,*")
11056 (set_attr "prefix" "orig,vex")
11057 (set_attr "mode" "<sseinsnmode>")])
11059 (define_expand "vec_widen_umult_even_v16si<mask_name>"
11060 [(set (match_operand:V8DI 0 "register_operand")
11064 (match_operand:V16SI 1 "nonimmediate_operand")
11065 (parallel [(const_int 0) (const_int 2)
11066 (const_int 4) (const_int 6)
11067 (const_int 8) (const_int 10)
11068 (const_int 12) (const_int 14)])))
11071 (match_operand:V16SI 2 "nonimmediate_operand")
11072 (parallel [(const_int 0) (const_int 2)
11073 (const_int 4) (const_int 6)
11074 (const_int 8) (const_int 10)
11075 (const_int 12) (const_int 14)])))))]
11077 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11079 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
11080 [(set (match_operand:V8DI 0 "register_operand" "=v")
11084 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11085 (parallel [(const_int 0) (const_int 2)
11086 (const_int 4) (const_int 6)
11087 (const_int 8) (const_int 10)
11088 (const_int 12) (const_int 14)])))
11091 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11092 (parallel [(const_int 0) (const_int 2)
11093 (const_int 4) (const_int 6)
11094 (const_int 8) (const_int 10)
11095 (const_int 12) (const_int 14)])))))]
11096 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11097 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11098 [(set_attr "type" "sseimul")
11099 (set_attr "prefix_extra" "1")
11100 (set_attr "prefix" "evex")
11101 (set_attr "mode" "XI")])
11103 (define_expand "vec_widen_umult_even_v8si<mask_name>"
11104 [(set (match_operand:V4DI 0 "register_operand")
11108 (match_operand:V8SI 1 "nonimmediate_operand")
11109 (parallel [(const_int 0) (const_int 2)
11110 (const_int 4) (const_int 6)])))
11113 (match_operand:V8SI 2 "nonimmediate_operand")
11114 (parallel [(const_int 0) (const_int 2)
11115 (const_int 4) (const_int 6)])))))]
11116 "TARGET_AVX2 && <mask_avx512vl_condition>"
11117 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11119 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
11120 [(set (match_operand:V4DI 0 "register_operand" "=v")
11124 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11125 (parallel [(const_int 0) (const_int 2)
11126 (const_int 4) (const_int 6)])))
11129 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11130 (parallel [(const_int 0) (const_int 2)
11131 (const_int 4) (const_int 6)])))))]
11132 "TARGET_AVX2 && <mask_avx512vl_condition>
11133 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11134 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11135 [(set_attr "type" "sseimul")
11136 (set_attr "prefix" "maybe_evex")
11137 (set_attr "mode" "OI")])
11139 (define_expand "vec_widen_umult_even_v4si<mask_name>"
11140 [(set (match_operand:V2DI 0 "register_operand")
11144 (match_operand:V4SI 1 "vector_operand")
11145 (parallel [(const_int 0) (const_int 2)])))
11148 (match_operand:V4SI 2 "vector_operand")
11149 (parallel [(const_int 0) (const_int 2)])))))]
11150 "TARGET_SSE2 && <mask_avx512vl_condition>"
11151 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11153 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
11154 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11158 (match_operand:V4SI 1 "vector_operand" "%0,v")
11159 (parallel [(const_int 0) (const_int 2)])))
11162 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11163 (parallel [(const_int 0) (const_int 2)])))))]
11164 "TARGET_SSE2 && <mask_avx512vl_condition>
11165 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11167 pmuludq\t{%2, %0|%0, %2}
11168 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11169 [(set_attr "isa" "noavx,avx")
11170 (set_attr "type" "sseimul")
11171 (set_attr "prefix_data16" "1,*")
11172 (set_attr "prefix" "orig,maybe_evex")
11173 (set_attr "mode" "TI")])
11175 (define_expand "vec_widen_smult_even_v16si<mask_name>"
11176 [(set (match_operand:V8DI 0 "register_operand")
11180 (match_operand:V16SI 1 "nonimmediate_operand")
11181 (parallel [(const_int 0) (const_int 2)
11182 (const_int 4) (const_int 6)
11183 (const_int 8) (const_int 10)
11184 (const_int 12) (const_int 14)])))
11187 (match_operand:V16SI 2 "nonimmediate_operand")
11188 (parallel [(const_int 0) (const_int 2)
11189 (const_int 4) (const_int 6)
11190 (const_int 8) (const_int 10)
11191 (const_int 12) (const_int 14)])))))]
11193 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11195 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
11196 [(set (match_operand:V8DI 0 "register_operand" "=v")
11200 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11201 (parallel [(const_int 0) (const_int 2)
11202 (const_int 4) (const_int 6)
11203 (const_int 8) (const_int 10)
11204 (const_int 12) (const_int 14)])))
11207 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11208 (parallel [(const_int 0) (const_int 2)
11209 (const_int 4) (const_int 6)
11210 (const_int 8) (const_int 10)
11211 (const_int 12) (const_int 14)])))))]
11212 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11213 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11214 [(set_attr "type" "sseimul")
11215 (set_attr "prefix_extra" "1")
11216 (set_attr "prefix" "evex")
11217 (set_attr "mode" "XI")])
11219 (define_expand "vec_widen_smult_even_v8si<mask_name>"
11220 [(set (match_operand:V4DI 0 "register_operand")
11224 (match_operand:V8SI 1 "nonimmediate_operand")
11225 (parallel [(const_int 0) (const_int 2)
11226 (const_int 4) (const_int 6)])))
11229 (match_operand:V8SI 2 "nonimmediate_operand")
11230 (parallel [(const_int 0) (const_int 2)
11231 (const_int 4) (const_int 6)])))))]
11232 "TARGET_AVX2 && <mask_avx512vl_condition>"
11233 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11235 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
11236 [(set (match_operand:V4DI 0 "register_operand" "=v")
11240 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11241 (parallel [(const_int 0) (const_int 2)
11242 (const_int 4) (const_int 6)])))
11245 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11246 (parallel [(const_int 0) (const_int 2)
11247 (const_int 4) (const_int 6)])))))]
11248 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11249 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11250 [(set_attr "type" "sseimul")
11251 (set_attr "prefix_extra" "1")
11252 (set_attr "prefix" "vex")
11253 (set_attr "mode" "OI")])
11255 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
11256 [(set (match_operand:V2DI 0 "register_operand")
11260 (match_operand:V4SI 1 "vector_operand")
11261 (parallel [(const_int 0) (const_int 2)])))
11264 (match_operand:V4SI 2 "vector_operand")
11265 (parallel [(const_int 0) (const_int 2)])))))]
11266 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11267 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11269 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11270 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11274 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11275 (parallel [(const_int 0) (const_int 2)])))
11278 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11279 (parallel [(const_int 0) (const_int 2)])))))]
11280 "TARGET_SSE4_1 && <mask_avx512vl_condition>
11281 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11283 pmuldq\t{%2, %0|%0, %2}
11284 pmuldq\t{%2, %0|%0, %2}
11285 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11286 [(set_attr "isa" "noavx,noavx,avx")
11287 (set_attr "type" "sseimul")
11288 (set_attr "prefix_data16" "1,1,*")
11289 (set_attr "prefix_extra" "1")
11290 (set_attr "prefix" "orig,orig,vex")
11291 (set_attr "mode" "TI")])
11293 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11294 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11295 (unspec:<sseunpackmode>
11296 [(match_operand:VI2_AVX2 1 "register_operand" "v")
11297 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11298 UNSPEC_PMADDWD512))]
11299 "TARGET_AVX512BW && <mask_mode512bit_condition>"
11300 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11301 [(set_attr "type" "sseiadd")
11302 (set_attr "prefix" "evex")
11303 (set_attr "mode" "XI")])
11305 (define_expand "avx2_pmaddwd"
11306 [(set (match_operand:V8SI 0 "register_operand")
11311 (match_operand:V16HI 1 "nonimmediate_operand")
11312 (parallel [(const_int 0) (const_int 2)
11313 (const_int 4) (const_int 6)
11314 (const_int 8) (const_int 10)
11315 (const_int 12) (const_int 14)])))
11318 (match_operand:V16HI 2 "nonimmediate_operand")
11319 (parallel [(const_int 0) (const_int 2)
11320 (const_int 4) (const_int 6)
11321 (const_int 8) (const_int 10)
11322 (const_int 12) (const_int 14)]))))
11325 (vec_select:V8HI (match_dup 1)
11326 (parallel [(const_int 1) (const_int 3)
11327 (const_int 5) (const_int 7)
11328 (const_int 9) (const_int 11)
11329 (const_int 13) (const_int 15)])))
11331 (vec_select:V8HI (match_dup 2)
11332 (parallel [(const_int 1) (const_int 3)
11333 (const_int 5) (const_int 7)
11334 (const_int 9) (const_int 11)
11335 (const_int 13) (const_int 15)]))))))]
11337 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11339 (define_insn "*avx2_pmaddwd"
11340 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11345 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11346 (parallel [(const_int 0) (const_int 2)
11347 (const_int 4) (const_int 6)
11348 (const_int 8) (const_int 10)
11349 (const_int 12) (const_int 14)])))
11352 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11353 (parallel [(const_int 0) (const_int 2)
11354 (const_int 4) (const_int 6)
11355 (const_int 8) (const_int 10)
11356 (const_int 12) (const_int 14)]))))
11359 (vec_select:V8HI (match_dup 1)
11360 (parallel [(const_int 1) (const_int 3)
11361 (const_int 5) (const_int 7)
11362 (const_int 9) (const_int 11)
11363 (const_int 13) (const_int 15)])))
11365 (vec_select:V8HI (match_dup 2)
11366 (parallel [(const_int 1) (const_int 3)
11367 (const_int 5) (const_int 7)
11368 (const_int 9) (const_int 11)
11369 (const_int 13) (const_int 15)]))))))]
11370 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11371 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11372 [(set_attr "type" "sseiadd")
11373 (set_attr "isa" "*,avx512bw")
11374 (set_attr "prefix" "vex,evex")
11375 (set_attr "mode" "OI")])
11377 (define_expand "sse2_pmaddwd"
11378 [(set (match_operand:V4SI 0 "register_operand")
11383 (match_operand:V8HI 1 "vector_operand")
11384 (parallel [(const_int 0) (const_int 2)
11385 (const_int 4) (const_int 6)])))
11388 (match_operand:V8HI 2 "vector_operand")
11389 (parallel [(const_int 0) (const_int 2)
11390 (const_int 4) (const_int 6)]))))
11393 (vec_select:V4HI (match_dup 1)
11394 (parallel [(const_int 1) (const_int 3)
11395 (const_int 5) (const_int 7)])))
11397 (vec_select:V4HI (match_dup 2)
11398 (parallel [(const_int 1) (const_int 3)
11399 (const_int 5) (const_int 7)]))))))]
11401 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
11403 (define_insn "*sse2_pmaddwd"
11404 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
11409 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11410 (parallel [(const_int 0) (const_int 2)
11411 (const_int 4) (const_int 6)])))
11414 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
11415 (parallel [(const_int 0) (const_int 2)
11416 (const_int 4) (const_int 6)]))))
11419 (vec_select:V4HI (match_dup 1)
11420 (parallel [(const_int 1) (const_int 3)
11421 (const_int 5) (const_int 7)])))
11423 (vec_select:V4HI (match_dup 2)
11424 (parallel [(const_int 1) (const_int 3)
11425 (const_int 5) (const_int 7)]))))))]
11426 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11428 pmaddwd\t{%2, %0|%0, %2}
11429 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
11430 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11431 [(set_attr "isa" "noavx,avx,avx512bw")
11432 (set_attr "type" "sseiadd")
11433 (set_attr "atom_unit" "simul")
11434 (set_attr "prefix_data16" "1,*,*")
11435 (set_attr "prefix" "orig,vex,evex")
11436 (set_attr "mode" "TI")])
11438 (define_insn "avx512dq_mul<mode>3<mask_name>"
11439 [(set (match_operand:VI8 0 "register_operand" "=v")
11441 (match_operand:VI8 1 "register_operand" "v")
11442 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
11443 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
11444 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11445 [(set_attr "type" "sseimul")
11446 (set_attr "prefix" "evex")
11447 (set_attr "mode" "<sseinsnmode>")])
11449 (define_expand "mul<mode>3<mask_name>"
11450 [(set (match_operand:VI4_AVX512F 0 "register_operand")
11452 (match_operand:VI4_AVX512F 1 "general_vector_operand")
11453 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
11454 "TARGET_SSE2 && <mask_mode512bit_condition>"
11458 if (!vector_operand (operands[1], <MODE>mode))
11459 operands[1] = force_reg (<MODE>mode, operands[1]);
11460 if (!vector_operand (operands[2], <MODE>mode))
11461 operands[2] = force_reg (<MODE>mode, operands[2]);
11462 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11466 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
11471 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
11472 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
11474 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
11475 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
11476 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11477 && <mask_mode512bit_condition>"
11479 pmulld\t{%2, %0|%0, %2}
11480 pmulld\t{%2, %0|%0, %2}
11481 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11482 [(set_attr "isa" "noavx,noavx,avx")
11483 (set_attr "type" "sseimul")
11484 (set_attr "prefix_extra" "1")
11485 (set_attr "prefix" "<mask_prefix4>")
11486 (set_attr "btver2_decode" "vector,vector,vector")
11487 (set_attr "mode" "<sseinsnmode>")])
11489 (define_expand "mul<mode>3"
11490 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11491 (mult:VI8_AVX2_AVX512F
11492 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11493 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11496 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
11500 (define_expand "vec_widen_<s>mult_hi_<mode>"
11501 [(match_operand:<sseunpackmode> 0 "register_operand")
11502 (any_extend:<sseunpackmode>
11503 (match_operand:VI124_AVX2 1 "register_operand"))
11504 (match_operand:VI124_AVX2 2 "register_operand")]
11507 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11512 (define_expand "vec_widen_<s>mult_lo_<mode>"
11513 [(match_operand:<sseunpackmode> 0 "register_operand")
11514 (any_extend:<sseunpackmode>
11515 (match_operand:VI124_AVX2 1 "register_operand"))
11516 (match_operand:VI124_AVX2 2 "register_operand")]
11519 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11524 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
11525 ;; named patterns, but signed V4SI needs special help for plain SSE2.
11526 (define_expand "vec_widen_smult_even_v4si"
11527 [(match_operand:V2DI 0 "register_operand")
11528 (match_operand:V4SI 1 "vector_operand")
11529 (match_operand:V4SI 2 "vector_operand")]
11532 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11537 (define_expand "vec_widen_<s>mult_odd_<mode>"
11538 [(match_operand:<sseunpackmode> 0 "register_operand")
11539 (any_extend:<sseunpackmode>
11540 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
11541 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
11544 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11549 (define_mode_attr SDOT_PMADD_SUF
11550 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
11552 (define_expand "sdot_prod<mode>"
11553 [(match_operand:<sseunpackmode> 0 "register_operand")
11554 (match_operand:VI2_AVX2 1 "register_operand")
11555 (match_operand:VI2_AVX2 2 "register_operand")
11556 (match_operand:<sseunpackmode> 3 "register_operand")]
11559 rtx t = gen_reg_rtx (<sseunpackmode>mode);
11560 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
11561 emit_insn (gen_rtx_SET (operands[0],
11562 gen_rtx_PLUS (<sseunpackmode>mode,
11567 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
11568 ;; back together when madd is available.
11569 (define_expand "sdot_prodv4si"
11570 [(match_operand:V2DI 0 "register_operand")
11571 (match_operand:V4SI 1 "register_operand")
11572 (match_operand:V4SI 2 "register_operand")
11573 (match_operand:V2DI 3 "register_operand")]
11576 rtx t = gen_reg_rtx (V2DImode);
11577 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
11578 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
11582 (define_expand "uavg<mode>3_ceil"
11583 [(set (match_operand:VI12_AVX2 0 "register_operand")
11584 (truncate:VI12_AVX2
11585 (lshiftrt:<ssedoublemode>
11586 (plus:<ssedoublemode>
11587 (plus:<ssedoublemode>
11588 (zero_extend:<ssedoublemode>
11589 (match_operand:VI12_AVX2 1 "vector_operand"))
11590 (zero_extend:<ssedoublemode>
11591 (match_operand:VI12_AVX2 2 "vector_operand")))
11596 operands[3] = CONST1_RTX(<MODE>mode);
11597 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11600 (define_expand "usadv16qi"
11601 [(match_operand:V4SI 0 "register_operand")
11602 (match_operand:V16QI 1 "register_operand")
11603 (match_operand:V16QI 2 "vector_operand")
11604 (match_operand:V4SI 3 "vector_operand")]
11607 rtx t1 = gen_reg_rtx (V2DImode);
11608 rtx t2 = gen_reg_rtx (V4SImode);
11609 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
11610 convert_move (t2, t1, 0);
11611 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
11615 (define_expand "usadv32qi"
11616 [(match_operand:V8SI 0 "register_operand")
11617 (match_operand:V32QI 1 "register_operand")
11618 (match_operand:V32QI 2 "nonimmediate_operand")
11619 (match_operand:V8SI 3 "nonimmediate_operand")]
11622 rtx t1 = gen_reg_rtx (V4DImode);
11623 rtx t2 = gen_reg_rtx (V8SImode);
11624 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
11625 convert_move (t2, t1, 0);
11626 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
11630 (define_expand "usadv64qi"
11631 [(match_operand:V16SI 0 "register_operand")
11632 (match_operand:V64QI 1 "register_operand")
11633 (match_operand:V64QI 2 "nonimmediate_operand")
11634 (match_operand:V16SI 3 "nonimmediate_operand")]
11637 rtx t1 = gen_reg_rtx (V8DImode);
11638 rtx t2 = gen_reg_rtx (V16SImode);
11639 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
11640 convert_move (t2, t1, 0);
11641 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
11645 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
11646 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
11647 (ashiftrt:VI248_AVX512BW_1
11648 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
11649 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11651 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11652 [(set_attr "type" "sseishft")
11653 (set (attr "length_immediate")
11654 (if_then_else (match_operand 2 "const_int_operand")
11656 (const_string "0")))
11657 (set_attr "mode" "<sseinsnmode>")])
11659 (define_insn "ashr<mode>3"
11660 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
11661 (ashiftrt:VI24_AVX2
11662 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
11663 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11666 psra<ssemodesuffix>\t{%2, %0|%0, %2}
11667 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11668 [(set_attr "isa" "noavx,avx")
11669 (set_attr "type" "sseishft")
11670 (set (attr "length_immediate")
11671 (if_then_else (match_operand 2 "const_int_operand")
11673 (const_string "0")))
11674 (set_attr "prefix_data16" "1,*")
11675 (set_attr "prefix" "orig,vex")
11676 (set_attr "mode" "<sseinsnmode>")])
11678 (define_insn "ashr<mode>3<mask_name>"
11679 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
11680 (ashiftrt:VI248_AVX512BW_AVX512VL
11681 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
11682 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11684 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11685 [(set_attr "type" "sseishft")
11686 (set (attr "length_immediate")
11687 (if_then_else (match_operand 2 "const_int_operand")
11689 (const_string "0")))
11690 (set_attr "mode" "<sseinsnmode>")])
11692 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
11693 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
11694 (any_lshift:VI248_AVX512BW_2
11695 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
11696 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11698 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11699 [(set_attr "type" "sseishft")
11700 (set (attr "length_immediate")
11701 (if_then_else (match_operand 2 "const_int_operand")
11703 (const_string "0")))
11704 (set_attr "mode" "<sseinsnmode>")])
11706 (define_insn "<shift_insn><mode>3"
11707 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
11708 (any_lshift:VI248_AVX2
11709 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
11710 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11713 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
11714 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11715 [(set_attr "isa" "noavx,avx")
11716 (set_attr "type" "sseishft")
11717 (set (attr "length_immediate")
11718 (if_then_else (match_operand 2 "const_int_operand")
11720 (const_string "0")))
11721 (set_attr "prefix_data16" "1,*")
11722 (set_attr "prefix" "orig,vex")
11723 (set_attr "mode" "<sseinsnmode>")])
11725 (define_insn "<shift_insn><mode>3<mask_name>"
11726 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
11727 (any_lshift:VI248_AVX512BW
11728 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
11729 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
11731 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11732 [(set_attr "type" "sseishft")
11733 (set (attr "length_immediate")
11734 (if_then_else (match_operand 2 "const_int_operand")
11736 (const_string "0")))
11737 (set_attr "mode" "<sseinsnmode>")])
11740 (define_expand "vec_shr_<mode>"
11741 [(set (match_dup 3)
11743 (match_operand:VI_128 1 "register_operand")
11744 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
11745 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
11748 operands[1] = gen_lowpart (V1TImode, operands[1]);
11749 operands[3] = gen_reg_rtx (V1TImode);
11750 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
11753 (define_insn "avx512bw_<shift_insn><mode>3"
11754 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
11755 (any_lshift:VIMAX_AVX512VL
11756 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
11757 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
11760 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11761 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11763 [(set_attr "type" "sseishft")
11764 (set_attr "length_immediate" "1")
11765 (set_attr "prefix" "maybe_evex")
11766 (set_attr "mode" "<sseinsnmode>")])
11768 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
11769 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
11770 (any_lshift:VIMAX_AVX2
11771 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
11772 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
11775 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
11777 switch (which_alternative)
11780 return "p<vshift>dq\t{%2, %0|%0, %2}";
11782 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
11784 gcc_unreachable ();
11787 [(set_attr "isa" "noavx,avx")
11788 (set_attr "type" "sseishft")
11789 (set_attr "length_immediate" "1")
11790 (set_attr "atom_unit" "sishuf")
11791 (set_attr "prefix_data16" "1,*")
11792 (set_attr "prefix" "orig,vex")
11793 (set_attr "mode" "<sseinsnmode>")])
11795 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
11796 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11797 (any_rotate:VI48_AVX512VL
11798 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11799 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11801 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11802 [(set_attr "prefix" "evex")
11803 (set_attr "mode" "<sseinsnmode>")])
11805 (define_insn "<avx512>_<rotate><mode><mask_name>"
11806 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11807 (any_rotate:VI48_AVX512VL
11808 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
11809 (match_operand:SI 2 "const_0_to_255_operand")))]
11811 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11812 [(set_attr "prefix" "evex")
11813 (set_attr "mode" "<sseinsnmode>")])
11815 (define_expand "<code><mode>3"
11816 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
11817 (maxmin:VI124_256_AVX512F_AVX512BW
11818 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
11819 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
11821 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11823 (define_insn "*avx2_<code><mode>3"
11824 [(set (match_operand:VI124_256 0 "register_operand" "=v")
11826 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
11827 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
11828 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11829 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11830 [(set_attr "type" "sseiadd")
11831 (set_attr "prefix_extra" "1")
11832 (set_attr "prefix" "vex")
11833 (set_attr "mode" "OI")])
11835 (define_expand "<code><mode>3_mask"
11836 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11837 (vec_merge:VI48_AVX512VL
11838 (maxmin:VI48_AVX512VL
11839 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11840 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11841 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11842 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11844 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11846 (define_insn "*avx512f_<code><mode>3<mask_name>"
11847 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11848 (maxmin:VI48_AVX512VL
11849 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
11850 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
11851 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11852 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11853 [(set_attr "type" "sseiadd")
11854 (set_attr "prefix_extra" "1")
11855 (set_attr "prefix" "maybe_evex")
11856 (set_attr "mode" "<sseinsnmode>")])
11858 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11859 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11860 (maxmin:VI12_AVX512VL
11861 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
11862 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
11864 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11865 [(set_attr "type" "sseiadd")
11866 (set_attr "prefix" "evex")
11867 (set_attr "mode" "<sseinsnmode>")])
11869 (define_expand "<code><mode>3"
11870 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11871 (maxmin:VI8_AVX2_AVX512F
11872 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11873 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11877 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
11878 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11881 enum rtx_code code;
11886 xops[0] = operands[0];
11888 if (<CODE> == SMAX || <CODE> == UMAX)
11890 xops[1] = operands[1];
11891 xops[2] = operands[2];
11895 xops[1] = operands[2];
11896 xops[2] = operands[1];
11899 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
11901 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
11902 xops[4] = operands[1];
11903 xops[5] = operands[2];
11905 ok = ix86_expand_int_vcond (xops);
11911 (define_expand "<code><mode>3"
11912 [(set (match_operand:VI124_128 0 "register_operand")
11914 (match_operand:VI124_128 1 "vector_operand")
11915 (match_operand:VI124_128 2 "vector_operand")))]
11918 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
11919 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11925 xops[0] = operands[0];
11926 operands[1] = force_reg (<MODE>mode, operands[1]);
11927 operands[2] = force_reg (<MODE>mode, operands[2]);
11929 if (<CODE> == SMAX)
11931 xops[1] = operands[1];
11932 xops[2] = operands[2];
11936 xops[1] = operands[2];
11937 xops[2] = operands[1];
11940 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11941 xops[4] = operands[1];
11942 xops[5] = operands[2];
11944 ok = ix86_expand_int_vcond (xops);
11950 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11951 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11953 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11954 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11956 && <mask_mode512bit_condition>
11957 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11959 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11960 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11961 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11962 [(set_attr "isa" "noavx,noavx,avx")
11963 (set_attr "type" "sseiadd")
11964 (set_attr "prefix_extra" "1,1,*")
11965 (set_attr "prefix" "orig,orig,vex")
11966 (set_attr "mode" "TI")])
11968 (define_insn "*<code>v8hi3"
11969 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11971 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11972 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11973 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11975 p<maxmin_int>w\t{%2, %0|%0, %2}
11976 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11977 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11978 [(set_attr "isa" "noavx,avx,avx512bw")
11979 (set_attr "type" "sseiadd")
11980 (set_attr "prefix_data16" "1,*,*")
11981 (set_attr "prefix_extra" "*,1,1")
11982 (set_attr "prefix" "orig,vex,evex")
11983 (set_attr "mode" "TI")])
11985 (define_expand "<code><mode>3"
11986 [(set (match_operand:VI124_128 0 "register_operand")
11988 (match_operand:VI124_128 1 "vector_operand")
11989 (match_operand:VI124_128 2 "vector_operand")))]
11992 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11993 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11994 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11996 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11997 operands[1] = force_reg (<MODE>mode, operands[1]);
11998 if (rtx_equal_p (op3, op2))
11999 op3 = gen_reg_rtx (V8HImode);
12000 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12001 emit_insn (gen_addv8hi3 (op0, op3, op2));
12009 operands[1] = force_reg (<MODE>mode, operands[1]);
12010 operands[2] = force_reg (<MODE>mode, operands[2]);
12012 xops[0] = operands[0];
12014 if (<CODE> == UMAX)
12016 xops[1] = operands[1];
12017 xops[2] = operands[2];
12021 xops[1] = operands[2];
12022 xops[2] = operands[1];
12025 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12026 xops[4] = operands[1];
12027 xops[5] = operands[2];
12029 ok = ix86_expand_int_vcond (xops);
12035 (define_insn "*sse4_1_<code><mode>3<mask_name>"
12036 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12038 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12039 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12041 && <mask_mode512bit_condition>
12042 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12044 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12045 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12046 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12047 [(set_attr "isa" "noavx,noavx,avx")
12048 (set_attr "type" "sseiadd")
12049 (set_attr "prefix_extra" "1,1,*")
12050 (set_attr "prefix" "orig,orig,vex")
12051 (set_attr "mode" "TI")])
12053 (define_insn "*<code>v16qi3"
12054 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12056 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12057 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12058 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12060 p<maxmin_int>b\t{%2, %0|%0, %2}
12061 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12062 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12063 [(set_attr "isa" "noavx,avx,avx512bw")
12064 (set_attr "type" "sseiadd")
12065 (set_attr "prefix_data16" "1,*,*")
12066 (set_attr "prefix_extra" "*,1,1")
12067 (set_attr "prefix" "orig,vex,evex")
12068 (set_attr "mode" "TI")])
12070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12072 ;; Parallel integral comparisons
12074 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12076 (define_expand "avx2_eq<mode>3"
12077 [(set (match_operand:VI_256 0 "register_operand")
12079 (match_operand:VI_256 1 "nonimmediate_operand")
12080 (match_operand:VI_256 2 "nonimmediate_operand")))]
12082 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12084 (define_insn "*avx2_eq<mode>3"
12085 [(set (match_operand:VI_256 0 "register_operand" "=x")
12087 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12088 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12089 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12090 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12091 [(set_attr "type" "ssecmp")
12092 (set_attr "prefix_extra" "1")
12093 (set_attr "prefix" "vex")
12094 (set_attr "mode" "OI")])
12096 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12097 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12098 (unspec:<avx512fmaskmode>
12099 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12100 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12101 UNSPEC_MASKED_EQ))]
12103 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12105 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12106 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12107 (unspec:<avx512fmaskmode>
12108 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12109 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12110 UNSPEC_MASKED_EQ))]
12112 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12114 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12115 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12116 (unspec:<avx512fmaskmode>
12117 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12118 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12119 UNSPEC_MASKED_EQ))]
12120 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12122 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12123 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12124 [(set_attr "type" "ssecmp")
12125 (set_attr "prefix_extra" "1")
12126 (set_attr "prefix" "evex")
12127 (set_attr "mode" "<sseinsnmode>")])
12129 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12130 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12131 (unspec:<avx512fmaskmode>
12132 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12133 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12134 UNSPEC_MASKED_EQ))]
12135 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12137 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12138 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12139 [(set_attr "type" "ssecmp")
12140 (set_attr "prefix_extra" "1")
12141 (set_attr "prefix" "evex")
12142 (set_attr "mode" "<sseinsnmode>")])
12144 (define_insn "*sse4_1_eqv2di3"
12145 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12147 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12148 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12149 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12151 pcmpeqq\t{%2, %0|%0, %2}
12152 pcmpeqq\t{%2, %0|%0, %2}
12153 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12154 [(set_attr "isa" "noavx,noavx,avx")
12155 (set_attr "type" "ssecmp")
12156 (set_attr "prefix_extra" "1")
12157 (set_attr "prefix" "orig,orig,vex")
12158 (set_attr "mode" "TI")])
12160 (define_insn "*sse2_eq<mode>3"
12161 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12163 (match_operand:VI124_128 1 "vector_operand" "%0,x")
12164 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12165 "TARGET_SSE2 && !TARGET_XOP
12166 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12168 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12169 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12170 [(set_attr "isa" "noavx,avx")
12171 (set_attr "type" "ssecmp")
12172 (set_attr "prefix_data16" "1,*")
12173 (set_attr "prefix" "orig,vex")
12174 (set_attr "mode" "TI")])
12176 (define_expand "sse2_eq<mode>3"
12177 [(set (match_operand:VI124_128 0 "register_operand")
12179 (match_operand:VI124_128 1 "vector_operand")
12180 (match_operand:VI124_128 2 "vector_operand")))]
12181 "TARGET_SSE2 && !TARGET_XOP "
12182 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12184 (define_expand "sse4_1_eqv2di3"
12185 [(set (match_operand:V2DI 0 "register_operand")
12187 (match_operand:V2DI 1 "vector_operand")
12188 (match_operand:V2DI 2 "vector_operand")))]
12190 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12192 (define_insn "sse4_2_gtv2di3"
12193 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12195 (match_operand:V2DI 1 "register_operand" "0,0,x")
12196 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12199 pcmpgtq\t{%2, %0|%0, %2}
12200 pcmpgtq\t{%2, %0|%0, %2}
12201 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12202 [(set_attr "isa" "noavx,noavx,avx")
12203 (set_attr "type" "ssecmp")
12204 (set_attr "prefix_extra" "1")
12205 (set_attr "prefix" "orig,orig,vex")
12206 (set_attr "mode" "TI")])
12208 (define_insn "avx2_gt<mode>3"
12209 [(set (match_operand:VI_256 0 "register_operand" "=x")
12211 (match_operand:VI_256 1 "register_operand" "x")
12212 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12214 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12215 [(set_attr "type" "ssecmp")
12216 (set_attr "prefix_extra" "1")
12217 (set_attr "prefix" "vex")
12218 (set_attr "mode" "OI")])
12220 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12221 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12222 (unspec:<avx512fmaskmode>
12223 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12224 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12226 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12227 [(set_attr "type" "ssecmp")
12228 (set_attr "prefix_extra" "1")
12229 (set_attr "prefix" "evex")
12230 (set_attr "mode" "<sseinsnmode>")])
12232 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12233 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12234 (unspec:<avx512fmaskmode>
12235 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12236 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12238 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12239 [(set_attr "type" "ssecmp")
12240 (set_attr "prefix_extra" "1")
12241 (set_attr "prefix" "evex")
12242 (set_attr "mode" "<sseinsnmode>")])
12244 (define_insn "sse2_gt<mode>3"
12245 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12247 (match_operand:VI124_128 1 "register_operand" "0,x")
12248 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12249 "TARGET_SSE2 && !TARGET_XOP"
12251 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12252 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12253 [(set_attr "isa" "noavx,avx")
12254 (set_attr "type" "ssecmp")
12255 (set_attr "prefix_data16" "1,*")
12256 (set_attr "prefix" "orig,vex")
12257 (set_attr "mode" "TI")])
12259 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12260 [(set (match_operand:V_512 0 "register_operand")
12261 (if_then_else:V_512
12262 (match_operator 3 ""
12263 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12264 (match_operand:VI_AVX512BW 5 "general_operand")])
12265 (match_operand:V_512 1)
12266 (match_operand:V_512 2)))]
12268 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12269 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12271 bool ok = ix86_expand_int_vcond (operands);
12276 (define_expand "vcond<V_256:mode><VI_256:mode>"
12277 [(set (match_operand:V_256 0 "register_operand")
12278 (if_then_else:V_256
12279 (match_operator 3 ""
12280 [(match_operand:VI_256 4 "nonimmediate_operand")
12281 (match_operand:VI_256 5 "general_operand")])
12282 (match_operand:V_256 1)
12283 (match_operand:V_256 2)))]
12285 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12286 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12288 bool ok = ix86_expand_int_vcond (operands);
12293 (define_expand "vcond<V_128:mode><VI124_128:mode>"
12294 [(set (match_operand:V_128 0 "register_operand")
12295 (if_then_else:V_128
12296 (match_operator 3 ""
12297 [(match_operand:VI124_128 4 "vector_operand")
12298 (match_operand:VI124_128 5 "general_operand")])
12299 (match_operand:V_128 1)
12300 (match_operand:V_128 2)))]
12302 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12303 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12305 bool ok = ix86_expand_int_vcond (operands);
12310 (define_expand "vcond<VI8F_128:mode>v2di"
12311 [(set (match_operand:VI8F_128 0 "register_operand")
12312 (if_then_else:VI8F_128
12313 (match_operator 3 ""
12314 [(match_operand:V2DI 4 "vector_operand")
12315 (match_operand:V2DI 5 "general_operand")])
12316 (match_operand:VI8F_128 1)
12317 (match_operand:VI8F_128 2)))]
12320 bool ok = ix86_expand_int_vcond (operands);
12325 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12326 [(set (match_operand:V_512 0 "register_operand")
12327 (if_then_else:V_512
12328 (match_operator 3 ""
12329 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12330 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12331 (match_operand:V_512 1 "general_operand")
12332 (match_operand:V_512 2 "general_operand")))]
12334 && (GET_MODE_NUNITS (<V_512:MODE>mode)
12335 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12337 bool ok = ix86_expand_int_vcond (operands);
12342 (define_expand "vcondu<V_256:mode><VI_256:mode>"
12343 [(set (match_operand:V_256 0 "register_operand")
12344 (if_then_else:V_256
12345 (match_operator 3 ""
12346 [(match_operand:VI_256 4 "nonimmediate_operand")
12347 (match_operand:VI_256 5 "nonimmediate_operand")])
12348 (match_operand:V_256 1 "general_operand")
12349 (match_operand:V_256 2 "general_operand")))]
12351 && (GET_MODE_NUNITS (<V_256:MODE>mode)
12352 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12354 bool ok = ix86_expand_int_vcond (operands);
12359 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
12360 [(set (match_operand:V_128 0 "register_operand")
12361 (if_then_else:V_128
12362 (match_operator 3 ""
12363 [(match_operand:VI124_128 4 "vector_operand")
12364 (match_operand:VI124_128 5 "vector_operand")])
12365 (match_operand:V_128 1 "general_operand")
12366 (match_operand:V_128 2 "general_operand")))]
12368 && (GET_MODE_NUNITS (<V_128:MODE>mode)
12369 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12371 bool ok = ix86_expand_int_vcond (operands);
12376 (define_expand "vcondu<VI8F_128:mode>v2di"
12377 [(set (match_operand:VI8F_128 0 "register_operand")
12378 (if_then_else:VI8F_128
12379 (match_operator 3 ""
12380 [(match_operand:V2DI 4 "vector_operand")
12381 (match_operand:V2DI 5 "vector_operand")])
12382 (match_operand:VI8F_128 1 "general_operand")
12383 (match_operand:VI8F_128 2 "general_operand")))]
12386 bool ok = ix86_expand_int_vcond (operands);
12391 (define_expand "vcondeq<VI8F_128:mode>v2di"
12392 [(set (match_operand:VI8F_128 0 "register_operand")
12393 (if_then_else:VI8F_128
12394 (match_operator 3 ""
12395 [(match_operand:V2DI 4 "vector_operand")
12396 (match_operand:V2DI 5 "general_operand")])
12397 (match_operand:VI8F_128 1)
12398 (match_operand:VI8F_128 2)))]
12401 bool ok = ix86_expand_int_vcond (operands);
12406 (define_mode_iterator VEC_PERM_AVX2
12407 [V16QI V8HI V4SI V2DI V4SF V2DF
12408 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
12409 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
12410 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
12411 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
12412 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
12413 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
12415 (define_expand "vec_perm<mode>"
12416 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
12417 (match_operand:VEC_PERM_AVX2 1 "register_operand")
12418 (match_operand:VEC_PERM_AVX2 2 "register_operand")
12419 (match_operand:<sseintvecmode> 3 "register_operand")]
12420 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
12422 ix86_expand_vec_perm (operands);
12426 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12428 ;; Parallel bitwise logical operations
12430 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12432 (define_expand "one_cmpl<mode>2"
12433 [(set (match_operand:VI 0 "register_operand")
12434 (xor:VI (match_operand:VI 1 "vector_operand")
12438 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
12441 (define_expand "<sse2_avx2>_andnot<mode>3"
12442 [(set (match_operand:VI_AVX2 0 "register_operand")
12444 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
12445 (match_operand:VI_AVX2 2 "vector_operand")))]
12448 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12449 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12450 (vec_merge:VI48_AVX512VL
12453 (match_operand:VI48_AVX512VL 1 "register_operand"))
12454 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12455 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12456 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12459 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
12460 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
12461 (vec_merge:VI12_AVX512VL
12464 (match_operand:VI12_AVX512VL 1 "register_operand"))
12465 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
12466 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
12467 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12470 (define_insn "*andnot<mode>3"
12471 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
12473 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
12474 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
12480 const char *ssesuffix;
12482 switch (get_attr_mode (insn))
12485 gcc_assert (TARGET_AVX512F);
12488 gcc_assert (TARGET_AVX2);
12491 gcc_assert (TARGET_SSE2);
12493 switch (<MODE>mode)
12497 /* There is no vpandnb or vpandnw instruction, nor vpandn for
12498 512-bit vectors. Use vpandnq instead. */
12503 ssesuffix = "<ssemodesuffix>";
12509 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
12510 ? "<ssemodesuffix>" : "");
12513 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12518 gcc_assert (TARGET_AVX512F);
12521 gcc_assert (TARGET_AVX);
12524 gcc_assert (TARGET_SSE);
12530 gcc_unreachable ();
12533 switch (which_alternative)
12536 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12540 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12543 gcc_unreachable ();
12546 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12547 output_asm_insn (buf, operands);
12550 [(set_attr "isa" "noavx,avx,avx")
12551 (set_attr "type" "sselog")
12552 (set (attr "prefix_data16")
12554 (and (eq_attr "alternative" "0")
12555 (eq_attr "mode" "TI"))
12557 (const_string "*")))
12558 (set_attr "prefix" "orig,vex,evex")
12560 (cond [(and (match_test "<MODE_SIZE> == 16")
12561 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12562 (const_string "<ssePSmode>")
12563 (match_test "TARGET_AVX2")
12564 (const_string "<sseinsnmode>")
12565 (match_test "TARGET_AVX")
12567 (match_test "<MODE_SIZE> > 16")
12568 (const_string "V8SF")
12569 (const_string "<sseinsnmode>"))
12570 (ior (not (match_test "TARGET_SSE2"))
12571 (match_test "optimize_function_for_size_p (cfun)"))
12572 (const_string "V4SF")
12574 (const_string "<sseinsnmode>")))])
12576 (define_insn "*andnot<mode>3_bcst"
12577 [(set (match_operand:VI 0 "register_operand" "=v")
12580 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12581 (vec_duplicate:VI48_AVX512VL
12582 (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
12584 "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
12585 [(set_attr "type" "sselog")
12586 (set_attr "prefix" "evex")
12587 (set_attr "mode" "<sseinsnmode>")])
12589 (define_insn "*andnot<mode>3_mask"
12590 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12591 (vec_merge:VI48_AVX512VL
12594 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12595 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12596 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12597 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12599 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
12600 [(set_attr "type" "sselog")
12601 (set_attr "prefix" "evex")
12602 (set_attr "mode" "<sseinsnmode>")])
12604 (define_expand "<code><mode>3"
12605 [(set (match_operand:VI 0 "register_operand")
12607 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
12608 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
12611 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
12615 (define_insn "<mask_codefor><code><mode>3<mask_name>"
12616 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
12617 (any_logic:VI48_AVX_AVX512F
12618 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12619 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12620 "TARGET_SSE && <mask_mode512bit_condition>
12621 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12626 const char *ssesuffix;
12628 switch (get_attr_mode (insn))
12631 gcc_assert (TARGET_AVX512F);
12634 gcc_assert (TARGET_AVX2);
12637 gcc_assert (TARGET_SSE2);
12639 switch (<MODE>mode)
12643 ssesuffix = "<ssemodesuffix>";
12649 ssesuffix = (TARGET_AVX512VL
12650 && (<mask_applied> || which_alternative == 2)
12651 ? "<ssemodesuffix>" : "");
12654 gcc_unreachable ();
12659 gcc_assert (TARGET_AVX);
12662 gcc_assert (TARGET_SSE);
12668 gcc_unreachable ();
12671 switch (which_alternative)
12674 if (<mask_applied>)
12675 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
12677 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12681 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
12684 gcc_unreachable ();
12687 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12688 output_asm_insn (buf, operands);
12691 [(set_attr "isa" "noavx,avx,avx")
12692 (set_attr "type" "sselog")
12693 (set (attr "prefix_data16")
12695 (and (eq_attr "alternative" "0")
12696 (eq_attr "mode" "TI"))
12698 (const_string "*")))
12699 (set_attr "prefix" "<mask_prefix3>,evex")
12701 (cond [(and (match_test "<MODE_SIZE> == 16")
12702 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12703 (const_string "<ssePSmode>")
12704 (match_test "TARGET_AVX2")
12705 (const_string "<sseinsnmode>")
12706 (match_test "TARGET_AVX")
12708 (match_test "<MODE_SIZE> > 16")
12709 (const_string "V8SF")
12710 (const_string "<sseinsnmode>"))
12711 (ior (not (match_test "TARGET_SSE2"))
12712 (match_test "optimize_function_for_size_p (cfun)"))
12713 (const_string "V4SF")
12715 (const_string "<sseinsnmode>")))])
12717 (define_insn "*<code><mode>3"
12718 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
12719 (any_logic:VI12_AVX_AVX512F
12720 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12721 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12722 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12727 const char *ssesuffix;
12729 switch (get_attr_mode (insn))
12732 gcc_assert (TARGET_AVX512F);
12735 gcc_assert (TARGET_AVX2);
12738 gcc_assert (TARGET_SSE2);
12740 switch (<MODE>mode)
12750 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12753 gcc_unreachable ();
12758 gcc_assert (TARGET_AVX);
12761 gcc_assert (TARGET_SSE);
12767 gcc_unreachable ();
12770 switch (which_alternative)
12773 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12777 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12780 gcc_unreachable ();
12783 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12784 output_asm_insn (buf, operands);
12787 [(set_attr "isa" "noavx,avx,avx")
12788 (set_attr "type" "sselog")
12789 (set (attr "prefix_data16")
12791 (and (eq_attr "alternative" "0")
12792 (eq_attr "mode" "TI"))
12794 (const_string "*")))
12795 (set_attr "prefix" "orig,vex,evex")
12797 (cond [(and (match_test "<MODE_SIZE> == 16")
12798 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
12799 (const_string "<ssePSmode>")
12800 (match_test "TARGET_AVX2")
12801 (const_string "<sseinsnmode>")
12802 (match_test "TARGET_AVX")
12804 (match_test "<MODE_SIZE> > 16")
12805 (const_string "V8SF")
12806 (const_string "<sseinsnmode>"))
12807 (ior (not (match_test "TARGET_SSE2"))
12808 (match_test "optimize_function_for_size_p (cfun)"))
12809 (const_string "V4SF")
12811 (const_string "<sseinsnmode>")))])
12813 (define_insn "*<code><mode>3_bcst"
12814 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12815 (any_logic:VI48_AVX512VL
12816 (vec_duplicate:VI48_AVX512VL
12817 (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
12818 (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
12819 "TARGET_AVX512F && <mask_avx512vl_condition>"
12820 "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
12821 [(set_attr "type" "sseiadd")
12822 (set_attr "prefix" "evex")
12823 (set_attr "mode" "<sseinsnmode>")])
12825 (define_mode_iterator VI1248_AVX512VLBW
12826 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
12827 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
12828 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
12829 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
12830 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
12831 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
12833 (define_mode_iterator AVX512ZEXTMASK
12834 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
12836 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
12837 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12838 (unspec:<avx512fmaskmode>
12839 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12840 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12843 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12844 [(set_attr "prefix" "evex")
12845 (set_attr "mode" "<sseinsnmode>")])
12847 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
12848 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12849 (unspec:<avx512fmaskmode>
12850 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12851 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12854 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12855 [(set_attr "prefix" "evex")
12856 (set_attr "mode" "<sseinsnmode>")])
12858 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
12859 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12860 (zero_extend:AVX512ZEXTMASK
12861 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12862 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12863 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12866 && (<AVX512ZEXTMASK:MODE_SIZE>
12867 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12868 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12869 [(set_attr "prefix" "evex")
12870 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12872 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
12873 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12874 (zero_extend:AVX512ZEXTMASK
12875 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12876 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12877 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12878 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12880 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12882 && (<AVX512ZEXTMASK:MODE_SIZE>
12883 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12884 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12885 [(set_attr "prefix" "evex")
12886 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12888 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
12889 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12890 (zero_extend:AVX512ZEXTMASK
12891 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12892 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12893 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12896 && (<AVX512ZEXTMASK:MODE_SIZE>
12897 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12898 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12899 [(set_attr "prefix" "evex")
12900 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12902 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
12903 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
12904 (zero_extend:AVX512ZEXTMASK
12905 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
12906 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
12907 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
12908 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
12910 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
12912 && (<AVX512ZEXTMASK:MODE_SIZE>
12913 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
12914 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
12915 [(set_attr "prefix" "evex")
12916 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
12918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12920 ;; Parallel integral element swizzling
12922 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12924 (define_expand "vec_pack_trunc_<mode>"
12925 [(match_operand:<ssepackmode> 0 "register_operand")
12926 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
12927 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
12930 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
12931 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
12932 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
12936 (define_expand "vec_pack_trunc_qi"
12937 [(set (match_operand:HI 0 "register_operand")
12938 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
12940 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
12943 (define_expand "vec_pack_trunc_<mode>"
12944 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
12945 (ior:<DOUBLEMASKMODE>
12946 (ashift:<DOUBLEMASKMODE>
12947 (zero_extend:<DOUBLEMASKMODE>
12948 (match_operand:SWI24 2 "register_operand"))
12950 (zero_extend:<DOUBLEMASKMODE>
12951 (match_operand:SWI24 1 "register_operand"))))]
12954 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
12957 (define_expand "vec_pack_sbool_trunc_qi"
12958 [(match_operand:QI 0 "register_operand")
12959 (match_operand:QI 1 "register_operand")
12960 (match_operand:QI 2 "register_operand")
12961 (match_operand:QI 3 "const_int_operand")]
12964 HOST_WIDE_INT nunits = INTVAL (operands[3]);
12965 rtx mask, tem1, tem2;
12966 if (nunits != 8 && nunits != 4)
12968 mask = gen_reg_rtx (QImode);
12969 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
12970 tem1 = gen_reg_rtx (QImode);
12971 emit_insn (gen_kandqi (tem1, operands[1], mask));
12972 if (TARGET_AVX512DQ)
12974 tem2 = gen_reg_rtx (QImode);
12975 emit_insn (gen_kashiftqi (tem2, operands[2],
12976 GEN_INT (nunits / 2)));
12980 tem2 = gen_reg_rtx (HImode);
12981 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
12983 GEN_INT (nunits / 2)));
12984 tem2 = lowpart_subreg (QImode, tem2, HImode);
12986 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
12990 (define_insn "<sse2_avx2>_packsswb<mask_name>"
12991 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12992 (vec_concat:VI1_AVX512
12993 (ss_truncate:<ssehalfvecmode>
12994 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12995 (ss_truncate:<ssehalfvecmode>
12996 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12997 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12999 packsswb\t{%2, %0|%0, %2}
13000 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13001 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13002 [(set_attr "isa" "noavx,avx,avx512bw")
13003 (set_attr "type" "sselog")
13004 (set_attr "prefix_data16" "1,*,*")
13005 (set_attr "prefix" "orig,<mask_prefix>,evex")
13006 (set_attr "mode" "<sseinsnmode>")])
13008 (define_insn "<sse2_avx2>_packssdw<mask_name>"
13009 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13010 (vec_concat:VI2_AVX2
13011 (ss_truncate:<ssehalfvecmode>
13012 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13013 (ss_truncate:<ssehalfvecmode>
13014 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13015 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13017 packssdw\t{%2, %0|%0, %2}
13018 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13019 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13020 [(set_attr "isa" "noavx,avx,avx512bw")
13021 (set_attr "type" "sselog")
13022 (set_attr "prefix_data16" "1,*,*")
13023 (set_attr "prefix" "orig,<mask_prefix>,evex")
13024 (set_attr "mode" "<sseinsnmode>")])
13026 (define_insn "<sse2_avx2>_packuswb<mask_name>"
13027 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13028 (vec_concat:VI1_AVX512
13029 (us_truncate:<ssehalfvecmode>
13030 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13031 (us_truncate:<ssehalfvecmode>
13032 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13033 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13035 packuswb\t{%2, %0|%0, %2}
13036 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13037 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13038 [(set_attr "isa" "noavx,avx,avx512bw")
13039 (set_attr "type" "sselog")
13040 (set_attr "prefix_data16" "1,*,*")
13041 (set_attr "prefix" "orig,<mask_prefix>,evex")
13042 (set_attr "mode" "<sseinsnmode>")])
13044 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
13045 [(set (match_operand:V64QI 0 "register_operand" "=v")
13048 (match_operand:V64QI 1 "register_operand" "v")
13049 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13050 (parallel [(const_int 8) (const_int 72)
13051 (const_int 9) (const_int 73)
13052 (const_int 10) (const_int 74)
13053 (const_int 11) (const_int 75)
13054 (const_int 12) (const_int 76)
13055 (const_int 13) (const_int 77)
13056 (const_int 14) (const_int 78)
13057 (const_int 15) (const_int 79)
13058 (const_int 24) (const_int 88)
13059 (const_int 25) (const_int 89)
13060 (const_int 26) (const_int 90)
13061 (const_int 27) (const_int 91)
13062 (const_int 28) (const_int 92)
13063 (const_int 29) (const_int 93)
13064 (const_int 30) (const_int 94)
13065 (const_int 31) (const_int 95)
13066 (const_int 40) (const_int 104)
13067 (const_int 41) (const_int 105)
13068 (const_int 42) (const_int 106)
13069 (const_int 43) (const_int 107)
13070 (const_int 44) (const_int 108)
13071 (const_int 45) (const_int 109)
13072 (const_int 46) (const_int 110)
13073 (const_int 47) (const_int 111)
13074 (const_int 56) (const_int 120)
13075 (const_int 57) (const_int 121)
13076 (const_int 58) (const_int 122)
13077 (const_int 59) (const_int 123)
13078 (const_int 60) (const_int 124)
13079 (const_int 61) (const_int 125)
13080 (const_int 62) (const_int 126)
13081 (const_int 63) (const_int 127)])))]
13083 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13084 [(set_attr "type" "sselog")
13085 (set_attr "prefix" "evex")
13086 (set_attr "mode" "XI")])
13088 (define_insn "avx2_interleave_highv32qi<mask_name>"
13089 [(set (match_operand:V32QI 0 "register_operand" "=v")
13092 (match_operand:V32QI 1 "register_operand" "v")
13093 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13094 (parallel [(const_int 8) (const_int 40)
13095 (const_int 9) (const_int 41)
13096 (const_int 10) (const_int 42)
13097 (const_int 11) (const_int 43)
13098 (const_int 12) (const_int 44)
13099 (const_int 13) (const_int 45)
13100 (const_int 14) (const_int 46)
13101 (const_int 15) (const_int 47)
13102 (const_int 24) (const_int 56)
13103 (const_int 25) (const_int 57)
13104 (const_int 26) (const_int 58)
13105 (const_int 27) (const_int 59)
13106 (const_int 28) (const_int 60)
13107 (const_int 29) (const_int 61)
13108 (const_int 30) (const_int 62)
13109 (const_int 31) (const_int 63)])))]
13110 "TARGET_AVX2 && <mask_avx512vl_condition>"
13111 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13112 [(set_attr "type" "sselog")
13113 (set_attr "prefix" "<mask_prefix>")
13114 (set_attr "mode" "OI")])
13116 (define_insn "vec_interleave_highv16qi<mask_name>"
13117 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13120 (match_operand:V16QI 1 "register_operand" "0,v")
13121 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13122 (parallel [(const_int 8) (const_int 24)
13123 (const_int 9) (const_int 25)
13124 (const_int 10) (const_int 26)
13125 (const_int 11) (const_int 27)
13126 (const_int 12) (const_int 28)
13127 (const_int 13) (const_int 29)
13128 (const_int 14) (const_int 30)
13129 (const_int 15) (const_int 31)])))]
13130 "TARGET_SSE2 && <mask_avx512vl_condition>"
13132 punpckhbw\t{%2, %0|%0, %2}
13133 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13134 [(set_attr "isa" "noavx,avx")
13135 (set_attr "type" "sselog")
13136 (set_attr "prefix_data16" "1,*")
13137 (set_attr "prefix" "orig,<mask_prefix>")
13138 (set_attr "mode" "TI")])
13140 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13141 [(set (match_operand:V64QI 0 "register_operand" "=v")
13144 (match_operand:V64QI 1 "register_operand" "v")
13145 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13146 (parallel [(const_int 0) (const_int 64)
13147 (const_int 1) (const_int 65)
13148 (const_int 2) (const_int 66)
13149 (const_int 3) (const_int 67)
13150 (const_int 4) (const_int 68)
13151 (const_int 5) (const_int 69)
13152 (const_int 6) (const_int 70)
13153 (const_int 7) (const_int 71)
13154 (const_int 16) (const_int 80)
13155 (const_int 17) (const_int 81)
13156 (const_int 18) (const_int 82)
13157 (const_int 19) (const_int 83)
13158 (const_int 20) (const_int 84)
13159 (const_int 21) (const_int 85)
13160 (const_int 22) (const_int 86)
13161 (const_int 23) (const_int 87)
13162 (const_int 32) (const_int 96)
13163 (const_int 33) (const_int 97)
13164 (const_int 34) (const_int 98)
13165 (const_int 35) (const_int 99)
13166 (const_int 36) (const_int 100)
13167 (const_int 37) (const_int 101)
13168 (const_int 38) (const_int 102)
13169 (const_int 39) (const_int 103)
13170 (const_int 48) (const_int 112)
13171 (const_int 49) (const_int 113)
13172 (const_int 50) (const_int 114)
13173 (const_int 51) (const_int 115)
13174 (const_int 52) (const_int 116)
13175 (const_int 53) (const_int 117)
13176 (const_int 54) (const_int 118)
13177 (const_int 55) (const_int 119)])))]
13179 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13180 [(set_attr "type" "sselog")
13181 (set_attr "prefix" "evex")
13182 (set_attr "mode" "XI")])
13184 (define_insn "avx2_interleave_lowv32qi<mask_name>"
13185 [(set (match_operand:V32QI 0 "register_operand" "=v")
13188 (match_operand:V32QI 1 "register_operand" "v")
13189 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13190 (parallel [(const_int 0) (const_int 32)
13191 (const_int 1) (const_int 33)
13192 (const_int 2) (const_int 34)
13193 (const_int 3) (const_int 35)
13194 (const_int 4) (const_int 36)
13195 (const_int 5) (const_int 37)
13196 (const_int 6) (const_int 38)
13197 (const_int 7) (const_int 39)
13198 (const_int 16) (const_int 48)
13199 (const_int 17) (const_int 49)
13200 (const_int 18) (const_int 50)
13201 (const_int 19) (const_int 51)
13202 (const_int 20) (const_int 52)
13203 (const_int 21) (const_int 53)
13204 (const_int 22) (const_int 54)
13205 (const_int 23) (const_int 55)])))]
13206 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13207 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13208 [(set_attr "type" "sselog")
13209 (set_attr "prefix" "maybe_vex")
13210 (set_attr "mode" "OI")])
13212 (define_insn "vec_interleave_lowv16qi<mask_name>"
13213 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13216 (match_operand:V16QI 1 "register_operand" "0,v")
13217 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13218 (parallel [(const_int 0) (const_int 16)
13219 (const_int 1) (const_int 17)
13220 (const_int 2) (const_int 18)
13221 (const_int 3) (const_int 19)
13222 (const_int 4) (const_int 20)
13223 (const_int 5) (const_int 21)
13224 (const_int 6) (const_int 22)
13225 (const_int 7) (const_int 23)])))]
13226 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13228 punpcklbw\t{%2, %0|%0, %2}
13229 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13230 [(set_attr "isa" "noavx,avx")
13231 (set_attr "type" "sselog")
13232 (set_attr "prefix_data16" "1,*")
13233 (set_attr "prefix" "orig,vex")
13234 (set_attr "mode" "TI")])
13236 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
13237 [(set (match_operand:V32HI 0 "register_operand" "=v")
13240 (match_operand:V32HI 1 "register_operand" "v")
13241 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13242 (parallel [(const_int 4) (const_int 36)
13243 (const_int 5) (const_int 37)
13244 (const_int 6) (const_int 38)
13245 (const_int 7) (const_int 39)
13246 (const_int 12) (const_int 44)
13247 (const_int 13) (const_int 45)
13248 (const_int 14) (const_int 46)
13249 (const_int 15) (const_int 47)
13250 (const_int 20) (const_int 52)
13251 (const_int 21) (const_int 53)
13252 (const_int 22) (const_int 54)
13253 (const_int 23) (const_int 55)
13254 (const_int 28) (const_int 60)
13255 (const_int 29) (const_int 61)
13256 (const_int 30) (const_int 62)
13257 (const_int 31) (const_int 63)])))]
13259 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13260 [(set_attr "type" "sselog")
13261 (set_attr "prefix" "evex")
13262 (set_attr "mode" "XI")])
13264 (define_insn "avx2_interleave_highv16hi<mask_name>"
13265 [(set (match_operand:V16HI 0 "register_operand" "=v")
13268 (match_operand:V16HI 1 "register_operand" "v")
13269 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13270 (parallel [(const_int 4) (const_int 20)
13271 (const_int 5) (const_int 21)
13272 (const_int 6) (const_int 22)
13273 (const_int 7) (const_int 23)
13274 (const_int 12) (const_int 28)
13275 (const_int 13) (const_int 29)
13276 (const_int 14) (const_int 30)
13277 (const_int 15) (const_int 31)])))]
13278 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13279 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13280 [(set_attr "type" "sselog")
13281 (set_attr "prefix" "maybe_evex")
13282 (set_attr "mode" "OI")])
13284 (define_insn "vec_interleave_highv8hi<mask_name>"
13285 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13288 (match_operand:V8HI 1 "register_operand" "0,v")
13289 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13290 (parallel [(const_int 4) (const_int 12)
13291 (const_int 5) (const_int 13)
13292 (const_int 6) (const_int 14)
13293 (const_int 7) (const_int 15)])))]
13294 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13296 punpckhwd\t{%2, %0|%0, %2}
13297 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13298 [(set_attr "isa" "noavx,avx")
13299 (set_attr "type" "sselog")
13300 (set_attr "prefix_data16" "1,*")
13301 (set_attr "prefix" "orig,maybe_vex")
13302 (set_attr "mode" "TI")])
13304 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13305 [(set (match_operand:V32HI 0 "register_operand" "=v")
13308 (match_operand:V32HI 1 "register_operand" "v")
13309 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13310 (parallel [(const_int 0) (const_int 32)
13311 (const_int 1) (const_int 33)
13312 (const_int 2) (const_int 34)
13313 (const_int 3) (const_int 35)
13314 (const_int 8) (const_int 40)
13315 (const_int 9) (const_int 41)
13316 (const_int 10) (const_int 42)
13317 (const_int 11) (const_int 43)
13318 (const_int 16) (const_int 48)
13319 (const_int 17) (const_int 49)
13320 (const_int 18) (const_int 50)
13321 (const_int 19) (const_int 51)
13322 (const_int 24) (const_int 56)
13323 (const_int 25) (const_int 57)
13324 (const_int 26) (const_int 58)
13325 (const_int 27) (const_int 59)])))]
13327 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13328 [(set_attr "type" "sselog")
13329 (set_attr "prefix" "evex")
13330 (set_attr "mode" "XI")])
13332 (define_insn "avx2_interleave_lowv16hi<mask_name>"
13333 [(set (match_operand:V16HI 0 "register_operand" "=v")
13336 (match_operand:V16HI 1 "register_operand" "v")
13337 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13338 (parallel [(const_int 0) (const_int 16)
13339 (const_int 1) (const_int 17)
13340 (const_int 2) (const_int 18)
13341 (const_int 3) (const_int 19)
13342 (const_int 8) (const_int 24)
13343 (const_int 9) (const_int 25)
13344 (const_int 10) (const_int 26)
13345 (const_int 11) (const_int 27)])))]
13346 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13347 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13348 [(set_attr "type" "sselog")
13349 (set_attr "prefix" "maybe_evex")
13350 (set_attr "mode" "OI")])
13352 (define_insn "vec_interleave_lowv8hi<mask_name>"
13353 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13356 (match_operand:V8HI 1 "register_operand" "0,v")
13357 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13358 (parallel [(const_int 0) (const_int 8)
13359 (const_int 1) (const_int 9)
13360 (const_int 2) (const_int 10)
13361 (const_int 3) (const_int 11)])))]
13362 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13364 punpcklwd\t{%2, %0|%0, %2}
13365 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13366 [(set_attr "isa" "noavx,avx")
13367 (set_attr "type" "sselog")
13368 (set_attr "prefix_data16" "1,*")
13369 (set_attr "prefix" "orig,maybe_evex")
13370 (set_attr "mode" "TI")])
13372 (define_insn "avx2_interleave_highv8si<mask_name>"
13373 [(set (match_operand:V8SI 0 "register_operand" "=v")
13376 (match_operand:V8SI 1 "register_operand" "v")
13377 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13378 (parallel [(const_int 2) (const_int 10)
13379 (const_int 3) (const_int 11)
13380 (const_int 6) (const_int 14)
13381 (const_int 7) (const_int 15)])))]
13382 "TARGET_AVX2 && <mask_avx512vl_condition>"
13383 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13384 [(set_attr "type" "sselog")
13385 (set_attr "prefix" "maybe_evex")
13386 (set_attr "mode" "OI")])
13388 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
13389 [(set (match_operand:V16SI 0 "register_operand" "=v")
13392 (match_operand:V16SI 1 "register_operand" "v")
13393 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13394 (parallel [(const_int 2) (const_int 18)
13395 (const_int 3) (const_int 19)
13396 (const_int 6) (const_int 22)
13397 (const_int 7) (const_int 23)
13398 (const_int 10) (const_int 26)
13399 (const_int 11) (const_int 27)
13400 (const_int 14) (const_int 30)
13401 (const_int 15) (const_int 31)])))]
13403 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13404 [(set_attr "type" "sselog")
13405 (set_attr "prefix" "evex")
13406 (set_attr "mode" "XI")])
13409 (define_insn "vec_interleave_highv4si<mask_name>"
13410 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13413 (match_operand:V4SI 1 "register_operand" "0,v")
13414 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13415 (parallel [(const_int 2) (const_int 6)
13416 (const_int 3) (const_int 7)])))]
13417 "TARGET_SSE2 && <mask_avx512vl_condition>"
13419 punpckhdq\t{%2, %0|%0, %2}
13420 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13421 [(set_attr "isa" "noavx,avx")
13422 (set_attr "type" "sselog")
13423 (set_attr "prefix_data16" "1,*")
13424 (set_attr "prefix" "orig,maybe_vex")
13425 (set_attr "mode" "TI")])
13427 (define_insn "avx2_interleave_lowv8si<mask_name>"
13428 [(set (match_operand:V8SI 0 "register_operand" "=v")
13431 (match_operand:V8SI 1 "register_operand" "v")
13432 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13433 (parallel [(const_int 0) (const_int 8)
13434 (const_int 1) (const_int 9)
13435 (const_int 4) (const_int 12)
13436 (const_int 5) (const_int 13)])))]
13437 "TARGET_AVX2 && <mask_avx512vl_condition>"
13438 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13439 [(set_attr "type" "sselog")
13440 (set_attr "prefix" "maybe_evex")
13441 (set_attr "mode" "OI")])
13443 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
13444 [(set (match_operand:V16SI 0 "register_operand" "=v")
13447 (match_operand:V16SI 1 "register_operand" "v")
13448 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13449 (parallel [(const_int 0) (const_int 16)
13450 (const_int 1) (const_int 17)
13451 (const_int 4) (const_int 20)
13452 (const_int 5) (const_int 21)
13453 (const_int 8) (const_int 24)
13454 (const_int 9) (const_int 25)
13455 (const_int 12) (const_int 28)
13456 (const_int 13) (const_int 29)])))]
13458 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13459 [(set_attr "type" "sselog")
13460 (set_attr "prefix" "evex")
13461 (set_attr "mode" "XI")])
13463 (define_insn "vec_interleave_lowv4si<mask_name>"
13464 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13467 (match_operand:V4SI 1 "register_operand" "0,v")
13468 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13469 (parallel [(const_int 0) (const_int 4)
13470 (const_int 1) (const_int 5)])))]
13471 "TARGET_SSE2 && <mask_avx512vl_condition>"
13473 punpckldq\t{%2, %0|%0, %2}
13474 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13475 [(set_attr "isa" "noavx,avx")
13476 (set_attr "type" "sselog")
13477 (set_attr "prefix_data16" "1,*")
13478 (set_attr "prefix" "orig,vex")
13479 (set_attr "mode" "TI")])
13481 (define_expand "vec_interleave_high<mode>"
13482 [(match_operand:VI_256 0 "register_operand")
13483 (match_operand:VI_256 1 "register_operand")
13484 (match_operand:VI_256 2 "nonimmediate_operand")]
13487 rtx t1 = gen_reg_rtx (<MODE>mode);
13488 rtx t2 = gen_reg_rtx (<MODE>mode);
13489 rtx t3 = gen_reg_rtx (V4DImode);
13490 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13491 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13492 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13493 gen_lowpart (V4DImode, t2),
13494 GEN_INT (1 + (3 << 4))));
13495 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13499 (define_expand "vec_interleave_low<mode>"
13500 [(match_operand:VI_256 0 "register_operand")
13501 (match_operand:VI_256 1 "register_operand")
13502 (match_operand:VI_256 2 "nonimmediate_operand")]
13505 rtx t1 = gen_reg_rtx (<MODE>mode);
13506 rtx t2 = gen_reg_rtx (<MODE>mode);
13507 rtx t3 = gen_reg_rtx (V4DImode);
13508 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13509 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13510 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13511 gen_lowpart (V4DImode, t2),
13512 GEN_INT (0 + (2 << 4))));
13513 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13517 ;; Modes handled by pinsr patterns.
13518 (define_mode_iterator PINSR_MODE
13519 [(V16QI "TARGET_SSE4_1") V8HI
13520 (V4SI "TARGET_SSE4_1")
13521 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
13523 (define_mode_attr sse2p4_1
13524 [(V16QI "sse4_1") (V8HI "sse2")
13525 (V4SI "sse4_1") (V2DI "sse4_1")])
13527 (define_mode_attr pinsr_evex_isa
13528 [(V16QI "avx512bw") (V8HI "avx512bw")
13529 (V4SI "avx512dq") (V2DI "avx512dq")])
13531 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
13532 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
13533 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
13534 (vec_merge:PINSR_MODE
13535 (vec_duplicate:PINSR_MODE
13536 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
13537 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
13538 (match_operand:SI 3 "const_int_operand")))]
13540 && ((unsigned) exact_log2 (INTVAL (operands[3]))
13541 < GET_MODE_NUNITS (<MODE>mode))"
13543 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
13545 switch (which_alternative)
13548 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13549 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
13552 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
13555 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13556 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
13560 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13562 gcc_unreachable ();
13565 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
13566 (set_attr "type" "sselog")
13567 (set (attr "prefix_rex")
13569 (and (not (match_test "TARGET_AVX"))
13570 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
13572 (const_string "*")))
13573 (set (attr "prefix_data16")
13575 (and (not (match_test "TARGET_AVX"))
13576 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13578 (const_string "*")))
13579 (set (attr "prefix_extra")
13581 (and (not (match_test "TARGET_AVX"))
13582 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13584 (const_string "1")))
13585 (set_attr "length_immediate" "1")
13586 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
13587 (set_attr "mode" "TI")])
13589 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
13590 [(match_operand:AVX512_VEC 0 "register_operand")
13591 (match_operand:AVX512_VEC 1 "register_operand")
13592 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
13593 (match_operand:SI 3 "const_0_to_3_operand")
13594 (match_operand:AVX512_VEC 4 "register_operand")
13595 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13598 int mask, selector;
13599 mask = INTVAL (operands[3]);
13600 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
13601 ? 0xFFFF ^ (0x000F << mask * 4)
13602 : 0xFF ^ (0x03 << mask * 2));
13603 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
13604 (operands[0], operands[1], operands[2], GEN_INT (selector),
13605 operands[4], operands[5]));
13609 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
13610 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
13611 (vec_merge:AVX512_VEC
13612 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
13613 (vec_duplicate:AVX512_VEC
13614 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
13615 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
13617 && (INTVAL (operands[3])
13618 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
13620 if (which_alternative == 0)
13621 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
13622 switch (<MODE>mode)
13625 return "vmovapd\t{%2, %x0|%x0, %2}";
13627 return "vmovaps\t{%2, %x0|%x0, %2}";
13629 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
13630 : "vmovdqa\t{%2, %x0|%x0, %2}";
13632 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
13633 : "vmovdqa\t{%2, %x0|%x0, %2}";
13635 gcc_unreachable ();
13638 [(set_attr "type" "sselog,ssemov,ssemov")
13639 (set_attr "length_immediate" "1,0,0")
13640 (set_attr "prefix" "evex,vex,evex")
13641 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
13643 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
13644 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
13645 (vec_merge:AVX512_VEC
13646 (match_operand:AVX512_VEC 1 "register_operand" "v")
13647 (vec_duplicate:AVX512_VEC
13648 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
13649 (match_operand:SI 3 "const_int_operand" "n")))]
13653 int selector = INTVAL (operands[3]);
13655 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
13657 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
13659 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
13661 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
13664 gcc_unreachable ();
13666 operands[3] = GEN_INT (mask);
13668 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
13670 [(set_attr "type" "sselog")
13671 (set_attr "length_immediate" "1")
13672 (set_attr "prefix" "evex")
13673 (set_attr "mode" "<sseinsnmode>")])
13675 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
13676 [(match_operand:AVX512_VEC_2 0 "register_operand")
13677 (match_operand:AVX512_VEC_2 1 "register_operand")
13678 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
13679 (match_operand:SI 3 "const_0_to_1_operand")
13680 (match_operand:AVX512_VEC_2 4 "register_operand")
13681 (match_operand:<avx512fmaskmode> 5 "register_operand")]
13684 int mask = INTVAL (operands[3]);
13686 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
13687 operands[2], operands[4],
13690 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
13691 operands[2], operands[4],
13696 (define_insn "vec_set_lo_<mode><mask_name>"
13697 [(set (match_operand:V16FI 0 "register_operand" "=v")
13699 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13700 (vec_select:<ssehalfvecmode>
13701 (match_operand:V16FI 1 "register_operand" "v")
13702 (parallel [(const_int 8) (const_int 9)
13703 (const_int 10) (const_int 11)
13704 (const_int 12) (const_int 13)
13705 (const_int 14) (const_int 15)]))))]
13707 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13708 [(set_attr "type" "sselog")
13709 (set_attr "length_immediate" "1")
13710 (set_attr "prefix" "evex")
13711 (set_attr "mode" "<sseinsnmode>")])
13713 (define_insn "vec_set_hi_<mode><mask_name>"
13714 [(set (match_operand:V16FI 0 "register_operand" "=v")
13716 (vec_select:<ssehalfvecmode>
13717 (match_operand:V16FI 1 "register_operand" "v")
13718 (parallel [(const_int 0) (const_int 1)
13719 (const_int 2) (const_int 3)
13720 (const_int 4) (const_int 5)
13721 (const_int 6) (const_int 7)]))
13722 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13724 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13725 [(set_attr "type" "sselog")
13726 (set_attr "length_immediate" "1")
13727 (set_attr "prefix" "evex")
13728 (set_attr "mode" "<sseinsnmode>")])
13730 (define_insn "vec_set_lo_<mode><mask_name>"
13731 [(set (match_operand:V8FI 0 "register_operand" "=v")
13733 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
13734 (vec_select:<ssehalfvecmode>
13735 (match_operand:V8FI 1 "register_operand" "v")
13736 (parallel [(const_int 4) (const_int 5)
13737 (const_int 6) (const_int 7)]))))]
13739 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
13740 [(set_attr "type" "sselog")
13741 (set_attr "length_immediate" "1")
13742 (set_attr "prefix" "evex")
13743 (set_attr "mode" "XI")])
13745 (define_insn "vec_set_hi_<mode><mask_name>"
13746 [(set (match_operand:V8FI 0 "register_operand" "=v")
13748 (vec_select:<ssehalfvecmode>
13749 (match_operand:V8FI 1 "register_operand" "v")
13750 (parallel [(const_int 0) (const_int 1)
13751 (const_int 2) (const_int 3)]))
13752 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
13754 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
13755 [(set_attr "type" "sselog")
13756 (set_attr "length_immediate" "1")
13757 (set_attr "prefix" "evex")
13758 (set_attr "mode" "XI")])
13760 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
13761 [(match_operand:VI8F_256 0 "register_operand")
13762 (match_operand:VI8F_256 1 "register_operand")
13763 (match_operand:VI8F_256 2 "nonimmediate_operand")
13764 (match_operand:SI 3 "const_0_to_3_operand")
13765 (match_operand:VI8F_256 4 "register_operand")
13766 (match_operand:QI 5 "register_operand")]
13769 int mask = INTVAL (operands[3]);
13770 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
13771 (operands[0], operands[1], operands[2],
13772 GEN_INT (((mask >> 0) & 1) * 2 + 0),
13773 GEN_INT (((mask >> 0) & 1) * 2 + 1),
13774 GEN_INT (((mask >> 1) & 1) * 2 + 4),
13775 GEN_INT (((mask >> 1) & 1) * 2 + 5),
13776 operands[4], operands[5]));
13780 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
13781 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
13782 (vec_select:VI8F_256
13783 (vec_concat:<ssedoublemode>
13784 (match_operand:VI8F_256 1 "register_operand" "v")
13785 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
13786 (parallel [(match_operand 3 "const_0_to_3_operand")
13787 (match_operand 4 "const_0_to_3_operand")
13788 (match_operand 5 "const_4_to_7_operand")
13789 (match_operand 6 "const_4_to_7_operand")])))]
13791 && (INTVAL (operands[3]) & 1) == 0
13792 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13793 && (INTVAL (operands[5]) & 1) == 0
13794 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
13797 mask = INTVAL (operands[3]) / 2;
13798 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
13799 operands[3] = GEN_INT (mask);
13800 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
13802 [(set_attr "type" "sselog")
13803 (set_attr "length_immediate" "1")
13804 (set_attr "prefix" "evex")
13805 (set_attr "mode" "XI")])
13807 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
13808 [(match_operand:V8FI 0 "register_operand")
13809 (match_operand:V8FI 1 "register_operand")
13810 (match_operand:V8FI 2 "nonimmediate_operand")
13811 (match_operand:SI 3 "const_0_to_255_operand")
13812 (match_operand:V8FI 4 "register_operand")
13813 (match_operand:QI 5 "register_operand")]
13816 int mask = INTVAL (operands[3]);
13817 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
13818 (operands[0], operands[1], operands[2],
13819 GEN_INT (((mask >> 0) & 3) * 2),
13820 GEN_INT (((mask >> 0) & 3) * 2 + 1),
13821 GEN_INT (((mask >> 2) & 3) * 2),
13822 GEN_INT (((mask >> 2) & 3) * 2 + 1),
13823 GEN_INT (((mask >> 4) & 3) * 2 + 8),
13824 GEN_INT (((mask >> 4) & 3) * 2 + 9),
13825 GEN_INT (((mask >> 6) & 3) * 2 + 8),
13826 GEN_INT (((mask >> 6) & 3) * 2 + 9),
13827 operands[4], operands[5]));
13831 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
13832 [(set (match_operand:V8FI 0 "register_operand" "=v")
13834 (vec_concat:<ssedoublemode>
13835 (match_operand:V8FI 1 "register_operand" "v")
13836 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
13837 (parallel [(match_operand 3 "const_0_to_7_operand")
13838 (match_operand 4 "const_0_to_7_operand")
13839 (match_operand 5 "const_0_to_7_operand")
13840 (match_operand 6 "const_0_to_7_operand")
13841 (match_operand 7 "const_8_to_15_operand")
13842 (match_operand 8 "const_8_to_15_operand")
13843 (match_operand 9 "const_8_to_15_operand")
13844 (match_operand 10 "const_8_to_15_operand")])))]
13846 && (INTVAL (operands[3]) & 1) == 0
13847 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13848 && (INTVAL (operands[5]) & 1) == 0
13849 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
13850 && (INTVAL (operands[7]) & 1) == 0
13851 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13852 && (INTVAL (operands[9]) & 1) == 0
13853 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
13856 mask = INTVAL (operands[3]) / 2;
13857 mask |= INTVAL (operands[5]) / 2 << 2;
13858 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
13859 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
13860 operands[3] = GEN_INT (mask);
13862 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13864 [(set_attr "type" "sselog")
13865 (set_attr "length_immediate" "1")
13866 (set_attr "prefix" "evex")
13867 (set_attr "mode" "<sseinsnmode>")])
13869 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
13870 [(match_operand:VI4F_256 0 "register_operand")
13871 (match_operand:VI4F_256 1 "register_operand")
13872 (match_operand:VI4F_256 2 "nonimmediate_operand")
13873 (match_operand:SI 3 "const_0_to_3_operand")
13874 (match_operand:VI4F_256 4 "register_operand")
13875 (match_operand:QI 5 "register_operand")]
13878 int mask = INTVAL (operands[3]);
13879 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
13880 (operands[0], operands[1], operands[2],
13881 GEN_INT (((mask >> 0) & 1) * 4 + 0),
13882 GEN_INT (((mask >> 0) & 1) * 4 + 1),
13883 GEN_INT (((mask >> 0) & 1) * 4 + 2),
13884 GEN_INT (((mask >> 0) & 1) * 4 + 3),
13885 GEN_INT (((mask >> 1) & 1) * 4 + 8),
13886 GEN_INT (((mask >> 1) & 1) * 4 + 9),
13887 GEN_INT (((mask >> 1) & 1) * 4 + 10),
13888 GEN_INT (((mask >> 1) & 1) * 4 + 11),
13889 operands[4], operands[5]));
13893 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
13894 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
13895 (vec_select:VI4F_256
13896 (vec_concat:<ssedoublemode>
13897 (match_operand:VI4F_256 1 "register_operand" "v")
13898 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
13899 (parallel [(match_operand 3 "const_0_to_7_operand")
13900 (match_operand 4 "const_0_to_7_operand")
13901 (match_operand 5 "const_0_to_7_operand")
13902 (match_operand 6 "const_0_to_7_operand")
13903 (match_operand 7 "const_8_to_15_operand")
13904 (match_operand 8 "const_8_to_15_operand")
13905 (match_operand 9 "const_8_to_15_operand")
13906 (match_operand 10 "const_8_to_15_operand")])))]
13908 && (INTVAL (operands[3]) & 3) == 0
13909 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13910 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13911 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13912 && (INTVAL (operands[7]) & 3) == 0
13913 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13914 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13915 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
13918 mask = INTVAL (operands[3]) / 4;
13919 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
13920 operands[3] = GEN_INT (mask);
13922 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13924 [(set_attr "type" "sselog")
13925 (set_attr "length_immediate" "1")
13926 (set_attr "prefix" "evex")
13927 (set_attr "mode" "<sseinsnmode>")])
13929 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
13930 [(match_operand:V16FI 0 "register_operand")
13931 (match_operand:V16FI 1 "register_operand")
13932 (match_operand:V16FI 2 "nonimmediate_operand")
13933 (match_operand:SI 3 "const_0_to_255_operand")
13934 (match_operand:V16FI 4 "register_operand")
13935 (match_operand:HI 5 "register_operand")]
13938 int mask = INTVAL (operands[3]);
13939 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
13940 (operands[0], operands[1], operands[2],
13941 GEN_INT (((mask >> 0) & 3) * 4),
13942 GEN_INT (((mask >> 0) & 3) * 4 + 1),
13943 GEN_INT (((mask >> 0) & 3) * 4 + 2),
13944 GEN_INT (((mask >> 0) & 3) * 4 + 3),
13945 GEN_INT (((mask >> 2) & 3) * 4),
13946 GEN_INT (((mask >> 2) & 3) * 4 + 1),
13947 GEN_INT (((mask >> 2) & 3) * 4 + 2),
13948 GEN_INT (((mask >> 2) & 3) * 4 + 3),
13949 GEN_INT (((mask >> 4) & 3) * 4 + 16),
13950 GEN_INT (((mask >> 4) & 3) * 4 + 17),
13951 GEN_INT (((mask >> 4) & 3) * 4 + 18),
13952 GEN_INT (((mask >> 4) & 3) * 4 + 19),
13953 GEN_INT (((mask >> 6) & 3) * 4 + 16),
13954 GEN_INT (((mask >> 6) & 3) * 4 + 17),
13955 GEN_INT (((mask >> 6) & 3) * 4 + 18),
13956 GEN_INT (((mask >> 6) & 3) * 4 + 19),
13957 operands[4], operands[5]));
13961 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
13962 [(set (match_operand:V16FI 0 "register_operand" "=v")
13964 (vec_concat:<ssedoublemode>
13965 (match_operand:V16FI 1 "register_operand" "v")
13966 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
13967 (parallel [(match_operand 3 "const_0_to_15_operand")
13968 (match_operand 4 "const_0_to_15_operand")
13969 (match_operand 5 "const_0_to_15_operand")
13970 (match_operand 6 "const_0_to_15_operand")
13971 (match_operand 7 "const_0_to_15_operand")
13972 (match_operand 8 "const_0_to_15_operand")
13973 (match_operand 9 "const_0_to_15_operand")
13974 (match_operand 10 "const_0_to_15_operand")
13975 (match_operand 11 "const_16_to_31_operand")
13976 (match_operand 12 "const_16_to_31_operand")
13977 (match_operand 13 "const_16_to_31_operand")
13978 (match_operand 14 "const_16_to_31_operand")
13979 (match_operand 15 "const_16_to_31_operand")
13980 (match_operand 16 "const_16_to_31_operand")
13981 (match_operand 17 "const_16_to_31_operand")
13982 (match_operand 18 "const_16_to_31_operand")])))]
13984 && (INTVAL (operands[3]) & 3) == 0
13985 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
13986 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
13987 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
13988 && (INTVAL (operands[7]) & 3) == 0
13989 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
13990 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
13991 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
13992 && (INTVAL (operands[11]) & 3) == 0
13993 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
13994 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
13995 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
13996 && (INTVAL (operands[15]) & 3) == 0
13997 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
13998 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
13999 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14002 mask = INTVAL (operands[3]) / 4;
14003 mask |= INTVAL (operands[7]) / 4 << 2;
14004 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14005 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14006 operands[3] = GEN_INT (mask);
14008 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14010 [(set_attr "type" "sselog")
14011 (set_attr "length_immediate" "1")
14012 (set_attr "prefix" "evex")
14013 (set_attr "mode" "<sseinsnmode>")])
14015 (define_expand "avx512f_pshufdv3_mask"
14016 [(match_operand:V16SI 0 "register_operand")
14017 (match_operand:V16SI 1 "nonimmediate_operand")
14018 (match_operand:SI 2 "const_0_to_255_operand")
14019 (match_operand:V16SI 3 "register_operand")
14020 (match_operand:HI 4 "register_operand")]
14023 int mask = INTVAL (operands[2]);
14024 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14025 GEN_INT ((mask >> 0) & 3),
14026 GEN_INT ((mask >> 2) & 3),
14027 GEN_INT ((mask >> 4) & 3),
14028 GEN_INT ((mask >> 6) & 3),
14029 GEN_INT (((mask >> 0) & 3) + 4),
14030 GEN_INT (((mask >> 2) & 3) + 4),
14031 GEN_INT (((mask >> 4) & 3) + 4),
14032 GEN_INT (((mask >> 6) & 3) + 4),
14033 GEN_INT (((mask >> 0) & 3) + 8),
14034 GEN_INT (((mask >> 2) & 3) + 8),
14035 GEN_INT (((mask >> 4) & 3) + 8),
14036 GEN_INT (((mask >> 6) & 3) + 8),
14037 GEN_INT (((mask >> 0) & 3) + 12),
14038 GEN_INT (((mask >> 2) & 3) + 12),
14039 GEN_INT (((mask >> 4) & 3) + 12),
14040 GEN_INT (((mask >> 6) & 3) + 12),
14041 operands[3], operands[4]));
14045 (define_insn "avx512f_pshufd_1<mask_name>"
14046 [(set (match_operand:V16SI 0 "register_operand" "=v")
14048 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14049 (parallel [(match_operand 2 "const_0_to_3_operand")
14050 (match_operand 3 "const_0_to_3_operand")
14051 (match_operand 4 "const_0_to_3_operand")
14052 (match_operand 5 "const_0_to_3_operand")
14053 (match_operand 6 "const_4_to_7_operand")
14054 (match_operand 7 "const_4_to_7_operand")
14055 (match_operand 8 "const_4_to_7_operand")
14056 (match_operand 9 "const_4_to_7_operand")
14057 (match_operand 10 "const_8_to_11_operand")
14058 (match_operand 11 "const_8_to_11_operand")
14059 (match_operand 12 "const_8_to_11_operand")
14060 (match_operand 13 "const_8_to_11_operand")
14061 (match_operand 14 "const_12_to_15_operand")
14062 (match_operand 15 "const_12_to_15_operand")
14063 (match_operand 16 "const_12_to_15_operand")
14064 (match_operand 17 "const_12_to_15_operand")])))]
14066 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14067 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14068 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14069 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14070 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14071 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14072 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14073 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14074 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14075 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14076 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14077 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14080 mask |= INTVAL (operands[2]) << 0;
14081 mask |= INTVAL (operands[3]) << 2;
14082 mask |= INTVAL (operands[4]) << 4;
14083 mask |= INTVAL (operands[5]) << 6;
14084 operands[2] = GEN_INT (mask);
14086 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14088 [(set_attr "type" "sselog1")
14089 (set_attr "prefix" "evex")
14090 (set_attr "length_immediate" "1")
14091 (set_attr "mode" "XI")])
14093 (define_expand "avx512vl_pshufdv3_mask"
14094 [(match_operand:V8SI 0 "register_operand")
14095 (match_operand:V8SI 1 "nonimmediate_operand")
14096 (match_operand:SI 2 "const_0_to_255_operand")
14097 (match_operand:V8SI 3 "register_operand")
14098 (match_operand:QI 4 "register_operand")]
14101 int mask = INTVAL (operands[2]);
14102 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14103 GEN_INT ((mask >> 0) & 3),
14104 GEN_INT ((mask >> 2) & 3),
14105 GEN_INT ((mask >> 4) & 3),
14106 GEN_INT ((mask >> 6) & 3),
14107 GEN_INT (((mask >> 0) & 3) + 4),
14108 GEN_INT (((mask >> 2) & 3) + 4),
14109 GEN_INT (((mask >> 4) & 3) + 4),
14110 GEN_INT (((mask >> 6) & 3) + 4),
14111 operands[3], operands[4]));
14115 (define_expand "avx2_pshufdv3"
14116 [(match_operand:V8SI 0 "register_operand")
14117 (match_operand:V8SI 1 "nonimmediate_operand")
14118 (match_operand:SI 2 "const_0_to_255_operand")]
14121 int mask = INTVAL (operands[2]);
14122 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14123 GEN_INT ((mask >> 0) & 3),
14124 GEN_INT ((mask >> 2) & 3),
14125 GEN_INT ((mask >> 4) & 3),
14126 GEN_INT ((mask >> 6) & 3),
14127 GEN_INT (((mask >> 0) & 3) + 4),
14128 GEN_INT (((mask >> 2) & 3) + 4),
14129 GEN_INT (((mask >> 4) & 3) + 4),
14130 GEN_INT (((mask >> 6) & 3) + 4)));
14134 (define_insn "avx2_pshufd_1<mask_name>"
14135 [(set (match_operand:V8SI 0 "register_operand" "=v")
14137 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14138 (parallel [(match_operand 2 "const_0_to_3_operand")
14139 (match_operand 3 "const_0_to_3_operand")
14140 (match_operand 4 "const_0_to_3_operand")
14141 (match_operand 5 "const_0_to_3_operand")
14142 (match_operand 6 "const_4_to_7_operand")
14143 (match_operand 7 "const_4_to_7_operand")
14144 (match_operand 8 "const_4_to_7_operand")
14145 (match_operand 9 "const_4_to_7_operand")])))]
14147 && <mask_avx512vl_condition>
14148 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14149 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14150 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14151 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14154 mask |= INTVAL (operands[2]) << 0;
14155 mask |= INTVAL (operands[3]) << 2;
14156 mask |= INTVAL (operands[4]) << 4;
14157 mask |= INTVAL (operands[5]) << 6;
14158 operands[2] = GEN_INT (mask);
14160 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14162 [(set_attr "type" "sselog1")
14163 (set_attr "prefix" "maybe_evex")
14164 (set_attr "length_immediate" "1")
14165 (set_attr "mode" "OI")])
14167 (define_expand "avx512vl_pshufd_mask"
14168 [(match_operand:V4SI 0 "register_operand")
14169 (match_operand:V4SI 1 "nonimmediate_operand")
14170 (match_operand:SI 2 "const_0_to_255_operand")
14171 (match_operand:V4SI 3 "register_operand")
14172 (match_operand:QI 4 "register_operand")]
14175 int mask = INTVAL (operands[2]);
14176 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14177 GEN_INT ((mask >> 0) & 3),
14178 GEN_INT ((mask >> 2) & 3),
14179 GEN_INT ((mask >> 4) & 3),
14180 GEN_INT ((mask >> 6) & 3),
14181 operands[3], operands[4]));
14185 (define_expand "sse2_pshufd"
14186 [(match_operand:V4SI 0 "register_operand")
14187 (match_operand:V4SI 1 "vector_operand")
14188 (match_operand:SI 2 "const_int_operand")]
14191 int mask = INTVAL (operands[2]);
14192 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14193 GEN_INT ((mask >> 0) & 3),
14194 GEN_INT ((mask >> 2) & 3),
14195 GEN_INT ((mask >> 4) & 3),
14196 GEN_INT ((mask >> 6) & 3)));
14200 (define_insn "sse2_pshufd_1<mask_name>"
14201 [(set (match_operand:V4SI 0 "register_operand" "=v")
14203 (match_operand:V4SI 1 "vector_operand" "vBm")
14204 (parallel [(match_operand 2 "const_0_to_3_operand")
14205 (match_operand 3 "const_0_to_3_operand")
14206 (match_operand 4 "const_0_to_3_operand")
14207 (match_operand 5 "const_0_to_3_operand")])))]
14208 "TARGET_SSE2 && <mask_avx512vl_condition>"
14211 mask |= INTVAL (operands[2]) << 0;
14212 mask |= INTVAL (operands[3]) << 2;
14213 mask |= INTVAL (operands[4]) << 4;
14214 mask |= INTVAL (operands[5]) << 6;
14215 operands[2] = GEN_INT (mask);
14217 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14219 [(set_attr "type" "sselog1")
14220 (set_attr "prefix_data16" "1")
14221 (set_attr "prefix" "<mask_prefix2>")
14222 (set_attr "length_immediate" "1")
14223 (set_attr "mode" "TI")])
14225 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14226 [(set (match_operand:V32HI 0 "register_operand" "=v")
14228 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14229 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14232 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14233 [(set_attr "type" "sselog")
14234 (set_attr "prefix" "evex")
14235 (set_attr "mode" "XI")])
14237 (define_expand "avx512vl_pshuflwv3_mask"
14238 [(match_operand:V16HI 0 "register_operand")
14239 (match_operand:V16HI 1 "nonimmediate_operand")
14240 (match_operand:SI 2 "const_0_to_255_operand")
14241 (match_operand:V16HI 3 "register_operand")
14242 (match_operand:HI 4 "register_operand")]
14243 "TARGET_AVX512VL && TARGET_AVX512BW"
14245 int mask = INTVAL (operands[2]);
14246 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14247 GEN_INT ((mask >> 0) & 3),
14248 GEN_INT ((mask >> 2) & 3),
14249 GEN_INT ((mask >> 4) & 3),
14250 GEN_INT ((mask >> 6) & 3),
14251 GEN_INT (((mask >> 0) & 3) + 8),
14252 GEN_INT (((mask >> 2) & 3) + 8),
14253 GEN_INT (((mask >> 4) & 3) + 8),
14254 GEN_INT (((mask >> 6) & 3) + 8),
14255 operands[3], operands[4]));
14259 (define_expand "avx2_pshuflwv3"
14260 [(match_operand:V16HI 0 "register_operand")
14261 (match_operand:V16HI 1 "nonimmediate_operand")
14262 (match_operand:SI 2 "const_0_to_255_operand")]
14265 int mask = INTVAL (operands[2]);
14266 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14267 GEN_INT ((mask >> 0) & 3),
14268 GEN_INT ((mask >> 2) & 3),
14269 GEN_INT ((mask >> 4) & 3),
14270 GEN_INT ((mask >> 6) & 3),
14271 GEN_INT (((mask >> 0) & 3) + 8),
14272 GEN_INT (((mask >> 2) & 3) + 8),
14273 GEN_INT (((mask >> 4) & 3) + 8),
14274 GEN_INT (((mask >> 6) & 3) + 8)));
14278 (define_insn "avx2_pshuflw_1<mask_name>"
14279 [(set (match_operand:V16HI 0 "register_operand" "=v")
14281 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14282 (parallel [(match_operand 2 "const_0_to_3_operand")
14283 (match_operand 3 "const_0_to_3_operand")
14284 (match_operand 4 "const_0_to_3_operand")
14285 (match_operand 5 "const_0_to_3_operand")
14290 (match_operand 6 "const_8_to_11_operand")
14291 (match_operand 7 "const_8_to_11_operand")
14292 (match_operand 8 "const_8_to_11_operand")
14293 (match_operand 9 "const_8_to_11_operand")
14297 (const_int 15)])))]
14299 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14300 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14301 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14302 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14303 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14306 mask |= INTVAL (operands[2]) << 0;
14307 mask |= INTVAL (operands[3]) << 2;
14308 mask |= INTVAL (operands[4]) << 4;
14309 mask |= INTVAL (operands[5]) << 6;
14310 operands[2] = GEN_INT (mask);
14312 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14314 [(set_attr "type" "sselog")
14315 (set_attr "prefix" "maybe_evex")
14316 (set_attr "length_immediate" "1")
14317 (set_attr "mode" "OI")])
14319 (define_expand "avx512vl_pshuflw_mask"
14320 [(match_operand:V8HI 0 "register_operand")
14321 (match_operand:V8HI 1 "nonimmediate_operand")
14322 (match_operand:SI 2 "const_0_to_255_operand")
14323 (match_operand:V8HI 3 "register_operand")
14324 (match_operand:QI 4 "register_operand")]
14325 "TARGET_AVX512VL && TARGET_AVX512BW"
14327 int mask = INTVAL (operands[2]);
14328 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14329 GEN_INT ((mask >> 0) & 3),
14330 GEN_INT ((mask >> 2) & 3),
14331 GEN_INT ((mask >> 4) & 3),
14332 GEN_INT ((mask >> 6) & 3),
14333 operands[3], operands[4]));
14337 (define_expand "sse2_pshuflw"
14338 [(match_operand:V8HI 0 "register_operand")
14339 (match_operand:V8HI 1 "vector_operand")
14340 (match_operand:SI 2 "const_int_operand")]
14343 int mask = INTVAL (operands[2]);
14344 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14345 GEN_INT ((mask >> 0) & 3),
14346 GEN_INT ((mask >> 2) & 3),
14347 GEN_INT ((mask >> 4) & 3),
14348 GEN_INT ((mask >> 6) & 3)));
14352 (define_insn "sse2_pshuflw_1<mask_name>"
14353 [(set (match_operand:V8HI 0 "register_operand" "=v")
14355 (match_operand:V8HI 1 "vector_operand" "vBm")
14356 (parallel [(match_operand 2 "const_0_to_3_operand")
14357 (match_operand 3 "const_0_to_3_operand")
14358 (match_operand 4 "const_0_to_3_operand")
14359 (match_operand 5 "const_0_to_3_operand")
14364 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14367 mask |= INTVAL (operands[2]) << 0;
14368 mask |= INTVAL (operands[3]) << 2;
14369 mask |= INTVAL (operands[4]) << 4;
14370 mask |= INTVAL (operands[5]) << 6;
14371 operands[2] = GEN_INT (mask);
14373 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14375 [(set_attr "type" "sselog")
14376 (set_attr "prefix_data16" "0")
14377 (set_attr "prefix_rep" "1")
14378 (set_attr "prefix" "maybe_vex")
14379 (set_attr "length_immediate" "1")
14380 (set_attr "mode" "TI")])
14382 (define_expand "avx2_pshufhwv3"
14383 [(match_operand:V16HI 0 "register_operand")
14384 (match_operand:V16HI 1 "nonimmediate_operand")
14385 (match_operand:SI 2 "const_0_to_255_operand")]
14388 int mask = INTVAL (operands[2]);
14389 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
14390 GEN_INT (((mask >> 0) & 3) + 4),
14391 GEN_INT (((mask >> 2) & 3) + 4),
14392 GEN_INT (((mask >> 4) & 3) + 4),
14393 GEN_INT (((mask >> 6) & 3) + 4),
14394 GEN_INT (((mask >> 0) & 3) + 12),
14395 GEN_INT (((mask >> 2) & 3) + 12),
14396 GEN_INT (((mask >> 4) & 3) + 12),
14397 GEN_INT (((mask >> 6) & 3) + 12)));
14401 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
14402 [(set (match_operand:V32HI 0 "register_operand" "=v")
14404 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14405 (match_operand:SI 2 "const_0_to_255_operand" "n")]
14408 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14409 [(set_attr "type" "sselog")
14410 (set_attr "prefix" "evex")
14411 (set_attr "mode" "XI")])
14413 (define_expand "avx512vl_pshufhwv3_mask"
14414 [(match_operand:V16HI 0 "register_operand")
14415 (match_operand:V16HI 1 "nonimmediate_operand")
14416 (match_operand:SI 2 "const_0_to_255_operand")
14417 (match_operand:V16HI 3 "register_operand")
14418 (match_operand:HI 4 "register_operand")]
14419 "TARGET_AVX512VL && TARGET_AVX512BW"
14421 int mask = INTVAL (operands[2]);
14422 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
14423 GEN_INT (((mask >> 0) & 3) + 4),
14424 GEN_INT (((mask >> 2) & 3) + 4),
14425 GEN_INT (((mask >> 4) & 3) + 4),
14426 GEN_INT (((mask >> 6) & 3) + 4),
14427 GEN_INT (((mask >> 0) & 3) + 12),
14428 GEN_INT (((mask >> 2) & 3) + 12),
14429 GEN_INT (((mask >> 4) & 3) + 12),
14430 GEN_INT (((mask >> 6) & 3) + 12),
14431 operands[3], operands[4]));
14435 (define_insn "avx2_pshufhw_1<mask_name>"
14436 [(set (match_operand:V16HI 0 "register_operand" "=v")
14438 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14439 (parallel [(const_int 0)
14443 (match_operand 2 "const_4_to_7_operand")
14444 (match_operand 3 "const_4_to_7_operand")
14445 (match_operand 4 "const_4_to_7_operand")
14446 (match_operand 5 "const_4_to_7_operand")
14451 (match_operand 6 "const_12_to_15_operand")
14452 (match_operand 7 "const_12_to_15_operand")
14453 (match_operand 8 "const_12_to_15_operand")
14454 (match_operand 9 "const_12_to_15_operand")])))]
14456 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14457 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14458 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14459 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14460 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14463 mask |= (INTVAL (operands[2]) - 4) << 0;
14464 mask |= (INTVAL (operands[3]) - 4) << 2;
14465 mask |= (INTVAL (operands[4]) - 4) << 4;
14466 mask |= (INTVAL (operands[5]) - 4) << 6;
14467 operands[2] = GEN_INT (mask);
14469 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14471 [(set_attr "type" "sselog")
14472 (set_attr "prefix" "maybe_evex")
14473 (set_attr "length_immediate" "1")
14474 (set_attr "mode" "OI")])
14476 (define_expand "avx512vl_pshufhw_mask"
14477 [(match_operand:V8HI 0 "register_operand")
14478 (match_operand:V8HI 1 "nonimmediate_operand")
14479 (match_operand:SI 2 "const_0_to_255_operand")
14480 (match_operand:V8HI 3 "register_operand")
14481 (match_operand:QI 4 "register_operand")]
14482 "TARGET_AVX512VL && TARGET_AVX512BW"
14484 int mask = INTVAL (operands[2]);
14485 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
14486 GEN_INT (((mask >> 0) & 3) + 4),
14487 GEN_INT (((mask >> 2) & 3) + 4),
14488 GEN_INT (((mask >> 4) & 3) + 4),
14489 GEN_INT (((mask >> 6) & 3) + 4),
14490 operands[3], operands[4]));
14494 (define_expand "sse2_pshufhw"
14495 [(match_operand:V8HI 0 "register_operand")
14496 (match_operand:V8HI 1 "vector_operand")
14497 (match_operand:SI 2 "const_int_operand")]
14500 int mask = INTVAL (operands[2]);
14501 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
14502 GEN_INT (((mask >> 0) & 3) + 4),
14503 GEN_INT (((mask >> 2) & 3) + 4),
14504 GEN_INT (((mask >> 4) & 3) + 4),
14505 GEN_INT (((mask >> 6) & 3) + 4)));
14509 (define_insn "sse2_pshufhw_1<mask_name>"
14510 [(set (match_operand:V8HI 0 "register_operand" "=v")
14512 (match_operand:V8HI 1 "vector_operand" "vBm")
14513 (parallel [(const_int 0)
14517 (match_operand 2 "const_4_to_7_operand")
14518 (match_operand 3 "const_4_to_7_operand")
14519 (match_operand 4 "const_4_to_7_operand")
14520 (match_operand 5 "const_4_to_7_operand")])))]
14521 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14524 mask |= (INTVAL (operands[2]) - 4) << 0;
14525 mask |= (INTVAL (operands[3]) - 4) << 2;
14526 mask |= (INTVAL (operands[4]) - 4) << 4;
14527 mask |= (INTVAL (operands[5]) - 4) << 6;
14528 operands[2] = GEN_INT (mask);
14530 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14532 [(set_attr "type" "sselog")
14533 (set_attr "prefix_rep" "1")
14534 (set_attr "prefix_data16" "0")
14535 (set_attr "prefix" "maybe_vex")
14536 (set_attr "length_immediate" "1")
14537 (set_attr "mode" "TI")])
14539 (define_expand "sse2_loadd"
14540 [(set (match_operand:V4SI 0 "register_operand")
14542 (vec_duplicate:V4SI
14543 (match_operand:SI 1 "nonimmediate_operand"))
14547 "operands[2] = CONST0_RTX (V4SImode);")
14549 (define_insn "sse2_loadld"
14550 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
14552 (vec_duplicate:V4SI
14553 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
14554 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
14558 %vmovd\t{%2, %0|%0, %2}
14559 %vmovd\t{%2, %0|%0, %2}
14560 movss\t{%2, %0|%0, %2}
14561 movss\t{%2, %0|%0, %2}
14562 vmovss\t{%2, %1, %0|%0, %1, %2}"
14563 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
14564 (set_attr "type" "ssemov")
14565 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
14566 (set_attr "mode" "TI,TI,V4SF,SF,SF")
14567 (set (attr "preferred_for_speed")
14568 (cond [(eq_attr "alternative" "1")
14569 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14571 (symbol_ref "true")))])
14573 ;; QI and HI modes handled by pextr patterns.
14574 (define_mode_iterator PEXTR_MODE12
14575 [(V16QI "TARGET_SSE4_1") V8HI])
14577 (define_insn "*vec_extract<mode>"
14578 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
14579 (vec_select:<ssescalarmode>
14580 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
14582 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
14585 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14586 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
14587 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14588 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14589 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
14590 (set_attr "type" "sselog1")
14591 (set_attr "prefix_data16" "1")
14592 (set (attr "prefix_extra")
14594 (and (eq_attr "alternative" "0,2")
14595 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14597 (const_string "1")))
14598 (set_attr "length_immediate" "1")
14599 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
14600 (set_attr "mode" "TI")])
14602 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
14603 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
14605 (vec_select:<PEXTR_MODE12:ssescalarmode>
14606 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
14608 [(match_operand:SI 2
14609 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
14612 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14613 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
14614 [(set_attr "isa" "*,avx512bw")
14615 (set_attr "type" "sselog1")
14616 (set_attr "prefix_data16" "1")
14617 (set (attr "prefix_extra")
14619 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
14621 (const_string "1")))
14622 (set_attr "length_immediate" "1")
14623 (set_attr "prefix" "maybe_vex")
14624 (set_attr "mode" "TI")])
14626 (define_insn "*vec_extract<mode>_mem"
14627 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
14628 (vec_select:<ssescalarmode>
14629 (match_operand:VI12_128 1 "memory_operand" "o")
14631 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14635 (define_insn "*vec_extract<ssevecmodelower>_0"
14636 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
14638 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
14639 (parallel [(const_int 0)])))]
14640 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14642 [(set_attr "isa" "*,sse2,*,*")
14643 (set (attr "preferred_for_speed")
14644 (cond [(eq_attr "alternative" "1")
14645 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14647 (symbol_ref "true")))])
14649 (define_insn "*vec_extractv2di_0_sse"
14650 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
14652 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
14653 (parallel [(const_int 0)])))]
14654 "TARGET_SSE && !TARGET_64BIT
14655 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14657 [(set_attr "isa" "sse4,*,*")
14658 (set (attr "preferred_for_speed")
14659 (cond [(eq_attr "alternative" "0")
14660 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14662 (symbol_ref "true")))])
14665 [(set (match_operand:DI 0 "general_reg_operand")
14667 (match_operand:V2DI 1 "register_operand")
14668 (parallel [(const_int 0)])))]
14669 "TARGET_SSE4_1 && !TARGET_64BIT
14670 && reload_completed"
14671 [(set (match_dup 2) (match_dup 4))
14675 (parallel [(const_int 1)])))]
14677 operands[4] = gen_lowpart (SImode, operands[1]);
14678 operands[5] = gen_lowpart (V4SImode, operands[1]);
14679 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
14683 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14685 (match_operand:<ssevecmode> 1 "register_operand")
14686 (parallel [(const_int 0)])))]
14687 "TARGET_SSE && reload_completed"
14688 [(set (match_dup 0) (match_dup 1))]
14689 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
14691 (define_insn "*vec_extractv4si_0_zext_sse4"
14692 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
14695 (match_operand:V4SI 1 "register_operand" "v,x,v")
14696 (parallel [(const_int 0)]))))]
14699 [(set_attr "isa" "x64,*,avx512f")
14700 (set (attr "preferred_for_speed")
14701 (cond [(eq_attr "alternative" "0")
14702 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
14704 (symbol_ref "true")))])
14706 (define_insn "*vec_extractv4si_0_zext"
14707 [(set (match_operand:DI 0 "register_operand" "=r")
14710 (match_operand:V4SI 1 "register_operand" "x")
14711 (parallel [(const_int 0)]))))]
14712 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
14716 [(set (match_operand:DI 0 "register_operand")
14719 (match_operand:V4SI 1 "register_operand")
14720 (parallel [(const_int 0)]))))]
14721 "TARGET_SSE2 && reload_completed"
14722 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14723 "operands[1] = gen_lowpart (SImode, operands[1]);")
14725 (define_insn "*vec_extractv4si"
14726 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
14728 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
14729 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
14732 switch (which_alternative)
14736 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
14740 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14741 return "psrldq\t{%2, %0|%0, %2}";
14745 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
14746 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
14749 gcc_unreachable ();
14752 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
14753 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
14754 (set (attr "prefix_extra")
14755 (if_then_else (eq_attr "alternative" "0,1")
14757 (const_string "*")))
14758 (set_attr "length_immediate" "1")
14759 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
14760 (set_attr "mode" "TI")])
14762 (define_insn "*vec_extractv4si_zext"
14763 [(set (match_operand:DI 0 "register_operand" "=r,r")
14766 (match_operand:V4SI 1 "register_operand" "x,v")
14767 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14768 "TARGET_64BIT && TARGET_SSE4_1"
14769 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
14770 [(set_attr "isa" "*,avx512dq")
14771 (set_attr "type" "sselog1")
14772 (set_attr "prefix_extra" "1")
14773 (set_attr "length_immediate" "1")
14774 (set_attr "prefix" "maybe_vex")
14775 (set_attr "mode" "TI")])
14777 (define_insn "*vec_extractv4si_mem"
14778 [(set (match_operand:SI 0 "register_operand" "=x,r")
14780 (match_operand:V4SI 1 "memory_operand" "o,o")
14781 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
14785 (define_insn_and_split "*vec_extractv4si_zext_mem"
14786 [(set (match_operand:DI 0 "register_operand" "=x,r")
14789 (match_operand:V4SI 1 "memory_operand" "o,o")
14790 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
14791 "TARGET_64BIT && TARGET_SSE"
14793 "&& reload_completed"
14794 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
14796 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
14799 (define_insn "*vec_extractv2di_1"
14800 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
14802 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
14803 (parallel [(const_int 1)])))]
14804 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
14806 %vpextrq\t{$1, %1, %0|%0, %1, 1}
14807 vpextrq\t{$1, %1, %0|%0, %1, 1}
14808 %vmovhps\t{%1, %0|%0, %1}
14809 psrldq\t{$8, %0|%0, 8}
14810 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14811 vpsrldq\t{$8, %1, %0|%0, %1, 8}
14812 movhlps\t{%1, %0|%0, %1}
14816 (cond [(eq_attr "alternative" "0")
14817 (const_string "x64_sse4")
14818 (eq_attr "alternative" "1")
14819 (const_string "x64_avx512dq")
14820 (eq_attr "alternative" "3")
14821 (const_string "sse2_noavx")
14822 (eq_attr "alternative" "4")
14823 (const_string "avx")
14824 (eq_attr "alternative" "5")
14825 (const_string "avx512bw")
14826 (eq_attr "alternative" "6")
14827 (const_string "noavx")
14828 (eq_attr "alternative" "8")
14829 (const_string "x64")
14831 (const_string "*")))
14833 (cond [(eq_attr "alternative" "2,6,7")
14834 (const_string "ssemov")
14835 (eq_attr "alternative" "3,4,5")
14836 (const_string "sseishft1")
14837 (eq_attr "alternative" "8")
14838 (const_string "imov")
14840 (const_string "sselog1")))
14841 (set (attr "length_immediate")
14842 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
14844 (const_string "*")))
14845 (set (attr "prefix_rex")
14846 (if_then_else (eq_attr "alternative" "0,1")
14848 (const_string "*")))
14849 (set (attr "prefix_extra")
14850 (if_then_else (eq_attr "alternative" "0,1")
14852 (const_string "*")))
14853 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
14854 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
14857 [(set (match_operand:<ssescalarmode> 0 "register_operand")
14858 (vec_select:<ssescalarmode>
14859 (match_operand:VI_128 1 "memory_operand")
14861 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
14862 "TARGET_SSE && reload_completed"
14863 [(set (match_dup 0) (match_dup 1))]
14865 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
14867 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
14870 (define_insn "*vec_extractv2ti"
14871 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
14873 (match_operand:V2TI 1 "register_operand" "x,v")
14875 [(match_operand:SI 2 "const_0_to_1_operand")])))]
14878 vextract%~128\t{%2, %1, %0|%0, %1, %2}
14879 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
14880 [(set_attr "type" "sselog")
14881 (set_attr "prefix_extra" "1")
14882 (set_attr "length_immediate" "1")
14883 (set_attr "prefix" "vex,evex")
14884 (set_attr "mode" "OI")])
14886 (define_insn "*vec_extractv4ti"
14887 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
14889 (match_operand:V4TI 1 "register_operand" "v")
14891 [(match_operand:SI 2 "const_0_to_3_operand")])))]
14893 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
14894 [(set_attr "type" "sselog")
14895 (set_attr "prefix_extra" "1")
14896 (set_attr "length_immediate" "1")
14897 (set_attr "prefix" "evex")
14898 (set_attr "mode" "XI")])
14900 (define_mode_iterator VEXTRACTI128_MODE
14901 [(V4TI "TARGET_AVX512F") V2TI])
14904 [(set (match_operand:TI 0 "nonimmediate_operand")
14906 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
14907 (parallel [(const_int 0)])))]
14909 && reload_completed
14910 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
14911 [(set (match_dup 0) (match_dup 1))]
14912 "operands[1] = gen_lowpart (TImode, operands[1]);")
14914 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
14915 ;; vector modes into vec_extract*.
14917 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
14918 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
14919 "can_create_pseudo_p ()
14920 && REG_P (operands[1])
14921 && VECTOR_MODE_P (GET_MODE (operands[1]))
14922 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
14923 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
14924 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
14925 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
14926 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
14927 (parallel [(const_int 0)])))]
14931 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
14934 if (<MODE>mode == SImode)
14936 tmp = gen_reg_rtx (V8SImode);
14937 emit_insn (gen_vec_extract_lo_v16si (tmp,
14938 gen_lowpart (V16SImode,
14943 tmp = gen_reg_rtx (V4DImode);
14944 emit_insn (gen_vec_extract_lo_v8di (tmp,
14945 gen_lowpart (V8DImode,
14951 tmp = gen_reg_rtx (<ssevecmode>mode);
14952 if (<MODE>mode == SImode)
14953 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
14956 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
14961 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
14966 (define_insn "*vec_concatv2si_sse4_1"
14967 [(set (match_operand:V2SI 0 "register_operand"
14968 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
14970 (match_operand:SI 1 "nonimmediate_operand"
14971 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
14972 (match_operand:SI 2 "nonimm_or_0_operand"
14973 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
14974 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14976 pinsrd\t{$1, %2, %0|%0, %2, 1}
14977 pinsrd\t{$1, %2, %0|%0, %2, 1}
14978 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14979 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
14980 punpckldq\t{%2, %0|%0, %2}
14981 punpckldq\t{%2, %0|%0, %2}
14982 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
14983 %vmovd\t{%1, %0|%0, %1}
14984 punpckldq\t{%2, %0|%0, %2}
14985 movd\t{%1, %0|%0, %1}"
14986 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
14987 (set (attr "mmx_isa")
14988 (if_then_else (eq_attr "alternative" "8,9")
14989 (const_string "native")
14990 (const_string "*")))
14992 (cond [(eq_attr "alternative" "7")
14993 (const_string "ssemov")
14994 (eq_attr "alternative" "8")
14995 (const_string "mmxcvt")
14996 (eq_attr "alternative" "9")
14997 (const_string "mmxmov")
14999 (const_string "sselog")))
15000 (set (attr "prefix_extra")
15001 (if_then_else (eq_attr "alternative" "0,1,2,3")
15003 (const_string "*")))
15004 (set (attr "length_immediate")
15005 (if_then_else (eq_attr "alternative" "0,1,2,3")
15007 (const_string "*")))
15008 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15009 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15011 ;; ??? In theory we can match memory for the MMX alternative, but allowing
15012 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15013 ;; alternatives pretty much forces the MMX alternative to be chosen.
15014 (define_insn "*vec_concatv2si"
15015 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
15017 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15018 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
15019 "TARGET_SSE && !TARGET_SSE4_1"
15021 punpckldq\t{%2, %0|%0, %2}
15022 movd\t{%1, %0|%0, %1}
15023 unpcklps\t{%2, %0|%0, %2}
15024 movss\t{%1, %0|%0, %1}
15025 punpckldq\t{%2, %0|%0, %2}
15026 movd\t{%1, %0|%0, %1}"
15027 [(set_attr "isa" "sse2,sse2,*,*,*,*")
15028 (set_attr "mmx_isa" "*,*,*,*,native,native")
15029 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15030 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15032 (define_insn "*vec_concatv4si"
15033 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
15035 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
15036 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15039 punpcklqdq\t{%2, %0|%0, %2}
15040 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15041 movlhps\t{%2, %0|%0, %2}
15042 movhps\t{%2, %0|%0, %q2}
15043 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15044 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15045 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15046 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15047 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15049 (define_insn "*vec_concatv4si_0"
15050 [(set (match_operand:V4SI 0 "register_operand" "=v,x")
15052 (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15053 (match_operand:V2SI 2 "const0_operand" " C,C")))]
15056 %vmovq\t{%1, %0|%0, %1}
15057 movq2dq\t{%1, %0|%0, %1}"
15058 [(set_attr "mmx_isa" "*,native")
15059 (set_attr "type" "ssemov")
15060 (set_attr "prefix" "maybe_vex,orig")
15061 (set_attr "mode" "TI")])
15063 (define_insn "vec_concatv2di"
15064 [(set (match_operand:V2DI 0 "register_operand"
15065 "=Yr,*x,x ,v ,x,v ,x,x,v")
15067 (match_operand:DI 1 "register_operand"
15068 " 0, 0,x ,Yv,0,Yv,0,0,v")
15069 (match_operand:DI 2 "nonimmediate_operand"
15070 " rm,rm,rm,rm,x,Yv,x,m,m")))]
15073 pinsrq\t{$1, %2, %0|%0, %2, 1}
15074 pinsrq\t{$1, %2, %0|%0, %2, 1}
15075 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15076 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15077 punpcklqdq\t{%2, %0|%0, %2}
15078 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15079 movlhps\t{%2, %0|%0, %2}
15080 movhps\t{%2, %0|%0, %2}
15081 vmovhps\t{%2, %1, %0|%0, %1, %2}"
15083 (cond [(eq_attr "alternative" "0,1")
15084 (const_string "x64_sse4_noavx")
15085 (eq_attr "alternative" "2")
15086 (const_string "x64_avx")
15087 (eq_attr "alternative" "3")
15088 (const_string "x64_avx512dq")
15089 (eq_attr "alternative" "4")
15090 (const_string "sse2_noavx")
15091 (eq_attr "alternative" "5,8")
15092 (const_string "avx")
15094 (const_string "noavx")))
15097 (eq_attr "alternative" "0,1,2,3,4,5")
15098 (const_string "sselog")
15099 (const_string "ssemov")))
15100 (set (attr "prefix_rex")
15101 (if_then_else (eq_attr "alternative" "0,1,2,3")
15103 (const_string "*")))
15104 (set (attr "prefix_extra")
15105 (if_then_else (eq_attr "alternative" "0,1,2,3")
15107 (const_string "*")))
15108 (set (attr "length_immediate")
15109 (if_then_else (eq_attr "alternative" "0,1,2,3")
15111 (const_string "*")))
15112 (set (attr "prefix")
15113 (cond [(eq_attr "alternative" "2")
15114 (const_string "vex")
15115 (eq_attr "alternative" "3")
15116 (const_string "evex")
15117 (eq_attr "alternative" "5,8")
15118 (const_string "maybe_evex")
15120 (const_string "orig")))
15121 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15123 (define_insn "*vec_concatv2di_0"
15124 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
15126 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15127 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
15130 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15131 %vmovq\t{%1, %0|%0, %1}
15132 movq2dq\t{%1, %0|%0, %1}"
15133 [(set_attr "isa" "x64,*,*")
15134 (set_attr "mmx_isa" "*,*,native")
15135 (set_attr "type" "ssemov")
15136 (set_attr "prefix_rex" "1,*,*")
15137 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15138 (set_attr "mode" "TI")
15139 (set (attr "preferred_for_speed")
15140 (cond [(eq_attr "alternative" "0")
15141 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15143 (symbol_ref "true")))])
15145 ;; vmovq clears also the higher bits.
15146 (define_insn "vec_set<mode>_0"
15147 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15148 (vec_merge:VI8_AVX_AVX512F
15149 (vec_duplicate:VI8_AVX_AVX512F
15150 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15151 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15154 "vmovq\t{%2, %x0|%x0, %2}"
15155 [(set_attr "isa" "x64,*")
15156 (set_attr "type" "ssemov")
15157 (set_attr "prefix_rex" "1,*")
15158 (set_attr "prefix" "maybe_evex")
15159 (set_attr "mode" "TI")
15160 (set (attr "preferred_for_speed")
15161 (cond [(eq_attr "alternative" "0")
15162 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15164 (symbol_ref "true")))])
15166 (define_expand "vec_unpacks_lo_<mode>"
15167 [(match_operand:<sseunpackmode> 0 "register_operand")
15168 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15170 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15172 (define_expand "vec_unpacks_hi_<mode>"
15173 [(match_operand:<sseunpackmode> 0 "register_operand")
15174 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15176 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15178 (define_expand "vec_unpacku_lo_<mode>"
15179 [(match_operand:<sseunpackmode> 0 "register_operand")
15180 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15182 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15184 (define_expand "vec_unpacks_sbool_lo_qi"
15185 [(match_operand:QI 0 "register_operand")
15186 (match_operand:QI 1 "register_operand")
15187 (match_operand:QI 2 "const_int_operand")]
15190 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15192 emit_move_insn (operands[0], operands[1]);
15196 (define_expand "vec_unpacks_lo_hi"
15197 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15198 (match_operand:HI 1 "register_operand"))]
15201 (define_expand "vec_unpacks_lo_si"
15202 [(set (match_operand:HI 0 "register_operand")
15203 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15206 (define_expand "vec_unpacks_lo_di"
15207 [(set (match_operand:SI 0 "register_operand")
15208 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15211 (define_expand "vec_unpacku_hi_<mode>"
15212 [(match_operand:<sseunpackmode> 0 "register_operand")
15213 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15215 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15217 (define_expand "vec_unpacks_sbool_hi_qi"
15218 [(match_operand:QI 0 "register_operand")
15219 (match_operand:QI 1 "register_operand")
15220 (match_operand:QI 2 "const_int_operand")]
15223 HOST_WIDE_INT nunits = INTVAL (operands[2]);
15224 if (nunits != 8 && nunits != 4)
15226 if (TARGET_AVX512DQ)
15227 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15228 GEN_INT (nunits / 2)));
15231 rtx tem = gen_reg_rtx (HImode);
15232 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15234 GEN_INT (nunits / 2)));
15235 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15240 (define_expand "vec_unpacks_hi_hi"
15242 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15243 (lshiftrt:HI (match_operand:HI 1 "register_operand")
15245 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15248 (define_expand "vec_unpacks_hi_<mode>"
15250 [(set (subreg:SWI48x
15251 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15252 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15254 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15256 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15262 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15264 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15265 [(set (match_operand:VI12_AVX2 0 "register_operand")
15266 (truncate:VI12_AVX2
15267 (lshiftrt:<ssedoublemode>
15268 (plus:<ssedoublemode>
15269 (plus:<ssedoublemode>
15270 (zero_extend:<ssedoublemode>
15271 (match_operand:VI12_AVX2 1 "vector_operand"))
15272 (zero_extend:<ssedoublemode>
15273 (match_operand:VI12_AVX2 2 "vector_operand")))
15274 (match_dup <mask_expand_op3>))
15276 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15278 operands[<mask_expand_op3>] = CONST1_RTX(<MODE>mode);
15279 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15282 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15283 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
15284 (truncate:VI12_AVX2
15285 (lshiftrt:<ssedoublemode>
15286 (plus:<ssedoublemode>
15287 (plus:<ssedoublemode>
15288 (zero_extend:<ssedoublemode>
15289 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
15290 (zero_extend:<ssedoublemode>
15291 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
15292 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
15294 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15295 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15297 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15298 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15299 [(set_attr "isa" "noavx,avx")
15300 (set_attr "type" "sseiadd")
15301 (set_attr "prefix_data16" "1,*")
15302 (set_attr "prefix" "orig,<mask_prefix>")
15303 (set_attr "mode" "<sseinsnmode>")])
15305 ;; The correct representation for this is absolutely enormous, and
15306 ;; surely not generally useful.
15307 (define_insn "<sse2_avx2>_psadbw"
15308 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15309 (unspec:VI8_AVX2_AVX512BW
15310 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15311 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15315 psadbw\t{%2, %0|%0, %2}
15316 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15317 [(set_attr "isa" "noavx,avx")
15318 (set_attr "type" "sseiadd")
15319 (set_attr "atom_unit" "simul")
15320 (set_attr "prefix_data16" "1,*")
15321 (set_attr "prefix" "orig,maybe_evex")
15322 (set_attr "mode" "<sseinsnmode>")])
15324 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15325 [(set (match_operand:SI 0 "register_operand" "=r")
15327 [(match_operand:VF_128_256 1 "register_operand" "x")]
15330 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15331 [(set_attr "type" "ssemov")
15332 (set_attr "prefix" "maybe_vex")
15333 (set_attr "mode" "<MODE>")])
15335 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
15336 [(set (match_operand:DI 0 "register_operand" "=r")
15339 [(match_operand:VF_128_256 1 "register_operand" "x")]
15341 "TARGET_64BIT && TARGET_SSE"
15342 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
15343 [(set_attr "type" "ssemov")
15344 (set_attr "prefix" "maybe_vex")
15345 (set_attr "mode" "<MODE>")])
15347 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
15348 [(set (match_operand:SI 0 "register_operand" "=r")
15351 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15352 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15356 "&& reload_completed"
15357 [(set (match_dup 0)
15358 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15359 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15360 [(set_attr "type" "ssemov")
15361 (set_attr "prefix" "maybe_vex")
15362 (set_attr "mode" "<MODE>")])
15364 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
15365 [(set (match_operand:DI 0 "register_operand" "=r")
15369 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15370 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15372 "TARGET_64BIT && TARGET_SSE"
15374 "&& reload_completed"
15375 [(set (match_dup 0)
15376 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15377 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15378 [(set_attr "type" "ssemov")
15379 (set_attr "prefix" "maybe_vex")
15380 (set_attr "mode" "<MODE>")])
15382 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
15383 [(set (match_operand:SI 0 "register_operand" "=r")
15385 [(subreg:VF_128_256
15386 (ashiftrt:<sseintvecmode>
15387 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15388 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15392 "&& reload_completed"
15393 [(set (match_dup 0)
15394 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15395 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15396 [(set_attr "type" "ssemov")
15397 (set_attr "prefix" "maybe_vex")
15398 (set_attr "mode" "<MODE>")])
15400 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
15401 [(set (match_operand:DI 0 "register_operand" "=r")
15404 [(subreg:VF_128_256
15405 (ashiftrt:<sseintvecmode>
15406 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15407 (match_operand:QI 2 "const_int_operand" "n")) 0)]
15409 "TARGET_64BIT && TARGET_SSE"
15411 "&& reload_completed"
15412 [(set (match_dup 0)
15413 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15414 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15415 [(set_attr "type" "ssemov")
15416 (set_attr "prefix" "maybe_vex")
15417 (set_attr "mode" "<MODE>")])
15419 (define_insn "<sse2_avx2>_pmovmskb"
15420 [(set (match_operand:SI 0 "register_operand" "=r")
15422 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15425 "%vpmovmskb\t{%1, %0|%0, %1}"
15426 [(set_attr "type" "ssemov")
15427 (set (attr "prefix_data16")
15429 (match_test "TARGET_AVX")
15431 (const_string "1")))
15432 (set_attr "prefix" "maybe_vex")
15433 (set_attr "mode" "SI")])
15435 (define_insn "*<sse2_avx2>_pmovmskb_zext"
15436 [(set (match_operand:DI 0 "register_operand" "=r")
15439 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15441 "TARGET_64BIT && TARGET_SSE2"
15442 "%vpmovmskb\t{%1, %k0|%k0, %1}"
15443 [(set_attr "type" "ssemov")
15444 (set (attr "prefix_data16")
15446 (match_test "TARGET_AVX")
15448 (const_string "1")))
15449 (set_attr "prefix" "maybe_vex")
15450 (set_attr "mode" "SI")])
15452 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
15453 [(set (match_operand:SI 0 "register_operand" "=r")
15455 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15456 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15461 [(set (match_dup 0)
15462 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15464 [(set_attr "type" "ssemov")
15465 (set (attr "prefix_data16")
15467 (match_test "TARGET_AVX")
15469 (const_string "1")))
15470 (set_attr "prefix" "maybe_vex")
15471 (set_attr "mode" "SI")])
15473 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
15474 [(set (match_operand:DI 0 "register_operand" "=r")
15477 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15478 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15480 "TARGET_64BIT && TARGET_SSE2"
15483 [(set (match_dup 0)
15484 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15486 [(set_attr "type" "ssemov")
15487 (set (attr "prefix_data16")
15489 (match_test "TARGET_AVX")
15491 (const_string "1")))
15492 (set_attr "prefix" "maybe_vex")
15493 (set_attr "mode" "SI")])
15495 (define_expand "sse2_maskmovdqu"
15496 [(set (match_operand:V16QI 0 "memory_operand")
15497 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
15498 (match_operand:V16QI 2 "register_operand")
15503 (define_insn "*sse2_maskmovdqu"
15504 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
15505 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
15506 (match_operand:V16QI 2 "register_operand" "x")
15507 (mem:V16QI (match_dup 0))]
15511 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
15512 that requires %v to be at the beginning of the opcode name. */
15513 if (Pmode != word_mode)
15514 fputs ("\taddr32", asm_out_file);
15515 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
15517 [(set_attr "type" "ssemov")
15518 (set_attr "prefix_data16" "1")
15519 (set (attr "length_address")
15520 (symbol_ref ("Pmode != word_mode")))
15521 ;; The implicit %rdi operand confuses default length_vex computation.
15522 (set (attr "length_vex")
15523 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
15524 (set_attr "prefix" "maybe_vex")
15525 (set_attr "znver1_decode" "vector")
15526 (set_attr "mode" "TI")])
15528 (define_insn "sse_ldmxcsr"
15529 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
15533 [(set_attr "type" "sse")
15534 (set_attr "atom_sse_attr" "mxcsr")
15535 (set_attr "prefix" "maybe_vex")
15536 (set_attr "memory" "load")])
15538 (define_insn "sse_stmxcsr"
15539 [(set (match_operand:SI 0 "memory_operand" "=m")
15540 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
15543 [(set_attr "type" "sse")
15544 (set_attr "atom_sse_attr" "mxcsr")
15545 (set_attr "prefix" "maybe_vex")
15546 (set_attr "memory" "store")])
15548 (define_insn "sse2_clflush"
15549 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
15553 [(set_attr "type" "sse")
15554 (set_attr "atom_sse_attr" "fence")
15555 (set_attr "memory" "unknown")])
15557 ;; As per AMD and Intel ISA manuals, the first operand is extensions
15558 ;; and it goes to %ecx. The second operand received is hints and it goes
15560 (define_insn "sse3_mwait"
15561 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
15562 (match_operand:SI 1 "register_operand" "a")]
15565 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
15566 ;; Since 32bit register operands are implicitly zero extended to 64bit,
15567 ;; we only need to set up 32bit registers.
15569 [(set_attr "length" "3")])
15571 (define_insn "@sse3_monitor_<mode>"
15572 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
15573 (match_operand:SI 1 "register_operand" "c")
15574 (match_operand:SI 2 "register_operand" "d")]
15577 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
15578 ;; RCX and RDX are used. Since 32bit register operands are implicitly
15579 ;; zero extended to 64bit, we only need to set up 32bit registers.
15581 [(set (attr "length")
15582 (symbol_ref ("(Pmode != word_mode) + 3")))])
15584 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15586 ;; SSSE3 instructions
15588 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15590 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
15592 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
15593 [(set (match_operand:V16HI 0 "register_operand" "=x")
15598 (ssse3_plusminus:HI
15600 (match_operand:V16HI 1 "register_operand" "x")
15601 (parallel [(const_int 0)]))
15602 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15603 (ssse3_plusminus:HI
15604 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15605 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15607 (ssse3_plusminus:HI
15608 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15609 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15610 (ssse3_plusminus:HI
15611 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15612 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15615 (ssse3_plusminus:HI
15616 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
15617 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
15618 (ssse3_plusminus:HI
15619 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
15620 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
15622 (ssse3_plusminus:HI
15623 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
15624 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
15625 (ssse3_plusminus:HI
15626 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
15627 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
15631 (ssse3_plusminus:HI
15633 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15634 (parallel [(const_int 0)]))
15635 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15636 (ssse3_plusminus:HI
15637 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15638 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15640 (ssse3_plusminus:HI
15641 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15642 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15643 (ssse3_plusminus:HI
15644 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15645 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
15648 (ssse3_plusminus:HI
15649 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
15650 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
15651 (ssse3_plusminus:HI
15652 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
15653 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
15655 (ssse3_plusminus:HI
15656 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
15657 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
15658 (ssse3_plusminus:HI
15659 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
15660 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
15662 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15663 [(set_attr "type" "sseiadd")
15664 (set_attr "prefix_extra" "1")
15665 (set_attr "prefix" "vex")
15666 (set_attr "mode" "OI")])
15668 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
15669 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15673 (ssse3_plusminus:HI
15675 (match_operand:V8HI 1 "register_operand" "0,x")
15676 (parallel [(const_int 0)]))
15677 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15678 (ssse3_plusminus:HI
15679 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15680 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15682 (ssse3_plusminus:HI
15683 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
15684 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
15685 (ssse3_plusminus:HI
15686 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
15687 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
15690 (ssse3_plusminus:HI
15692 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
15693 (parallel [(const_int 0)]))
15694 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15695 (ssse3_plusminus:HI
15696 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15697 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
15699 (ssse3_plusminus:HI
15700 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
15701 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
15702 (ssse3_plusminus:HI
15703 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
15704 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
15707 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15708 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
15709 [(set_attr "isa" "noavx,avx")
15710 (set_attr "type" "sseiadd")
15711 (set_attr "atom_unit" "complex")
15712 (set_attr "prefix_data16" "1,*")
15713 (set_attr "prefix_extra" "1")
15714 (set_attr "prefix" "orig,vex")
15715 (set_attr "mode" "TI")])
15717 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
15718 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
15721 (ssse3_plusminus:HI
15723 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
15724 (parallel [(const_int 0)]))
15725 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
15726 (ssse3_plusminus:HI
15727 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
15728 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
15730 (ssse3_plusminus:HI
15732 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")
15733 (parallel [(const_int 0)]))
15734 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
15735 (ssse3_plusminus:HI
15736 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
15737 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
15738 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
15740 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
15743 "TARGET_MMX_WITH_SSE && reload_completed"
15746 /* Generate SSE version of the operation. */
15747 rtx op0 = lowpart_subreg (V8HImode, operands[0],
15748 GET_MODE (operands[0]));
15749 rtx op1 = lowpart_subreg (V8HImode, operands[1],
15750 GET_MODE (operands[1]));
15751 rtx op2 = lowpart_subreg (V8HImode, operands[2],
15752 GET_MODE (operands[2]));
15753 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
15754 ix86_move_vector_high_sse_to_mmx (op0);
15757 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
15758 (set_attr "type" "sseiadd")
15759 (set_attr "atom_unit" "complex")
15760 (set_attr "prefix_extra" "1")
15761 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15762 (set_attr "mode" "DI,TI,TI")])
15764 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
15765 [(set (match_operand:V8SI 0 "register_operand" "=x")
15771 (match_operand:V8SI 1 "register_operand" "x")
15772 (parallel [(const_int 0)]))
15773 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15775 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15776 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15779 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
15780 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
15782 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
15783 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
15788 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
15789 (parallel [(const_int 0)]))
15790 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15792 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15793 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
15796 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
15797 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
15799 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
15800 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
15802 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15803 [(set_attr "type" "sseiadd")
15804 (set_attr "prefix_extra" "1")
15805 (set_attr "prefix" "vex")
15806 (set_attr "mode" "OI")])
15808 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
15809 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15814 (match_operand:V4SI 1 "register_operand" "0,x")
15815 (parallel [(const_int 0)]))
15816 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15818 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
15819 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
15823 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
15824 (parallel [(const_int 0)]))
15825 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
15827 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
15828 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
15831 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15832 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
15833 [(set_attr "isa" "noavx,avx")
15834 (set_attr "type" "sseiadd")
15835 (set_attr "atom_unit" "complex")
15836 (set_attr "prefix_data16" "1,*")
15837 (set_attr "prefix_extra" "1")
15838 (set_attr "prefix" "orig,vex")
15839 (set_attr "mode" "TI")])
15841 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
15842 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
15846 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
15847 (parallel [(const_int 0)]))
15848 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
15851 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")
15852 (parallel [(const_int 0)]))
15853 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
15854 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
15856 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
15859 "TARGET_MMX_WITH_SSE && reload_completed"
15862 /* Generate SSE version of the operation. */
15863 rtx op0 = lowpart_subreg (V4SImode, operands[0],
15864 GET_MODE (operands[0]));
15865 rtx op1 = lowpart_subreg (V4SImode, operands[1],
15866 GET_MODE (operands[1]));
15867 rtx op2 = lowpart_subreg (V4SImode, operands[2],
15868 GET_MODE (operands[2]));
15869 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
15870 ix86_move_vector_high_sse_to_mmx (op0);
15873 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
15874 (set_attr "type" "sseiadd")
15875 (set_attr "atom_unit" "complex")
15876 (set_attr "prefix_extra" "1")
15877 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15878 (set_attr "mode" "DI,TI,TI")])
15880 (define_insn "avx2_pmaddubsw256"
15881 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
15886 (match_operand:V32QI 1 "register_operand" "x,v")
15887 (parallel [(const_int 0) (const_int 2)
15888 (const_int 4) (const_int 6)
15889 (const_int 8) (const_int 10)
15890 (const_int 12) (const_int 14)
15891 (const_int 16) (const_int 18)
15892 (const_int 20) (const_int 22)
15893 (const_int 24) (const_int 26)
15894 (const_int 28) (const_int 30)])))
15897 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
15898 (parallel [(const_int 0) (const_int 2)
15899 (const_int 4) (const_int 6)
15900 (const_int 8) (const_int 10)
15901 (const_int 12) (const_int 14)
15902 (const_int 16) (const_int 18)
15903 (const_int 20) (const_int 22)
15904 (const_int 24) (const_int 26)
15905 (const_int 28) (const_int 30)]))))
15908 (vec_select:V16QI (match_dup 1)
15909 (parallel [(const_int 1) (const_int 3)
15910 (const_int 5) (const_int 7)
15911 (const_int 9) (const_int 11)
15912 (const_int 13) (const_int 15)
15913 (const_int 17) (const_int 19)
15914 (const_int 21) (const_int 23)
15915 (const_int 25) (const_int 27)
15916 (const_int 29) (const_int 31)])))
15918 (vec_select:V16QI (match_dup 2)
15919 (parallel [(const_int 1) (const_int 3)
15920 (const_int 5) (const_int 7)
15921 (const_int 9) (const_int 11)
15922 (const_int 13) (const_int 15)
15923 (const_int 17) (const_int 19)
15924 (const_int 21) (const_int 23)
15925 (const_int 25) (const_int 27)
15926 (const_int 29) (const_int 31)]))))))]
15928 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
15929 [(set_attr "isa" "*,avx512bw")
15930 (set_attr "type" "sseiadd")
15931 (set_attr "prefix_extra" "1")
15932 (set_attr "prefix" "vex,evex")
15933 (set_attr "mode" "OI")])
15935 ;; The correct representation for this is absolutely enormous, and
15936 ;; surely not generally useful.
15937 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
15938 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
15939 (unspec:VI2_AVX512VL
15940 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
15941 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
15942 UNSPEC_PMADDUBSW512))]
15944 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
15945 [(set_attr "type" "sseiadd")
15946 (set_attr "prefix" "evex")
15947 (set_attr "mode" "XI")])
15949 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
15950 [(set (match_operand:V32HI 0 "register_operand" "=v")
15957 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
15959 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
15961 (const_vector:V32HI [(const_int 1) (const_int 1)
15962 (const_int 1) (const_int 1)
15963 (const_int 1) (const_int 1)
15964 (const_int 1) (const_int 1)
15965 (const_int 1) (const_int 1)
15966 (const_int 1) (const_int 1)
15967 (const_int 1) (const_int 1)
15968 (const_int 1) (const_int 1)
15969 (const_int 1) (const_int 1)
15970 (const_int 1) (const_int 1)
15971 (const_int 1) (const_int 1)
15972 (const_int 1) (const_int 1)
15973 (const_int 1) (const_int 1)
15974 (const_int 1) (const_int 1)
15975 (const_int 1) (const_int 1)
15976 (const_int 1) (const_int 1)]))
15979 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15980 [(set_attr "type" "sseimul")
15981 (set_attr "prefix" "evex")
15982 (set_attr "mode" "XI")])
15984 (define_insn "ssse3_pmaddubsw128"
15985 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
15990 (match_operand:V16QI 1 "register_operand" "0,x,v")
15991 (parallel [(const_int 0) (const_int 2)
15992 (const_int 4) (const_int 6)
15993 (const_int 8) (const_int 10)
15994 (const_int 12) (const_int 14)])))
15997 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
15998 (parallel [(const_int 0) (const_int 2)
15999 (const_int 4) (const_int 6)
16000 (const_int 8) (const_int 10)
16001 (const_int 12) (const_int 14)]))))
16004 (vec_select:V8QI (match_dup 1)
16005 (parallel [(const_int 1) (const_int 3)
16006 (const_int 5) (const_int 7)
16007 (const_int 9) (const_int 11)
16008 (const_int 13) (const_int 15)])))
16010 (vec_select:V8QI (match_dup 2)
16011 (parallel [(const_int 1) (const_int 3)
16012 (const_int 5) (const_int 7)
16013 (const_int 9) (const_int 11)
16014 (const_int 13) (const_int 15)]))))))]
16017 pmaddubsw\t{%2, %0|%0, %2}
16018 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16019 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16020 [(set_attr "isa" "noavx,avx,avx512bw")
16021 (set_attr "type" "sseiadd")
16022 (set_attr "atom_unit" "simul")
16023 (set_attr "prefix_data16" "1,*,*")
16024 (set_attr "prefix_extra" "1")
16025 (set_attr "prefix" "orig,vex,evex")
16026 (set_attr "mode" "TI")])
16028 (define_insn "ssse3_pmaddubsw"
16029 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16034 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
16035 (parallel [(const_int 0) (const_int 2)
16036 (const_int 4) (const_int 6)])))
16039 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16040 (parallel [(const_int 0) (const_int 2)
16041 (const_int 4) (const_int 6)]))))
16044 (vec_select:V4QI (match_dup 1)
16045 (parallel [(const_int 1) (const_int 3)
16046 (const_int 5) (const_int 7)])))
16048 (vec_select:V4QI (match_dup 2)
16049 (parallel [(const_int 1) (const_int 3)
16050 (const_int 5) (const_int 7)]))))))]
16051 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16053 pmaddubsw\t{%2, %0|%0, %2}
16054 pmaddubsw\t{%2, %0|%0, %2}
16055 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16056 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16057 (set_attr "type" "sseiadd")
16058 (set_attr "atom_unit" "simul")
16059 (set_attr "prefix_extra" "1")
16060 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16061 (set_attr "mode" "DI,TI,TI")])
16063 (define_mode_iterator PMULHRSW
16064 [V8HI (V16HI "TARGET_AVX2")])
16066 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16067 [(set (match_operand:PMULHRSW 0 "register_operand")
16068 (vec_merge:PMULHRSW
16070 (lshiftrt:<ssedoublemode>
16071 (plus:<ssedoublemode>
16072 (lshiftrt:<ssedoublemode>
16073 (mult:<ssedoublemode>
16074 (sign_extend:<ssedoublemode>
16075 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16076 (sign_extend:<ssedoublemode>
16077 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16081 (match_operand:PMULHRSW 3 "register_operand")
16082 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16083 "TARGET_AVX512BW && TARGET_AVX512VL"
16085 operands[5] = CONST1_RTX(<MODE>mode);
16086 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16089 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16090 [(set (match_operand:PMULHRSW 0 "register_operand")
16092 (lshiftrt:<ssedoublemode>
16093 (plus:<ssedoublemode>
16094 (lshiftrt:<ssedoublemode>
16095 (mult:<ssedoublemode>
16096 (sign_extend:<ssedoublemode>
16097 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16098 (sign_extend:<ssedoublemode>
16099 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16105 operands[3] = CONST1_RTX(<MODE>mode);
16106 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16109 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16110 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16112 (lshiftrt:<ssedoublemode>
16113 (plus:<ssedoublemode>
16114 (lshiftrt:<ssedoublemode>
16115 (mult:<ssedoublemode>
16116 (sign_extend:<ssedoublemode>
16117 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16118 (sign_extend:<ssedoublemode>
16119 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16121 (match_operand:VI2_AVX2 3 "const1_operand"))
16123 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16124 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16126 pmulhrsw\t{%2, %0|%0, %2}
16127 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16128 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16129 [(set_attr "isa" "noavx,avx,avx512bw")
16130 (set_attr "type" "sseimul")
16131 (set_attr "prefix_data16" "1,*,*")
16132 (set_attr "prefix_extra" "1")
16133 (set_attr "prefix" "orig,maybe_evex,evex")
16134 (set_attr "mode" "<sseinsnmode>")])
16136 (define_expand "ssse3_pmulhrswv4hi3"
16137 [(set (match_operand:V4HI 0 "register_operand")
16144 (match_operand:V4HI 1 "register_mmxmem_operand"))
16146 (match_operand:V4HI 2 "register_mmxmem_operand")))
16150 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16152 operands[3] = CONST1_RTX(V4HImode);
16153 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16156 (define_insn "*ssse3_pmulhrswv4hi3"
16157 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16164 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16166 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16168 (match_operand:V4HI 3 "const1_operand"))
16170 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16172 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16174 pmulhrsw\t{%2, %0|%0, %2}
16175 pmulhrsw\t{%2, %0|%0, %2}
16176 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16177 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16178 (set_attr "type" "sseimul")
16179 (set_attr "prefix_extra" "1")
16180 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16181 (set_attr "mode" "DI,TI,TI")])
16183 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16184 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16186 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16187 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16189 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16191 pshufb\t{%2, %0|%0, %2}
16192 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16193 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16194 [(set_attr "isa" "noavx,avx,avx512bw")
16195 (set_attr "type" "sselog1")
16196 (set_attr "prefix_data16" "1,*,*")
16197 (set_attr "prefix_extra" "1")
16198 (set_attr "prefix" "orig,maybe_evex,evex")
16199 (set_attr "btver2_decode" "vector")
16200 (set_attr "mode" "<sseinsnmode>")])
16202 (define_insn_and_split "ssse3_pshufbv8qi3"
16203 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16204 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16205 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16207 (clobber (match_scratch:V4SI 3 "=X,x,Yv"))]
16208 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16210 pshufb\t{%2, %0|%0, %2}
16213 "TARGET_MMX_WITH_SSE && reload_completed"
16214 [(set (match_dup 3) (match_dup 5))
16216 (and:V4SI (match_dup 3) (match_dup 2)))
16218 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16220 /* Emulate MMX version of pshufb with SSE version by masking out the
16221 bit 3 of the shuffle control byte. */
16222 operands[0] = lowpart_subreg (V16QImode, operands[0],
16223 GET_MODE (operands[0]));
16224 operands[1] = lowpart_subreg (V16QImode, operands[1],
16225 GET_MODE (operands[1]));
16226 operands[2] = lowpart_subreg (V4SImode, operands[2],
16227 GET_MODE (operands[2]));
16228 operands[4] = lowpart_subreg (V16QImode, operands[3],
16229 GET_MODE (operands[3]));
16230 rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
16231 GEN_INT (0xf7f7f7f7),
16232 GEN_INT (0xf7f7f7f7),
16233 GEN_INT (0xf7f7f7f7));
16234 rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
16235 operands[5] = force_const_mem (V4SImode, vec_const);
16237 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16238 (set_attr "prefix_extra" "1")
16239 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16240 (set_attr "mode" "DI,TI,TI")])
16242 (define_insn "<ssse3_avx2>_psign<mode>3"
16243 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16245 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16246 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16250 psign<ssemodesuffix>\t{%2, %0|%0, %2}
16251 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16252 [(set_attr "isa" "noavx,avx")
16253 (set_attr "type" "sselog1")
16254 (set_attr "prefix_data16" "1,*")
16255 (set_attr "prefix_extra" "1")
16256 (set_attr "prefix" "orig,vex")
16257 (set_attr "mode" "<sseinsnmode>")])
16259 (define_insn "ssse3_psign<mode>3"
16260 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
16262 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
16263 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
16265 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16267 psign<mmxvecsize>\t{%2, %0|%0, %2}
16268 psign<mmxvecsize>\t{%2, %0|%0, %2}
16269 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16270 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16271 (set_attr "type" "sselog1")
16272 (set_attr "prefix_extra" "1")
16273 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16274 (set_attr "mode" "DI,TI,TI")])
16276 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
16277 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16278 (vec_merge:VI1_AVX512
16280 [(match_operand:VI1_AVX512 1 "register_operand" "v")
16281 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16282 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16284 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16285 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16286 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
16288 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16289 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
16291 [(set_attr "type" "sseishft")
16292 (set_attr "atom_unit" "sishuf")
16293 (set_attr "prefix_extra" "1")
16294 (set_attr "length_immediate" "1")
16295 (set_attr "prefix" "evex")
16296 (set_attr "mode" "<sseinsnmode>")])
16298 (define_insn "<ssse3_avx2>_palignr<mode>"
16299 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
16300 (unspec:SSESCALARMODE
16301 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
16302 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
16303 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16307 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16309 switch (which_alternative)
16312 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16315 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16317 gcc_unreachable ();
16320 [(set_attr "isa" "noavx,avx,avx512bw")
16321 (set_attr "type" "sseishft")
16322 (set_attr "atom_unit" "sishuf")
16323 (set_attr "prefix_data16" "1,*,*")
16324 (set_attr "prefix_extra" "1")
16325 (set_attr "length_immediate" "1")
16326 (set_attr "prefix" "orig,vex,evex")
16327 (set_attr "mode" "<sseinsnmode>")])
16329 (define_insn_and_split "ssse3_palignrdi"
16330 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
16331 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
16332 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
16333 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16335 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16337 switch (which_alternative)
16340 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16341 return "palignr\t{%3, %2, %0|%0, %2, %3}";
16346 gcc_unreachable ();
16349 "TARGET_MMX_WITH_SSE && reload_completed"
16350 [(set (match_dup 0)
16351 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
16353 /* Emulate MMX palignrdi with SSE psrldq. */
16354 rtx op0 = lowpart_subreg (V2DImode, operands[0],
16355 GET_MODE (operands[0]));
16357 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
16360 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
16361 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
16362 /* Swap bits 0:63 with bits 64:127. */
16363 rtx mask = gen_rtx_PARALLEL (VOIDmode,
16364 gen_rtvec (4, GEN_INT (2),
16368 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
16369 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
16370 emit_insn (gen_rtx_SET (op1, op2));
16372 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
16374 [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
16375 (set_attr "type" "sseishft")
16376 (set_attr "atom_unit" "sishuf")
16377 (set_attr "prefix_extra" "1")
16378 (set_attr "length_immediate" "1")
16379 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16380 (set_attr "mode" "DI,TI,TI")])
16382 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
16383 ;; modes for abs instruction on pre AVX-512 targets.
16384 (define_mode_iterator VI1248_AVX512VL_AVX512BW
16385 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
16386 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
16387 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
16388 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
16390 (define_insn "*abs<mode>2"
16391 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
16392 (abs:VI1248_AVX512VL_AVX512BW
16393 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
16395 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
16396 [(set_attr "type" "sselog1")
16397 (set_attr "prefix_data16" "1")
16398 (set_attr "prefix_extra" "1")
16399 (set_attr "prefix" "maybe_vex")
16400 (set_attr "mode" "<sseinsnmode>")])
16402 (define_insn "abs<mode>2_mask"
16403 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16404 (vec_merge:VI48_AVX512VL
16406 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
16407 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
16408 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16410 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16411 [(set_attr "type" "sselog1")
16412 (set_attr "prefix" "evex")
16413 (set_attr "mode" "<sseinsnmode>")])
16415 (define_insn "abs<mode>2_mask"
16416 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16417 (vec_merge:VI12_AVX512VL
16419 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
16420 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
16421 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16423 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16424 [(set_attr "type" "sselog1")
16425 (set_attr "prefix" "evex")
16426 (set_attr "mode" "<sseinsnmode>")])
16428 (define_expand "abs<mode>2"
16429 [(set (match_operand:VI_AVX2 0 "register_operand")
16431 (match_operand:VI_AVX2 1 "vector_operand")))]
16435 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
16436 && !TARGET_AVX512VL))
16438 ix86_expand_sse2_abs (operands[0], operands[1]);
16443 (define_insn "abs<mode>2"
16444 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
16446 (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
16447 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16449 pabs<mmxvecsize>\t{%1, %0|%0, %1}
16450 %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
16451 [(set_attr "mmx_isa" "native,x64")
16452 (set_attr "type" "sselog1")
16453 (set_attr "prefix_rep" "0")
16454 (set_attr "prefix_extra" "1")
16455 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16456 (set_attr "mode" "DI,TI")])
16458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16460 ;; AMD SSE4A instructions
16462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16464 (define_insn "sse4a_movnt<mode>"
16465 [(set (match_operand:MODEF 0 "memory_operand" "=m")
16467 [(match_operand:MODEF 1 "register_operand" "x")]
16470 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
16471 [(set_attr "type" "ssemov")
16472 (set_attr "mode" "<MODE>")])
16474 (define_insn "sse4a_vmmovnt<mode>"
16475 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
16476 (unspec:<ssescalarmode>
16477 [(vec_select:<ssescalarmode>
16478 (match_operand:VF_128 1 "register_operand" "x")
16479 (parallel [(const_int 0)]))]
16482 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
16483 [(set_attr "type" "ssemov")
16484 (set_attr "mode" "<ssescalarmode>")])
16486 (define_insn "sse4a_extrqi"
16487 [(set (match_operand:V2DI 0 "register_operand" "=x")
16488 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16489 (match_operand 2 "const_0_to_255_operand")
16490 (match_operand 3 "const_0_to_255_operand")]
16493 "extrq\t{%3, %2, %0|%0, %2, %3}"
16494 [(set_attr "type" "sse")
16495 (set_attr "prefix_data16" "1")
16496 (set_attr "length_immediate" "2")
16497 (set_attr "mode" "TI")])
16499 (define_insn "sse4a_extrq"
16500 [(set (match_operand:V2DI 0 "register_operand" "=x")
16501 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16502 (match_operand:V16QI 2 "register_operand" "x")]
16505 "extrq\t{%2, %0|%0, %2}"
16506 [(set_attr "type" "sse")
16507 (set_attr "prefix_data16" "1")
16508 (set_attr "mode" "TI")])
16510 (define_insn "sse4a_insertqi"
16511 [(set (match_operand:V2DI 0 "register_operand" "=x")
16512 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16513 (match_operand:V2DI 2 "register_operand" "x")
16514 (match_operand 3 "const_0_to_255_operand")
16515 (match_operand 4 "const_0_to_255_operand")]
16518 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
16519 [(set_attr "type" "sseins")
16520 (set_attr "prefix_data16" "0")
16521 (set_attr "prefix_rep" "1")
16522 (set_attr "length_immediate" "2")
16523 (set_attr "mode" "TI")])
16525 (define_insn "sse4a_insertq"
16526 [(set (match_operand:V2DI 0 "register_operand" "=x")
16527 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16528 (match_operand:V2DI 2 "register_operand" "x")]
16531 "insertq\t{%2, %0|%0, %2}"
16532 [(set_attr "type" "sseins")
16533 (set_attr "prefix_data16" "0")
16534 (set_attr "prefix_rep" "1")
16535 (set_attr "mode" "TI")])
16537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16539 ;; Intel SSE4.1 instructions
16541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16543 ;; Mapping of immediate bits for blend instructions
16544 (define_mode_attr blendbits
16545 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
16547 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
16548 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16549 (vec_merge:VF_128_256
16550 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16551 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
16552 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
16555 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16556 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16557 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16558 [(set_attr "isa" "noavx,noavx,avx")
16559 (set_attr "type" "ssemov")
16560 (set_attr "length_immediate" "1")
16561 (set_attr "prefix_data16" "1,1,*")
16562 (set_attr "prefix_extra" "1")
16563 (set_attr "prefix" "orig,orig,vex")
16564 (set_attr "mode" "<MODE>")])
16566 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
16567 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16569 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
16570 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16571 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
16575 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16576 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16577 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16578 [(set_attr "isa" "noavx,noavx,avx")
16579 (set_attr "type" "ssemov")
16580 (set_attr "length_immediate" "1")
16581 (set_attr "prefix_data16" "1,1,*")
16582 (set_attr "prefix_extra" "1")
16583 (set_attr "prefix" "orig,orig,vex")
16584 (set_attr "btver2_decode" "vector,vector,vector")
16585 (set_attr "mode" "<MODE>")])
16587 ;; Also define scalar versions. These are used for conditional move.
16588 ;; Using subregs into vector modes causes register allocation lossage.
16589 ;; These patterns do not allow memory operands because the native
16590 ;; instructions read the full 128-bits.
16592 (define_insn "sse4_1_blendv<ssemodesuffix>"
16593 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
16595 [(match_operand:MODEF 1 "register_operand" "0,0,x")
16596 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
16597 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
16601 if (get_attr_mode (insn) == MODE_V4SF)
16602 return (which_alternative == 2
16603 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16604 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
16606 return (which_alternative == 2
16607 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16608 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
16610 [(set_attr "isa" "noavx,noavx,avx")
16611 (set_attr "type" "ssemov")
16612 (set_attr "length_immediate" "1")
16613 (set_attr "prefix_data16" "1,1,*")
16614 (set_attr "prefix_extra" "1")
16615 (set_attr "prefix" "orig,orig,vex")
16616 (set_attr "btver2_decode" "vector,vector,vector")
16618 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
16619 (const_string "V4SF")
16620 (match_test "TARGET_AVX")
16621 (const_string "<ssevecmode>")
16622 (match_test "optimize_function_for_size_p (cfun)")
16623 (const_string "V4SF")
16625 (const_string "<ssevecmode>")))])
16627 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
16628 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16630 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
16631 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16633 (lt:<sseintvecmode>
16634 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
16635 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
16639 "&& reload_completed"
16640 [(set (match_dup 0)
16642 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16643 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
16644 [(set_attr "isa" "noavx,noavx,avx")
16645 (set_attr "type" "ssemov")
16646 (set_attr "length_immediate" "1")
16647 (set_attr "prefix_data16" "1,1,*")
16648 (set_attr "prefix_extra" "1")
16649 (set_attr "prefix" "orig,orig,vex")
16650 (set_attr "btver2_decode" "vector,vector,vector")
16651 (set_attr "mode" "<MODE>")])
16653 (define_mode_attr ssefltmodesuffix
16654 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
16656 (define_mode_attr ssefltvecmode
16657 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
16659 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
16660 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
16661 (unspec:<ssebytemode>
16662 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
16663 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
16664 (subreg:<ssebytemode>
16666 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
16667 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
16671 "&& reload_completed"
16672 [(set (match_dup 0)
16673 (unspec:<ssefltvecmode>
16674 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16676 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
16677 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
16678 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
16679 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
16681 [(set_attr "isa" "noavx,noavx,avx")
16682 (set_attr "type" "ssemov")
16683 (set_attr "length_immediate" "1")
16684 (set_attr "prefix_data16" "1,1,*")
16685 (set_attr "prefix_extra" "1")
16686 (set_attr "prefix" "orig,orig,vex")
16687 (set_attr "btver2_decode" "vector,vector,vector")
16688 (set_attr "mode" "<ssefltvecmode>")])
16690 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
16691 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
16693 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
16694 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
16695 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16699 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16700 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
16701 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16702 [(set_attr "isa" "noavx,noavx,avx")
16703 (set_attr "type" "ssemul")
16704 (set_attr "length_immediate" "1")
16705 (set_attr "prefix_data16" "1,1,*")
16706 (set_attr "prefix_extra" "1")
16707 (set_attr "prefix" "orig,orig,vex")
16708 (set_attr "btver2_decode" "vector,vector,vector")
16709 (set_attr "znver1_decode" "vector,vector,vector")
16710 (set_attr "mode" "<MODE>")])
16712 ;; Mode attribute used by `vmovntdqa' pattern
16713 (define_mode_attr vi8_sse4_1_avx2_avx512
16714 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
16716 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
16717 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
16718 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
16721 "%vmovntdqa\t{%1, %0|%0, %1}"
16722 [(set_attr "isa" "noavx,noavx,avx")
16723 (set_attr "type" "ssemov")
16724 (set_attr "prefix_extra" "1,1,*")
16725 (set_attr "prefix" "orig,orig,maybe_evex")
16726 (set_attr "mode" "<sseinsnmode>")])
16728 (define_insn "<sse4_1_avx2>_mpsadbw"
16729 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16731 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16732 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16733 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
16737 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16738 mpsadbw\t{%3, %2, %0|%0, %2, %3}
16739 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16740 [(set_attr "isa" "noavx,noavx,avx")
16741 (set_attr "type" "sselog1")
16742 (set_attr "length_immediate" "1")
16743 (set_attr "prefix_extra" "1")
16744 (set_attr "prefix" "orig,orig,vex")
16745 (set_attr "btver2_decode" "vector,vector,vector")
16746 (set_attr "znver1_decode" "vector,vector,vector")
16747 (set_attr "mode" "<sseinsnmode>")])
16749 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
16750 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
16751 (vec_concat:VI2_AVX2
16752 (us_truncate:<ssehalfvecmode>
16753 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
16754 (us_truncate:<ssehalfvecmode>
16755 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
16756 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16758 packusdw\t{%2, %0|%0, %2}
16759 packusdw\t{%2, %0|%0, %2}
16760 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16761 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16762 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
16763 (set_attr "type" "sselog")
16764 (set_attr "prefix_extra" "1")
16765 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
16766 (set_attr "mode" "<sseinsnmode>")])
16768 (define_insn "<sse4_1_avx2>_pblendvb"
16769 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16771 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16772 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16773 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
16777 pblendvb\t{%3, %2, %0|%0, %2, %3}
16778 pblendvb\t{%3, %2, %0|%0, %2, %3}
16779 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16780 [(set_attr "isa" "noavx,noavx,avx")
16781 (set_attr "type" "ssemov")
16782 (set_attr "prefix_extra" "1")
16783 (set_attr "length_immediate" "*,*,1")
16784 (set_attr "prefix" "orig,orig,vex")
16785 (set_attr "btver2_decode" "vector,vector,vector")
16786 (set_attr "mode" "<sseinsnmode>")])
16788 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
16789 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
16791 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
16792 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
16793 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
16794 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
16799 [(set (match_dup 0)
16801 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
16803 [(set_attr "isa" "noavx,noavx,avx")
16804 (set_attr "type" "ssemov")
16805 (set_attr "prefix_extra" "1")
16806 (set_attr "length_immediate" "*,*,1")
16807 (set_attr "prefix" "orig,orig,vex")
16808 (set_attr "btver2_decode" "vector,vector,vector")
16809 (set_attr "mode" "<sseinsnmode>")])
16811 (define_insn "sse4_1_pblendw"
16812 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16814 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
16815 (match_operand:V8HI 1 "register_operand" "0,0,x")
16816 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
16819 pblendw\t{%3, %2, %0|%0, %2, %3}
16820 pblendw\t{%3, %2, %0|%0, %2, %3}
16821 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16822 [(set_attr "isa" "noavx,noavx,avx")
16823 (set_attr "type" "ssemov")
16824 (set_attr "prefix_extra" "1")
16825 (set_attr "length_immediate" "1")
16826 (set_attr "prefix" "orig,orig,vex")
16827 (set_attr "mode" "TI")])
16829 ;; The builtin uses an 8-bit immediate. Expand that.
16830 (define_expand "avx2_pblendw"
16831 [(set (match_operand:V16HI 0 "register_operand")
16833 (match_operand:V16HI 2 "nonimmediate_operand")
16834 (match_operand:V16HI 1 "register_operand")
16835 (match_operand:SI 3 "const_0_to_255_operand")))]
16838 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
16839 operands[3] = GEN_INT (val << 8 | val);
16842 (define_insn "*avx2_pblendw"
16843 [(set (match_operand:V16HI 0 "register_operand" "=x")
16845 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
16846 (match_operand:V16HI 1 "register_operand" "x")
16847 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
16850 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
16851 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16853 [(set_attr "type" "ssemov")
16854 (set_attr "prefix_extra" "1")
16855 (set_attr "length_immediate" "1")
16856 (set_attr "prefix" "vex")
16857 (set_attr "mode" "OI")])
16859 (define_insn "avx2_pblendd<mode>"
16860 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
16861 (vec_merge:VI4_AVX2
16862 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
16863 (match_operand:VI4_AVX2 1 "register_operand" "x")
16864 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
16866 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16867 [(set_attr "type" "ssemov")
16868 (set_attr "prefix_extra" "1")
16869 (set_attr "length_immediate" "1")
16870 (set_attr "prefix" "vex")
16871 (set_attr "mode" "<sseinsnmode>")])
16873 (define_insn "sse4_1_phminposuw"
16874 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
16875 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
16876 UNSPEC_PHMINPOSUW))]
16878 "%vphminposuw\t{%1, %0|%0, %1}"
16879 [(set_attr "isa" "noavx,noavx,avx")
16880 (set_attr "type" "sselog1")
16881 (set_attr "prefix_extra" "1")
16882 (set_attr "prefix" "orig,orig,vex")
16883 (set_attr "mode" "TI")])
16885 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
16886 [(set (match_operand:V16HI 0 "register_operand" "=v")
16888 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16889 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16890 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16891 [(set_attr "type" "ssemov")
16892 (set_attr "prefix_extra" "1")
16893 (set_attr "prefix" "maybe_evex")
16894 (set_attr "mode" "OI")])
16896 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
16897 [(set (match_operand:V32HI 0 "register_operand" "=v")
16899 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
16901 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16902 [(set_attr "type" "ssemov")
16903 (set_attr "prefix_extra" "1")
16904 (set_attr "prefix" "evex")
16905 (set_attr "mode" "XI")])
16907 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
16908 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16911 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
16912 (parallel [(const_int 0) (const_int 1)
16913 (const_int 2) (const_int 3)
16914 (const_int 4) (const_int 5)
16915 (const_int 6) (const_int 7)]))))]
16916 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16917 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16918 [(set_attr "isa" "noavx,noavx,avx")
16919 (set_attr "type" "ssemov")
16920 (set_attr "prefix_extra" "1")
16921 (set_attr "prefix" "orig,orig,maybe_evex")
16922 (set_attr "mode" "TI")])
16924 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
16925 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
16927 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
16928 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16929 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16930 [(set_attr "isa" "noavx,noavx,avx")
16931 (set_attr "type" "ssemov")
16932 (set_attr "prefix_extra" "1")
16933 (set_attr "prefix" "orig,orig,maybe_evex")
16934 (set_attr "mode" "TI")])
16936 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
16937 [(set (match_operand:V8HI 0 "register_operand")
16942 (match_operand:DI 1 "memory_operand")
16944 (parallel [(const_int 0) (const_int 1)
16945 (const_int 2) (const_int 3)
16946 (const_int 4) (const_int 5)
16947 (const_int 6) (const_int 7)]))))]
16948 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16949 && can_create_pseudo_p ()"
16952 [(set (match_dup 0)
16953 (any_extend:V8HI (match_dup 1)))]
16954 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
16956 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
16957 [(set (match_operand:V16SI 0 "register_operand" "=v")
16959 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
16961 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
16962 [(set_attr "type" "ssemov")
16963 (set_attr "prefix" "evex")
16964 (set_attr "mode" "XI")])
16966 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
16967 [(set (match_operand:V8SI 0 "register_operand" "=v")
16970 (match_operand:V16QI 1 "register_operand" "v")
16971 (parallel [(const_int 0) (const_int 1)
16972 (const_int 2) (const_int 3)
16973 (const_int 4) (const_int 5)
16974 (const_int 6) (const_int 7)]))))]
16975 "TARGET_AVX2 && <mask_avx512vl_condition>"
16976 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16977 [(set_attr "type" "ssemov")
16978 (set_attr "prefix_extra" "1")
16979 (set_attr "prefix" "maybe_evex")
16980 (set_attr "mode" "OI")])
16982 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
16983 [(set (match_operand:V8SI 0 "register_operand" "=v")
16985 (match_operand:V8QI 1 "memory_operand" "m")))]
16986 "TARGET_AVX2 && <mask_avx512vl_condition>"
16987 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16988 [(set_attr "type" "ssemov")
16989 (set_attr "prefix_extra" "1")
16990 (set_attr "prefix" "maybe_evex")
16991 (set_attr "mode" "OI")])
16993 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
16994 [(set (match_operand:V8SI 0 "register_operand")
16999 (match_operand:DI 1 "memory_operand")
17001 (parallel [(const_int 0) (const_int 1)
17002 (const_int 2) (const_int 3)
17003 (const_int 4) (const_int 5)
17004 (const_int 6) (const_int 7)]))))]
17005 "TARGET_AVX2 && <mask_avx512vl_condition>
17006 && can_create_pseudo_p ()"
17009 [(set (match_dup 0)
17010 (any_extend:V8SI (match_dup 1)))]
17011 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17013 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17014 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17017 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17018 (parallel [(const_int 0) (const_int 1)
17019 (const_int 2) (const_int 3)]))))]
17020 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17021 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17022 [(set_attr "isa" "noavx,noavx,avx")
17023 (set_attr "type" "ssemov")
17024 (set_attr "prefix_extra" "1")
17025 (set_attr "prefix" "orig,orig,maybe_evex")
17026 (set_attr "mode" "TI")])
17028 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17029 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17031 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17032 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17033 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17034 [(set_attr "isa" "noavx,noavx,avx")
17035 (set_attr "type" "ssemov")
17036 (set_attr "prefix_extra" "1")
17037 (set_attr "prefix" "orig,orig,maybe_evex")
17038 (set_attr "mode" "TI")])
17040 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17041 [(set (match_operand:V4SI 0 "register_operand")
17046 (vec_duplicate:V4SI
17047 (match_operand:SI 1 "memory_operand"))
17049 [(const_int 0) (const_int 0)
17050 (const_int 0) (const_int 0)])
17052 (parallel [(const_int 0) (const_int 1)
17053 (const_int 2) (const_int 3)]))))]
17054 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17055 && can_create_pseudo_p ()"
17058 [(set (match_dup 0)
17059 (any_extend:V4SI (match_dup 1)))]
17060 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17062 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17063 [(set (match_operand:V16SI 0 "register_operand" "=v")
17065 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17067 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17068 [(set_attr "type" "ssemov")
17069 (set_attr "prefix" "evex")
17070 (set_attr "mode" "XI")])
17072 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
17073 [(set (match_operand:V8SI 0 "register_operand" "=v")
17075 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17076 "TARGET_AVX2 && <mask_avx512vl_condition>"
17077 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17078 [(set_attr "type" "ssemov")
17079 (set_attr "prefix_extra" "1")
17080 (set_attr "prefix" "maybe_evex")
17081 (set_attr "mode" "OI")])
17083 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17084 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17087 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17088 (parallel [(const_int 0) (const_int 1)
17089 (const_int 2) (const_int 3)]))))]
17090 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17091 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17092 [(set_attr "isa" "noavx,noavx,avx")
17093 (set_attr "type" "ssemov")
17094 (set_attr "prefix_extra" "1")
17095 (set_attr "prefix" "orig,orig,maybe_evex")
17096 (set_attr "mode" "TI")])
17098 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17099 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17101 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17102 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17103 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17104 [(set_attr "isa" "noavx,noavx,avx")
17105 (set_attr "type" "ssemov")
17106 (set_attr "prefix_extra" "1")
17107 (set_attr "prefix" "orig,orig,maybe_evex")
17108 (set_attr "mode" "TI")])
17110 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
17111 [(set (match_operand:V4SI 0 "register_operand")
17116 (match_operand:DI 1 "memory_operand")
17118 (parallel [(const_int 0) (const_int 1)
17119 (const_int 2) (const_int 3)]))))]
17120 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17121 && can_create_pseudo_p ()"
17124 [(set (match_dup 0)
17125 (any_extend:V4SI (match_dup 1)))]
17126 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17128 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
17129 [(set (match_operand:V8DI 0 "register_operand" "=v")
17132 (match_operand:V16QI 1 "register_operand" "v")
17133 (parallel [(const_int 0) (const_int 1)
17134 (const_int 2) (const_int 3)
17135 (const_int 4) (const_int 5)
17136 (const_int 6) (const_int 7)]))))]
17138 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17139 [(set_attr "type" "ssemov")
17140 (set_attr "prefix" "evex")
17141 (set_attr "mode" "XI")])
17143 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
17144 [(set (match_operand:V8DI 0 "register_operand" "=v")
17146 (match_operand:V8QI 1 "memory_operand" "m")))]
17148 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17149 [(set_attr "type" "ssemov")
17150 (set_attr "prefix" "evex")
17151 (set_attr "mode" "XI")])
17153 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
17154 [(set (match_operand:V8DI 0 "register_operand")
17159 (match_operand:DI 1 "memory_operand")
17161 (parallel [(const_int 0) (const_int 1)
17162 (const_int 2) (const_int 3)
17163 (const_int 4) (const_int 5)
17164 (const_int 6) (const_int 7)]))))]
17165 "TARGET_AVX512F && can_create_pseudo_p ()"
17168 [(set (match_dup 0)
17169 (any_extend:V8DI (match_dup 1)))]
17170 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17172 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
17173 [(set (match_operand:V4DI 0 "register_operand" "=v")
17176 (match_operand:V16QI 1 "register_operand" "v")
17177 (parallel [(const_int 0) (const_int 1)
17178 (const_int 2) (const_int 3)]))))]
17179 "TARGET_AVX2 && <mask_avx512vl_condition>"
17180 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17181 [(set_attr "type" "ssemov")
17182 (set_attr "prefix_extra" "1")
17183 (set_attr "prefix" "maybe_evex")
17184 (set_attr "mode" "OI")])
17186 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
17187 [(set (match_operand:V4DI 0 "register_operand" "=v")
17189 (match_operand:V4QI 1 "memory_operand" "m")))]
17190 "TARGET_AVX2 && <mask_avx512vl_condition>"
17191 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17192 [(set_attr "type" "ssemov")
17193 (set_attr "prefix_extra" "1")
17194 (set_attr "prefix" "maybe_evex")
17195 (set_attr "mode" "OI")])
17197 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
17198 [(set (match_operand:V4DI 0 "register_operand")
17203 (vec_duplicate:V4SI
17204 (match_operand:SI 1 "memory_operand"))
17206 [(const_int 0) (const_int 0)
17207 (const_int 0) (const_int 0)])
17209 (parallel [(const_int 0) (const_int 1)
17210 (const_int 2) (const_int 3)]))))]
17211 "TARGET_AVX2 && <mask_avx512vl_condition>
17212 && can_create_pseudo_p ()"
17215 [(set (match_dup 0)
17216 (any_extend:V4DI (match_dup 1)))]
17217 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17219 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
17220 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17223 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17224 (parallel [(const_int 0) (const_int 1)]))))]
17225 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17226 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17227 [(set_attr "isa" "noavx,noavx,avx")
17228 (set_attr "type" "ssemov")
17229 (set_attr "prefix_extra" "1")
17230 (set_attr "prefix" "orig,orig,maybe_evex")
17231 (set_attr "mode" "TI")])
17233 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
17234 [(set (match_operand:V8DI 0 "register_operand" "=v")
17236 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17238 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17239 [(set_attr "type" "ssemov")
17240 (set_attr "prefix" "evex")
17241 (set_attr "mode" "XI")])
17243 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
17244 [(set (match_operand:V4DI 0 "register_operand" "=v")
17247 (match_operand:V8HI 1 "register_operand" "v")
17248 (parallel [(const_int 0) (const_int 1)
17249 (const_int 2) (const_int 3)]))))]
17250 "TARGET_AVX2 && <mask_avx512vl_condition>"
17251 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17252 [(set_attr "type" "ssemov")
17253 (set_attr "prefix_extra" "1")
17254 (set_attr "prefix" "maybe_evex")
17255 (set_attr "mode" "OI")])
17257 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
17258 [(set (match_operand:V4DI 0 "register_operand" "=v")
17260 (match_operand:V4HI 1 "memory_operand" "m")))]
17261 "TARGET_AVX2 && <mask_avx512vl_condition>"
17262 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17263 [(set_attr "type" "ssemov")
17264 (set_attr "prefix_extra" "1")
17265 (set_attr "prefix" "maybe_evex")
17266 (set_attr "mode" "OI")])
17268 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
17269 [(set (match_operand:V4DI 0 "register_operand")
17274 (match_operand:DI 1 "memory_operand")
17276 (parallel [(const_int 0) (const_int 1)
17277 (const_int 2) (const_int 3)]))))]
17278 "TARGET_AVX2 && <mask_avx512vl_condition>
17279 && can_create_pseudo_p ()"
17282 [(set (match_dup 0)
17283 (any_extend:V4DI (match_dup 1)))]
17284 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17286 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
17287 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17290 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17291 (parallel [(const_int 0) (const_int 1)]))))]
17292 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17293 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17294 [(set_attr "isa" "noavx,noavx,avx")
17295 (set_attr "type" "ssemov")
17296 (set_attr "prefix_extra" "1")
17297 (set_attr "prefix" "orig,orig,maybe_evex")
17298 (set_attr "mode" "TI")])
17300 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
17301 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17303 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
17304 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17305 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17306 [(set_attr "isa" "noavx,noavx,avx")
17307 (set_attr "type" "ssemov")
17308 (set_attr "prefix_extra" "1")
17309 (set_attr "prefix" "orig,orig,maybe_evex")
17310 (set_attr "mode" "TI")])
17312 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
17313 [(set (match_operand:V2DI 0 "register_operand")
17318 (vec_duplicate:V4SI
17319 (match_operand:SI 1 "memory_operand"))
17321 [(const_int 0) (const_int 0)
17322 (const_int 0) (const_int 0)])
17324 (parallel [(const_int 0) (const_int 1)]))))]
17325 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17326 && can_create_pseudo_p ()"
17329 [(set (match_dup 0)
17330 (any_extend:V2DI (match_dup 1)))]
17331 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
17333 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
17334 [(set (match_operand:V8DI 0 "register_operand" "=v")
17336 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
17338 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17339 [(set_attr "type" "ssemov")
17340 (set_attr "prefix" "evex")
17341 (set_attr "mode" "XI")])
17343 (define_insn "avx2_<code>v4siv4di2<mask_name>"
17344 [(set (match_operand:V4DI 0 "register_operand" "=v")
17346 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
17347 "TARGET_AVX2 && <mask_avx512vl_condition>"
17348 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17349 [(set_attr "type" "ssemov")
17350 (set_attr "prefix" "maybe_evex")
17351 (set_attr "prefix_extra" "1")
17352 (set_attr "mode" "OI")])
17354 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
17355 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17358 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
17359 (parallel [(const_int 0) (const_int 1)]))))]
17360 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17361 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17362 [(set_attr "isa" "noavx,noavx,avx")
17363 (set_attr "type" "ssemov")
17364 (set_attr "prefix_extra" "1")
17365 (set_attr "prefix" "orig,orig,maybe_evex")
17366 (set_attr "mode" "TI")])
17368 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
17369 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17371 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
17372 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17373 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17374 [(set_attr "isa" "noavx,noavx,avx")
17375 (set_attr "type" "ssemov")
17376 (set_attr "prefix_extra" "1")
17377 (set_attr "prefix" "orig,orig,maybe_evex")
17378 (set_attr "mode" "TI")])
17380 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
17381 [(set (match_operand:V2DI 0 "register_operand")
17386 (match_operand:DI 1 "memory_operand")
17388 (parallel [(const_int 0) (const_int 1)]))))]
17389 "TARGET_SSE4_1 && <mask_avx512vl_condition>
17390 && can_create_pseudo_p ()"
17393 [(set (match_dup 0)
17394 (any_extend:V2DI (match_dup 1)))]
17395 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
17397 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
17398 ;; setting FLAGS_REG. But it is not a really compare instruction.
17399 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
17400 [(set (reg:CC FLAGS_REG)
17401 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
17402 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
17405 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
17406 [(set_attr "type" "ssecomi")
17407 (set_attr "prefix_extra" "1")
17408 (set_attr "prefix" "vex")
17409 (set_attr "mode" "<MODE>")])
17411 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
17412 ;; But it is not a really compare instruction.
17413 (define_insn "<sse4_1>_ptest<mode>"
17414 [(set (reg:CC FLAGS_REG)
17415 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
17416 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
17419 "%vptest\t{%1, %0|%0, %1}"
17420 [(set_attr "isa" "noavx,noavx,avx")
17421 (set_attr "type" "ssecomi")
17422 (set_attr "prefix_extra" "1")
17423 (set_attr "prefix" "orig,orig,vex")
17424 (set (attr "btver2_decode")
17426 (match_test "<sseinsnmode>mode==OImode")
17427 (const_string "vector")
17428 (const_string "*")))
17429 (set_attr "mode" "<sseinsnmode>")])
17431 (define_insn "ptesttf2"
17432 [(set (reg:CC FLAGS_REG)
17433 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
17434 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
17437 "%vptest\t{%1, %0|%0, %1}"
17438 [(set_attr "isa" "noavx,noavx,avx")
17439 (set_attr "type" "ssecomi")
17440 (set_attr "prefix_extra" "1")
17441 (set_attr "prefix" "orig,orig,vex")
17442 (set_attr "mode" "TI")])
17444 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
17445 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17447 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
17448 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
17451 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17452 [(set_attr "isa" "noavx,noavx,avx")
17453 (set_attr "type" "ssecvt")
17454 (set_attr "prefix_data16" "1,1,*")
17455 (set_attr "prefix_extra" "1")
17456 (set_attr "length_immediate" "1")
17457 (set_attr "prefix" "orig,orig,vex")
17458 (set_attr "mode" "<MODE>")])
17460 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
17461 [(match_operand:<sseintvecmode> 0 "register_operand")
17462 (match_operand:VF1_128_256 1 "vector_operand")
17463 (match_operand:SI 2 "const_0_to_15_operand")]
17466 rtx tmp = gen_reg_rtx (<MODE>mode);
17469 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
17472 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17476 (define_expand "avx512f_round<castmode>512"
17477 [(match_operand:VF_512 0 "register_operand")
17478 (match_operand:VF_512 1 "nonimmediate_operand")
17479 (match_operand:SI 2 "const_0_to_15_operand")]
17482 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
17486 (define_expand "avx512f_roundps512_sfix"
17487 [(match_operand:V16SI 0 "register_operand")
17488 (match_operand:V16SF 1 "nonimmediate_operand")
17489 (match_operand:SI 2 "const_0_to_15_operand")]
17492 rtx tmp = gen_reg_rtx (V16SFmode);
17493 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
17494 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
17498 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
17499 [(match_operand:<ssepackfltmode> 0 "register_operand")
17500 (match_operand:VF2 1 "vector_operand")
17501 (match_operand:VF2 2 "vector_operand")
17502 (match_operand:SI 3 "const_0_to_15_operand")]
17507 if (<MODE>mode == V2DFmode
17508 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
17510 rtx tmp2 = gen_reg_rtx (V4DFmode);
17512 tmp0 = gen_reg_rtx (V4DFmode);
17513 tmp1 = force_reg (V2DFmode, operands[1]);
17515 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
17516 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
17517 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
17521 tmp0 = gen_reg_rtx (<MODE>mode);
17522 tmp1 = gen_reg_rtx (<MODE>mode);
17525 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
17528 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
17531 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
17536 (define_insn "sse4_1_round<ssescalarmodesuffix>"
17537 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
17540 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
17541 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
17543 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
17547 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
17548 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
17549 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
17550 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17551 [(set_attr "isa" "noavx,noavx,avx,avx512f")
17552 (set_attr "type" "ssecvt")
17553 (set_attr "length_immediate" "1")
17554 (set_attr "prefix_data16" "1,1,*,*")
17555 (set_attr "prefix_extra" "1")
17556 (set_attr "prefix" "orig,orig,vex,evex")
17557 (set_attr "mode" "<MODE>")])
17559 (define_expand "round<mode>2"
17560 [(set (match_dup 3)
17562 (match_operand:VF 1 "register_operand")
17564 (set (match_operand:VF 0 "register_operand")
17566 [(match_dup 3) (match_dup 4)]
17568 "TARGET_SSE4_1 && !flag_trapping_math"
17570 machine_mode scalar_mode;
17571 const struct real_format *fmt;
17572 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
17573 rtx half, vec_half;
17575 scalar_mode = GET_MODE_INNER (<MODE>mode);
17577 /* load nextafter (0.5, 0.0) */
17578 fmt = REAL_MODE_FORMAT (scalar_mode);
17579 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
17580 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
17581 half = const_double_from_real_value (pred_half, scalar_mode);
17583 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
17584 vec_half = force_reg (<MODE>mode, vec_half);
17586 operands[2] = gen_reg_rtx (<MODE>mode);
17587 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
17589 operands[3] = gen_reg_rtx (<MODE>mode);
17590 operands[4] = GEN_INT (ROUND_TRUNC);
17593 (define_expand "round<mode>2_sfix"
17594 [(match_operand:<sseintvecmode> 0 "register_operand")
17595 (match_operand:VF1 1 "register_operand")]
17596 "TARGET_SSE4_1 && !flag_trapping_math"
17598 rtx tmp = gen_reg_rtx (<MODE>mode);
17600 emit_insn (gen_round<mode>2 (tmp, operands[1]));
17603 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17607 (define_expand "round<mode>2_vec_pack_sfix"
17608 [(match_operand:<ssepackfltmode> 0 "register_operand")
17609 (match_operand:VF2 1 "register_operand")
17610 (match_operand:VF2 2 "register_operand")]
17611 "TARGET_SSE4_1 && !flag_trapping_math"
17615 if (<MODE>mode == V2DFmode
17616 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
17618 rtx tmp2 = gen_reg_rtx (V4DFmode);
17620 tmp0 = gen_reg_rtx (V4DFmode);
17621 tmp1 = force_reg (V2DFmode, operands[1]);
17623 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
17624 emit_insn (gen_roundv4df2 (tmp2, tmp0));
17625 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
17629 tmp0 = gen_reg_rtx (<MODE>mode);
17630 tmp1 = gen_reg_rtx (<MODE>mode);
17632 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
17633 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
17636 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
17641 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17643 ;; Intel SSE4.2 string/text processing instructions
17645 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17647 (define_insn_and_split "sse4_2_pcmpestr"
17648 [(set (match_operand:SI 0 "register_operand" "=c,c")
17650 [(match_operand:V16QI 2 "register_operand" "x,x")
17651 (match_operand:SI 3 "register_operand" "a,a")
17652 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
17653 (match_operand:SI 5 "register_operand" "d,d")
17654 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
17656 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17664 (set (reg:CC FLAGS_REG)
17673 && can_create_pseudo_p ()"
17678 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17679 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17680 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17683 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
17684 operands[3], operands[4],
17685 operands[5], operands[6]));
17687 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
17688 operands[3], operands[4],
17689 operands[5], operands[6]));
17690 if (flags && !(ecx || xmm0))
17691 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
17692 operands[2], operands[3],
17693 operands[4], operands[5],
17695 if (!(flags || ecx || xmm0))
17696 emit_note (NOTE_INSN_DELETED);
17700 [(set_attr "type" "sselog")
17701 (set_attr "prefix_data16" "1")
17702 (set_attr "prefix_extra" "1")
17703 (set_attr "length_immediate" "1")
17704 (set_attr "memory" "none,load")
17705 (set_attr "mode" "TI")])
17707 (define_insn "sse4_2_pcmpestri"
17708 [(set (match_operand:SI 0 "register_operand" "=c,c")
17710 [(match_operand:V16QI 1 "register_operand" "x,x")
17711 (match_operand:SI 2 "register_operand" "a,a")
17712 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17713 (match_operand:SI 4 "register_operand" "d,d")
17714 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17716 (set (reg:CC FLAGS_REG)
17725 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
17726 [(set_attr "type" "sselog")
17727 (set_attr "prefix_data16" "1")
17728 (set_attr "prefix_extra" "1")
17729 (set_attr "prefix" "maybe_vex")
17730 (set_attr "length_immediate" "1")
17731 (set_attr "btver2_decode" "vector")
17732 (set_attr "memory" "none,load")
17733 (set_attr "mode" "TI")])
17735 (define_insn "sse4_2_pcmpestrm"
17736 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17738 [(match_operand:V16QI 1 "register_operand" "x,x")
17739 (match_operand:SI 2 "register_operand" "a,a")
17740 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17741 (match_operand:SI 4 "register_operand" "d,d")
17742 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
17744 (set (reg:CC FLAGS_REG)
17753 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
17754 [(set_attr "type" "sselog")
17755 (set_attr "prefix_data16" "1")
17756 (set_attr "prefix_extra" "1")
17757 (set_attr "length_immediate" "1")
17758 (set_attr "prefix" "maybe_vex")
17759 (set_attr "btver2_decode" "vector")
17760 (set_attr "memory" "none,load")
17761 (set_attr "mode" "TI")])
17763 (define_insn "sse4_2_pcmpestr_cconly"
17764 [(set (reg:CC FLAGS_REG)
17766 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17767 (match_operand:SI 3 "register_operand" "a,a,a,a")
17768 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
17769 (match_operand:SI 5 "register_operand" "d,d,d,d")
17770 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
17772 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17773 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17776 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17777 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
17778 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
17779 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
17780 [(set_attr "type" "sselog")
17781 (set_attr "prefix_data16" "1")
17782 (set_attr "prefix_extra" "1")
17783 (set_attr "length_immediate" "1")
17784 (set_attr "memory" "none,load,none,load")
17785 (set_attr "btver2_decode" "vector,vector,vector,vector")
17786 (set_attr "prefix" "maybe_vex")
17787 (set_attr "mode" "TI")])
17789 (define_insn_and_split "sse4_2_pcmpistr"
17790 [(set (match_operand:SI 0 "register_operand" "=c,c")
17792 [(match_operand:V16QI 2 "register_operand" "x,x")
17793 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
17794 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
17796 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
17802 (set (reg:CC FLAGS_REG)
17809 && can_create_pseudo_p ()"
17814 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
17815 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
17816 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
17819 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
17820 operands[3], operands[4]));
17822 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
17823 operands[3], operands[4]));
17824 if (flags && !(ecx || xmm0))
17825 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
17826 operands[2], operands[3],
17828 if (!(flags || ecx || xmm0))
17829 emit_note (NOTE_INSN_DELETED);
17833 [(set_attr "type" "sselog")
17834 (set_attr "prefix_data16" "1")
17835 (set_attr "prefix_extra" "1")
17836 (set_attr "length_immediate" "1")
17837 (set_attr "memory" "none,load")
17838 (set_attr "mode" "TI")])
17840 (define_insn "sse4_2_pcmpistri"
17841 [(set (match_operand:SI 0 "register_operand" "=c,c")
17843 [(match_operand:V16QI 1 "register_operand" "x,x")
17844 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17845 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17847 (set (reg:CC FLAGS_REG)
17854 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
17855 [(set_attr "type" "sselog")
17856 (set_attr "prefix_data16" "1")
17857 (set_attr "prefix_extra" "1")
17858 (set_attr "length_immediate" "1")
17859 (set_attr "prefix" "maybe_vex")
17860 (set_attr "memory" "none,load")
17861 (set_attr "btver2_decode" "vector")
17862 (set_attr "mode" "TI")])
17864 (define_insn "sse4_2_pcmpistrm"
17865 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
17867 [(match_operand:V16QI 1 "register_operand" "x,x")
17868 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
17869 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17871 (set (reg:CC FLAGS_REG)
17878 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
17879 [(set_attr "type" "sselog")
17880 (set_attr "prefix_data16" "1")
17881 (set_attr "prefix_extra" "1")
17882 (set_attr "length_immediate" "1")
17883 (set_attr "prefix" "maybe_vex")
17884 (set_attr "memory" "none,load")
17885 (set_attr "btver2_decode" "vector")
17886 (set_attr "mode" "TI")])
17888 (define_insn "sse4_2_pcmpistr_cconly"
17889 [(set (reg:CC FLAGS_REG)
17891 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
17892 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
17893 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
17895 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
17896 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
17899 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17900 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
17901 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
17902 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
17903 [(set_attr "type" "sselog")
17904 (set_attr "prefix_data16" "1")
17905 (set_attr "prefix_extra" "1")
17906 (set_attr "length_immediate" "1")
17907 (set_attr "memory" "none,load,none,load")
17908 (set_attr "prefix" "maybe_vex")
17909 (set_attr "btver2_decode" "vector,vector,vector,vector")
17910 (set_attr "mode" "TI")])
17912 ;; Packed float variants
17913 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
17914 [(V8DI "V8SF") (V16SI "V16SF")])
17916 (define_expand "avx512pf_gatherpf<mode>sf"
17918 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17919 (mem:<GATHER_SCATTER_SF_MEM_MODE>
17921 [(match_operand 2 "vsib_address_operand")
17922 (match_operand:VI48_512 1 "register_operand")
17923 (match_operand:SI 3 "const1248_operand")]))
17924 (match_operand:SI 4 "const_2_to_3_operand")]
17925 UNSPEC_GATHER_PREFETCH)]
17929 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17930 operands[3]), UNSPEC_VSIBADDR);
17933 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
17935 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17936 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
17938 [(match_operand:P 2 "vsib_address_operand" "Tv")
17939 (match_operand:VI48_512 1 "register_operand" "v")
17940 (match_operand:SI 3 "const1248_operand" "n")]
17942 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17943 UNSPEC_GATHER_PREFETCH)]
17946 switch (INTVAL (operands[4]))
17949 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17950 gas changed what it requires incompatibly. */
17951 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17953 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
17955 gcc_unreachable ();
17958 [(set_attr "type" "sse")
17959 (set_attr "prefix" "evex")
17960 (set_attr "mode" "XI")])
17962 ;; Packed double variants
17963 (define_expand "avx512pf_gatherpf<mode>df"
17965 [(match_operand:<avx512fmaskmode> 0 "register_operand")
17968 [(match_operand 2 "vsib_address_operand")
17969 (match_operand:VI4_256_8_512 1 "register_operand")
17970 (match_operand:SI 3 "const1248_operand")]))
17971 (match_operand:SI 4 "const_2_to_3_operand")]
17972 UNSPEC_GATHER_PREFETCH)]
17976 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
17977 operands[3]), UNSPEC_VSIBADDR);
17980 (define_insn "*avx512pf_gatherpf<mode>df_mask"
17982 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
17983 (match_operator:V8DF 5 "vsib_mem_operator"
17985 [(match_operand:P 2 "vsib_address_operand" "Tv")
17986 (match_operand:VI4_256_8_512 1 "register_operand" "v")
17987 (match_operand:SI 3 "const1248_operand" "n")]
17989 (match_operand:SI 4 "const_2_to_3_operand" "n")]
17990 UNSPEC_GATHER_PREFETCH)]
17993 switch (INTVAL (operands[4]))
17996 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
17997 gas changed what it requires incompatibly. */
17998 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18000 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18002 gcc_unreachable ();
18005 [(set_attr "type" "sse")
18006 (set_attr "prefix" "evex")
18007 (set_attr "mode" "XI")])
18009 ;; Packed float variants
18010 (define_expand "avx512pf_scatterpf<mode>sf"
18012 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18013 (mem:<GATHER_SCATTER_SF_MEM_MODE>
18015 [(match_operand 2 "vsib_address_operand")
18016 (match_operand:VI48_512 1 "register_operand")
18017 (match_operand:SI 3 "const1248_operand")]))
18018 (match_operand:SI 4 "const2367_operand")]
18019 UNSPEC_SCATTER_PREFETCH)]
18023 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18024 operands[3]), UNSPEC_VSIBADDR);
18027 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
18029 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18030 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18032 [(match_operand:P 2 "vsib_address_operand" "Tv")
18033 (match_operand:VI48_512 1 "register_operand" "v")
18034 (match_operand:SI 3 "const1248_operand" "n")]
18036 (match_operand:SI 4 "const2367_operand" "n")]
18037 UNSPEC_SCATTER_PREFETCH)]
18040 switch (INTVAL (operands[4]))
18044 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18045 gas changed what it requires incompatibly. */
18046 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18049 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18051 gcc_unreachable ();
18054 [(set_attr "type" "sse")
18055 (set_attr "prefix" "evex")
18056 (set_attr "mode" "XI")])
18058 ;; Packed double variants
18059 (define_expand "avx512pf_scatterpf<mode>df"
18061 [(match_operand:<avx512fmaskmode> 0 "register_operand")
18064 [(match_operand 2 "vsib_address_operand")
18065 (match_operand:VI4_256_8_512 1 "register_operand")
18066 (match_operand:SI 3 "const1248_operand")]))
18067 (match_operand:SI 4 "const2367_operand")]
18068 UNSPEC_SCATTER_PREFETCH)]
18072 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18073 operands[3]), UNSPEC_VSIBADDR);
18076 (define_insn "*avx512pf_scatterpf<mode>df_mask"
18078 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18079 (match_operator:V8DF 5 "vsib_mem_operator"
18081 [(match_operand:P 2 "vsib_address_operand" "Tv")
18082 (match_operand:VI4_256_8_512 1 "register_operand" "v")
18083 (match_operand:SI 3 "const1248_operand" "n")]
18085 (match_operand:SI 4 "const2367_operand" "n")]
18086 UNSPEC_SCATTER_PREFETCH)]
18089 switch (INTVAL (operands[4]))
18093 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18094 gas changed what it requires incompatibly. */
18095 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18098 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18100 gcc_unreachable ();
18103 [(set_attr "type" "sse")
18104 (set_attr "prefix" "evex")
18105 (set_attr "mode" "XI")])
18107 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
18108 [(set (match_operand:VF_512 0 "register_operand" "=v")
18110 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18113 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18114 [(set_attr "prefix" "evex")
18115 (set_attr "type" "sse")
18116 (set_attr "mode" "<MODE>")])
18118 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
18119 [(set (match_operand:VF_512 0 "register_operand" "=v")
18121 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18124 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18125 [(set_attr "prefix" "evex")
18126 (set_attr "type" "sse")
18127 (set_attr "mode" "<MODE>")])
18129 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
18130 [(set (match_operand:VF_128 0 "register_operand" "=v")
18133 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18135 (match_operand:VF_128 2 "register_operand" "v")
18138 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18139 [(set_attr "length_immediate" "1")
18140 (set_attr "prefix" "evex")
18141 (set_attr "type" "sse")
18142 (set_attr "mode" "<MODE>")])
18144 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
18145 [(set (match_operand:VF_512 0 "register_operand" "=v")
18147 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18150 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18151 [(set_attr "prefix" "evex")
18152 (set_attr "type" "sse")
18153 (set_attr "mode" "<MODE>")])
18155 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
18156 [(set (match_operand:VF_128 0 "register_operand" "=v")
18159 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18161 (match_operand:VF_128 2 "register_operand" "v")
18164 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18165 [(set_attr "length_immediate" "1")
18166 (set_attr "type" "sse")
18167 (set_attr "prefix" "evex")
18168 (set_attr "mode" "<MODE>")])
18170 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18172 ;; XOP instructions
18174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18176 (define_code_iterator xop_plus [plus ss_plus])
18178 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
18179 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
18181 ;; XOP parallel integer multiply/add instructions.
18183 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
18184 [(set (match_operand:VI24_128 0 "register_operand" "=x")
18187 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
18188 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
18189 (match_operand:VI24_128 3 "register_operand" "x")))]
18191 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18192 [(set_attr "type" "ssemuladd")
18193 (set_attr "mode" "TI")])
18195 (define_insn "xop_p<macs>dql"
18196 [(set (match_operand:V2DI 0 "register_operand" "=x")
18201 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18202 (parallel [(const_int 0) (const_int 2)])))
18205 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18206 (parallel [(const_int 0) (const_int 2)]))))
18207 (match_operand:V2DI 3 "register_operand" "x")))]
18209 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18210 [(set_attr "type" "ssemuladd")
18211 (set_attr "mode" "TI")])
18213 (define_insn "xop_p<macs>dqh"
18214 [(set (match_operand:V2DI 0 "register_operand" "=x")
18219 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18220 (parallel [(const_int 1) (const_int 3)])))
18223 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18224 (parallel [(const_int 1) (const_int 3)]))))
18225 (match_operand:V2DI 3 "register_operand" "x")))]
18227 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18228 [(set_attr "type" "ssemuladd")
18229 (set_attr "mode" "TI")])
18231 ;; XOP parallel integer multiply/add instructions for the intrinisics
18232 (define_insn "xop_p<macs>wd"
18233 [(set (match_operand:V4SI 0 "register_operand" "=x")
18238 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18239 (parallel [(const_int 1) (const_int 3)
18240 (const_int 5) (const_int 7)])))
18243 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18244 (parallel [(const_int 1) (const_int 3)
18245 (const_int 5) (const_int 7)]))))
18246 (match_operand:V4SI 3 "register_operand" "x")))]
18248 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18249 [(set_attr "type" "ssemuladd")
18250 (set_attr "mode" "TI")])
18252 (define_insn "xop_p<madcs>wd"
18253 [(set (match_operand:V4SI 0 "register_operand" "=x")
18259 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18260 (parallel [(const_int 0) (const_int 2)
18261 (const_int 4) (const_int 6)])))
18264 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18265 (parallel [(const_int 0) (const_int 2)
18266 (const_int 4) (const_int 6)]))))
18271 (parallel [(const_int 1) (const_int 3)
18272 (const_int 5) (const_int 7)])))
18276 (parallel [(const_int 1) (const_int 3)
18277 (const_int 5) (const_int 7)])))))
18278 (match_operand:V4SI 3 "register_operand" "x")))]
18280 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18281 [(set_attr "type" "ssemuladd")
18282 (set_attr "mode" "TI")])
18284 ;; XOP parallel XMM conditional moves
18285 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
18286 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
18287 (if_then_else:V_128_256
18288 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
18289 (match_operand:V_128_256 1 "register_operand" "x,x")
18290 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
18292 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18293 [(set_attr "type" "sse4arg")])
18295 ;; XOP horizontal add/subtract instructions
18296 (define_insn "xop_phadd<u>bw"
18297 [(set (match_operand:V8HI 0 "register_operand" "=x")
18301 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18302 (parallel [(const_int 0) (const_int 2)
18303 (const_int 4) (const_int 6)
18304 (const_int 8) (const_int 10)
18305 (const_int 12) (const_int 14)])))
18309 (parallel [(const_int 1) (const_int 3)
18310 (const_int 5) (const_int 7)
18311 (const_int 9) (const_int 11)
18312 (const_int 13) (const_int 15)])))))]
18314 "vphadd<u>bw\t{%1, %0|%0, %1}"
18315 [(set_attr "type" "sseiadd1")])
18317 (define_insn "xop_phadd<u>bd"
18318 [(set (match_operand:V4SI 0 "register_operand" "=x")
18323 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18324 (parallel [(const_int 0) (const_int 4)
18325 (const_int 8) (const_int 12)])))
18329 (parallel [(const_int 1) (const_int 5)
18330 (const_int 9) (const_int 13)]))))
18335 (parallel [(const_int 2) (const_int 6)
18336 (const_int 10) (const_int 14)])))
18340 (parallel [(const_int 3) (const_int 7)
18341 (const_int 11) (const_int 15)]))))))]
18343 "vphadd<u>bd\t{%1, %0|%0, %1}"
18344 [(set_attr "type" "sseiadd1")])
18346 (define_insn "xop_phadd<u>bq"
18347 [(set (match_operand:V2DI 0 "register_operand" "=x")
18353 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18354 (parallel [(const_int 0) (const_int 8)])))
18358 (parallel [(const_int 1) (const_int 9)]))))
18363 (parallel [(const_int 2) (const_int 10)])))
18367 (parallel [(const_int 3) (const_int 11)])))))
18373 (parallel [(const_int 4) (const_int 12)])))
18377 (parallel [(const_int 5) (const_int 13)]))))
18382 (parallel [(const_int 6) (const_int 14)])))
18386 (parallel [(const_int 7) (const_int 15)])))))))]
18388 "vphadd<u>bq\t{%1, %0|%0, %1}"
18389 [(set_attr "type" "sseiadd1")])
18391 (define_insn "xop_phadd<u>wd"
18392 [(set (match_operand:V4SI 0 "register_operand" "=x")
18396 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18397 (parallel [(const_int 0) (const_int 2)
18398 (const_int 4) (const_int 6)])))
18402 (parallel [(const_int 1) (const_int 3)
18403 (const_int 5) (const_int 7)])))))]
18405 "vphadd<u>wd\t{%1, %0|%0, %1}"
18406 [(set_attr "type" "sseiadd1")])
18408 (define_insn "xop_phadd<u>wq"
18409 [(set (match_operand:V2DI 0 "register_operand" "=x")
18414 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18415 (parallel [(const_int 0) (const_int 4)])))
18419 (parallel [(const_int 1) (const_int 5)]))))
18424 (parallel [(const_int 2) (const_int 6)])))
18428 (parallel [(const_int 3) (const_int 7)]))))))]
18430 "vphadd<u>wq\t{%1, %0|%0, %1}"
18431 [(set_attr "type" "sseiadd1")])
18433 (define_insn "xop_phadd<u>dq"
18434 [(set (match_operand:V2DI 0 "register_operand" "=x")
18438 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18439 (parallel [(const_int 0) (const_int 2)])))
18443 (parallel [(const_int 1) (const_int 3)])))))]
18445 "vphadd<u>dq\t{%1, %0|%0, %1}"
18446 [(set_attr "type" "sseiadd1")])
18448 (define_insn "xop_phsubbw"
18449 [(set (match_operand:V8HI 0 "register_operand" "=x")
18453 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18454 (parallel [(const_int 0) (const_int 2)
18455 (const_int 4) (const_int 6)
18456 (const_int 8) (const_int 10)
18457 (const_int 12) (const_int 14)])))
18461 (parallel [(const_int 1) (const_int 3)
18462 (const_int 5) (const_int 7)
18463 (const_int 9) (const_int 11)
18464 (const_int 13) (const_int 15)])))))]
18466 "vphsubbw\t{%1, %0|%0, %1}"
18467 [(set_attr "type" "sseiadd1")])
18469 (define_insn "xop_phsubwd"
18470 [(set (match_operand:V4SI 0 "register_operand" "=x")
18474 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18475 (parallel [(const_int 0) (const_int 2)
18476 (const_int 4) (const_int 6)])))
18480 (parallel [(const_int 1) (const_int 3)
18481 (const_int 5) (const_int 7)])))))]
18483 "vphsubwd\t{%1, %0|%0, %1}"
18484 [(set_attr "type" "sseiadd1")])
18486 (define_insn "xop_phsubdq"
18487 [(set (match_operand:V2DI 0 "register_operand" "=x")
18491 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18492 (parallel [(const_int 0) (const_int 2)])))
18496 (parallel [(const_int 1) (const_int 3)])))))]
18498 "vphsubdq\t{%1, %0|%0, %1}"
18499 [(set_attr "type" "sseiadd1")])
18501 ;; XOP permute instructions
18502 (define_insn "xop_pperm"
18503 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18505 [(match_operand:V16QI 1 "register_operand" "x,x")
18506 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18507 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
18508 UNSPEC_XOP_PERMUTE))]
18509 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18510 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18511 [(set_attr "type" "sse4arg")
18512 (set_attr "mode" "TI")])
18514 ;; XOP pack instructions that combine two vectors into a smaller vector
18515 (define_insn "xop_pperm_pack_v2di_v4si"
18516 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
18519 (match_operand:V2DI 1 "register_operand" "x,x"))
18521 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
18522 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18523 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18524 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18525 [(set_attr "type" "sse4arg")
18526 (set_attr "mode" "TI")])
18528 (define_insn "xop_pperm_pack_v4si_v8hi"
18529 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
18532 (match_operand:V4SI 1 "register_operand" "x,x"))
18534 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
18535 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18536 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18537 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18538 [(set_attr "type" "sse4arg")
18539 (set_attr "mode" "TI")])
18541 (define_insn "xop_pperm_pack_v8hi_v16qi"
18542 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
18545 (match_operand:V8HI 1 "register_operand" "x,x"))
18547 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
18548 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
18549 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
18550 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18551 [(set_attr "type" "sse4arg")
18552 (set_attr "mode" "TI")])
18554 ;; XOP packed rotate instructions
18555 (define_expand "rotl<mode>3"
18556 [(set (match_operand:VI_128 0 "register_operand")
18558 (match_operand:VI_128 1 "nonimmediate_operand")
18559 (match_operand:SI 2 "general_operand")))]
18562 /* If we were given a scalar, convert it to parallel */
18563 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
18565 rtvec vs = rtvec_alloc (<ssescalarnum>);
18566 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
18567 rtx reg = gen_reg_rtx (<MODE>mode);
18568 rtx op2 = operands[2];
18571 if (GET_MODE (op2) != <ssescalarmode>mode)
18573 op2 = gen_reg_rtx (<ssescalarmode>mode);
18574 convert_move (op2, operands[2], false);
18577 for (i = 0; i < <ssescalarnum>; i++)
18578 RTVEC_ELT (vs, i) = op2;
18580 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
18581 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
18586 (define_expand "rotr<mode>3"
18587 [(set (match_operand:VI_128 0 "register_operand")
18589 (match_operand:VI_128 1 "nonimmediate_operand")
18590 (match_operand:SI 2 "general_operand")))]
18593 /* If we were given a scalar, convert it to parallel */
18594 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
18596 rtvec vs = rtvec_alloc (<ssescalarnum>);
18597 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
18598 rtx neg = gen_reg_rtx (<MODE>mode);
18599 rtx reg = gen_reg_rtx (<MODE>mode);
18600 rtx op2 = operands[2];
18603 if (GET_MODE (op2) != <ssescalarmode>mode)
18605 op2 = gen_reg_rtx (<ssescalarmode>mode);
18606 convert_move (op2, operands[2], false);
18609 for (i = 0; i < <ssescalarnum>; i++)
18610 RTVEC_ELT (vs, i) = op2;
18612 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
18613 emit_insn (gen_neg<mode>2 (neg, reg));
18614 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
18619 (define_insn "xop_rotl<mode>3"
18620 [(set (match_operand:VI_128 0 "register_operand" "=x")
18622 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18623 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
18625 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18626 [(set_attr "type" "sseishft")
18627 (set_attr "length_immediate" "1")
18628 (set_attr "mode" "TI")])
18630 (define_insn "xop_rotr<mode>3"
18631 [(set (match_operand:VI_128 0 "register_operand" "=x")
18633 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
18634 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
18638 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
18639 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
18641 [(set_attr "type" "sseishft")
18642 (set_attr "length_immediate" "1")
18643 (set_attr "mode" "TI")])
18645 (define_expand "vrotr<mode>3"
18646 [(match_operand:VI_128 0 "register_operand")
18647 (match_operand:VI_128 1 "register_operand")
18648 (match_operand:VI_128 2 "register_operand")]
18651 rtx reg = gen_reg_rtx (<MODE>mode);
18652 emit_insn (gen_neg<mode>2 (reg, operands[2]));
18653 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
18657 (define_expand "vrotl<mode>3"
18658 [(match_operand:VI_128 0 "register_operand")
18659 (match_operand:VI_128 1 "register_operand")
18660 (match_operand:VI_128 2 "register_operand")]
18663 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
18667 (define_insn "xop_vrotl<mode>3"
18668 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18669 (if_then_else:VI_128
18671 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18674 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18678 (neg:VI_128 (match_dup 2)))))]
18679 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18680 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18681 [(set_attr "type" "sseishft")
18682 (set_attr "prefix_data16" "0")
18683 (set_attr "prefix_extra" "2")
18684 (set_attr "mode" "TI")])
18686 ;; XOP packed shift instructions.
18687 (define_expand "vlshr<mode>3"
18688 [(set (match_operand:VI12_128 0 "register_operand")
18690 (match_operand:VI12_128 1 "register_operand")
18691 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18694 rtx neg = gen_reg_rtx (<MODE>mode);
18695 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18696 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18700 (define_expand "vlshr<mode>3"
18701 [(set (match_operand:VI48_128 0 "register_operand")
18703 (match_operand:VI48_128 1 "register_operand")
18704 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18705 "TARGET_AVX2 || TARGET_XOP"
18709 rtx neg = gen_reg_rtx (<MODE>mode);
18710 emit_insn (gen_neg<mode>2 (neg, operands[2]));
18711 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
18716 (define_expand "vlshr<mode>3"
18717 [(set (match_operand:VI48_512 0 "register_operand")
18719 (match_operand:VI48_512 1 "register_operand")
18720 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18723 (define_expand "vlshr<mode>3"
18724 [(set (match_operand:VI48_256 0 "register_operand")
18726 (match_operand:VI48_256 1 "register_operand")
18727 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18730 (define_expand "vashrv8hi3<mask_name>"
18731 [(set (match_operand:V8HI 0 "register_operand")
18733 (match_operand:V8HI 1 "register_operand")
18734 (match_operand:V8HI 2 "nonimmediate_operand")))]
18735 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
18739 rtx neg = gen_reg_rtx (V8HImode);
18740 emit_insn (gen_negv8hi2 (neg, operands[2]));
18741 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
18746 (define_expand "vashrv16qi3"
18747 [(set (match_operand:V16QI 0 "register_operand")
18749 (match_operand:V16QI 1 "register_operand")
18750 (match_operand:V16QI 2 "nonimmediate_operand")))]
18753 rtx neg = gen_reg_rtx (V16QImode);
18754 emit_insn (gen_negv16qi2 (neg, operands[2]));
18755 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
18759 (define_expand "vashrv2di3<mask_name>"
18760 [(set (match_operand:V2DI 0 "register_operand")
18762 (match_operand:V2DI 1 "register_operand")
18763 (match_operand:V2DI 2 "nonimmediate_operand")))]
18764 "TARGET_XOP || TARGET_AVX512VL"
18768 rtx neg = gen_reg_rtx (V2DImode);
18769 emit_insn (gen_negv2di2 (neg, operands[2]));
18770 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
18775 (define_expand "vashrv4si3"
18776 [(set (match_operand:V4SI 0 "register_operand")
18777 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
18778 (match_operand:V4SI 2 "nonimmediate_operand")))]
18779 "TARGET_AVX2 || TARGET_XOP"
18783 rtx neg = gen_reg_rtx (V4SImode);
18784 emit_insn (gen_negv4si2 (neg, operands[2]));
18785 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
18790 (define_expand "vashrv16si3"
18791 [(set (match_operand:V16SI 0 "register_operand")
18792 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
18793 (match_operand:V16SI 2 "nonimmediate_operand")))]
18796 (define_expand "vashrv8si3"
18797 [(set (match_operand:V8SI 0 "register_operand")
18798 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
18799 (match_operand:V8SI 2 "nonimmediate_operand")))]
18802 (define_expand "vashl<mode>3"
18803 [(set (match_operand:VI12_128 0 "register_operand")
18805 (match_operand:VI12_128 1 "register_operand")
18806 (match_operand:VI12_128 2 "nonimmediate_operand")))]
18809 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18813 (define_expand "vashl<mode>3"
18814 [(set (match_operand:VI48_128 0 "register_operand")
18816 (match_operand:VI48_128 1 "register_operand")
18817 (match_operand:VI48_128 2 "nonimmediate_operand")))]
18818 "TARGET_AVX2 || TARGET_XOP"
18822 operands[2] = force_reg (<MODE>mode, operands[2]);
18823 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
18828 (define_expand "vashl<mode>3"
18829 [(set (match_operand:VI48_512 0 "register_operand")
18831 (match_operand:VI48_512 1 "register_operand")
18832 (match_operand:VI48_512 2 "nonimmediate_operand")))]
18835 (define_expand "vashl<mode>3"
18836 [(set (match_operand:VI48_256 0 "register_operand")
18838 (match_operand:VI48_256 1 "register_operand")
18839 (match_operand:VI48_256 2 "nonimmediate_operand")))]
18842 (define_insn "xop_sha<mode>3"
18843 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18844 (if_then_else:VI_128
18846 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18849 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18853 (neg:VI_128 (match_dup 2)))))]
18854 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18855 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18856 [(set_attr "type" "sseishft")
18857 (set_attr "prefix_data16" "0")
18858 (set_attr "prefix_extra" "2")
18859 (set_attr "mode" "TI")])
18861 (define_insn "xop_shl<mode>3"
18862 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
18863 (if_then_else:VI_128
18865 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
18868 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
18872 (neg:VI_128 (match_dup 2)))))]
18873 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
18874 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18875 [(set_attr "type" "sseishft")
18876 (set_attr "prefix_data16" "0")
18877 (set_attr "prefix_extra" "2")
18878 (set_attr "mode" "TI")])
18880 (define_expand "<shift_insn><mode>3"
18881 [(set (match_operand:VI1_AVX512 0 "register_operand")
18882 (any_shift:VI1_AVX512
18883 (match_operand:VI1_AVX512 1 "register_operand")
18884 (match_operand:SI 2 "nonmemory_operand")))]
18887 if (TARGET_XOP && <MODE>mode == V16QImode)
18889 bool negate = false;
18890 rtx (*gen) (rtx, rtx, rtx);
18894 if (<CODE> != ASHIFT)
18896 if (CONST_INT_P (operands[2]))
18897 operands[2] = GEN_INT (-INTVAL (operands[2]));
18901 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
18902 for (i = 0; i < 16; i++)
18903 XVECEXP (par, 0, i) = operands[2];
18905 tmp = gen_reg_rtx (V16QImode);
18906 emit_insn (gen_vec_initv16qiqi (tmp, par));
18909 emit_insn (gen_negv16qi2 (tmp, tmp));
18911 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
18912 emit_insn (gen (operands[0], operands[1], tmp));
18915 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
18919 (define_expand "ashrv2di3"
18920 [(set (match_operand:V2DI 0 "register_operand")
18922 (match_operand:V2DI 1 "register_operand")
18923 (match_operand:DI 2 "nonmemory_operand")))]
18924 "TARGET_XOP || TARGET_AVX512VL"
18926 if (!TARGET_AVX512VL)
18928 rtx reg = gen_reg_rtx (V2DImode);
18930 bool negate = false;
18933 if (CONST_INT_P (operands[2]))
18934 operands[2] = GEN_INT (-INTVAL (operands[2]));
18938 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
18939 for (i = 0; i < 2; i++)
18940 XVECEXP (par, 0, i) = operands[2];
18942 emit_insn (gen_vec_initv2didi (reg, par));
18945 emit_insn (gen_negv2di2 (reg, reg));
18947 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
18952 ;; XOP FRCZ support
18953 (define_insn "xop_frcz<mode>2"
18954 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
18956 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
18959 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
18960 [(set_attr "type" "ssecvt1")
18961 (set_attr "mode" "<MODE>")])
18963 (define_expand "xop_vmfrcz<mode>2"
18964 [(set (match_operand:VF_128 0 "register_operand")
18967 [(match_operand:VF_128 1 "nonimmediate_operand")]
18972 "operands[2] = CONST0_RTX (<MODE>mode);")
18974 (define_insn "*xop_vmfrcz<mode>2"
18975 [(set (match_operand:VF_128 0 "register_operand" "=x")
18978 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
18980 (match_operand:VF_128 2 "const0_operand")
18983 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
18984 [(set_attr "type" "ssecvt1")
18985 (set_attr "mode" "<MODE>")])
18987 (define_insn "xop_maskcmp<mode>3"
18988 [(set (match_operand:VI_128 0 "register_operand" "=x")
18989 (match_operator:VI_128 1 "ix86_comparison_int_operator"
18990 [(match_operand:VI_128 2 "register_operand" "x")
18991 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
18993 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
18994 [(set_attr "type" "sse4arg")
18995 (set_attr "prefix_data16" "0")
18996 (set_attr "prefix_rep" "0")
18997 (set_attr "prefix_extra" "2")
18998 (set_attr "length_immediate" "1")
18999 (set_attr "mode" "TI")])
19001 (define_insn "xop_maskcmp_uns<mode>3"
19002 [(set (match_operand:VI_128 0 "register_operand" "=x")
19003 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
19004 [(match_operand:VI_128 2 "register_operand" "x")
19005 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19007 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19008 [(set_attr "type" "ssecmp")
19009 (set_attr "prefix_data16" "0")
19010 (set_attr "prefix_rep" "0")
19011 (set_attr "prefix_extra" "2")
19012 (set_attr "length_immediate" "1")
19013 (set_attr "mode" "TI")])
19015 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
19016 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
19017 ;; the exact instruction generated for the intrinsic.
19018 (define_insn "xop_maskcmp_uns2<mode>3"
19019 [(set (match_operand:VI_128 0 "register_operand" "=x")
19021 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
19022 [(match_operand:VI_128 2 "register_operand" "x")
19023 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
19024 UNSPEC_XOP_UNSIGNED_CMP))]
19026 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19027 [(set_attr "type" "ssecmp")
19028 (set_attr "prefix_data16" "0")
19029 (set_attr "prefix_extra" "2")
19030 (set_attr "length_immediate" "1")
19031 (set_attr "mode" "TI")])
19033 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
19034 ;; being added here to be complete.
19035 (define_insn "xop_pcom_tf<mode>3"
19036 [(set (match_operand:VI_128 0 "register_operand" "=x")
19038 [(match_operand:VI_128 1 "register_operand" "x")
19039 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
19040 (match_operand:SI 3 "const_int_operand" "n")]
19041 UNSPEC_XOP_TRUEFALSE))]
19044 return ((INTVAL (operands[3]) != 0)
19045 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19046 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
19048 [(set_attr "type" "ssecmp")
19049 (set_attr "prefix_data16" "0")
19050 (set_attr "prefix_extra" "2")
19051 (set_attr "length_immediate" "1")
19052 (set_attr "mode" "TI")])
19054 (define_insn "xop_vpermil2<mode>3"
19055 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
19057 [(match_operand:VF_128_256 1 "register_operand" "x,x")
19058 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
19059 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
19060 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
19063 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
19064 [(set_attr "type" "sse4arg")
19065 (set_attr "length_immediate" "1")
19066 (set_attr "mode" "<MODE>")])
19068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19070 (define_insn "aesenc"
19071 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19072 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19073 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19077 aesenc\t{%2, %0|%0, %2}
19078 vaesenc\t{%2, %1, %0|%0, %1, %2}"
19079 [(set_attr "isa" "noavx,avx")
19080 (set_attr "type" "sselog1")
19081 (set_attr "prefix_extra" "1")
19082 (set_attr "prefix" "orig,vex")
19083 (set_attr "btver2_decode" "double,double")
19084 (set_attr "mode" "TI")])
19086 (define_insn "aesenclast"
19087 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19088 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19089 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19090 UNSPEC_AESENCLAST))]
19093 aesenclast\t{%2, %0|%0, %2}
19094 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
19095 [(set_attr "isa" "noavx,avx")
19096 (set_attr "type" "sselog1")
19097 (set_attr "prefix_extra" "1")
19098 (set_attr "prefix" "orig,vex")
19099 (set_attr "btver2_decode" "double,double")
19100 (set_attr "mode" "TI")])
19102 (define_insn "aesdec"
19103 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19104 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19105 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19109 aesdec\t{%2, %0|%0, %2}
19110 vaesdec\t{%2, %1, %0|%0, %1, %2}"
19111 [(set_attr "isa" "noavx,avx")
19112 (set_attr "type" "sselog1")
19113 (set_attr "prefix_extra" "1")
19114 (set_attr "prefix" "orig,vex")
19115 (set_attr "btver2_decode" "double,double")
19116 (set_attr "mode" "TI")])
19118 (define_insn "aesdeclast"
19119 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19120 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19121 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19122 UNSPEC_AESDECLAST))]
19125 aesdeclast\t{%2, %0|%0, %2}
19126 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
19127 [(set_attr "isa" "noavx,avx")
19128 (set_attr "type" "sselog1")
19129 (set_attr "prefix_extra" "1")
19130 (set_attr "prefix" "orig,vex")
19131 (set_attr "btver2_decode" "double,double")
19132 (set_attr "mode" "TI")])
19134 (define_insn "aesimc"
19135 [(set (match_operand:V2DI 0 "register_operand" "=x")
19136 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
19139 "%vaesimc\t{%1, %0|%0, %1}"
19140 [(set_attr "type" "sselog1")
19141 (set_attr "prefix_extra" "1")
19142 (set_attr "prefix" "maybe_vex")
19143 (set_attr "mode" "TI")])
19145 (define_insn "aeskeygenassist"
19146 [(set (match_operand:V2DI 0 "register_operand" "=x")
19147 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
19148 (match_operand:SI 2 "const_0_to_255_operand" "n")]
19149 UNSPEC_AESKEYGENASSIST))]
19151 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
19152 [(set_attr "type" "sselog1")
19153 (set_attr "prefix_extra" "1")
19154 (set_attr "length_immediate" "1")
19155 (set_attr "prefix" "maybe_vex")
19156 (set_attr "mode" "TI")])
19158 (define_insn "pclmulqdq"
19159 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19160 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19161 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
19162 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19166 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
19167 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19168 [(set_attr "isa" "noavx,avx")
19169 (set_attr "type" "sselog1")
19170 (set_attr "prefix_extra" "1")
19171 (set_attr "length_immediate" "1")
19172 (set_attr "prefix" "orig,vex")
19173 (set_attr "mode" "TI")])
19175 (define_expand "avx_vzeroall"
19176 [(match_par_dup 0 [(const_int 0)])]
19179 int nregs = TARGET_64BIT ? 16 : 8;
19182 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
19184 XVECEXP (operands[0], 0, 0)
19185 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
19188 for (regno = 0; regno < nregs; regno++)
19189 XVECEXP (operands[0], 0, regno + 1)
19190 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
19191 CONST0_RTX (V8SImode));
19194 (define_insn "*avx_vzeroall"
19195 [(match_parallel 0 "vzeroall_operation"
19196 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
19199 [(set_attr "type" "sse")
19200 (set_attr "modrm" "0")
19201 (set_attr "memory" "none")
19202 (set_attr "prefix" "vex")
19203 (set_attr "btver2_decode" "vector")
19204 (set_attr "mode" "OI")])
19206 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
19207 ;; if the upper 128bits are unused.
19208 (define_insn "avx_vzeroupper"
19209 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
19212 [(set_attr "type" "sse")
19213 (set_attr "modrm" "0")
19214 (set_attr "memory" "none")
19215 (set_attr "prefix" "vex")
19216 (set_attr "btver2_decode" "vector")
19217 (set_attr "mode" "OI")])
19219 (define_mode_attr pbroadcast_evex_isa
19220 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
19221 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
19222 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
19223 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
19225 (define_insn "avx2_pbroadcast<mode>"
19226 [(set (match_operand:VI 0 "register_operand" "=x,v")
19228 (vec_select:<ssescalarmode>
19229 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
19230 (parallel [(const_int 0)]))))]
19232 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
19233 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
19234 (set_attr "type" "ssemov")
19235 (set_attr "prefix_extra" "1")
19236 (set_attr "prefix" "vex,evex")
19237 (set_attr "mode" "<sseinsnmode>")])
19239 (define_insn "avx2_pbroadcast<mode>_1"
19240 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
19241 (vec_duplicate:VI_256
19242 (vec_select:<ssescalarmode>
19243 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
19244 (parallel [(const_int 0)]))))]
19247 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19248 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19249 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19250 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
19251 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
19252 (set_attr "type" "ssemov")
19253 (set_attr "prefix_extra" "1")
19254 (set_attr "prefix" "vex")
19255 (set_attr "mode" "<sseinsnmode>")])
19257 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
19258 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
19259 (unspec:VI48F_256_512
19260 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
19261 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19263 "TARGET_AVX2 && <mask_mode512bit_condition>"
19264 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19265 [(set_attr "type" "sselog")
19266 (set_attr "prefix" "<mask_prefix2>")
19267 (set_attr "mode" "<sseinsnmode>")])
19269 (define_insn "<avx512>_permvar<mode><mask_name>"
19270 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19271 (unspec:VI1_AVX512VL
19272 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
19273 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19275 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
19276 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19277 [(set_attr "type" "sselog")
19278 (set_attr "prefix" "<mask_prefix2>")
19279 (set_attr "mode" "<sseinsnmode>")])
19281 (define_insn "<avx512>_permvar<mode><mask_name>"
19282 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19283 (unspec:VI2_AVX512VL
19284 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
19285 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19287 "TARGET_AVX512BW && <mask_mode512bit_condition>"
19288 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19289 [(set_attr "type" "sselog")
19290 (set_attr "prefix" "<mask_prefix2>")
19291 (set_attr "mode" "<sseinsnmode>")])
19293 (define_expand "avx2_perm<mode>"
19294 [(match_operand:VI8F_256 0 "register_operand")
19295 (match_operand:VI8F_256 1 "nonimmediate_operand")
19296 (match_operand:SI 2 "const_0_to_255_operand")]
19299 int mask = INTVAL (operands[2]);
19300 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
19301 GEN_INT ((mask >> 0) & 3),
19302 GEN_INT ((mask >> 2) & 3),
19303 GEN_INT ((mask >> 4) & 3),
19304 GEN_INT ((mask >> 6) & 3)));
19308 (define_expand "avx512vl_perm<mode>_mask"
19309 [(match_operand:VI8F_256 0 "register_operand")
19310 (match_operand:VI8F_256 1 "nonimmediate_operand")
19311 (match_operand:SI 2 "const_0_to_255_operand")
19312 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
19313 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19316 int mask = INTVAL (operands[2]);
19317 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
19318 GEN_INT ((mask >> 0) & 3),
19319 GEN_INT ((mask >> 2) & 3),
19320 GEN_INT ((mask >> 4) & 3),
19321 GEN_INT ((mask >> 6) & 3),
19322 operands[3], operands[4]));
19326 (define_insn "avx2_perm<mode>_1<mask_name>"
19327 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
19328 (vec_select:VI8F_256
19329 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
19330 (parallel [(match_operand 2 "const_0_to_3_operand")
19331 (match_operand 3 "const_0_to_3_operand")
19332 (match_operand 4 "const_0_to_3_operand")
19333 (match_operand 5 "const_0_to_3_operand")])))]
19334 "TARGET_AVX2 && <mask_mode512bit_condition>"
19337 mask |= INTVAL (operands[2]) << 0;
19338 mask |= INTVAL (operands[3]) << 2;
19339 mask |= INTVAL (operands[4]) << 4;
19340 mask |= INTVAL (operands[5]) << 6;
19341 operands[2] = GEN_INT (mask);
19342 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
19344 [(set_attr "type" "sselog")
19345 (set_attr "prefix" "<mask_prefix2>")
19346 (set_attr "mode" "<sseinsnmode>")])
19348 (define_expand "avx512f_perm<mode>"
19349 [(match_operand:V8FI 0 "register_operand")
19350 (match_operand:V8FI 1 "nonimmediate_operand")
19351 (match_operand:SI 2 "const_0_to_255_operand")]
19354 int mask = INTVAL (operands[2]);
19355 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
19356 GEN_INT ((mask >> 0) & 3),
19357 GEN_INT ((mask >> 2) & 3),
19358 GEN_INT ((mask >> 4) & 3),
19359 GEN_INT ((mask >> 6) & 3),
19360 GEN_INT (((mask >> 0) & 3) + 4),
19361 GEN_INT (((mask >> 2) & 3) + 4),
19362 GEN_INT (((mask >> 4) & 3) + 4),
19363 GEN_INT (((mask >> 6) & 3) + 4)));
19367 (define_expand "avx512f_perm<mode>_mask"
19368 [(match_operand:V8FI 0 "register_operand")
19369 (match_operand:V8FI 1 "nonimmediate_operand")
19370 (match_operand:SI 2 "const_0_to_255_operand")
19371 (match_operand:V8FI 3 "nonimm_or_0_operand")
19372 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19375 int mask = INTVAL (operands[2]);
19376 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
19377 GEN_INT ((mask >> 0) & 3),
19378 GEN_INT ((mask >> 2) & 3),
19379 GEN_INT ((mask >> 4) & 3),
19380 GEN_INT ((mask >> 6) & 3),
19381 GEN_INT (((mask >> 0) & 3) + 4),
19382 GEN_INT (((mask >> 2) & 3) + 4),
19383 GEN_INT (((mask >> 4) & 3) + 4),
19384 GEN_INT (((mask >> 6) & 3) + 4),
19385 operands[3], operands[4]));
19389 (define_insn "avx512f_perm<mode>_1<mask_name>"
19390 [(set (match_operand:V8FI 0 "register_operand" "=v")
19392 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
19393 (parallel [(match_operand 2 "const_0_to_3_operand")
19394 (match_operand 3 "const_0_to_3_operand")
19395 (match_operand 4 "const_0_to_3_operand")
19396 (match_operand 5 "const_0_to_3_operand")
19397 (match_operand 6 "const_4_to_7_operand")
19398 (match_operand 7 "const_4_to_7_operand")
19399 (match_operand 8 "const_4_to_7_operand")
19400 (match_operand 9 "const_4_to_7_operand")])))]
19401 "TARGET_AVX512F && <mask_mode512bit_condition>
19402 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
19403 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
19404 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
19405 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
19408 mask |= INTVAL (operands[2]) << 0;
19409 mask |= INTVAL (operands[3]) << 2;
19410 mask |= INTVAL (operands[4]) << 4;
19411 mask |= INTVAL (operands[5]) << 6;
19412 operands[2] = GEN_INT (mask);
19413 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
19415 [(set_attr "type" "sselog")
19416 (set_attr "prefix" "<mask_prefix2>")
19417 (set_attr "mode" "<sseinsnmode>")])
19419 (define_insn "avx2_permv2ti"
19420 [(set (match_operand:V4DI 0 "register_operand" "=x")
19422 [(match_operand:V4DI 1 "register_operand" "x")
19423 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
19424 (match_operand:SI 3 "const_0_to_255_operand" "n")]
19427 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19428 [(set_attr "type" "sselog")
19429 (set_attr "prefix" "vex")
19430 (set_attr "mode" "OI")])
19432 (define_insn "avx2_vec_dupv4df"
19433 [(set (match_operand:V4DF 0 "register_operand" "=v")
19434 (vec_duplicate:V4DF
19436 (match_operand:V2DF 1 "register_operand" "v")
19437 (parallel [(const_int 0)]))))]
19439 "vbroadcastsd\t{%1, %0|%0, %1}"
19440 [(set_attr "type" "sselog1")
19441 (set_attr "prefix" "maybe_evex")
19442 (set_attr "mode" "V4DF")])
19444 (define_insn "<avx512>_vec_dup<mode>_1"
19445 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
19446 (vec_duplicate:VI_AVX512BW
19447 (vec_select:<ssescalarmode>
19448 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
19449 (parallel [(const_int 0)]))))]
19452 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19453 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
19454 [(set_attr "type" "ssemov")
19455 (set_attr "prefix" "evex")
19456 (set_attr "mode" "<sseinsnmode>")])
19458 (define_insn "<avx512>_vec_dup<mode><mask_name>"
19459 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
19460 (vec_duplicate:V48_AVX512VL
19461 (vec_select:<ssescalarmode>
19462 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19463 (parallel [(const_int 0)]))))]
19466 /* There is no DF broadcast (in AVX-512*) to 128b register.
19467 Mimic it with integer variant. */
19468 if (<MODE>mode == V2DFmode)
19469 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
19471 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
19473 [(set_attr "type" "ssemov")
19474 (set_attr "prefix" "evex")
19475 (set_attr "mode" "<sseinsnmode>")])
19477 (define_insn "<avx512>_vec_dup<mode><mask_name>"
19478 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
19479 (vec_duplicate:VI12_AVX512VL
19480 (vec_select:<ssescalarmode>
19481 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19482 (parallel [(const_int 0)]))))]
19484 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
19485 [(set_attr "type" "ssemov")
19486 (set_attr "prefix" "evex")
19487 (set_attr "mode" "<sseinsnmode>")])
19489 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
19490 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19491 (vec_duplicate:V16FI
19492 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19495 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
19496 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19497 [(set_attr "type" "ssemov")
19498 (set_attr "prefix" "evex")
19499 (set_attr "mode" "<sseinsnmode>")])
19501 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
19502 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
19503 (vec_duplicate:V8FI
19504 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19507 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19508 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19509 [(set_attr "type" "ssemov")
19510 (set_attr "prefix" "evex")
19511 (set_attr "mode" "<sseinsnmode>")])
19513 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
19514 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
19515 (vec_duplicate:VI12_AVX512VL
19516 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
19519 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
19520 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
19521 [(set_attr "type" "ssemov")
19522 (set_attr "prefix" "evex")
19523 (set_attr "mode" "<sseinsnmode>")])
19525 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
19526 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
19527 (vec_duplicate:V48_AVX512VL
19528 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
19530 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19531 [(set_attr "type" "ssemov")
19532 (set_attr "prefix" "evex")
19533 (set_attr "mode" "<sseinsnmode>")
19534 (set (attr "enabled")
19535 (if_then_else (eq_attr "alternative" "1")
19536 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
19537 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
19540 (define_insn "vec_dupv4sf"
19541 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
19542 (vec_duplicate:V4SF
19543 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
19546 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
19547 vbroadcastss\t{%1, %0|%0, %1}
19548 shufps\t{$0, %0, %0|%0, %0, 0}"
19549 [(set_attr "isa" "avx,avx,noavx")
19550 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
19551 (set_attr "length_immediate" "1,0,1")
19552 (set_attr "prefix_extra" "0,1,*")
19553 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
19554 (set_attr "mode" "V4SF")])
19556 (define_insn "*vec_dupv4si"
19557 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
19558 (vec_duplicate:V4SI
19559 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
19562 %vpshufd\t{$0, %1, %0|%0, %1, 0}
19563 vbroadcastss\t{%1, %0|%0, %1}
19564 shufps\t{$0, %0, %0|%0, %0, 0}"
19565 [(set_attr "isa" "sse2,avx,noavx")
19566 (set_attr "type" "sselog1,ssemov,sselog1")
19567 (set_attr "length_immediate" "1,0,1")
19568 (set_attr "prefix_extra" "0,1,*")
19569 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
19570 (set_attr "mode" "TI,V4SF,V4SF")])
19572 (define_insn "*vec_dupv2di"
19573 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
19574 (vec_duplicate:V2DI
19575 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
19579 vpunpcklqdq\t{%d1, %0|%0, %d1}
19580 %vmovddup\t{%1, %0|%0, %1}
19582 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
19583 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
19584 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
19585 (set_attr "mode" "TI,TI,DF,V4SF")])
19587 (define_insn "avx2_vbroadcasti128_<mode>"
19588 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
19590 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
19594 vbroadcasti128\t{%1, %0|%0, %1}
19595 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19596 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
19597 [(set_attr "isa" "*,avx512dq,avx512vl")
19598 (set_attr "type" "ssemov")
19599 (set_attr "prefix_extra" "1")
19600 (set_attr "prefix" "vex,evex,evex")
19601 (set_attr "mode" "OI")])
19603 ;; Modes handled by AVX vec_dup patterns.
19604 (define_mode_iterator AVX_VEC_DUP_MODE
19605 [V8SI V8SF V4DI V4DF])
19606 (define_mode_attr vecdupssescalarmodesuffix
19607 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
19608 ;; Modes handled by AVX2 vec_dup patterns.
19609 (define_mode_iterator AVX2_VEC_DUP_MODE
19610 [V32QI V16QI V16HI V8HI V8SI V4SI])
19612 (define_insn "*vec_dup<mode>"
19613 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
19614 (vec_duplicate:AVX2_VEC_DUP_MODE
19615 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
19618 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
19619 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
19621 [(set_attr "isa" "*,*,noavx512vl")
19622 (set_attr "type" "ssemov")
19623 (set_attr "prefix_extra" "1")
19624 (set_attr "prefix" "maybe_evex")
19625 (set_attr "mode" "<sseinsnmode>")
19626 (set (attr "preferred_for_speed")
19627 (cond [(eq_attr "alternative" "2")
19628 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
19630 (symbol_ref "true")))])
19632 (define_insn "vec_dup<mode>"
19633 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
19634 (vec_duplicate:AVX_VEC_DUP_MODE
19635 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
19638 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
19639 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
19640 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
19641 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
19643 [(set_attr "type" "ssemov")
19644 (set_attr "prefix_extra" "1")
19645 (set_attr "prefix" "maybe_evex")
19646 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
19647 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
19650 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
19651 (vec_duplicate:AVX2_VEC_DUP_MODE
19652 (match_operand:<ssescalarmode> 1 "register_operand")))]
19654 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
19655 available, because then we can broadcast from GPRs directly.
19656 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
19657 for V*SI mode it requires just -mavx512vl. */
19658 && !(TARGET_AVX512VL
19659 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
19660 && reload_completed && GENERAL_REG_P (operands[1])"
19663 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
19664 CONST0_RTX (V4SImode),
19665 gen_lowpart (SImode, operands[1])));
19666 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
19667 gen_lowpart (<ssexmmmode>mode,
19673 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
19674 (vec_duplicate:AVX_VEC_DUP_MODE
19675 (match_operand:<ssescalarmode> 1 "register_operand")))]
19676 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
19677 [(set (match_dup 2)
19678 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
19680 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
19681 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
19683 (define_insn "avx_vbroadcastf128_<mode>"
19684 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
19686 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
19690 vbroadcast<i128>\t{%1, %0|%0, %1}
19691 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19692 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
19693 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
19694 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
19695 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
19696 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
19697 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
19698 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
19699 (set_attr "prefix_extra" "1")
19700 (set_attr "length_immediate" "0,1,1,0,1,0,1")
19701 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
19702 (set_attr "mode" "<sseinsnmode>")])
19704 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
19705 (define_mode_iterator VI4F_BRCST32x2
19706 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19707 V16SF (V8SF "TARGET_AVX512VL")])
19709 (define_mode_attr 64x2mode
19710 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
19712 (define_mode_attr 32x2mode
19713 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
19714 (V8SF "V2SF") (V4SI "V2SI")])
19716 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
19717 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
19718 (vec_duplicate:VI4F_BRCST32x2
19719 (vec_select:<32x2mode>
19720 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
19721 (parallel [(const_int 0) (const_int 1)]))))]
19723 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
19724 [(set_attr "type" "ssemov")
19725 (set_attr "prefix_extra" "1")
19726 (set_attr "prefix" "evex")
19727 (set_attr "mode" "<sseinsnmode>")])
19729 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
19730 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
19731 (vec_duplicate:VI4F_256
19732 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
19735 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
19736 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19737 [(set_attr "type" "ssemov")
19738 (set_attr "prefix_extra" "1")
19739 (set_attr "prefix" "evex")
19740 (set_attr "mode" "<sseinsnmode>")])
19742 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19743 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
19744 (vec_duplicate:V16FI
19745 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
19748 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
19749 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19750 [(set_attr "type" "ssemov")
19751 (set_attr "prefix_extra" "1")
19752 (set_attr "prefix" "evex")
19753 (set_attr "mode" "<sseinsnmode>")])
19755 ;; For broadcast[i|f]64x2
19756 (define_mode_iterator VI8F_BRCST64x2
19757 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
19759 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
19760 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
19761 (vec_duplicate:VI8F_BRCST64x2
19762 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
19765 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
19766 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19767 [(set_attr "type" "ssemov")
19768 (set_attr "prefix_extra" "1")
19769 (set_attr "prefix" "evex")
19770 (set_attr "mode" "<sseinsnmode>")])
19772 (define_insn "avx512cd_maskb_vec_dup<mode>"
19773 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19774 (vec_duplicate:VI8_AVX512VL
19776 (match_operand:QI 1 "register_operand" "k"))))]
19778 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
19779 [(set_attr "type" "mskmov")
19780 (set_attr "prefix" "evex")
19781 (set_attr "mode" "XI")])
19783 (define_insn "avx512cd_maskw_vec_dup<mode>"
19784 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
19785 (vec_duplicate:VI4_AVX512VL
19787 (match_operand:HI 1 "register_operand" "k"))))]
19789 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
19790 [(set_attr "type" "mskmov")
19791 (set_attr "prefix" "evex")
19792 (set_attr "mode" "XI")])
19794 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
19795 ;; If it so happens that the input is in memory, use vbroadcast.
19796 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
19797 (define_insn "*avx_vperm_broadcast_v4sf"
19798 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
19800 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
19801 (match_parallel 2 "avx_vbroadcast_operand"
19802 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19805 int elt = INTVAL (operands[3]);
19806 switch (which_alternative)
19810 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
19811 return "vbroadcastss\t{%1, %0|%0, %k1}";
19813 operands[2] = GEN_INT (elt * 0x55);
19814 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
19816 gcc_unreachable ();
19819 [(set_attr "type" "ssemov,ssemov,sselog1")
19820 (set_attr "prefix_extra" "1")
19821 (set_attr "length_immediate" "0,0,1")
19822 (set_attr "prefix" "maybe_evex")
19823 (set_attr "mode" "SF,SF,V4SF")])
19825 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
19826 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
19828 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
19829 (match_parallel 2 "avx_vbroadcast_operand"
19830 [(match_operand 3 "const_int_operand" "C,n,n")])))]
19833 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
19834 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
19836 rtx op0 = operands[0], op1 = operands[1];
19837 int elt = INTVAL (operands[3]);
19843 if (TARGET_AVX2 && elt == 0)
19845 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
19850 /* Shuffle element we care about into all elements of the 128-bit lane.
19851 The other lane gets shuffled too, but we don't care. */
19852 if (<MODE>mode == V4DFmode)
19853 mask = (elt & 1 ? 15 : 0);
19855 mask = (elt & 3) * 0x55;
19856 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
19858 /* Shuffle the lane we care about into both lanes of the dest. */
19859 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
19860 if (EXT_REX_SSE_REG_P (op0))
19862 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
19864 gcc_assert (<MODE>mode == V8SFmode);
19865 if ((mask & 1) == 0)
19866 emit_insn (gen_avx2_vec_dupv8sf (op0,
19867 gen_lowpart (V4SFmode, op0)));
19869 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
19870 GEN_INT (4), GEN_INT (5),
19871 GEN_INT (6), GEN_INT (7),
19872 GEN_INT (12), GEN_INT (13),
19873 GEN_INT (14), GEN_INT (15)));
19877 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
19881 operands[1] = adjust_address (op1, <ssescalarmode>mode,
19882 elt * GET_MODE_SIZE (<ssescalarmode>mode));
19885 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19886 [(set (match_operand:VF2 0 "register_operand")
19888 (match_operand:VF2 1 "nonimmediate_operand")
19889 (match_operand:SI 2 "const_0_to_255_operand")))]
19890 "TARGET_AVX && <mask_mode512bit_condition>"
19892 int mask = INTVAL (operands[2]);
19893 rtx perm[<ssescalarnum>];
19896 for (i = 0; i < <ssescalarnum>; i = i + 2)
19898 perm[i] = GEN_INT (((mask >> i) & 1) + i);
19899 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
19903 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19906 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19907 [(set (match_operand:VF1 0 "register_operand")
19909 (match_operand:VF1 1 "nonimmediate_operand")
19910 (match_operand:SI 2 "const_0_to_255_operand")))]
19911 "TARGET_AVX && <mask_mode512bit_condition>"
19913 int mask = INTVAL (operands[2]);
19914 rtx perm[<ssescalarnum>];
19917 for (i = 0; i < <ssescalarnum>; i = i + 4)
19919 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
19920 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
19921 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
19922 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
19926 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19929 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
19930 [(set (match_operand:VF 0 "register_operand" "=v")
19932 (match_operand:VF 1 "nonimmediate_operand" "vm")
19933 (match_parallel 2 ""
19934 [(match_operand 3 "const_int_operand")])))]
19935 "TARGET_AVX && <mask_mode512bit_condition>
19936 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
19938 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
19939 operands[2] = GEN_INT (mask);
19940 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
19942 [(set_attr "type" "sselog")
19943 (set_attr "prefix_extra" "1")
19944 (set_attr "length_immediate" "1")
19945 (set_attr "prefix" "<mask_prefix>")
19946 (set_attr "mode" "<sseinsnmode>")])
19948 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
19949 [(set (match_operand:VF 0 "register_operand" "=v")
19951 [(match_operand:VF 1 "register_operand" "v")
19952 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
19954 "TARGET_AVX && <mask_mode512bit_condition>"
19955 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19956 [(set_attr "type" "sselog")
19957 (set_attr "prefix_extra" "1")
19958 (set_attr "btver2_decode" "vector")
19959 (set_attr "prefix" "<mask_prefix>")
19960 (set_attr "mode" "<sseinsnmode>")])
19962 (define_mode_iterator VPERMI2
19963 [V16SI V16SF V8DI V8DF
19964 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
19965 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
19966 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
19967 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
19968 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19969 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19970 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19971 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19973 (define_mode_iterator VPERMI2I
19975 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
19976 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
19977 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
19978 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
19979 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
19980 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
19982 (define_expand "<avx512>_vpermi2var<mode>3_mask"
19983 [(set (match_operand:VPERMI2 0 "register_operand")
19986 [(match_operand:<sseintvecmode> 2 "register_operand")
19987 (match_operand:VPERMI2 1 "register_operand")
19988 (match_operand:VPERMI2 3 "nonimmediate_operand")]
19991 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
19994 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
19995 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
19998 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
19999 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
20000 (vec_merge:VPERMI2I
20002 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20003 (match_operand:VPERMI2I 1 "register_operand" "v")
20004 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
20007 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20009 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20010 [(set_attr "type" "sselog")
20011 (set_attr "prefix" "evex")
20012 (set_attr "mode" "<sseinsnmode>")])
20014 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
20015 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20016 (vec_merge:VF_AVX512VL
20017 (unspec:VF_AVX512VL
20018 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20019 (match_operand:VF_AVX512VL 1 "register_operand" "v")
20020 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
20022 (subreg:VF_AVX512VL (match_dup 2) 0)
20023 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20025 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20026 [(set_attr "type" "sselog")
20027 (set_attr "prefix" "evex")
20028 (set_attr "mode" "<sseinsnmode>")])
20030 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
20031 [(match_operand:VPERMI2 0 "register_operand")
20032 (match_operand:<sseintvecmode> 1 "register_operand")
20033 (match_operand:VPERMI2 2 "register_operand")
20034 (match_operand:VPERMI2 3 "nonimmediate_operand")
20035 (match_operand:<avx512fmaskmode> 4 "register_operand")]
20038 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
20039 operands[0], operands[1], operands[2], operands[3],
20040 CONST0_RTX (<MODE>mode), operands[4]));
20044 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
20045 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
20047 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
20048 (match_operand:VPERMI2 2 "register_operand" "0,v")
20049 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
20053 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
20054 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
20055 [(set_attr "type" "sselog")
20056 (set_attr "prefix" "evex")
20057 (set_attr "mode" "<sseinsnmode>")])
20059 (define_insn "<avx512>_vpermt2var<mode>3_mask"
20060 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
20063 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
20064 (match_operand:VPERMI2 2 "register_operand" "0")
20065 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
20068 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20070 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20071 [(set_attr "type" "sselog")
20072 (set_attr "prefix" "evex")
20073 (set_attr "mode" "<sseinsnmode>")])
20075 (define_expand "avx_vperm2f128<mode>3"
20076 [(set (match_operand:AVX256MODE2P 0 "register_operand")
20077 (unspec:AVX256MODE2P
20078 [(match_operand:AVX256MODE2P 1 "register_operand")
20079 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
20080 (match_operand:SI 3 "const_0_to_255_operand")]
20081 UNSPEC_VPERMIL2F128))]
20084 int mask = INTVAL (operands[3]);
20085 if ((mask & 0x88) == 0)
20087 rtx perm[<ssescalarnum>], t1, t2;
20088 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
20090 base = (mask & 3) * nelt2;
20091 for (i = 0; i < nelt2; ++i)
20092 perm[i] = GEN_INT (base + i);
20094 base = ((mask >> 4) & 3) * nelt2;
20095 for (i = 0; i < nelt2; ++i)
20096 perm[i + nelt2] = GEN_INT (base + i);
20098 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
20099 operands[1], operands[2]);
20100 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
20101 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
20102 t2 = gen_rtx_SET (operands[0], t2);
20108 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
20109 ;; means that in order to represent this properly in rtl we'd have to
20110 ;; nest *another* vec_concat with a zero operand and do the select from
20111 ;; a 4x wide vector. That doesn't seem very nice.
20112 (define_insn "*avx_vperm2f128<mode>_full"
20113 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20114 (unspec:AVX256MODE2P
20115 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
20116 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
20117 (match_operand:SI 3 "const_0_to_255_operand" "n")]
20118 UNSPEC_VPERMIL2F128))]
20120 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20121 [(set_attr "type" "sselog")
20122 (set_attr "prefix_extra" "1")
20123 (set_attr "length_immediate" "1")
20124 (set_attr "prefix" "vex")
20125 (set_attr "mode" "<sseinsnmode>")])
20127 (define_insn "*avx_vperm2f128<mode>_nozero"
20128 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20129 (vec_select:AVX256MODE2P
20130 (vec_concat:<ssedoublevecmode>
20131 (match_operand:AVX256MODE2P 1 "register_operand" "x")
20132 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
20133 (match_parallel 3 ""
20134 [(match_operand 4 "const_int_operand")])))]
20136 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
20138 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
20140 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
20142 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
20143 operands[3] = GEN_INT (mask);
20144 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
20146 [(set_attr "type" "sselog")
20147 (set_attr "prefix_extra" "1")
20148 (set_attr "length_immediate" "1")
20149 (set_attr "prefix" "vex")
20150 (set_attr "mode" "<sseinsnmode>")])
20152 (define_insn "*ssse3_palignr<mode>_perm"
20153 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
20155 (match_operand:V_128 1 "register_operand" "0,x,v")
20156 (match_parallel 2 "palignr_operand"
20157 [(match_operand 3 "const_int_operand" "n,n,n")])))]
20160 operands[2] = (GEN_INT (INTVAL (operands[3])
20161 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
20163 switch (which_alternative)
20166 return "palignr\t{%2, %1, %0|%0, %1, %2}";
20169 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
20171 gcc_unreachable ();
20174 [(set_attr "isa" "noavx,avx,avx512bw")
20175 (set_attr "type" "sseishft")
20176 (set_attr "atom_unit" "sishuf")
20177 (set_attr "prefix_data16" "1,*,*")
20178 (set_attr "prefix_extra" "1")
20179 (set_attr "length_immediate" "1")
20180 (set_attr "prefix" "orig,vex,evex")])
20182 (define_expand "avx512vl_vinsert<mode>"
20183 [(match_operand:VI48F_256 0 "register_operand")
20184 (match_operand:VI48F_256 1 "register_operand")
20185 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20186 (match_operand:SI 3 "const_0_to_1_operand")
20187 (match_operand:VI48F_256 4 "register_operand")
20188 (match_operand:<avx512fmaskmode> 5 "register_operand")]
20191 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20193 switch (INTVAL (operands[3]))
20196 insn = gen_vec_set_lo_<mode>_mask;
20199 insn = gen_vec_set_hi_<mode>_mask;
20202 gcc_unreachable ();
20205 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
20210 (define_expand "avx_vinsertf128<mode>"
20211 [(match_operand:V_256 0 "register_operand")
20212 (match_operand:V_256 1 "register_operand")
20213 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20214 (match_operand:SI 3 "const_0_to_1_operand")]
20217 rtx (*insn)(rtx, rtx, rtx);
20219 switch (INTVAL (operands[3]))
20222 insn = gen_vec_set_lo_<mode>;
20225 insn = gen_vec_set_hi_<mode>;
20228 gcc_unreachable ();
20231 emit_insn (insn (operands[0], operands[1], operands[2]));
20235 (define_insn "vec_set_lo_<mode><mask_name>"
20236 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20237 (vec_concat:VI8F_256
20238 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20239 (vec_select:<ssehalfvecmode>
20240 (match_operand:VI8F_256 1 "register_operand" "v")
20241 (parallel [(const_int 2) (const_int 3)]))))]
20242 "TARGET_AVX && <mask_avx512dq_condition>"
20244 if (TARGET_AVX512DQ)
20245 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20246 else if (TARGET_AVX512VL)
20247 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20249 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20251 [(set_attr "type" "sselog")
20252 (set_attr "prefix_extra" "1")
20253 (set_attr "length_immediate" "1")
20254 (set_attr "prefix" "vex")
20255 (set_attr "mode" "<sseinsnmode>")])
20257 (define_insn "vec_set_hi_<mode><mask_name>"
20258 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20259 (vec_concat:VI8F_256
20260 (vec_select:<ssehalfvecmode>
20261 (match_operand:VI8F_256 1 "register_operand" "v")
20262 (parallel [(const_int 0) (const_int 1)]))
20263 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20264 "TARGET_AVX && <mask_avx512dq_condition>"
20266 if (TARGET_AVX512DQ)
20267 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20268 else if (TARGET_AVX512VL)
20269 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20271 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20273 [(set_attr "type" "sselog")
20274 (set_attr "prefix_extra" "1")
20275 (set_attr "length_immediate" "1")
20276 (set_attr "prefix" "vex")
20277 (set_attr "mode" "<sseinsnmode>")])
20279 (define_insn "vec_set_lo_<mode><mask_name>"
20280 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20281 (vec_concat:VI4F_256
20282 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20283 (vec_select:<ssehalfvecmode>
20284 (match_operand:VI4F_256 1 "register_operand" "v")
20285 (parallel [(const_int 4) (const_int 5)
20286 (const_int 6) (const_int 7)]))))]
20289 if (TARGET_AVX512VL)
20290 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20292 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20294 [(set_attr "type" "sselog")
20295 (set_attr "prefix_extra" "1")
20296 (set_attr "length_immediate" "1")
20297 (set_attr "prefix" "vex")
20298 (set_attr "mode" "<sseinsnmode>")])
20300 (define_insn "vec_set_hi_<mode><mask_name>"
20301 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20302 (vec_concat:VI4F_256
20303 (vec_select:<ssehalfvecmode>
20304 (match_operand:VI4F_256 1 "register_operand" "v")
20305 (parallel [(const_int 0) (const_int 1)
20306 (const_int 2) (const_int 3)]))
20307 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20310 if (TARGET_AVX512VL)
20311 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20313 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20315 [(set_attr "type" "sselog")
20316 (set_attr "prefix_extra" "1")
20317 (set_attr "length_immediate" "1")
20318 (set_attr "prefix" "vex")
20319 (set_attr "mode" "<sseinsnmode>")])
20321 (define_insn "vec_set_lo_v16hi"
20322 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20324 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
20326 (match_operand:V16HI 1 "register_operand" "x,v")
20327 (parallel [(const_int 8) (const_int 9)
20328 (const_int 10) (const_int 11)
20329 (const_int 12) (const_int 13)
20330 (const_int 14) (const_int 15)]))))]
20333 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20334 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20335 [(set_attr "type" "sselog")
20336 (set_attr "prefix_extra" "1")
20337 (set_attr "length_immediate" "1")
20338 (set_attr "prefix" "vex,evex")
20339 (set_attr "mode" "OI")])
20341 (define_insn "vec_set_hi_v16hi"
20342 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20345 (match_operand:V16HI 1 "register_operand" "x,v")
20346 (parallel [(const_int 0) (const_int 1)
20347 (const_int 2) (const_int 3)
20348 (const_int 4) (const_int 5)
20349 (const_int 6) (const_int 7)]))
20350 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
20353 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20354 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20355 [(set_attr "type" "sselog")
20356 (set_attr "prefix_extra" "1")
20357 (set_attr "length_immediate" "1")
20358 (set_attr "prefix" "vex,evex")
20359 (set_attr "mode" "OI")])
20361 (define_insn "vec_set_lo_v32qi"
20362 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20364 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
20366 (match_operand:V32QI 1 "register_operand" "x,v")
20367 (parallel [(const_int 16) (const_int 17)
20368 (const_int 18) (const_int 19)
20369 (const_int 20) (const_int 21)
20370 (const_int 22) (const_int 23)
20371 (const_int 24) (const_int 25)
20372 (const_int 26) (const_int 27)
20373 (const_int 28) (const_int 29)
20374 (const_int 30) (const_int 31)]))))]
20377 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20378 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20379 [(set_attr "type" "sselog")
20380 (set_attr "prefix_extra" "1")
20381 (set_attr "length_immediate" "1")
20382 (set_attr "prefix" "vex,evex")
20383 (set_attr "mode" "OI")])
20385 (define_insn "vec_set_hi_v32qi"
20386 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20389 (match_operand:V32QI 1 "register_operand" "x,v")
20390 (parallel [(const_int 0) (const_int 1)
20391 (const_int 2) (const_int 3)
20392 (const_int 4) (const_int 5)
20393 (const_int 6) (const_int 7)
20394 (const_int 8) (const_int 9)
20395 (const_int 10) (const_int 11)
20396 (const_int 12) (const_int 13)
20397 (const_int 14) (const_int 15)]))
20398 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
20401 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20402 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20403 [(set_attr "type" "sselog")
20404 (set_attr "prefix_extra" "1")
20405 (set_attr "length_immediate" "1")
20406 (set_attr "prefix" "vex,evex")
20407 (set_attr "mode" "OI")])
20409 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
20410 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
20412 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
20413 (match_operand:V48_AVX2 1 "memory_operand" "m")]
20416 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
20417 [(set_attr "type" "sselog1")
20418 (set_attr "prefix_extra" "1")
20419 (set_attr "prefix" "vex")
20420 (set_attr "btver2_decode" "vector")
20421 (set_attr "mode" "<sseinsnmode>")])
20423 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
20424 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
20426 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
20427 (match_operand:V48_AVX2 2 "register_operand" "x")
20431 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20432 [(set_attr "type" "sselog1")
20433 (set_attr "prefix_extra" "1")
20434 (set_attr "prefix" "vex")
20435 (set_attr "btver2_decode" "vector")
20436 (set_attr "mode" "<sseinsnmode>")])
20438 (define_expand "maskload<mode><sseintvecmodelower>"
20439 [(set (match_operand:V48_AVX2 0 "register_operand")
20441 [(match_operand:<sseintvecmode> 2 "register_operand")
20442 (match_operand:V48_AVX2 1 "memory_operand")]
20446 (define_expand "maskload<mode><avx512fmaskmodelower>"
20447 [(set (match_operand:V48_AVX512VL 0 "register_operand")
20448 (vec_merge:V48_AVX512VL
20449 (match_operand:V48_AVX512VL 1 "memory_operand")
20451 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20454 (define_expand "maskload<mode><avx512fmaskmodelower>"
20455 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
20456 (vec_merge:VI12_AVX512VL
20457 (match_operand:VI12_AVX512VL 1 "memory_operand")
20459 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20462 (define_expand "maskstore<mode><sseintvecmodelower>"
20463 [(set (match_operand:V48_AVX2 0 "memory_operand")
20465 [(match_operand:<sseintvecmode> 2 "register_operand")
20466 (match_operand:V48_AVX2 1 "register_operand")
20471 (define_expand "maskstore<mode><avx512fmaskmodelower>"
20472 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
20473 (vec_merge:V48_AVX512VL
20474 (match_operand:V48_AVX512VL 1 "register_operand")
20476 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20479 (define_expand "maskstore<mode><avx512fmaskmodelower>"
20480 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
20481 (vec_merge:VI12_AVX512VL
20482 (match_operand:VI12_AVX512VL 1 "register_operand")
20484 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
20487 (define_expand "cbranch<mode>4"
20488 [(set (reg:CC FLAGS_REG)
20489 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
20490 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
20491 (set (pc) (if_then_else
20492 (match_operator 0 "bt_comparison_operator"
20493 [(reg:CC FLAGS_REG) (const_int 0)])
20494 (label_ref (match_operand 3))
20498 ix86_expand_branch (GET_CODE (operands[0]),
20499 operands[1], operands[2], operands[3]);
20504 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
20505 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
20506 (unspec:AVX256MODE2P
20507 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
20509 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
20511 "&& reload_completed"
20512 [(set (match_dup 0) (match_dup 1))]
20514 if (REG_P (operands[0]))
20515 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
20517 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
20518 <ssehalfvecmode>mode);
20521 ;; Modes handled by vec_init expanders.
20522 (define_mode_iterator VEC_INIT_MODE
20523 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
20524 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
20525 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
20526 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
20527 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
20528 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
20529 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
20531 ;; Likewise, but for initialization from half sized vectors.
20532 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
20533 (define_mode_iterator VEC_INIT_HALF_MODE
20534 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
20535 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
20536 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
20537 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
20538 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
20539 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
20540 (V4TI "TARGET_AVX512F")])
20542 (define_expand "vec_init<mode><ssescalarmodelower>"
20543 [(match_operand:VEC_INIT_MODE 0 "register_operand")
20547 ix86_expand_vector_init (false, operands[0], operands[1]);
20551 (define_expand "vec_init<mode><ssehalfvecmodelower>"
20552 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
20556 ix86_expand_vector_init (false, operands[0], operands[1]);
20560 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
20561 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
20562 (ashiftrt:VI48_AVX512F_AVX512VL
20563 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
20564 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
20565 "TARGET_AVX2 && <mask_mode512bit_condition>"
20566 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20567 [(set_attr "type" "sseishft")
20568 (set_attr "prefix" "maybe_evex")
20569 (set_attr "mode" "<sseinsnmode>")])
20571 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
20572 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20573 (ashiftrt:VI2_AVX512VL
20574 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
20575 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
20577 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20578 [(set_attr "type" "sseishft")
20579 (set_attr "prefix" "maybe_evex")
20580 (set_attr "mode" "<sseinsnmode>")])
20582 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
20583 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
20584 (any_lshift:VI48_AVX512F
20585 (match_operand:VI48_AVX512F 1 "register_operand" "v")
20586 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
20587 "TARGET_AVX2 && <mask_mode512bit_condition>"
20588 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20589 [(set_attr "type" "sseishft")
20590 (set_attr "prefix" "maybe_evex")
20591 (set_attr "mode" "<sseinsnmode>")])
20593 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
20594 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20595 (any_lshift:VI2_AVX512VL
20596 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
20597 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
20599 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20600 [(set_attr "type" "sseishft")
20601 (set_attr "prefix" "maybe_evex")
20602 (set_attr "mode" "<sseinsnmode>")])
20604 (define_insn "avx_vec_concat<mode>"
20605 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
20606 (vec_concat:V_256_512
20607 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
20608 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
20611 switch (which_alternative)
20614 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20616 if (<MODE_SIZE> == 64)
20618 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
20619 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20621 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20625 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20626 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20628 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
20632 switch (get_attr_mode (insn))
20635 return "vmovaps\t{%1, %t0|%t0, %1}";
20637 return "vmovapd\t{%1, %t0|%t0, %1}";
20639 return "vmovaps\t{%1, %x0|%x0, %1}";
20641 return "vmovapd\t{%1, %x0|%x0, %1}";
20643 if (which_alternative == 2)
20644 return "vmovdqa\t{%1, %t0|%t0, %1}";
20645 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20646 return "vmovdqa64\t{%1, %t0|%t0, %1}";
20648 return "vmovdqa32\t{%1, %t0|%t0, %1}";
20650 if (which_alternative == 2)
20651 return "vmovdqa\t{%1, %x0|%x0, %1}";
20652 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
20653 return "vmovdqa64\t{%1, %x0|%x0, %1}";
20655 return "vmovdqa32\t{%1, %x0|%x0, %1}";
20657 gcc_unreachable ();
20660 gcc_unreachable ();
20663 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
20664 (set_attr "prefix_extra" "1,1,*,*")
20665 (set_attr "length_immediate" "1,1,*,*")
20666 (set_attr "prefix" "maybe_evex")
20667 (set_attr "mode" "<sseinsnmode>")])
20669 (define_insn "vcvtph2ps<mask_name>"
20670 [(set (match_operand:V4SF 0 "register_operand" "=v")
20672 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
20674 (parallel [(const_int 0) (const_int 1)
20675 (const_int 2) (const_int 3)])))]
20676 "TARGET_F16C || TARGET_AVX512VL"
20677 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20678 [(set_attr "type" "ssecvt")
20679 (set_attr "prefix" "maybe_evex")
20680 (set_attr "mode" "V4SF")])
20682 (define_insn "*vcvtph2ps_load<mask_name>"
20683 [(set (match_operand:V4SF 0 "register_operand" "=v")
20684 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
20685 UNSPEC_VCVTPH2PS))]
20686 "TARGET_F16C || TARGET_AVX512VL"
20687 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20688 [(set_attr "type" "ssecvt")
20689 (set_attr "prefix" "vex")
20690 (set_attr "mode" "V8SF")])
20692 (define_insn "vcvtph2ps256<mask_name>"
20693 [(set (match_operand:V8SF 0 "register_operand" "=v")
20694 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
20695 UNSPEC_VCVTPH2PS))]
20696 "TARGET_F16C || TARGET_AVX512VL"
20697 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20698 [(set_attr "type" "ssecvt")
20699 (set_attr "prefix" "vex")
20700 (set_attr "btver2_decode" "double")
20701 (set_attr "mode" "V8SF")])
20703 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
20704 [(set (match_operand:V16SF 0 "register_operand" "=v")
20706 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20707 UNSPEC_VCVTPH2PS))]
20709 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20710 [(set_attr "type" "ssecvt")
20711 (set_attr "prefix" "evex")
20712 (set_attr "mode" "V16SF")])
20714 (define_expand "vcvtps2ph_mask"
20715 [(set (match_operand:V8HI 0 "register_operand")
20718 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20719 (match_operand:SI 2 "const_0_to_255_operand")]
20722 (match_operand:V8HI 3 "nonimm_or_0_operand")
20723 (match_operand:QI 4 "register_operand")))]
20725 "operands[5] = CONST0_RTX (V4HImode);")
20727 (define_expand "vcvtps2ph"
20728 [(set (match_operand:V8HI 0 "register_operand")
20730 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
20731 (match_operand:SI 2 "const_0_to_255_operand")]
20735 "operands[3] = CONST0_RTX (V4HImode);")
20737 (define_insn "*vcvtps2ph<mask_name>"
20738 [(set (match_operand:V8HI 0 "register_operand" "=v")
20740 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20741 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20743 (match_operand:V4HI 3 "const0_operand")))]
20744 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
20745 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
20746 [(set_attr "type" "ssecvt")
20747 (set_attr "prefix" "maybe_evex")
20748 (set_attr "mode" "V4SF")])
20750 (define_insn "*vcvtps2ph_store<mask_name>"
20751 [(set (match_operand:V4HI 0 "memory_operand" "=m")
20752 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
20753 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20754 UNSPEC_VCVTPS2PH))]
20755 "TARGET_F16C || TARGET_AVX512VL"
20756 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20757 [(set_attr "type" "ssecvt")
20758 (set_attr "prefix" "maybe_evex")
20759 (set_attr "mode" "V4SF")])
20761 (define_insn "vcvtps2ph256<mask_name>"
20762 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
20763 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
20764 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20765 UNSPEC_VCVTPS2PH))]
20766 "TARGET_F16C || TARGET_AVX512VL"
20767 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20768 [(set_attr "type" "ssecvt")
20769 (set_attr "prefix" "maybe_evex")
20770 (set_attr "btver2_decode" "vector")
20771 (set_attr "mode" "V8SF")])
20773 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
20774 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
20776 [(match_operand:V16SF 1 "register_operand" "v")
20777 (match_operand:SI 2 "const_0_to_255_operand" "N")]
20778 UNSPEC_VCVTPS2PH))]
20780 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20781 [(set_attr "type" "ssecvt")
20782 (set_attr "prefix" "evex")
20783 (set_attr "mode" "V16SF")])
20785 ;; For gather* insn patterns
20786 (define_mode_iterator VEC_GATHER_MODE
20787 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
20788 (define_mode_attr VEC_GATHER_IDXSI
20789 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
20790 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
20791 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
20792 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
20794 (define_mode_attr VEC_GATHER_IDXDI
20795 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20796 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
20797 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
20798 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
20800 (define_mode_attr VEC_GATHER_SRCDI
20801 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
20802 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
20803 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
20804 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
20806 (define_expand "avx2_gathersi<mode>"
20807 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20808 (unspec:VEC_GATHER_MODE
20809 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
20810 (mem:<ssescalarmode>
20812 [(match_operand 2 "vsib_address_operand")
20813 (match_operand:<VEC_GATHER_IDXSI>
20814 3 "register_operand")
20815 (match_operand:SI 5 "const1248_operand ")]))
20816 (mem:BLK (scratch))
20817 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
20819 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20823 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20824 operands[5]), UNSPEC_VSIBADDR);
20827 (define_insn "*avx2_gathersi<mode>"
20828 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20829 (unspec:VEC_GATHER_MODE
20830 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
20831 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20833 [(match_operand:P 3 "vsib_address_operand" "Tv")
20834 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
20835 (match_operand:SI 6 "const1248_operand" "n")]
20837 (mem:BLK (scratch))
20838 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
20840 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20842 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
20843 [(set_attr "type" "ssemov")
20844 (set_attr "prefix" "vex")
20845 (set_attr "mode" "<sseinsnmode>")])
20847 (define_insn "*avx2_gathersi<mode>_2"
20848 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20849 (unspec:VEC_GATHER_MODE
20851 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20853 [(match_operand:P 2 "vsib_address_operand" "Tv")
20854 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
20855 (match_operand:SI 5 "const1248_operand" "n")]
20857 (mem:BLK (scratch))
20858 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
20860 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20862 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
20863 [(set_attr "type" "ssemov")
20864 (set_attr "prefix" "vex")
20865 (set_attr "mode" "<sseinsnmode>")])
20867 (define_expand "avx2_gatherdi<mode>"
20868 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
20869 (unspec:VEC_GATHER_MODE
20870 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
20871 (mem:<ssescalarmode>
20873 [(match_operand 2 "vsib_address_operand")
20874 (match_operand:<VEC_GATHER_IDXDI>
20875 3 "register_operand")
20876 (match_operand:SI 5 "const1248_operand ")]))
20877 (mem:BLK (scratch))
20878 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
20880 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
20884 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20885 operands[5]), UNSPEC_VSIBADDR);
20888 (define_insn "*avx2_gatherdi<mode>"
20889 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20890 (unspec:VEC_GATHER_MODE
20891 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20892 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20894 [(match_operand:P 3 "vsib_address_operand" "Tv")
20895 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20896 (match_operand:SI 6 "const1248_operand" "n")]
20898 (mem:BLK (scratch))
20899 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20901 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20903 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
20904 [(set_attr "type" "ssemov")
20905 (set_attr "prefix" "vex")
20906 (set_attr "mode" "<sseinsnmode>")])
20908 (define_insn "*avx2_gatherdi<mode>_2"
20909 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
20910 (unspec:VEC_GATHER_MODE
20912 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20914 [(match_operand:P 2 "vsib_address_operand" "Tv")
20915 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20916 (match_operand:SI 5 "const1248_operand" "n")]
20918 (mem:BLK (scratch))
20919 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20921 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
20924 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
20925 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
20926 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
20928 [(set_attr "type" "ssemov")
20929 (set_attr "prefix" "vex")
20930 (set_attr "mode" "<sseinsnmode>")])
20932 (define_insn "*avx2_gatherdi<mode>_3"
20933 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20934 (vec_select:<VEC_GATHER_SRCDI>
20936 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
20937 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
20939 [(match_operand:P 3 "vsib_address_operand" "Tv")
20940 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
20941 (match_operand:SI 6 "const1248_operand" "n")]
20943 (mem:BLK (scratch))
20944 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
20946 (parallel [(const_int 0) (const_int 1)
20947 (const_int 2) (const_int 3)])))
20948 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20950 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
20951 [(set_attr "type" "ssemov")
20952 (set_attr "prefix" "vex")
20953 (set_attr "mode" "<sseinsnmode>")])
20955 (define_insn "*avx2_gatherdi<mode>_4"
20956 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
20957 (vec_select:<VEC_GATHER_SRCDI>
20960 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
20962 [(match_operand:P 2 "vsib_address_operand" "Tv")
20963 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
20964 (match_operand:SI 5 "const1248_operand" "n")]
20966 (mem:BLK (scratch))
20967 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
20969 (parallel [(const_int 0) (const_int 1)
20970 (const_int 2) (const_int 3)])))
20971 (clobber (match_scratch:VI4F_256 1 "=&x"))]
20973 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
20974 [(set_attr "type" "ssemov")
20975 (set_attr "prefix" "vex")
20976 (set_attr "mode" "<sseinsnmode>")])
20978 (define_expand "<avx512>_gathersi<mode>"
20979 [(parallel [(set (match_operand:VI48F 0 "register_operand")
20981 [(match_operand:VI48F 1 "register_operand")
20982 (match_operand:<avx512fmaskmode> 4 "register_operand")
20983 (mem:<ssescalarmode>
20985 [(match_operand 2 "vsib_address_operand")
20986 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
20987 (match_operand:SI 5 "const1248_operand")]))]
20989 (clobber (match_scratch:<avx512fmaskmode> 7))])]
20993 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
20994 operands[5]), UNSPEC_VSIBADDR);
20997 (define_insn "*avx512f_gathersi<mode>"
20998 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21000 [(match_operand:VI48F 1 "register_operand" "0")
21001 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
21002 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21004 [(match_operand:P 4 "vsib_address_operand" "Tv")
21005 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
21006 (match_operand:SI 5 "const1248_operand" "n")]
21007 UNSPEC_VSIBADDR)])]
21009 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
21011 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21012 ;; gas changed what it requires incompatibly.
21013 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
21014 [(set_attr "type" "ssemov")
21015 (set_attr "prefix" "evex")
21016 (set_attr "mode" "<sseinsnmode>")])
21018 (define_insn "*avx512f_gathersi<mode>_2"
21019 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21022 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21023 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21025 [(match_operand:P 3 "vsib_address_operand" "Tv")
21026 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21027 (match_operand:SI 4 "const1248_operand" "n")]
21028 UNSPEC_VSIBADDR)])]
21030 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21032 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21033 ;; gas changed what it requires incompatibly.
21034 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
21035 [(set_attr "type" "ssemov")
21036 (set_attr "prefix" "evex")
21037 (set_attr "mode" "<sseinsnmode>")])
21040 (define_expand "<avx512>_gatherdi<mode>"
21041 [(parallel [(set (match_operand:VI48F 0 "register_operand")
21043 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21044 (match_operand:QI 4 "register_operand")
21045 (mem:<ssescalarmode>
21047 [(match_operand 2 "vsib_address_operand")
21048 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
21049 (match_operand:SI 5 "const1248_operand")]))]
21051 (clobber (match_scratch:QI 7))])]
21055 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21056 operands[5]), UNSPEC_VSIBADDR);
21059 (define_insn "*avx512f_gatherdi<mode>"
21060 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21062 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
21063 (match_operand:QI 7 "register_operand" "2")
21064 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21066 [(match_operand:P 4 "vsib_address_operand" "Tv")
21067 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
21068 (match_operand:SI 5 "const1248_operand" "n")]
21069 UNSPEC_VSIBADDR)])]
21071 (clobber (match_scratch:QI 2 "=&Yk"))]
21073 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21074 ;; gas changed what it requires incompatibly.
21075 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
21076 [(set_attr "type" "ssemov")
21077 (set_attr "prefix" "evex")
21078 (set_attr "mode" "<sseinsnmode>")])
21080 (define_insn "*avx512f_gatherdi<mode>_2"
21081 [(set (match_operand:VI48F 0 "register_operand" "=&v")
21084 (match_operand:QI 6 "register_operand" "1")
21085 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21087 [(match_operand:P 3 "vsib_address_operand" "Tv")
21088 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21089 (match_operand:SI 4 "const1248_operand" "n")]
21090 UNSPEC_VSIBADDR)])]
21092 (clobber (match_scratch:QI 1 "=&Yk"))]
21095 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21096 gas changed what it requires incompatibly. */
21097 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
21099 if (<MODE_SIZE> != 64)
21100 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
21102 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
21104 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
21106 [(set_attr "type" "ssemov")
21107 (set_attr "prefix" "evex")
21108 (set_attr "mode" "<sseinsnmode>")])
21110 (define_expand "<avx512>_scattersi<mode>"
21111 [(parallel [(set (mem:VI48F
21113 [(match_operand 0 "vsib_address_operand")
21114 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
21115 (match_operand:SI 4 "const1248_operand")]))
21117 [(match_operand:<avx512fmaskmode> 1 "register_operand")
21118 (match_operand:VI48F 3 "register_operand")]
21120 (clobber (match_scratch:<avx512fmaskmode> 6))])]
21124 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21125 operands[4]), UNSPEC_VSIBADDR);
21128 (define_insn "*avx512f_scattersi<mode>"
21129 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21131 [(match_operand:P 0 "vsib_address_operand" "Tv")
21132 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21133 (match_operand:SI 4 "const1248_operand" "n")]
21136 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21137 (match_operand:VI48F 3 "register_operand" "v")]
21139 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21141 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21142 ;; gas changed what it requires incompatibly.
21143 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21144 [(set_attr "type" "ssemov")
21145 (set_attr "prefix" "evex")
21146 (set_attr "mode" "<sseinsnmode>")])
21148 (define_expand "<avx512>_scatterdi<mode>"
21149 [(parallel [(set (mem:VI48F
21151 [(match_operand 0 "vsib_address_operand")
21152 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
21153 (match_operand:SI 4 "const1248_operand")]))
21155 [(match_operand:QI 1 "register_operand")
21156 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
21158 (clobber (match_scratch:QI 6))])]
21162 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
21163 operands[4]), UNSPEC_VSIBADDR);
21166 (define_insn "*avx512f_scatterdi<mode>"
21167 [(set (match_operator:VI48F 5 "vsib_mem_operator"
21169 [(match_operand:P 0 "vsib_address_operand" "Tv")
21170 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21171 (match_operand:SI 4 "const1248_operand" "n")]
21174 [(match_operand:QI 6 "register_operand" "1")
21175 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
21177 (clobber (match_scratch:QI 1 "=&Yk"))]
21179 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21180 ;; gas changed what it requires incompatibly.
21181 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21182 [(set_attr "type" "ssemov")
21183 (set_attr "prefix" "evex")
21184 (set_attr "mode" "<sseinsnmode>")])
21186 (define_insn "<avx512>_compress<mode>_mask"
21187 [(set (match_operand:VI48F 0 "register_operand" "=v")
21189 [(match_operand:VI48F 1 "register_operand" "v")
21190 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
21191 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21194 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21195 [(set_attr "type" "ssemov")
21196 (set_attr "prefix" "evex")
21197 (set_attr "mode" "<sseinsnmode>")])
21199 (define_insn "compress<mode>_mask"
21200 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
21201 (unspec:VI12_AVX512VLBW
21202 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
21203 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
21204 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21206 "TARGET_AVX512VBMI2"
21207 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21208 [(set_attr "type" "ssemov")
21209 (set_attr "prefix" "evex")
21210 (set_attr "mode" "<sseinsnmode>")])
21212 (define_insn "<avx512>_compressstore<mode>_mask"
21213 [(set (match_operand:VI48F 0 "memory_operand" "=m")
21215 [(match_operand:VI48F 1 "register_operand" "x")
21217 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21218 UNSPEC_COMPRESS_STORE))]
21220 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21221 [(set_attr "type" "ssemov")
21222 (set_attr "prefix" "evex")
21223 (set_attr "memory" "store")
21224 (set_attr "mode" "<sseinsnmode>")])
21226 (define_insn "compressstore<mode>_mask"
21227 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
21228 (unspec:VI12_AVX512VLBW
21229 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
21231 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21232 UNSPEC_COMPRESS_STORE))]
21233 "TARGET_AVX512VBMI2"
21234 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21235 [(set_attr "type" "ssemov")
21236 (set_attr "prefix" "evex")
21237 (set_attr "memory" "store")
21238 (set_attr "mode" "<sseinsnmode>")])
21240 (define_expand "<avx512>_expand<mode>_maskz"
21241 [(set (match_operand:VI48F 0 "register_operand")
21243 [(match_operand:VI48F 1 "nonimmediate_operand")
21244 (match_operand:VI48F 2 "nonimm_or_0_operand")
21245 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21248 "operands[2] = CONST0_RTX (<MODE>mode);")
21250 (define_insn "<avx512>_expand<mode>_mask"
21251 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
21253 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
21254 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
21255 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21258 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21259 [(set_attr "type" "ssemov")
21260 (set_attr "prefix" "evex")
21261 (set_attr "memory" "none,load")
21262 (set_attr "mode" "<sseinsnmode>")])
21264 (define_insn "expand<mode>_mask"
21265 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
21266 (unspec:VI12_AVX512VLBW
21267 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
21268 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
21269 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21271 "TARGET_AVX512VBMI2"
21272 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21273 [(set_attr "type" "ssemov")
21274 (set_attr "prefix" "evex")
21275 (set_attr "memory" "none,load")
21276 (set_attr "mode" "<sseinsnmode>")])
21278 (define_expand "expand<mode>_maskz"
21279 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
21280 (unspec:VI12_AVX512VLBW
21281 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
21282 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
21283 (match_operand:<avx512fmaskmode> 3 "register_operand")]
21285 "TARGET_AVX512VBMI2"
21286 "operands[2] = CONST0_RTX (<MODE>mode);")
21288 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
21289 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21290 (unspec:VF_AVX512VL
21291 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21292 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
21293 (match_operand:SI 3 "const_0_to_15_operand")]
21295 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
21296 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
21297 [(set_attr "type" "sse")
21298 (set_attr "prefix" "evex")
21299 (set_attr "mode" "<MODE>")])
21301 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
21302 [(set (match_operand:VF_128 0 "register_operand" "=v")
21305 [(match_operand:VF_128 1 "register_operand" "v")
21306 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21307 (match_operand:SI 3 "const_0_to_15_operand")]
21312 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
21313 [(set_attr "type" "sse")
21314 (set_attr "prefix" "evex")
21315 (set_attr "mode" "<MODE>")])
21317 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
21318 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21319 (unspec:<avx512fmaskmode>
21320 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21321 (match_operand:QI 2 "const_0_to_255_operand" "n")]
21324 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
21325 [(set_attr "type" "sse")
21326 (set_attr "length_immediate" "1")
21327 (set_attr "prefix" "evex")
21328 (set_attr "mode" "<MODE>")])
21330 (define_insn "avx512dq_vmfpclass<mode>"
21331 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21332 (and:<avx512fmaskmode>
21333 (unspec:<avx512fmaskmode>
21334 [(match_operand:VF_128 1 "register_operand" "v")
21335 (match_operand:QI 2 "const_0_to_255_operand" "n")]
21339 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
21340 [(set_attr "type" "sse")
21341 (set_attr "length_immediate" "1")
21342 (set_attr "prefix" "evex")
21343 (set_attr "mode" "<MODE>")])
21345 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
21346 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21347 (unspec:VF_AVX512VL
21348 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
21349 (match_operand:SI 2 "const_0_to_15_operand")]
21352 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
21353 [(set_attr "prefix" "evex")
21354 (set_attr "mode" "<MODE>")])
21356 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
21357 [(set (match_operand:VF_128 0 "register_operand" "=v")
21360 [(match_operand:VF_128 1 "register_operand" "v")
21361 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21362 (match_operand:SI 3 "const_0_to_15_operand")]
21367 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
21368 [(set_attr "prefix" "evex")
21369 (set_attr "mode" "<ssescalarmode>")])
21371 ;; The correct representation for this is absolutely enormous, and
21372 ;; surely not generally useful.
21373 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
21374 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21375 (unspec:VI2_AVX512VL
21376 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
21377 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
21378 (match_operand:SI 3 "const_0_to_255_operand")]
21381 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
21382 [(set_attr "type" "sselog1")
21383 (set_attr "length_immediate" "1")
21384 (set_attr "prefix" "evex")
21385 (set_attr "mode" "<sseinsnmode>")])
21387 (define_insn "clz<mode>2<mask_name>"
21388 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21390 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21392 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21393 [(set_attr "type" "sse")
21394 (set_attr "prefix" "evex")
21395 (set_attr "mode" "<sseinsnmode>")])
21397 (define_insn "<mask_codefor>conflict<mode><mask_name>"
21398 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21399 (unspec:VI48_AVX512VL
21400 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
21403 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21404 [(set_attr "type" "sse")
21405 (set_attr "prefix" "evex")
21406 (set_attr "mode" "<sseinsnmode>")])
21408 (define_insn "sha1msg1"
21409 [(set (match_operand:V4SI 0 "register_operand" "=x")
21411 [(match_operand:V4SI 1 "register_operand" "0")
21412 (match_operand:V4SI 2 "vector_operand" "xBm")]
21415 "sha1msg1\t{%2, %0|%0, %2}"
21416 [(set_attr "type" "sselog1")
21417 (set_attr "mode" "TI")])
21419 (define_insn "sha1msg2"
21420 [(set (match_operand:V4SI 0 "register_operand" "=x")
21422 [(match_operand:V4SI 1 "register_operand" "0")
21423 (match_operand:V4SI 2 "vector_operand" "xBm")]
21426 "sha1msg2\t{%2, %0|%0, %2}"
21427 [(set_attr "type" "sselog1")
21428 (set_attr "mode" "TI")])
21430 (define_insn "sha1nexte"
21431 [(set (match_operand:V4SI 0 "register_operand" "=x")
21433 [(match_operand:V4SI 1 "register_operand" "0")
21434 (match_operand:V4SI 2 "vector_operand" "xBm")]
21435 UNSPEC_SHA1NEXTE))]
21437 "sha1nexte\t{%2, %0|%0, %2}"
21438 [(set_attr "type" "sselog1")
21439 (set_attr "mode" "TI")])
21441 (define_insn "sha1rnds4"
21442 [(set (match_operand:V4SI 0 "register_operand" "=x")
21444 [(match_operand:V4SI 1 "register_operand" "0")
21445 (match_operand:V4SI 2 "vector_operand" "xBm")
21446 (match_operand:SI 3 "const_0_to_3_operand" "n")]
21447 UNSPEC_SHA1RNDS4))]
21449 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
21450 [(set_attr "type" "sselog1")
21451 (set_attr "length_immediate" "1")
21452 (set_attr "mode" "TI")])
21454 (define_insn "sha256msg1"
21455 [(set (match_operand:V4SI 0 "register_operand" "=x")
21457 [(match_operand:V4SI 1 "register_operand" "0")
21458 (match_operand:V4SI 2 "vector_operand" "xBm")]
21459 UNSPEC_SHA256MSG1))]
21461 "sha256msg1\t{%2, %0|%0, %2}"
21462 [(set_attr "type" "sselog1")
21463 (set_attr "mode" "TI")])
21465 (define_insn "sha256msg2"
21466 [(set (match_operand:V4SI 0 "register_operand" "=x")
21468 [(match_operand:V4SI 1 "register_operand" "0")
21469 (match_operand:V4SI 2 "vector_operand" "xBm")]
21470 UNSPEC_SHA256MSG2))]
21472 "sha256msg2\t{%2, %0|%0, %2}"
21473 [(set_attr "type" "sselog1")
21474 (set_attr "mode" "TI")])
21476 (define_insn "sha256rnds2"
21477 [(set (match_operand:V4SI 0 "register_operand" "=x")
21479 [(match_operand:V4SI 1 "register_operand" "0")
21480 (match_operand:V4SI 2 "vector_operand" "xBm")
21481 (match_operand:V4SI 3 "register_operand" "Yz")]
21482 UNSPEC_SHA256RNDS2))]
21484 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
21485 [(set_attr "type" "sselog1")
21486 (set_attr "length_immediate" "1")
21487 (set_attr "mode" "TI")])
21489 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
21490 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
21491 (unspec:AVX512MODE2P
21492 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
21494 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21496 "&& reload_completed"
21497 [(set (match_dup 0) (match_dup 1))]
21499 if (REG_P (operands[0]))
21500 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
21502 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21503 <ssequartermode>mode);
21506 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
21507 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
21508 (unspec:AVX512MODE2P
21509 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
21511 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21513 "&& reload_completed"
21514 [(set (match_dup 0) (match_dup 1))]
21516 if (REG_P (operands[0]))
21517 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21519 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21520 <ssehalfvecmode>mode);
21523 (define_int_iterator VPMADD52
21524 [UNSPEC_VPMADD52LUQ
21525 UNSPEC_VPMADD52HUQ])
21527 (define_int_attr vpmadd52type
21528 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
21530 (define_expand "vpamdd52huq<mode>_maskz"
21531 [(match_operand:VI8_AVX512VL 0 "register_operand")
21532 (match_operand:VI8_AVX512VL 1 "register_operand")
21533 (match_operand:VI8_AVX512VL 2 "register_operand")
21534 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
21535 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21536 "TARGET_AVX512IFMA"
21538 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
21539 operands[0], operands[1], operands[2], operands[3],
21540 CONST0_RTX (<MODE>mode), operands[4]));
21544 (define_expand "vpamdd52luq<mode>_maskz"
21545 [(match_operand:VI8_AVX512VL 0 "register_operand")
21546 (match_operand:VI8_AVX512VL 1 "register_operand")
21547 (match_operand:VI8_AVX512VL 2 "register_operand")
21548 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
21549 (match_operand:<avx512fmaskmode> 4 "register_operand")]
21550 "TARGET_AVX512IFMA"
21552 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
21553 operands[0], operands[1], operands[2], operands[3],
21554 CONST0_RTX (<MODE>mode), operands[4]));
21558 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
21559 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21560 (unspec:VI8_AVX512VL
21561 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
21562 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
21563 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
21565 "TARGET_AVX512IFMA"
21566 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
21567 [(set_attr "type" "ssemuladd")
21568 (set_attr "prefix" "evex")
21569 (set_attr "mode" "<sseinsnmode>")])
21571 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
21572 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
21573 (vec_merge:VI8_AVX512VL
21574 (unspec:VI8_AVX512VL
21575 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
21576 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
21577 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
21580 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
21581 "TARGET_AVX512IFMA"
21582 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
21583 [(set_attr "type" "ssemuladd")
21584 (set_attr "prefix" "evex")
21585 (set_attr "mode" "<sseinsnmode>")])
21587 (define_insn "vpmultishiftqb<mode><mask_name>"
21588 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
21589 (unspec:VI1_AVX512VL
21590 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
21591 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
21592 UNSPEC_VPMULTISHIFT))]
21593 "TARGET_AVX512VBMI"
21594 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21595 [(set_attr "type" "sselog")
21596 (set_attr "prefix" "evex")
21597 (set_attr "mode" "<sseinsnmode>")])
21599 (define_mode_iterator IMOD4
21600 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
21602 (define_mode_attr imod4_narrow
21603 [(V64SF "V16SF") (V64SI "V16SI")])
21605 (define_expand "mov<mode>"
21606 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
21607 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
21610 ix86_expand_vector_move (<MODE>mode, operands);
21614 (define_insn_and_split "*mov<mode>_internal"
21615 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
21616 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
21618 && (register_operand (operands[0], <MODE>mode)
21619 || register_operand (operands[1], <MODE>mode))"
21621 "&& reload_completed"
21627 for (i = 0; i < 4; i++)
21629 op0 = simplify_subreg
21630 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
21631 op1 = simplify_subreg
21632 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
21633 emit_move_insn (op0, op1);
21638 (define_insn "avx5124fmaddps_4fmaddps"
21639 [(set (match_operand:V16SF 0 "register_operand" "=v")
21641 [(match_operand:V16SF 1 "register_operand" "0")
21642 (match_operand:V64SF 2 "register_operand" "v")
21643 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21644 "TARGET_AVX5124FMAPS"
21645 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21646 [(set_attr ("type") ("ssemuladd"))
21647 (set_attr ("prefix") ("evex"))
21648 (set_attr ("mode") ("V16SF"))])
21650 (define_insn "avx5124fmaddps_4fmaddps_mask"
21651 [(set (match_operand:V16SF 0 "register_operand" "=v")
21654 [(match_operand:V64SF 1 "register_operand" "v")
21655 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21656 (match_operand:V16SF 3 "register_operand" "0")
21657 (match_operand:HI 4 "register_operand" "Yk")))]
21658 "TARGET_AVX5124FMAPS"
21659 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21660 [(set_attr ("type") ("ssemuladd"))
21661 (set_attr ("prefix") ("evex"))
21662 (set_attr ("mode") ("V16SF"))])
21664 (define_insn "avx5124fmaddps_4fmaddps_maskz"
21665 [(set (match_operand:V16SF 0 "register_operand" "=v")
21668 [(match_operand:V16SF 1 "register_operand" "0")
21669 (match_operand:V64SF 2 "register_operand" "v")
21670 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21671 (match_operand:V16SF 4 "const0_operand" "C")
21672 (match_operand:HI 5 "register_operand" "Yk")))]
21673 "TARGET_AVX5124FMAPS"
21674 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21675 [(set_attr ("type") ("ssemuladd"))
21676 (set_attr ("prefix") ("evex"))
21677 (set_attr ("mode") ("V16SF"))])
21679 (define_insn "avx5124fmaddps_4fmaddss"
21680 [(set (match_operand:V4SF 0 "register_operand" "=v")
21682 [(match_operand:V4SF 1 "register_operand" "0")
21683 (match_operand:V64SF 2 "register_operand" "v")
21684 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
21685 "TARGET_AVX5124FMAPS"
21686 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21687 [(set_attr ("type") ("ssemuladd"))
21688 (set_attr ("prefix") ("evex"))
21689 (set_attr ("mode") ("SF"))])
21691 (define_insn "avx5124fmaddps_4fmaddss_mask"
21692 [(set (match_operand:V4SF 0 "register_operand" "=v")
21695 [(match_operand:V64SF 1 "register_operand" "v")
21696 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
21697 (match_operand:V4SF 3 "register_operand" "0")
21698 (match_operand:QI 4 "register_operand" "Yk")))]
21699 "TARGET_AVX5124FMAPS"
21700 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21701 [(set_attr ("type") ("ssemuladd"))
21702 (set_attr ("prefix") ("evex"))
21703 (set_attr ("mode") ("SF"))])
21705 (define_insn "avx5124fmaddps_4fmaddss_maskz"
21706 [(set (match_operand:V4SF 0 "register_operand" "=v")
21709 [(match_operand:V4SF 1 "register_operand" "0")
21710 (match_operand:V64SF 2 "register_operand" "v")
21711 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
21712 (match_operand:V4SF 4 "const0_operand" "C")
21713 (match_operand:QI 5 "register_operand" "Yk")))]
21714 "TARGET_AVX5124FMAPS"
21715 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21716 [(set_attr ("type") ("ssemuladd"))
21717 (set_attr ("prefix") ("evex"))
21718 (set_attr ("mode") ("SF"))])
21720 (define_insn "avx5124fmaddps_4fnmaddps"
21721 [(set (match_operand:V16SF 0 "register_operand" "=v")
21723 [(match_operand:V16SF 1 "register_operand" "0")
21724 (match_operand:V64SF 2 "register_operand" "v")
21725 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21726 "TARGET_AVX5124FMAPS"
21727 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
21728 [(set_attr ("type") ("ssemuladd"))
21729 (set_attr ("prefix") ("evex"))
21730 (set_attr ("mode") ("V16SF"))])
21732 (define_insn "avx5124fmaddps_4fnmaddps_mask"
21733 [(set (match_operand:V16SF 0 "register_operand" "=v")
21736 [(match_operand:V64SF 1 "register_operand" "v")
21737 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21738 (match_operand:V16SF 3 "register_operand" "0")
21739 (match_operand:HI 4 "register_operand" "Yk")))]
21740 "TARGET_AVX5124FMAPS"
21741 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21742 [(set_attr ("type") ("ssemuladd"))
21743 (set_attr ("prefix") ("evex"))
21744 (set_attr ("mode") ("V16SF"))])
21746 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
21747 [(set (match_operand:V16SF 0 "register_operand" "=v")
21750 [(match_operand:V16SF 1 "register_operand" "0")
21751 (match_operand:V64SF 2 "register_operand" "v")
21752 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21753 (match_operand:V16SF 4 "const0_operand" "C")
21754 (match_operand:HI 5 "register_operand" "Yk")))]
21755 "TARGET_AVX5124FMAPS"
21756 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21757 [(set_attr ("type") ("ssemuladd"))
21758 (set_attr ("prefix") ("evex"))
21759 (set_attr ("mode") ("V16SF"))])
21761 (define_insn "avx5124fmaddps_4fnmaddss"
21762 [(set (match_operand:V4SF 0 "register_operand" "=v")
21764 [(match_operand:V4SF 1 "register_operand" "0")
21765 (match_operand:V64SF 2 "register_operand" "v")
21766 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
21767 "TARGET_AVX5124FMAPS"
21768 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
21769 [(set_attr ("type") ("ssemuladd"))
21770 (set_attr ("prefix") ("evex"))
21771 (set_attr ("mode") ("SF"))])
21773 (define_insn "avx5124fmaddps_4fnmaddss_mask"
21774 [(set (match_operand:V4SF 0 "register_operand" "=v")
21777 [(match_operand:V64SF 1 "register_operand" "v")
21778 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21779 (match_operand:V4SF 3 "register_operand" "0")
21780 (match_operand:QI 4 "register_operand" "Yk")))]
21781 "TARGET_AVX5124FMAPS"
21782 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
21783 [(set_attr ("type") ("ssemuladd"))
21784 (set_attr ("prefix") ("evex"))
21785 (set_attr ("mode") ("SF"))])
21787 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
21788 [(set (match_operand:V4SF 0 "register_operand" "=v")
21791 [(match_operand:V4SF 1 "register_operand" "0")
21792 (match_operand:V64SF 2 "register_operand" "v")
21793 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
21794 (match_operand:V4SF 4 "const0_operand" "C")
21795 (match_operand:QI 5 "register_operand" "Yk")))]
21796 "TARGET_AVX5124FMAPS"
21797 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
21798 [(set_attr ("type") ("ssemuladd"))
21799 (set_attr ("prefix") ("evex"))
21800 (set_attr ("mode") ("SF"))])
21802 (define_insn "avx5124vnniw_vp4dpwssd"
21803 [(set (match_operand:V16SI 0 "register_operand" "=v")
21805 [(match_operand:V16SI 1 "register_operand" "0")
21806 (match_operand:V64SI 2 "register_operand" "v")
21807 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
21808 "TARGET_AVX5124VNNIW"
21809 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
21810 [(set_attr ("type") ("ssemuladd"))
21811 (set_attr ("prefix") ("evex"))
21812 (set_attr ("mode") ("TI"))])
21814 (define_insn "avx5124vnniw_vp4dpwssd_mask"
21815 [(set (match_operand:V16SI 0 "register_operand" "=v")
21818 [(match_operand:V64SI 1 "register_operand" "v")
21819 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21820 (match_operand:V16SI 3 "register_operand" "0")
21821 (match_operand:HI 4 "register_operand" "Yk")))]
21822 "TARGET_AVX5124VNNIW"
21823 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21824 [(set_attr ("type") ("ssemuladd"))
21825 (set_attr ("prefix") ("evex"))
21826 (set_attr ("mode") ("TI"))])
21828 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
21829 [(set (match_operand:V16SI 0 "register_operand" "=v")
21832 [(match_operand:V16SI 1 "register_operand" "0")
21833 (match_operand:V64SI 2 "register_operand" "v")
21834 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
21835 (match_operand:V16SI 4 "const0_operand" "C")
21836 (match_operand:HI 5 "register_operand" "Yk")))]
21837 "TARGET_AVX5124VNNIW"
21838 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21839 [(set_attr ("type") ("ssemuladd"))
21840 (set_attr ("prefix") ("evex"))
21841 (set_attr ("mode") ("TI"))])
21843 (define_insn "avx5124vnniw_vp4dpwssds"
21844 [(set (match_operand:V16SI 0 "register_operand" "=v")
21846 [(match_operand:V16SI 1 "register_operand" "0")
21847 (match_operand:V64SI 2 "register_operand" "v")
21848 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
21849 "TARGET_AVX5124VNNIW"
21850 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
21851 [(set_attr ("type") ("ssemuladd"))
21852 (set_attr ("prefix") ("evex"))
21853 (set_attr ("mode") ("TI"))])
21855 (define_insn "avx5124vnniw_vp4dpwssds_mask"
21856 [(set (match_operand:V16SI 0 "register_operand" "=v")
21859 [(match_operand:V64SI 1 "register_operand" "v")
21860 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21861 (match_operand:V16SI 3 "register_operand" "0")
21862 (match_operand:HI 4 "register_operand" "Yk")))]
21863 "TARGET_AVX5124VNNIW"
21864 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
21865 [(set_attr ("type") ("ssemuladd"))
21866 (set_attr ("prefix") ("evex"))
21867 (set_attr ("mode") ("TI"))])
21869 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
21870 [(set (match_operand:V16SI 0 "register_operand" "=v")
21873 [(match_operand:V16SI 1 "register_operand" "0")
21874 (match_operand:V64SI 2 "register_operand" "v")
21875 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
21876 (match_operand:V16SI 4 "const0_operand" "C")
21877 (match_operand:HI 5 "register_operand" "Yk")))]
21878 "TARGET_AVX5124VNNIW"
21879 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
21880 [(set_attr ("type") ("ssemuladd"))
21881 (set_attr ("prefix") ("evex"))
21882 (set_attr ("mode") ("TI"))])
21884 (define_insn "vpopcount<mode><mask_name>"
21885 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21886 (popcount:VI48_AVX512VL
21887 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
21888 "TARGET_AVX512VPOPCNTDQ"
21889 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21891 ;; Save multiple registers out-of-line.
21892 (define_insn "save_multiple<mode>"
21893 [(match_parallel 0 "save_multiple"
21894 [(use (match_operand:P 1 "symbol_operand"))])]
21895 "TARGET_SSE && TARGET_64BIT"
21898 ;; Restore multiple registers out-of-line.
21899 (define_insn "restore_multiple<mode>"
21900 [(match_parallel 0 "restore_multiple"
21901 [(use (match_operand:P 1 "symbol_operand"))])]
21902 "TARGET_SSE && TARGET_64BIT"
21905 ;; Restore multiple registers out-of-line and return.
21906 (define_insn "restore_multiple_and_return<mode>"
21907 [(match_parallel 0 "restore_multiple"
21909 (use (match_operand:P 1 "symbol_operand"))
21910 (set (reg:DI SP_REG) (reg:DI R10_REG))
21912 "TARGET_SSE && TARGET_64BIT"
21915 ;; Restore multiple registers out-of-line when hard frame pointer is used,
21916 ;; perform the leave operation prior to returning (from the function).
21917 (define_insn "restore_multiple_leave_return<mode>"
21918 [(match_parallel 0 "restore_multiple"
21920 (use (match_operand:P 1 "symbol_operand"))
21921 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
21922 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
21923 (clobber (mem:BLK (scratch)))
21925 "TARGET_SSE && TARGET_64BIT"
21928 (define_insn "vpopcount<mode><mask_name>"
21929 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21930 (popcount:VI12_AVX512VL
21931 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
21932 "TARGET_AVX512BITALG"
21933 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
21935 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
21936 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21937 (unspec:VI1_AVX512F
21938 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21939 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21940 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21941 UNSPEC_GF2P8AFFINEINV))]
21944 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
21945 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21946 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21947 [(set_attr "isa" "noavx,avx,avx512f")
21948 (set_attr "prefix_data16" "1,*,*")
21949 (set_attr "prefix_extra" "1")
21950 (set_attr "prefix" "orig,maybe_evex,evex")
21951 (set_attr "mode" "<sseinsnmode>")])
21953 (define_insn "vgf2p8affineqb_<mode><mask_name>"
21954 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21955 (unspec:VI1_AVX512F
21956 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21957 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")
21958 (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")]
21959 UNSPEC_GF2P8AFFINE))]
21962 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
21963 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}
21964 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
21965 [(set_attr "isa" "noavx,avx,avx512f")
21966 (set_attr "prefix_data16" "1,*,*")
21967 (set_attr "prefix_extra" "1")
21968 (set_attr "prefix" "orig,maybe_evex,evex")
21969 (set_attr "mode" "<sseinsnmode>")])
21971 (define_insn "vgf2p8mulb_<mode><mask_name>"
21972 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
21973 (unspec:VI1_AVX512F
21974 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
21975 (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")]
21979 gf2p8mulb\t{%2, %0| %0, %2}
21980 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}
21981 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
21982 [(set_attr "isa" "noavx,avx,avx512f")
21983 (set_attr "prefix_data16" "1,*,*")
21984 (set_attr "prefix_extra" "1")
21985 (set_attr "prefix" "orig,maybe_evex,evex")
21986 (set_attr "mode" "<sseinsnmode>")])
21988 (define_insn "vpshrd_<mode><mask_name>"
21989 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
21990 (unspec:VI248_AVX512VL
21991 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
21992 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
21993 (match_operand:SI 3 "const_0_to_255_operand" "n")]
21995 "TARGET_AVX512VBMI2"
21996 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
21997 [(set_attr ("prefix") ("evex"))])
21999 (define_insn "vpshld_<mode><mask_name>"
22000 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22001 (unspec:VI248_AVX512VL
22002 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
22003 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
22004 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22006 "TARGET_AVX512VBMI2"
22007 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22008 [(set_attr ("prefix") ("evex"))])
22010 (define_insn "vpshrdv_<mode>"
22011 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22012 (unspec:VI248_AVX512VL
22013 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22014 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22015 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22017 "TARGET_AVX512VBMI2"
22018 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22019 [(set_attr ("prefix") ("evex"))
22020 (set_attr "mode" "<sseinsnmode>")])
22022 (define_insn "vpshrdv_<mode>_mask"
22023 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22024 (vec_merge:VI248_AVX512VL
22025 (unspec:VI248_AVX512VL
22026 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22027 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22028 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22031 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22032 "TARGET_AVX512VBMI2"
22033 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22034 [(set_attr ("prefix") ("evex"))
22035 (set_attr "mode" "<sseinsnmode>")])
22037 (define_expand "vpshrdv_<mode>_maskz"
22038 [(match_operand:VI248_AVX512VL 0 "register_operand")
22039 (match_operand:VI248_AVX512VL 1 "register_operand")
22040 (match_operand:VI248_AVX512VL 2 "register_operand")
22041 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22042 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22043 "TARGET_AVX512VBMI2"
22045 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
22046 operands[2], operands[3],
22047 CONST0_RTX (<MODE>mode),
22052 (define_insn "vpshrdv_<mode>_maskz_1"
22053 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22054 (vec_merge:VI248_AVX512VL
22055 (unspec:VI248_AVX512VL
22056 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22057 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22058 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22060 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22061 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22062 "TARGET_AVX512VBMI2"
22063 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22064 [(set_attr ("prefix") ("evex"))
22065 (set_attr "mode" "<sseinsnmode>")])
22067 (define_insn "vpshldv_<mode>"
22068 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22069 (unspec:VI248_AVX512VL
22070 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22071 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22072 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22074 "TARGET_AVX512VBMI2"
22075 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22076 [(set_attr ("prefix") ("evex"))
22077 (set_attr "mode" "<sseinsnmode>")])
22079 (define_insn "vpshldv_<mode>_mask"
22080 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22081 (vec_merge:VI248_AVX512VL
22082 (unspec:VI248_AVX512VL
22083 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22084 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22085 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22088 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22089 "TARGET_AVX512VBMI2"
22090 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22091 [(set_attr ("prefix") ("evex"))
22092 (set_attr "mode" "<sseinsnmode>")])
22094 (define_expand "vpshldv_<mode>_maskz"
22095 [(match_operand:VI248_AVX512VL 0 "register_operand")
22096 (match_operand:VI248_AVX512VL 1 "register_operand")
22097 (match_operand:VI248_AVX512VL 2 "register_operand")
22098 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22099 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22100 "TARGET_AVX512VBMI2"
22102 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
22103 operands[2], operands[3],
22104 CONST0_RTX (<MODE>mode),
22109 (define_insn "vpshldv_<mode>_maskz_1"
22110 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22111 (vec_merge:VI248_AVX512VL
22112 (unspec:VI248_AVX512VL
22113 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22114 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22115 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22117 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22118 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22119 "TARGET_AVX512VBMI2"
22120 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22121 [(set_attr ("prefix") ("evex"))
22122 (set_attr "mode" "<sseinsnmode>")])
22124 (define_insn "vpdpbusd_<mode>"
22125 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22126 (unspec:VI4_AVX512VL
22127 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22128 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22129 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22130 UNSPEC_VPMADDUBSWACCD))]
22131 "TARGET_AVX512VNNI"
22132 "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
22133 [(set_attr ("prefix") ("evex"))])
22135 (define_insn "vpdpbusd_<mode>_mask"
22136 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22137 (vec_merge:VI4_AVX512VL
22138 (unspec:VI4_AVX512VL
22139 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22140 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22141 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22142 UNSPEC_VPMADDUBSWACCD)
22144 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22145 "TARGET_AVX512VNNI"
22146 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22147 [(set_attr ("prefix") ("evex"))])
22149 (define_expand "vpdpbusd_<mode>_maskz"
22150 [(match_operand:VI4_AVX512VL 0 "register_operand")
22151 (match_operand:VI4_AVX512VL 1 "register_operand")
22152 (match_operand:VI4_AVX512VL 2 "register_operand")
22153 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22154 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22155 "TARGET_AVX512VNNI"
22157 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
22158 operands[2], operands[3],
22159 CONST0_RTX (<MODE>mode),
22164 (define_insn "vpdpbusd_<mode>_maskz_1"
22165 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22166 (vec_merge:VI4_AVX512VL
22167 (unspec:VI4_AVX512VL
22168 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22169 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22170 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
22171 ] UNSPEC_VPMADDUBSWACCD)
22172 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22173 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22174 "TARGET_AVX512VNNI"
22175 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22176 [(set_attr ("prefix") ("evex"))])
22179 (define_insn "vpdpbusds_<mode>"
22180 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22181 (unspec:VI4_AVX512VL
22182 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22183 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22184 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22185 UNSPEC_VPMADDUBSWACCSSD))]
22186 "TARGET_AVX512VNNI"
22187 "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
22188 [(set_attr ("prefix") ("evex"))])
22190 (define_insn "vpdpbusds_<mode>_mask"
22191 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22192 (vec_merge:VI4_AVX512VL
22193 (unspec:VI4_AVX512VL
22194 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22195 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22196 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22197 UNSPEC_VPMADDUBSWACCSSD)
22199 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22200 "TARGET_AVX512VNNI"
22201 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22202 [(set_attr ("prefix") ("evex"))])
22204 (define_expand "vpdpbusds_<mode>_maskz"
22205 [(match_operand:VI4_AVX512VL 0 "register_operand")
22206 (match_operand:VI4_AVX512VL 1 "register_operand")
22207 (match_operand:VI4_AVX512VL 2 "register_operand")
22208 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22209 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22210 "TARGET_AVX512VNNI"
22212 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
22213 operands[2], operands[3],
22214 CONST0_RTX (<MODE>mode),
22219 (define_insn "vpdpbusds_<mode>_maskz_1"
22220 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22221 (vec_merge:VI4_AVX512VL
22222 (unspec:VI4_AVX512VL
22223 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22224 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22225 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22226 UNSPEC_VPMADDUBSWACCSSD)
22227 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22228 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22229 "TARGET_AVX512VNNI"
22230 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22231 [(set_attr ("prefix") ("evex"))])
22234 (define_insn "vpdpwssd_<mode>"
22235 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22236 (unspec:VI4_AVX512VL
22237 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22238 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22239 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22240 UNSPEC_VPMADDWDACCD))]
22241 "TARGET_AVX512VNNI"
22242 "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
22243 [(set_attr ("prefix") ("evex"))])
22245 (define_insn "vpdpwssd_<mode>_mask"
22246 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22247 (vec_merge:VI4_AVX512VL
22248 (unspec:VI4_AVX512VL
22249 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22250 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22251 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22252 UNSPEC_VPMADDWDACCD)
22254 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22255 "TARGET_AVX512VNNI"
22256 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22257 [(set_attr ("prefix") ("evex"))])
22259 (define_expand "vpdpwssd_<mode>_maskz"
22260 [(match_operand:VI4_AVX512VL 0 "register_operand")
22261 (match_operand:VI4_AVX512VL 1 "register_operand")
22262 (match_operand:VI4_AVX512VL 2 "register_operand")
22263 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22264 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22265 "TARGET_AVX512VNNI"
22267 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
22268 operands[2], operands[3],
22269 CONST0_RTX (<MODE>mode),
22274 (define_insn "vpdpwssd_<mode>_maskz_1"
22275 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22276 (vec_merge:VI4_AVX512VL
22277 (unspec:VI4_AVX512VL
22278 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22279 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22280 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22281 UNSPEC_VPMADDWDACCD)
22282 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22283 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22284 "TARGET_AVX512VNNI"
22285 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22286 [(set_attr ("prefix") ("evex"))])
22289 (define_insn "vpdpwssds_<mode>"
22290 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22291 (unspec:VI4_AVX512VL
22292 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22293 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22294 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22295 UNSPEC_VPMADDWDACCSSD))]
22296 "TARGET_AVX512VNNI"
22297 "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
22298 [(set_attr ("prefix") ("evex"))])
22300 (define_insn "vpdpwssds_<mode>_mask"
22301 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22302 (vec_merge:VI4_AVX512VL
22303 (unspec:VI4_AVX512VL
22304 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22305 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22306 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22307 UNSPEC_VPMADDWDACCSSD)
22309 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22310 "TARGET_AVX512VNNI"
22311 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22312 [(set_attr ("prefix") ("evex"))])
22314 (define_expand "vpdpwssds_<mode>_maskz"
22315 [(match_operand:VI4_AVX512VL 0 "register_operand")
22316 (match_operand:VI4_AVX512VL 1 "register_operand")
22317 (match_operand:VI4_AVX512VL 2 "register_operand")
22318 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22319 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22320 "TARGET_AVX512VNNI"
22322 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
22323 operands[2], operands[3],
22324 CONST0_RTX (<MODE>mode),
22329 (define_insn "vpdpwssds_<mode>_maskz_1"
22330 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22331 (vec_merge:VI4_AVX512VL
22332 (unspec:VI4_AVX512VL
22333 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22334 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22335 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22336 UNSPEC_VPMADDWDACCSSD)
22337 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22338 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22339 "TARGET_AVX512VNNI"
22340 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22341 [(set_attr ("prefix") ("evex"))])
22343 (define_insn "vaesdec_<mode>"
22344 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22345 (unspec:VI1_AVX512VL_F
22346 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22347 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22350 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
22353 (define_insn "vaesdeclast_<mode>"
22354 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22355 (unspec:VI1_AVX512VL_F
22356 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22357 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22358 UNSPEC_VAESDECLAST))]
22360 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
22363 (define_insn "vaesenc_<mode>"
22364 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22365 (unspec:VI1_AVX512VL_F
22366 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22367 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22370 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
22373 (define_insn "vaesenclast_<mode>"
22374 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22375 (unspec:VI1_AVX512VL_F
22376 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22377 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22378 UNSPEC_VAESENCLAST))]
22380 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
22383 (define_insn "vpclmulqdq_<mode>"
22384 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
22385 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
22386 (match_operand:VI8_FVL 2 "vector_operand" "vm")
22387 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22388 UNSPEC_VPCLMULQDQ))]
22389 "TARGET_VPCLMULQDQ"
22390 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22391 [(set_attr "mode" "DI")])
22393 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
22394 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
22395 (unspec:<avx512fmaskmode>
22396 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
22397 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
22398 UNSPEC_VPSHUFBIT))]
22399 "TARGET_AVX512BITALG"
22400 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
22401 [(set_attr "prefix" "evex")
22402 (set_attr "mode" "<sseinsnmode>")])
22404 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
22405 ;; Converting from BF to SF
22406 (define_mode_attr bf16_cvt_2sf
22407 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
22408 ;; Converting from SF to BF
22409 (define_mode_attr sf_cvt_bf16
22410 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
22411 ;; Mapping from BF to SF
22412 (define_mode_attr sf_bf16
22413 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
22415 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
22416 [(match_operand:BF16 0 "register_operand")
22417 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
22418 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
22419 (match_operand:<avx512fmaskmode> 3 "register_operand")]
22420 "TARGET_AVX512BF16"
22422 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
22423 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
22427 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
22428 [(set (match_operand:BF16 0 "register_operand" "=v")
22430 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
22431 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
22432 UNSPEC_VCVTNE2PS2BF16))]
22433 "TARGET_AVX512BF16"
22434 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
22436 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
22437 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
22438 (match_operand:VF1_AVX512VL 1 "register_operand")
22439 (match_operand:<avx512fmaskmode> 2 "register_operand")]
22440 "TARGET_AVX512BF16"
22442 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
22443 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
22447 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
22448 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
22449 (unspec:<sf_cvt_bf16>
22450 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
22451 UNSPEC_VCVTNEPS2BF16))]
22452 "TARGET_AVX512BF16"
22453 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22455 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
22456 [(match_operand:VF1_AVX512VL 0 "register_operand")
22457 (match_operand:VF1_AVX512VL 1 "register_operand")
22458 (match_operand:<sf_bf16> 2 "register_operand")
22459 (match_operand:<sf_bf16> 3 "register_operand")
22460 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
22461 "TARGET_AVX512BF16"
22463 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
22464 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
22468 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
22469 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
22470 (unspec:VF1_AVX512VL
22471 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
22472 (match_operand:<sf_bf16> 2 "register_operand" "v")
22473 (match_operand:<sf_bf16> 3 "register_operand" "v")]
22474 UNSPEC_VDPBF16PS))]
22475 "TARGET_AVX512BF16"
22476 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
22478 (define_insn "avx512f_dpbf16ps_<mode>_mask"
22479 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
22480 (vec_merge:VF1_AVX512VL
22481 (unspec:VF1_AVX512VL
22482 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
22483 (match_operand:<sf_bf16> 2 "register_operand" "v")
22484 (match_operand:<sf_bf16> 3 "register_operand" "v")]
22487 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
22488 "TARGET_AVX512BF16"
22489 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")