1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2021 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
54 UNSPEC_XOP_UNSIGNED_CMP
64 UNSPEC_AESKEYGENASSIST
85 ;; For AVX512F support
87 UNSPEC_UNSIGNED_FIX_NOTRUNC
102 UNSPEC_COMPRESS_STORE
114 ;; For embed. rounding feature
115 UNSPEC_EMBEDDED_ROUNDING
117 ;; For AVX512PF support
118 UNSPEC_GATHER_PREFETCH
119 UNSPEC_SCATTER_PREFETCH
121 ;; For AVX512ER support
135 ;; For AVX512BW support
143 ;; For AVX512DQ support
148 ;; For AVX512IFMA support
152 ;; For AVX512VBMI support
155 ;; For AVX5124FMAPS/AVX5124VNNIW support
162 UNSPEC_GF2P8AFFINEINV
166 ;; For AVX512VBMI2 support
172 ;; For AVX512VNNI support
173 UNSPEC_VPMADDUBSWACCD
174 UNSPEC_VPMADDUBSWACCSSD
176 UNSPEC_VPMADDWDACCSSD
184 ;; For VPCLMULQDQ support
187 ;; For AVX512BITALG support
190 ;; For VP2INTERSECT support
193 ;; For AVX512BF16 support
194 UNSPEC_VCVTNE2PS2BF16
199 (define_c_enum "unspecv" [
209 UNSPECV_AESDEC128KLU8
210 UNSPECV_AESENC128KLU8
211 UNSPECV_AESDEC256KLU8
212 UNSPECV_AESENC256KLU8
213 UNSPECV_AESDECWIDE128KLU8
214 UNSPECV_AESENCWIDE128KLU8
215 UNSPECV_AESDECWIDE256KLU8
216 UNSPECV_AESENCWIDE256KLU8
217 UNSPECV_ENCODEKEY128U32
218 UNSPECV_ENCODEKEY256U32
221 ;; All vector modes including V?TImode, used in move patterns.
222 (define_mode_iterator VMOVE
223 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
224 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
225 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
226 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
227 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
228 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
229 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
231 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
232 (define_mode_iterator V48_AVX512VL
233 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
234 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
235 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
236 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
238 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
239 (define_mode_iterator VI12_AVX512VL
240 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
241 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
243 ;; Same iterator, but without supposed TARGET_AVX512BW
244 (define_mode_iterator VI12_AVX512VLBW
245 [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
246 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
247 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
249 (define_mode_iterator VI1_AVX512VL
250 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
253 (define_mode_iterator V
254 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
255 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
256 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
257 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
258 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
259 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
261 ;; All 128bit vector modes
262 (define_mode_iterator V_128
263 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
265 ;; All 256bit vector modes
266 (define_mode_iterator V_256
267 [V32QI V16HI V8SI V4DI V8SF V4DF])
269 ;; All 128bit and 256bit vector modes
270 (define_mode_iterator V_128_256
271 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
273 ;; All 512bit vector modes
274 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
276 ;; All 256bit and 512bit vector modes
277 (define_mode_iterator V_256_512
278 [V32QI V16HI V8SI V4DI V8SF V4DF
279 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
280 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
282 ;; All vector float modes
283 (define_mode_iterator VF
284 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
285 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
287 ;; 128- and 256-bit float vector modes
288 (define_mode_iterator VF_128_256
289 [(V8SF "TARGET_AVX") V4SF
290 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
292 ;; All SFmode vector float modes
293 (define_mode_iterator VF1
294 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
296 (define_mode_iterator VF1_AVX2
297 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
299 ;; 128- and 256-bit SF vector modes
300 (define_mode_iterator VF1_128_256
301 [(V8SF "TARGET_AVX") V4SF])
303 (define_mode_iterator VF1_128_256VL
304 [V8SF (V4SF "TARGET_AVX512VL")])
306 ;; All DFmode vector float modes
307 (define_mode_iterator VF2
308 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
310 ;; 128- and 256-bit DF vector modes
311 (define_mode_iterator VF2_128_256
312 [(V4DF "TARGET_AVX") V2DF])
314 (define_mode_iterator VF2_512_256
315 [(V8DF "TARGET_AVX512F") V4DF])
317 (define_mode_iterator VF2_512_256VL
318 [V8DF (V4DF "TARGET_AVX512VL")])
320 ;; All 128bit vector float modes
321 (define_mode_iterator VF_128
322 [V4SF (V2DF "TARGET_SSE2")])
324 ;; All 256bit vector float modes
325 (define_mode_iterator VF_256
328 ;; All 512bit vector float modes
329 (define_mode_iterator VF_512
332 (define_mode_iterator VI48_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
334 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
336 (define_mode_iterator VI1248_AVX512VLBW
337 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
338 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
339 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
340 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
341 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
342 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
344 (define_mode_iterator VF_AVX512VL
345 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
346 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
348 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
349 (define_mode_iterator VF1_AVX512ER_128_256
350 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
352 (define_mode_iterator VF2_AVX512VL
353 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
355 (define_mode_iterator VF1_AVX512VL
356 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
358 ;; All vector integer modes
359 (define_mode_iterator VI
360 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
361 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
362 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
363 (V8SI "TARGET_AVX") V4SI
364 (V4DI "TARGET_AVX") V2DI])
366 (define_mode_iterator VI_AVX2
367 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
368 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
369 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
370 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
372 ;; All QImode vector integer modes
373 (define_mode_iterator VI1
374 [(V32QI "TARGET_AVX") V16QI])
376 ;; All DImode vector integer modes
377 (define_mode_iterator V_AVX
378 [V16QI V8HI V4SI V2DI V4SF V2DF
379 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
380 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
381 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
383 (define_mode_iterator VI48_AVX
385 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
387 (define_mode_iterator VI8
388 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
390 (define_mode_iterator VI8_FVL
391 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
393 (define_mode_iterator VI8_AVX512VL
394 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
396 (define_mode_iterator VI8_256_512
397 [V8DI (V4DI "TARGET_AVX512VL")])
399 (define_mode_iterator VI1_AVX2
400 [(V32QI "TARGET_AVX2") V16QI])
402 (define_mode_iterator VI1_AVX512
403 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
405 (define_mode_iterator VI1_AVX512F
406 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
408 (define_mode_iterator VI12_256_512_AVX512VL
409 [V64QI (V32QI "TARGET_AVX512VL")
410 V32HI (V16HI "TARGET_AVX512VL")])
412 (define_mode_iterator VI2_AVX2
413 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
415 (define_mode_iterator VI2_AVX512F
416 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
418 (define_mode_iterator VI4_AVX
419 [(V8SI "TARGET_AVX") V4SI])
421 (define_mode_iterator VI4_AVX2
422 [(V8SI "TARGET_AVX2") V4SI])
424 (define_mode_iterator VI4_AVX512F
425 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
427 (define_mode_iterator VI4_AVX512VL
428 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
430 (define_mode_iterator VI48_AVX512F_AVX512VL
431 [V4SI V8SI (V16SI "TARGET_AVX512F")
432 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
434 (define_mode_iterator VI2_AVX512VL
435 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
437 (define_mode_iterator VI1_AVX512VL_F
438 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
440 (define_mode_iterator VI8_AVX2_AVX512BW
441 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
443 (define_mode_iterator VI8_AVX2
444 [(V4DI "TARGET_AVX2") V2DI])
446 (define_mode_iterator VI8_AVX2_AVX512F
447 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
449 (define_mode_iterator VI8_AVX_AVX512F
450 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
452 (define_mode_iterator VI4_128_8_256
456 (define_mode_iterator V8FI
460 (define_mode_iterator V16FI
463 ;; ??? We should probably use TImode instead.
464 (define_mode_iterator VIMAX_AVX2_AVX512BW
465 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
467 ;; Suppose TARGET_AVX512BW as baseline
468 (define_mode_iterator VIMAX_AVX512VL
469 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
471 (define_mode_iterator VIMAX_AVX2
472 [(V2TI "TARGET_AVX2") V1TI])
474 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
475 (define_mode_iterator SSESCALARMODE
476 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
478 (define_mode_iterator VI12_AVX2
479 [(V32QI "TARGET_AVX2") V16QI
480 (V16HI "TARGET_AVX2") V8HI])
482 (define_mode_iterator VI12_AVX2_AVX512BW
483 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
484 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
486 (define_mode_iterator VI24_AVX2
487 [(V16HI "TARGET_AVX2") V8HI
488 (V8SI "TARGET_AVX2") V4SI])
490 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
491 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
492 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
493 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
495 (define_mode_iterator VI124_AVX2
496 [(V32QI "TARGET_AVX2") V16QI
497 (V16HI "TARGET_AVX2") V8HI
498 (V8SI "TARGET_AVX2") V4SI])
500 (define_mode_iterator VI2_AVX2_AVX512BW
501 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
503 (define_mode_iterator VI248_AVX512VL
505 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
506 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
507 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
509 (define_mode_iterator VI248_AVX512VLBW
510 [(V32HI "TARGET_AVX512BW")
511 (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
512 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
513 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
514 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
516 (define_mode_iterator VI48_AVX2
517 [(V8SI "TARGET_AVX2") V4SI
518 (V4DI "TARGET_AVX2") V2DI])
520 (define_mode_iterator VI248_AVX2
521 [(V16HI "TARGET_AVX2") V8HI
522 (V8SI "TARGET_AVX2") V4SI
523 (V4DI "TARGET_AVX2") V2DI])
525 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
526 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
527 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
528 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
530 (define_mode_iterator VI248_AVX512BW
531 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
533 (define_mode_iterator VI248_AVX512BW_AVX512VL
534 [(V32HI "TARGET_AVX512BW")
535 (V4DI "TARGET_AVX512VL") V16SI V8DI])
537 ;; Suppose TARGET_AVX512VL as baseline
538 (define_mode_iterator VI248_AVX512BW_1
539 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
543 (define_mode_iterator VI248_AVX512BW_2
544 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
548 (define_mode_iterator VI48_AVX512F
549 [(V16SI "TARGET_AVX512F") V8SI V4SI
550 (V8DI "TARGET_AVX512F") V4DI V2DI])
552 (define_mode_iterator VI48_AVX_AVX512F
553 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
554 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
556 (define_mode_iterator VI12_AVX_AVX512F
557 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
558 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
560 (define_mode_iterator V48_AVX2
563 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
564 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
566 (define_mode_iterator VI1_AVX512VLBW
567 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL")
568 (V16QI "TARGET_AVX512VL")])
570 (define_mode_attr avx512
571 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
572 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
573 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
574 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
575 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
576 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
578 (define_mode_attr v_Yw
579 [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
580 (V8HI "Yw") (V16HI "Yw") (V32HI "v")
581 (V4SI "v") (V8SI "v") (V16SI "v")
582 (V2DI "v") (V4DI "v") (V8DI "v")
583 (V4SF "v") (V8SF "v") (V16SF "v")
584 (V2DF "v") (V4DF "v") (V8DF "v")
585 (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")])
587 (define_mode_attr sse2_avx_avx512f
588 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
589 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
590 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
591 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
592 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
593 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
595 (define_mode_attr sse2_avx2
596 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
597 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
598 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
599 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
600 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
602 (define_mode_attr ssse3_avx2
603 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
604 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
605 (V4SI "ssse3") (V8SI "avx2")
606 (V2DI "ssse3") (V4DI "avx2")
607 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
609 (define_mode_attr sse4_1_avx2
610 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
611 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
612 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
613 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
615 (define_mode_attr avx_avx2
616 [(V4SF "avx") (V2DF "avx")
617 (V8SF "avx") (V4DF "avx")
618 (V4SI "avx2") (V2DI "avx2")
619 (V8SI "avx2") (V4DI "avx2")])
621 (define_mode_attr vec_avx2
622 [(V16QI "vec") (V32QI "avx2")
623 (V8HI "vec") (V16HI "avx2")
624 (V4SI "vec") (V8SI "avx2")
625 (V2DI "vec") (V4DI "avx2")])
627 (define_mode_attr avx2_avx512
628 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
629 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
630 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
631 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
632 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
634 (define_mode_attr shuffletype
635 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
636 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
637 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
638 (V32HI "i") (V16HI "i") (V8HI "i")
639 (V64QI "i") (V32QI "i") (V16QI "i")
640 (V4TI "i") (V2TI "i") (V1TI "i")])
642 (define_mode_attr ssequartermode
643 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
645 (define_mode_attr ssequarterinsnmode
646 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
648 (define_mode_attr vecmemsuffix
649 [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
650 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
652 (define_mode_attr ssedoublemodelower
653 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
654 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
655 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
657 (define_mode_attr ssedoublemode
658 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
659 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
660 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
661 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
662 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
663 (V4DI "V8DI") (V8DI "V16DI")])
665 (define_mode_attr ssebytemode
666 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
667 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
669 ;; All 128bit vector integer modes
670 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
672 ;; All 256bit vector integer modes
673 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
675 ;; All 128 and 256bit vector integer modes
676 (define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
678 ;; Various 128bit vector integer mode combinations
679 (define_mode_iterator VI12_128 [V16QI V8HI])
680 (define_mode_iterator VI14_128 [V16QI V4SI])
681 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
682 (define_mode_iterator VI24_128 [V8HI V4SI])
683 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
684 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
685 (define_mode_iterator VI248_512 [V32HI V16SI V8DI])
686 (define_mode_iterator VI48_128 [V4SI V2DI])
687 (define_mode_iterator VI148_512 [V64QI V16SI V8DI])
688 (define_mode_iterator VI148_256 [V32QI V8SI V4DI])
689 (define_mode_iterator VI148_128 [V16QI V4SI V2DI])
691 ;; Various 256bit and 512 vector integer mode combinations
692 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
693 (define_mode_iterator VI124_256_AVX512F_AVX512BW
695 (V64QI "TARGET_AVX512BW")
696 (V32HI "TARGET_AVX512BW")
697 (V16SI "TARGET_AVX512F")])
698 (define_mode_iterator VI48_256 [V8SI V4DI])
699 (define_mode_iterator VI48_512 [V16SI V8DI])
700 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
701 (define_mode_iterator VI_AVX512BW
702 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
704 ;; Int-float size matches
705 (define_mode_iterator VI4F_128 [V4SI V4SF])
706 (define_mode_iterator VI8F_128 [V2DI V2DF])
707 (define_mode_iterator VI4F_256 [V8SI V8SF])
708 (define_mode_iterator VI8F_256 [V4DI V4DF])
709 (define_mode_iterator VI4F_256_512
711 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
712 (define_mode_iterator VI48F_256_512
714 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
715 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
716 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
717 (define_mode_iterator VF48_I1248
718 [V16SI V16SF V8DI V8DF V32HI V64QI])
719 (define_mode_iterator VI48F
720 [V16SI V16SF V8DI V8DF
721 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
722 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
723 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
724 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
725 (define_mode_iterator VI12_VI48F_AVX512VLBW
726 [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
727 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
728 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
729 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
730 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
731 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
732 (V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
733 (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
734 (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
736 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
738 (define_mode_iterator VF_AVX512
739 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
740 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
743 (define_mode_attr avx512bcst
744 [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
745 (V8SI "%{1to8%}") (V4DI "%{1to4%}")
746 (V16SI "%{1to16%}") (V8DI "%{1to8%}")
747 (V4SF "%{1to4%}") (V2DF "%{1to2%}")
748 (V8SF "%{1to8%}") (V4DF "%{1to4%}")
749 (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
751 ;; Mapping from float mode to required SSE level
752 (define_mode_attr sse
753 [(SF "sse") (DF "sse2")
754 (V4SF "sse") (V2DF "sse2")
755 (V16SF "avx512f") (V8SF "avx")
756 (V8DF "avx512f") (V4DF "avx")])
758 (define_mode_attr sse2
759 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
760 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
762 (define_mode_attr sse3
763 [(V16QI "sse3") (V32QI "avx")])
765 (define_mode_attr sse4_1
766 [(V4SF "sse4_1") (V2DF "sse4_1")
767 (V8SF "avx") (V4DF "avx")
769 (V4DI "avx") (V2DI "sse4_1")
770 (V8SI "avx") (V4SI "sse4_1")
771 (V16QI "sse4_1") (V32QI "avx")
772 (V8HI "sse4_1") (V16HI "avx")])
774 (define_mode_attr avxsizesuffix
775 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
776 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
777 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
778 (V16SF "512") (V8DF "512")
779 (V8SF "256") (V4DF "256")
780 (V4SF "") (V2DF "")])
782 ;; SSE instruction mode
783 (define_mode_attr sseinsnmode
784 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
785 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
786 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
787 (V16SF "V16SF") (V8DF "V8DF")
788 (V8SF "V8SF") (V4DF "V4DF")
789 (V4SF "V4SF") (V2DF "V2DF")
792 ;; SSE constant -1 constraint
793 (define_mode_attr sseconstm1
794 [(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC")
795 (V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC")
796 (V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC")
797 (V16SF "BF") (V8DF "BF")
798 (V8SF "BF") (V4DF "BF")
799 (V4SF "BF") (V2DF "BF")])
801 ;; Mapping of vector modes to corresponding mask size
802 (define_mode_attr avx512fmaskmode
803 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
804 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
805 (V16SI "HI") (V8SI "QI") (V4SI "QI")
806 (V8DI "QI") (V4DI "QI") (V2DI "QI")
807 (V16SF "HI") (V8SF "QI") (V4SF "QI")
808 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
810 ;; Mapping of vector modes to corresponding mask size
811 (define_mode_attr avx512fmaskmodelower
812 [(V64QI "di") (V32QI "si") (V16QI "hi")
813 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
814 (V16SI "hi") (V8SI "qi") (V4SI "qi")
815 (V8DI "qi") (V4DI "qi") (V2DI "qi")
816 (V16SF "hi") (V8SF "qi") (V4SF "qi")
817 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
819 ;; Mapping of vector modes to corresponding mask half size
820 (define_mode_attr avx512fmaskhalfmode
821 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
822 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
823 (V16SI "QI") (V8SI "QI") (V4SI "QI")
824 (V8DI "QI") (V4DI "QI") (V2DI "QI")
825 (V16SF "QI") (V8SF "QI") (V4SF "QI")
826 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
828 ;; Mapping of vector float modes to an integer mode of the same size
829 (define_mode_attr sseintvecmode
830 [(V16SF "V16SI") (V8DF "V8DI")
831 (V8SF "V8SI") (V4DF "V4DI")
832 (V4SF "V4SI") (V2DF "V2DI")
833 (V16SI "V16SI") (V8DI "V8DI")
834 (V8SI "V8SI") (V4DI "V4DI")
835 (V4SI "V4SI") (V2DI "V2DI")
836 (V16HI "V16HI") (V8HI "V8HI")
837 (V32HI "V32HI") (V64QI "V64QI")
838 (V32QI "V32QI") (V16QI "V16QI")])
840 (define_mode_attr sseintvecmode2
841 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
842 (V8SF "OI") (V4SF "TI")])
844 (define_mode_attr sseintvecmodelower
845 [(V16SF "v16si") (V8DF "v8di")
846 (V8SF "v8si") (V4DF "v4di")
847 (V4SF "v4si") (V2DF "v2di")
848 (V8SI "v8si") (V4DI "v4di")
849 (V4SI "v4si") (V2DI "v2di")
850 (V16HI "v16hi") (V8HI "v8hi")
851 (V32QI "v32qi") (V16QI "v16qi")])
853 ;; Mapping of vector modes to a vector mode of double size
854 (define_mode_attr ssedoublevecmode
855 [(V64QI "V128QI") (V32HI "V64HI") (V16SI "V32SI") (V8DI "V16DI")
856 (V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
857 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
858 (V16SF "V32SF") (V8DF "V16DF")
859 (V8SF "V16SF") (V4DF "V8DF")
860 (V4SF "V8SF") (V2DF "V4DF")])
862 ;; Mapping of vector modes to a vector mode of half size
863 ;; instead of V1DI/V1DF, DI/DF are used for V2DI/V2DF although they are scalar.
864 (define_mode_attr ssehalfvecmode
865 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
866 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
867 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V2DI "DI")
868 (V16SF "V8SF") (V8DF "V4DF")
869 (V8SF "V4SF") (V4DF "V2DF")
870 (V4SF "V2SF") (V2DF "DF")])
872 (define_mode_attr ssehalfvecmodelower
873 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
874 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
875 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
876 (V16SF "v8sf") (V8DF "v4df")
877 (V8SF "v4sf") (V4DF "v2df")
880 ;; Mapping of vector modes ti packed single mode of the same size
881 (define_mode_attr ssePSmode
882 [(V16SI "V16SF") (V8DF "V16SF")
883 (V16SF "V16SF") (V8DI "V16SF")
884 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
885 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
886 (V8SI "V8SF") (V4SI "V4SF")
887 (V4DI "V8SF") (V2DI "V4SF")
888 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
889 (V8SF "V8SF") (V4SF "V4SF")
890 (V4DF "V8SF") (V2DF "V4SF")])
892 (define_mode_attr ssePSmode2
893 [(V8DI "V8SF") (V4DI "V4SF")])
895 ;; Mapping of vector modes back to the scalar modes
896 (define_mode_attr ssescalarmode
897 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
898 (V32HI "HI") (V16HI "HI") (V8HI "HI")
899 (V16SI "SI") (V8SI "SI") (V4SI "SI")
900 (V8DI "DI") (V4DI "DI") (V2DI "DI")
901 (V16SF "SF") (V8SF "SF") (V4SF "SF")
902 (V8DF "DF") (V4DF "DF") (V2DF "DF")
903 (V4TI "TI") (V2TI "TI")])
905 ;; Mapping of vector modes back to the scalar modes
906 (define_mode_attr ssescalarmodelower
907 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
908 (V32HI "hi") (V16HI "hi") (V8HI "hi")
909 (V16SI "si") (V8SI "si") (V4SI "si")
910 (V8DI "di") (V4DI "di") (V2DI "di")
911 (V16SF "sf") (V8SF "sf") (V4SF "sf")
912 (V8DF "df") (V4DF "df") (V2DF "df")
913 (V4TI "ti") (V2TI "ti")])
915 ;; Mapping of vector modes to the 128bit modes
916 (define_mode_attr ssexmmmode
917 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
918 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
919 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
920 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
921 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
922 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
924 ;; Pointer size override for scalar modes (Intel asm dialect)
925 (define_mode_attr iptr
926 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
927 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
928 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
929 (V16SF "k") (V8DF "q")
930 (V8SF "k") (V4DF "q")
931 (V4SF "k") (V2DF "q")
934 ;; Mapping of vector modes to VPTERNLOG suffix
935 (define_mode_attr ternlogsuffix
936 [(V8DI "q") (V4DI "q") (V2DI "q")
937 (V16SI "d") (V8SI "d") (V4SI "d")
938 (V32HI "d") (V16HI "d") (V8HI "d")
939 (V64QI "d") (V32QI "d") (V16QI "d")])
941 ;; Number of scalar elements in each vector type
942 (define_mode_attr ssescalarnum
943 [(V64QI "64") (V16SI "16") (V8DI "8")
944 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
945 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
946 (V16SF "16") (V8DF "8")
947 (V8SF "8") (V4DF "4")
948 (V4SF "4") (V2DF "2")])
950 ;; Mask of scalar elements in each vector type
951 (define_mode_attr ssescalarnummask
952 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
953 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
954 (V8SF "7") (V4DF "3")
955 (V4SF "3") (V2DF "1")])
957 (define_mode_attr ssescalarsize
958 [(V4TI "64") (V2TI "64") (V1TI "64")
959 (V8DI "64") (V4DI "64") (V2DI "64")
960 (V64QI "8") (V32QI "8") (V16QI "8")
961 (V32HI "16") (V16HI "16") (V8HI "16")
962 (V16SI "32") (V8SI "32") (V4SI "32")
963 (V16SF "32") (V8SF "32") (V4SF "32")
964 (V8DF "64") (V4DF "64") (V2DF "64")])
966 ;; SSE prefix for integer vector modes
967 (define_mode_attr sseintprefix
968 [(V2DI "p") (V2DF "")
973 (V16SI "p") (V16SF "")
974 (V16QI "p") (V8HI "p")
975 (V32QI "p") (V16HI "p")
976 (V64QI "p") (V32HI "p")])
978 ;; SSE scalar suffix for vector modes
979 (define_mode_attr ssescalarmodesuffix
981 (V16SF "ss") (V8DF "sd")
982 (V8SF "ss") (V4DF "sd")
983 (V4SF "ss") (V2DF "sd")
984 (V16SI "d") (V8DI "q")
985 (V8SI "d") (V4DI "q")
986 (V4SI "d") (V2DI "q")])
988 ;; Pack/unpack vector modes
989 (define_mode_attr sseunpackmode
990 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
991 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
992 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
994 (define_mode_attr ssepackmode
995 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
996 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
997 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
999 ;; Mapping of the max integer size for xop rotate immediate constraint
1000 (define_mode_attr sserotatemax
1001 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
1003 ;; Mapping of mode to cast intrinsic name
1004 (define_mode_attr castmode
1005 [(V8SI "si") (V8SF "ps") (V4DF "pd")
1006 (V16SI "si") (V16SF "ps") (V8DF "pd")])
1008 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
1009 ;; i64x4 or f64x4 for 512bit modes.
1010 (define_mode_attr i128
1011 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
1012 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
1013 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
1015 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
1016 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
1017 (define_mode_attr i128vldq
1018 [(V8SF "f32x4") (V4DF "f64x2")
1019 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
1022 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
1023 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
1025 ;; Mapping for dbpsabbw modes
1026 (define_mode_attr dbpsadbwmode
1027 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
1029 ;; Mapping suffixes for broadcast
1030 (define_mode_attr bcstscalarsuff
1031 [(V64QI "b") (V32QI "b") (V16QI "b")
1032 (V32HI "w") (V16HI "w") (V8HI "w")
1033 (V16SI "d") (V8SI "d") (V4SI "d")
1034 (V8DI "q") (V4DI "q") (V2DI "q")
1035 (V16SF "ss") (V8SF "ss") (V4SF "ss")
1036 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
1038 ;; Tie mode of assembler operand to mode iterator
1039 (define_mode_attr xtg_mode
1040 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
1041 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
1042 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
1044 ;; Half mask mode for unpacks
1045 (define_mode_attr HALFMASKMODE
1046 [(DI "SI") (SI "HI")])
1048 ;; Double mask mode for packs
1049 (define_mode_attr DOUBLEMASKMODE
1050 [(HI "SI") (SI "DI")])
1053 ;; Include define_subst patterns for instructions with mask
1054 (include "subst.md")
1056 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1064 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1065 ;; This is essential for maintaining stable calling conventions.
1067 (define_expand "mov<mode>"
1068 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1069 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1072 ix86_expand_vector_move (<MODE>mode, operands);
1076 (define_insn "mov<mode>_internal"
1077 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1079 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1080 " C,<sseconstm1>,vm,v"))]
1082 && (register_operand (operands[0], <MODE>mode)
1083 || register_operand (operands[1], <MODE>mode))"
1085 switch (get_attr_type (insn))
1088 return standard_sse_constant_opcode (insn, operands);
1091 return ix86_output_ssemov (insn, operands);
1097 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1098 (set_attr "prefix" "maybe_vex")
1100 (cond [(match_test "TARGET_AVX")
1101 (const_string "<sseinsnmode>")
1102 (ior (not (match_test "TARGET_SSE2"))
1103 (match_test "optimize_function_for_size_p (cfun)"))
1104 (const_string "V4SF")
1105 (and (match_test "<MODE>mode == V2DFmode")
1106 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1107 (const_string "V4SF")
1108 (and (eq_attr "alternative" "3")
1109 (match_test "TARGET_SSE_TYPELESS_STORES"))
1110 (const_string "V4SF")
1111 (and (eq_attr "alternative" "0")
1112 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1115 (const_string "<sseinsnmode>")))
1116 (set (attr "enabled")
1117 (cond [(and (match_test "<MODE_SIZE> == 16")
1118 (eq_attr "alternative" "1"))
1119 (symbol_ref "TARGET_SSE2")
1120 (and (match_test "<MODE_SIZE> == 32")
1121 (eq_attr "alternative" "1"))
1122 (symbol_ref "TARGET_AVX2")
1124 (symbol_ref "true")))])
1126 ;; If mem_addr points to a memory region with less than whole vector size bytes
1127 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1128 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1130 (define_expand "<avx512>_load<mode>_mask"
1131 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1132 (vec_merge:V48_AVX512VL
1133 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1134 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1135 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1138 if (CONST_INT_P (operands[3]))
1140 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1143 else if (MEM_P (operands[1]))
1144 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1145 gen_rtvec(1, operands[1]),
1149 (define_insn "*<avx512>_load<mode>_mask"
1150 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1151 (vec_merge:V48_AVX512VL
1152 (unspec:V48_AVX512VL
1153 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1155 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1156 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1159 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1161 if (misaligned_operand (operands[1], <MODE>mode))
1162 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1164 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1168 if (misaligned_operand (operands[1], <MODE>mode))
1169 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1171 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1174 [(set_attr "type" "ssemov")
1175 (set_attr "prefix" "evex")
1176 (set_attr "mode" "<sseinsnmode>")])
1178 (define_insn_and_split "*<avx512>_load<mode>"
1179 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1180 (unspec:V48_AVX512VL
1181 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1186 [(set (match_dup 0) (match_dup 1))])
1188 (define_expand "<avx512>_load<mode>_mask"
1189 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1190 (vec_merge:VI12_AVX512VL
1191 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1192 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1193 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1196 if (CONST_INT_P (operands[3]))
1198 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1201 else if (MEM_P (operands[1]))
1202 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1203 gen_rtvec(1, operands[1]),
1208 (define_insn "*<avx512>_load<mode>_mask"
1209 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1210 (vec_merge:VI12_AVX512VL
1211 (unspec:VI12_AVX512VL
1212 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1214 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1215 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1217 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1218 [(set_attr "type" "ssemov")
1219 (set_attr "prefix" "evex")
1220 (set_attr "mode" "<sseinsnmode>")])
1222 (define_insn_and_split "*<avx512>_load<mode>"
1223 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1224 (unspec:VI12_AVX512VL
1225 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1230 [(set (match_dup 0) (match_dup 1))])
1232 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1233 [(set (match_operand:VF_128 0 "register_operand" "=v")
1236 (match_operand:VF_128 2 "register_operand" "v")
1237 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1238 (match_operand:QI 4 "register_operand" "Yk"))
1239 (match_operand:VF_128 1 "register_operand" "v")
1242 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1243 [(set_attr "type" "ssemov")
1244 (set_attr "prefix" "evex")
1245 (set_attr "mode" "<ssescalarmode>")])
1247 (define_expand "avx512f_load<mode>_mask"
1248 [(set (match_operand:<ssevecmode> 0 "register_operand")
1249 (vec_merge:<ssevecmode>
1250 (vec_merge:<ssevecmode>
1251 (vec_duplicate:<ssevecmode>
1252 (match_operand:MODEF 1 "memory_operand"))
1253 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1254 (match_operand:QI 3 "register_operand"))
1258 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1260 (define_insn "*avx512f_load<mode>_mask"
1261 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1262 (vec_merge:<ssevecmode>
1263 (vec_merge:<ssevecmode>
1264 (vec_duplicate:<ssevecmode>
1265 (match_operand:MODEF 1 "memory_operand" "m"))
1266 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1267 (match_operand:QI 3 "register_operand" "Yk"))
1268 (match_operand:<ssevecmode> 4 "const0_operand" "C")
1271 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1272 [(set_attr "type" "ssemov")
1273 (set_attr "prefix" "evex")
1274 (set_attr "memory" "load")
1275 (set_attr "mode" "<MODE>")])
1277 (define_insn "avx512f_store<mode>_mask"
1278 [(set (match_operand:MODEF 0 "memory_operand" "=m")
1280 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1283 (match_operand:<ssevecmode> 1 "register_operand" "v")
1284 (parallel [(const_int 0)]))
1287 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1288 [(set_attr "type" "ssemov")
1289 (set_attr "prefix" "evex")
1290 (set_attr "memory" "store")
1291 (set_attr "mode" "<MODE>")])
1293 (define_insn "<avx512>_blendm<mode>"
1294 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1295 (vec_merge:V48_AVX512VL
1296 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1297 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1298 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1301 if (REG_P (operands[1])
1302 && REGNO (operands[1]) != REGNO (operands[0]))
1303 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1305 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1307 if (misaligned_operand (operands[2], <MODE>mode))
1308 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1310 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1314 if (misaligned_operand (operands[2], <MODE>mode))
1315 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1317 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1320 [(set_attr "type" "ssemov")
1321 (set_attr "prefix" "evex")
1322 (set_attr "mode" "<sseinsnmode>")])
1324 (define_insn "<avx512>_blendm<mode>"
1325 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1326 (vec_merge:VI12_AVX512VL
1327 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1328 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1329 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1332 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1333 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1334 [(set_attr "type" "ssemov")
1335 (set_attr "prefix" "evex")
1336 (set_attr "mode" "<sseinsnmode>")])
1338 (define_insn "<avx512>_store<mode>_mask"
1339 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1340 (vec_merge:V48_AVX512VL
1341 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1343 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1346 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1348 if (misaligned_operand (operands[0], <MODE>mode))
1349 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1351 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1355 if (misaligned_operand (operands[0], <MODE>mode))
1356 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1358 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1361 [(set_attr "type" "ssemov")
1362 (set_attr "prefix" "evex")
1363 (set_attr "memory" "store")
1364 (set_attr "mode" "<sseinsnmode>")])
1366 (define_insn "<avx512>_store<mode>_mask"
1367 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1368 (vec_merge:VI12_AVX512VL
1369 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1371 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1373 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1374 [(set_attr "type" "ssemov")
1375 (set_attr "prefix" "evex")
1376 (set_attr "memory" "store")
1377 (set_attr "mode" "<sseinsnmode>")])
1379 (define_insn "sse2_movq128"
1380 [(set (match_operand:V2DI 0 "register_operand" "=v")
1383 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1384 (parallel [(const_int 0)]))
1387 "%vmovq\t{%1, %0|%0, %q1}"
1388 [(set_attr "type" "ssemov")
1389 (set_attr "prefix" "maybe_vex")
1390 (set_attr "mode" "TI")])
1392 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1393 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1394 ;; from memory, we'd prefer to load the memory directly into the %xmm
1395 ;; register. To facilitate this happy circumstance, this pattern won't
1396 ;; split until after register allocation. If the 64-bit value didn't
1397 ;; come from memory, this is the best we can do. This is much better
1398 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1401 (define_insn_and_split "movdi_to_sse"
1402 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1403 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1404 UNSPEC_MOVDI_TO_SSE))
1405 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1406 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1408 "&& reload_completed"
1411 if (register_operand (operands[1], DImode))
1413 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1414 Assemble the 64-bit DImode value in an xmm register. */
1415 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1416 gen_lowpart (SImode, operands[1])));
1418 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1419 gen_highpart (SImode, operands[1]),
1423 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1424 gen_highpart (SImode, operands[1])));
1425 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1429 else if (memory_operand (operands[1], DImode))
1430 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1431 operands[1], const0_rtx));
1436 [(set_attr "isa" "sse4,*,*")])
1439 [(set (match_operand:V4SF 0 "register_operand")
1440 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1441 "TARGET_SSE && reload_completed"
1444 (vec_duplicate:V4SF (match_dup 1))
1448 operands[1] = gen_lowpart (SFmode, operands[1]);
1449 operands[2] = CONST0_RTX (V4SFmode);
1453 [(set (match_operand:V2DF 0 "register_operand")
1454 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1455 "TARGET_SSE2 && reload_completed"
1456 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1458 operands[1] = gen_lowpart (DFmode, operands[1]);
1459 operands[2] = CONST0_RTX (DFmode);
1462 (define_expand "movmisalign<mode>"
1463 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1464 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1467 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1471 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1473 [(set (match_operand:V2DF 0 "sse_reg_operand")
1474 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1475 (match_operand:DF 4 "const0_operand")))
1476 (set (match_operand:V2DF 2 "sse_reg_operand")
1477 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1478 (parallel [(const_int 0)]))
1479 (match_operand:DF 3 "memory_operand")))]
1480 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1481 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1482 [(set (match_dup 2) (match_dup 5))]
1483 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1486 [(set (match_operand:DF 0 "sse_reg_operand")
1487 (match_operand:DF 1 "memory_operand"))
1488 (set (match_operand:V2DF 2 "sse_reg_operand")
1489 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1490 (match_operand:DF 3 "memory_operand")))]
1491 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1492 && REGNO (operands[4]) == REGNO (operands[2])
1493 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1494 [(set (match_dup 2) (match_dup 5))]
1495 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1497 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1499 [(set (match_operand:DF 0 "memory_operand")
1500 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1501 (parallel [(const_int 0)])))
1502 (set (match_operand:DF 2 "memory_operand")
1503 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1504 (parallel [(const_int 1)])))]
1505 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1506 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1507 [(set (match_dup 4) (match_dup 1))]
1508 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1510 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1511 [(set (match_operand:VI1 0 "register_operand" "=x")
1512 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1515 "%vlddqu\t{%1, %0|%0, %1}"
1516 [(set_attr "type" "ssemov")
1517 (set_attr "movu" "1")
1518 (set (attr "prefix_data16")
1520 (match_test "TARGET_AVX")
1522 (const_string "0")))
1523 (set (attr "prefix_rep")
1525 (match_test "TARGET_AVX")
1527 (const_string "1")))
1528 (set_attr "prefix" "maybe_vex")
1529 (set_attr "mode" "<sseinsnmode>")])
1531 (define_insn "sse2_movnti<mode>"
1532 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1533 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1536 "movnti\t{%1, %0|%0, %1}"
1537 [(set_attr "type" "ssemov")
1538 (set_attr "prefix_data16" "0")
1539 (set_attr "mode" "<MODE>")])
1541 (define_insn "<sse>_movnt<mode>"
1542 [(set (match_operand:VF 0 "memory_operand" "=m")
1544 [(match_operand:VF 1 "register_operand" "v")]
1547 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1548 [(set_attr "type" "ssemov")
1549 (set_attr "prefix" "maybe_vex")
1550 (set_attr "mode" "<MODE>")])
1552 (define_insn "<sse2>_movnt<mode>"
1553 [(set (match_operand:VI8 0 "memory_operand" "=m")
1554 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1557 "%vmovntdq\t{%1, %0|%0, %1}"
1558 [(set_attr "type" "ssecvt")
1559 (set (attr "prefix_data16")
1561 (match_test "TARGET_AVX")
1563 (const_string "1")))
1564 (set_attr "prefix" "maybe_vex")
1565 (set_attr "mode" "<sseinsnmode>")])
1567 ; Expand patterns for non-temporal stores. At the moment, only those
1568 ; that directly map to insns are defined; it would be possible to
1569 ; define patterns for other modes that would expand to several insns.
1571 ;; Modes handled by storent patterns.
1572 (define_mode_iterator STORENT_MODE
1573 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1574 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1575 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1576 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1577 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1579 (define_expand "storent<mode>"
1580 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1581 (unspec:STORENT_MODE
1582 [(match_operand:STORENT_MODE 1 "register_operand")]
1586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1590 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1592 ;; All integer modes with AVX512BW/DQ.
1593 (define_mode_iterator SWI1248_AVX512BWDQ
1594 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1596 ;; All integer modes with AVX512BW, where HImode operation
1597 ;; can be used instead of QImode.
1598 (define_mode_iterator SWI1248_AVX512BW
1599 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1601 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1602 (define_mode_iterator SWI1248_AVX512BWDQ2
1603 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1604 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1606 (define_expand "kmov<mskmodesuffix>"
1607 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1608 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1610 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1612 (define_insn "k<code><mode>"
1613 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1614 (any_logic:SWI1248_AVX512BW
1615 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1616 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1617 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1620 if (get_attr_mode (insn) == MODE_HI)
1621 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1623 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1625 [(set_attr "type" "msklog")
1626 (set_attr "prefix" "vex")
1628 (cond [(and (match_test "<MODE>mode == QImode")
1629 (not (match_test "TARGET_AVX512DQ")))
1632 (const_string "<MODE>")))])
1635 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1636 (any_logic:SWI1248_AVX512BW
1637 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1638 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1639 (clobber (reg:CC FLAGS_REG))]
1640 "TARGET_AVX512F && reload_completed"
1643 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1644 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1646 (define_insn "kandn<mode>"
1647 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1648 (and:SWI1248_AVX512BW
1649 (not:SWI1248_AVX512BW
1650 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1651 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1652 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1655 if (get_attr_mode (insn) == MODE_HI)
1656 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1658 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1660 [(set_attr "type" "msklog")
1661 (set_attr "prefix" "vex")
1663 (cond [(and (match_test "<MODE>mode == QImode")
1664 (not (match_test "TARGET_AVX512DQ")))
1667 (const_string "<MODE>")))])
1670 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1671 (and:SWI1248_AVX512BW
1672 (not:SWI1248_AVX512BW
1673 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1674 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1675 (clobber (reg:CC FLAGS_REG))]
1676 "TARGET_AVX512F && reload_completed"
1679 (and:SWI1248_AVX512BW
1680 (not:SWI1248_AVX512BW (match_dup 1))
1682 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1684 (define_insn "kxnor<mode>"
1685 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1686 (not:SWI1248_AVX512BW
1687 (xor:SWI1248_AVX512BW
1688 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1689 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1690 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1693 if (get_attr_mode (insn) == MODE_HI)
1694 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1696 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1698 [(set_attr "type" "msklog")
1699 (set_attr "prefix" "vex")
1701 (cond [(and (match_test "<MODE>mode == QImode")
1702 (not (match_test "TARGET_AVX512DQ")))
1705 (const_string "<MODE>")))])
1707 (define_insn "knot<mode>"
1708 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1709 (not:SWI1248_AVX512BW
1710 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1711 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1714 if (get_attr_mode (insn) == MODE_HI)
1715 return "knotw\t{%1, %0|%0, %1}";
1717 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1719 [(set_attr "type" "msklog")
1720 (set_attr "prefix" "vex")
1722 (cond [(and (match_test "<MODE>mode == QImode")
1723 (not (match_test "TARGET_AVX512DQ")))
1726 (const_string "<MODE>")))])
1729 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1730 (not:SWI1248_AVX512BW
1731 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
1732 "TARGET_AVX512F && reload_completed"
1735 (not:SWI1248_AVX512BW (match_dup 1)))
1736 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1738 (define_insn "*knotsi_1_zext"
1739 [(set (match_operand:DI 0 "register_operand" "=k")
1741 (not:SI (match_operand:SI 1 "register_operand" "k"))))
1742 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1744 "knotd\t{%1, %0|%0, %1}";
1745 [(set_attr "type" "msklog")
1746 (set_attr "prefix" "vex")
1747 (set_attr "mode" "SI")])
1750 [(set (match_operand:DI 0 "mask_reg_operand")
1752 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
1753 "TARGET_AVX512BW && reload_completed"
1757 (not:SI (match_dup 1))))
1758 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1760 (define_insn "kadd<mode>"
1761 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1762 (plus:SWI1248_AVX512BWDQ2
1763 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1764 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1765 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1767 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1768 [(set_attr "type" "msklog")
1769 (set_attr "prefix" "vex")
1770 (set_attr "mode" "<MODE>")])
1772 ;; Mask variant shift mnemonics
1773 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1775 (define_insn "k<code><mode>"
1776 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1777 (any_lshift:SWI1248_AVX512BWDQ
1778 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1779 (match_operand 2 "const_0_to_255_operand" "n")))
1780 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1782 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1783 [(set_attr "type" "msklog")
1784 (set_attr "prefix" "vex")
1785 (set_attr "mode" "<MODE>")])
1788 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1789 (any_lshift:SWI1248_AVX512BW
1790 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1791 (match_operand 2 "const_int_operand")))
1792 (clobber (reg:CC FLAGS_REG))]
1793 "TARGET_AVX512F && reload_completed"
1796 (any_lshift:SWI1248_AVX512BW
1799 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1801 (define_insn "ktest<mode>"
1802 [(set (reg:CC FLAGS_REG)
1804 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1805 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1808 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1809 [(set_attr "mode" "<MODE>")
1810 (set_attr "type" "msklog")
1811 (set_attr "prefix" "vex")])
1813 (define_insn "kortest<mode>"
1814 [(set (reg:CC FLAGS_REG)
1816 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1817 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1820 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1821 [(set_attr "mode" "<MODE>")
1822 (set_attr "type" "msklog")
1823 (set_attr "prefix" "vex")])
1825 (define_insn "kunpckhi"
1826 [(set (match_operand:HI 0 "register_operand" "=k")
1829 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1831 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1833 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1834 [(set_attr "mode" "HI")
1835 (set_attr "type" "msklog")
1836 (set_attr "prefix" "vex")])
1838 (define_insn "kunpcksi"
1839 [(set (match_operand:SI 0 "register_operand" "=k")
1842 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1844 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1846 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1847 [(set_attr "mode" "SI")])
1849 (define_insn "kunpckdi"
1850 [(set (match_operand:DI 0 "register_operand" "=k")
1853 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1855 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1857 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1858 [(set_attr "mode" "DI")])
1861 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1863 ;; Parallel floating point arithmetic
1865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1867 (define_expand "<code><mode>2"
1868 [(set (match_operand:VF 0 "register_operand")
1870 (match_operand:VF 1 "register_operand")))]
1872 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1874 (define_insn_and_split "*<code><mode>2"
1875 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1877 (match_operand:VF 1 "vector_operand" "0,xBm,v,m")))
1878 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1881 "&& reload_completed"
1883 (<absneg_op>:VF (match_dup 1) (match_dup 2)))]
1887 if (MEM_P (operands[1]))
1888 std::swap (operands[1], operands[2]);
1892 if (operands_match_p (operands[0], operands[2]))
1893 std::swap (operands[1], operands[2]);
1896 [(set_attr "isa" "noavx,noavx,avx,avx")])
1898 (define_insn_and_split "*nabs<mode>2"
1899 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1902 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1903 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
1906 "&& reload_completed"
1908 (ior:VF (match_dup 1) (match_dup 2)))]
1912 if (MEM_P (operands[1]))
1913 std::swap (operands[1], operands[2]);
1917 if (operands_match_p (operands[0], operands[2]))
1918 std::swap (operands[1], operands[2]);
1921 [(set_attr "isa" "noavx,noavx,avx,avx")])
1923 (define_expand "cond_<insn><mode>"
1924 [(set (match_operand:VF 0 "register_operand")
1927 (match_operand:VF 2 "vector_operand")
1928 (match_operand:VF 3 "vector_operand"))
1929 (match_operand:VF 4 "nonimm_or_0_operand")
1930 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
1931 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
1933 emit_insn (gen_<insn><mode>3_mask (operands[0],
1941 (define_expand "<insn><mode>3<mask_name><round_name>"
1942 [(set (match_operand:VF 0 "register_operand")
1944 (match_operand:VF 1 "<round_nimm_predicate>")
1945 (match_operand:VF 2 "<round_nimm_predicate>")))]
1946 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1947 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1949 (define_insn "*<insn><mode>3<mask_name><round_name>"
1950 [(set (match_operand:VF 0 "register_operand" "=x,v")
1952 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
1953 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
1954 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1955 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1957 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1958 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1959 [(set_attr "isa" "noavx,avx")
1960 (set_attr "type" "sseadd")
1961 (set_attr "prefix" "<bcst_mask_prefix3>")
1962 (set_attr "mode" "<MODE>")])
1964 ;; Standard scalar operation patterns which preserve the rest of the
1965 ;; vector for combiner.
1966 (define_insn "*<sse>_vm<insn><mode>3"
1967 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1969 (vec_duplicate:VF_128
1970 (plusminus:<ssescalarmode>
1971 (vec_select:<ssescalarmode>
1972 (match_operand:VF_128 1 "register_operand" "0,v")
1973 (parallel [(const_int 0)]))
1974 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1979 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1980 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1981 [(set_attr "isa" "noavx,avx")
1982 (set_attr "type" "sseadd")
1983 (set_attr "prefix" "orig,vex")
1984 (set_attr "mode" "<ssescalarmode>")])
1986 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
1987 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1990 (match_operand:VF_128 1 "register_operand" "0,v")
1991 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1996 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1997 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1998 [(set_attr "isa" "noavx,avx")
1999 (set_attr "type" "sseadd")
2000 (set_attr "prefix" "<round_scalar_prefix>")
2001 (set_attr "mode" "<ssescalarmode>")])
2003 (define_expand "cond_mul<mode>"
2004 [(set (match_operand:VF 0 "register_operand")
2007 (match_operand:VF 2 "vector_operand")
2008 (match_operand:VF 3 "vector_operand"))
2009 (match_operand:VF 4 "nonimm_or_0_operand")
2010 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2011 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2013 emit_insn (gen_mul<mode>3_mask (operands[0],
2021 (define_expand "mul<mode>3<mask_name><round_name>"
2022 [(set (match_operand:VF 0 "register_operand")
2024 (match_operand:VF 1 "<round_nimm_predicate>")
2025 (match_operand:VF 2 "<round_nimm_predicate>")))]
2026 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2027 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
2029 (define_insn "*mul<mode>3<mask_name><round_name>"
2030 [(set (match_operand:VF 0 "register_operand" "=x,v")
2032 (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
2033 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2034 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
2035 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2037 mul<ssemodesuffix>\t{%2, %0|%0, %2}
2038 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2039 [(set_attr "isa" "noavx,avx")
2040 (set_attr "type" "ssemul")
2041 (set_attr "prefix" "<bcst_mask_prefix3>")
2042 (set_attr "btver2_decode" "direct,double")
2043 (set_attr "mode" "<MODE>")])
2045 ;; Standard scalar operation patterns which preserve the rest of the
2046 ;; vector for combiner.
2047 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
2048 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2050 (vec_duplicate:VF_128
2051 (multdiv:<ssescalarmode>
2052 (vec_select:<ssescalarmode>
2053 (match_operand:VF_128 1 "register_operand" "0,v")
2054 (parallel [(const_int 0)]))
2055 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
2060 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2061 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2062 [(set_attr "isa" "noavx,avx")
2063 (set_attr "type" "sse<multdiv_mnemonic>")
2064 (set_attr "prefix" "orig,vex")
2065 (set_attr "btver2_decode" "direct,double")
2066 (set_attr "mode" "<ssescalarmode>")])
2068 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
2069 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2072 (match_operand:VF_128 1 "register_operand" "0,v")
2073 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2078 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2079 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
2080 [(set_attr "isa" "noavx,avx")
2081 (set_attr "type" "sse<multdiv_mnemonic>")
2082 (set_attr "prefix" "<round_scalar_prefix>")
2083 (set_attr "btver2_decode" "direct,double")
2084 (set_attr "mode" "<ssescalarmode>")])
2086 (define_expand "div<mode>3"
2087 [(set (match_operand:VF2 0 "register_operand")
2088 (div:VF2 (match_operand:VF2 1 "register_operand")
2089 (match_operand:VF2 2 "vector_operand")))]
2092 (define_expand "div<mode>3"
2093 [(set (match_operand:VF1 0 "register_operand")
2094 (div:VF1 (match_operand:VF1 1 "register_operand")
2095 (match_operand:VF1 2 "vector_operand")))]
2099 && TARGET_RECIP_VEC_DIV
2100 && !optimize_insn_for_size_p ()
2101 && flag_finite_math_only && !flag_trapping_math
2102 && flag_unsafe_math_optimizations)
2104 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2109 (define_expand "cond_div<mode>"
2110 [(set (match_operand:VF 0 "register_operand")
2113 (match_operand:VF 2 "register_operand")
2114 (match_operand:VF 3 "vector_operand"))
2115 (match_operand:VF 4 "nonimm_or_0_operand")
2116 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2117 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2119 emit_insn (gen_<sse>_div<mode>3_mask (operands[0],
2127 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2128 [(set (match_operand:VF 0 "register_operand" "=x,v")
2130 (match_operand:VF 1 "register_operand" "0,v")
2131 (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2132 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2134 div<ssemodesuffix>\t{%2, %0|%0, %2}
2135 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2136 [(set_attr "isa" "noavx,avx")
2137 (set_attr "type" "ssediv")
2138 (set_attr "prefix" "<bcst_mask_prefix3>")
2139 (set_attr "mode" "<MODE>")])
2141 (define_insn "<sse>_rcp<mode>2"
2142 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2144 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2146 "%vrcpps\t{%1, %0|%0, %1}"
2147 [(set_attr "type" "sse")
2148 (set_attr "atom_sse_attr" "rcp")
2149 (set_attr "btver2_sse_attr" "rcp")
2150 (set_attr "prefix" "maybe_vex")
2151 (set_attr "mode" "<MODE>")])
2153 (define_insn "sse_vmrcpv4sf2"
2154 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2156 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2158 (match_operand:V4SF 2 "register_operand" "0,x")
2162 rcpss\t{%1, %0|%0, %k1}
2163 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2164 [(set_attr "isa" "noavx,avx")
2165 (set_attr "type" "sse")
2166 (set_attr "atom_sse_attr" "rcp")
2167 (set_attr "btver2_sse_attr" "rcp")
2168 (set_attr "prefix" "orig,vex")
2169 (set_attr "mode" "SF")])
2171 (define_insn "*sse_vmrcpv4sf2"
2172 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2175 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2177 (match_operand:V4SF 2 "register_operand" "0,x")
2181 rcpss\t{%1, %0|%0, %1}
2182 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2183 [(set_attr "isa" "noavx,avx")
2184 (set_attr "type" "sse")
2185 (set_attr "atom_sse_attr" "rcp")
2186 (set_attr "btver2_sse_attr" "rcp")
2187 (set_attr "prefix" "orig,vex")
2188 (set_attr "mode" "SF")])
2190 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2191 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2193 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2196 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2197 [(set_attr "type" "sse")
2198 (set_attr "prefix" "evex")
2199 (set_attr "mode" "<MODE>")])
2201 (define_insn "srcp14<mode>"
2202 [(set (match_operand:VF_128 0 "register_operand" "=v")
2205 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2207 (match_operand:VF_128 2 "register_operand" "v")
2210 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2211 [(set_attr "type" "sse")
2212 (set_attr "prefix" "evex")
2213 (set_attr "mode" "<MODE>")])
2215 (define_insn "srcp14<mode>_mask"
2216 [(set (match_operand:VF_128 0 "register_operand" "=v")
2220 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2222 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2223 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2224 (match_operand:VF_128 2 "register_operand" "v")
2227 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2228 [(set_attr "type" "sse")
2229 (set_attr "prefix" "evex")
2230 (set_attr "mode" "<MODE>")])
2232 (define_expand "sqrt<mode>2"
2233 [(set (match_operand:VF2 0 "register_operand")
2234 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2237 (define_expand "sqrt<mode>2"
2238 [(set (match_operand:VF1 0 "register_operand")
2239 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2243 && TARGET_RECIP_VEC_SQRT
2244 && !optimize_insn_for_size_p ()
2245 && flag_finite_math_only && !flag_trapping_math
2246 && flag_unsafe_math_optimizations)
2248 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2253 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2254 [(set (match_operand:VF 0 "register_operand" "=x,v")
2255 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2256 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2258 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2259 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2260 [(set_attr "isa" "noavx,avx")
2261 (set_attr "type" "sse")
2262 (set_attr "atom_sse_attr" "sqrt")
2263 (set_attr "btver2_sse_attr" "sqrt")
2264 (set_attr "prefix" "maybe_vex")
2265 (set_attr "mode" "<MODE>")])
2267 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2268 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2271 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2272 (match_operand:VF_128 2 "register_operand" "0,v")
2276 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2277 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2278 [(set_attr "isa" "noavx,avx")
2279 (set_attr "type" "sse")
2280 (set_attr "atom_sse_attr" "sqrt")
2281 (set_attr "prefix" "<round_scalar_prefix>")
2282 (set_attr "btver2_sse_attr" "sqrt")
2283 (set_attr "mode" "<ssescalarmode>")])
2285 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2286 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2288 (vec_duplicate:VF_128
2289 (sqrt:<ssescalarmode>
2290 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2291 (match_operand:VF_128 2 "register_operand" "0,v")
2295 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2296 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2297 [(set_attr "isa" "noavx,avx")
2298 (set_attr "type" "sse")
2299 (set_attr "atom_sse_attr" "sqrt")
2300 (set_attr "prefix" "<round_scalar_prefix>")
2301 (set_attr "btver2_sse_attr" "sqrt")
2302 (set_attr "mode" "<ssescalarmode>")])
2304 (define_expand "rsqrt<mode>2"
2305 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2306 (unspec:VF1_AVX512ER_128_256
2307 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2309 "TARGET_SSE && TARGET_SSE_MATH"
2311 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2315 (define_insn "<sse>_rsqrt<mode>2"
2316 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2318 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2320 "%vrsqrtps\t{%1, %0|%0, %1}"
2321 [(set_attr "type" "sse")
2322 (set_attr "prefix" "maybe_vex")
2323 (set_attr "mode" "<MODE>")])
2325 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2326 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2328 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2331 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2332 [(set_attr "type" "sse")
2333 (set_attr "prefix" "evex")
2334 (set_attr "mode" "<MODE>")])
2336 (define_insn "rsqrt14<mode>"
2337 [(set (match_operand:VF_128 0 "register_operand" "=v")
2340 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2342 (match_operand:VF_128 2 "register_operand" "v")
2345 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2346 [(set_attr "type" "sse")
2347 (set_attr "prefix" "evex")
2348 (set_attr "mode" "<MODE>")])
2350 (define_insn "rsqrt14_<mode>_mask"
2351 [(set (match_operand:VF_128 0 "register_operand" "=v")
2355 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2357 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2358 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2359 (match_operand:VF_128 2 "register_operand" "v")
2362 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2363 [(set_attr "type" "sse")
2364 (set_attr "prefix" "evex")
2365 (set_attr "mode" "<MODE>")])
2367 (define_insn "sse_vmrsqrtv4sf2"
2368 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2370 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2372 (match_operand:V4SF 2 "register_operand" "0,x")
2376 rsqrtss\t{%1, %0|%0, %k1}
2377 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2378 [(set_attr "isa" "noavx,avx")
2379 (set_attr "type" "sse")
2380 (set_attr "prefix" "orig,vex")
2381 (set_attr "mode" "SF")])
2383 (define_insn "*sse_vmrsqrtv4sf2"
2384 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2387 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2389 (match_operand:V4SF 2 "register_operand" "0,x")
2393 rsqrtss\t{%1, %0|%0, %1}
2394 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2395 [(set_attr "isa" "noavx,avx")
2396 (set_attr "type" "sse")
2397 (set_attr "prefix" "orig,vex")
2398 (set_attr "mode" "SF")])
2400 (define_expand "cond_<code><mode>"
2401 [(set (match_operand:VF 0 "register_operand")
2404 (match_operand:VF 2 "vector_operand")
2405 (match_operand:VF 3 "vector_operand"))
2406 (match_operand:VF 4 "nonimm_or_0_operand")
2407 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2408 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2410 emit_insn (gen_<code><mode>3_mask (operands[0],
2418 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2419 [(set (match_operand:VF 0 "register_operand")
2421 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2422 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2423 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2425 if (!flag_finite_math_only || flag_signed_zeros)
2427 operands[1] = force_reg (<MODE>mode, operands[1]);
2428 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2429 (operands[0], operands[1], operands[2]
2430 <mask_operand_arg34>
2431 <round_saeonly_mask_arg3>));
2435 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2438 ;; These versions of the min/max patterns are intentionally ignorant of
2439 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2440 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2441 ;; are undefined in this condition, we're certain this is correct.
2443 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2444 [(set (match_operand:VF 0 "register_operand" "=x,v")
2446 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2447 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2449 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2450 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2452 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2453 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2454 [(set_attr "isa" "noavx,avx")
2455 (set_attr "type" "sseadd")
2456 (set_attr "btver2_sse_attr" "maxmin")
2457 (set_attr "prefix" "<mask_prefix3>")
2458 (set_attr "mode" "<MODE>")])
2460 ;; These versions of the min/max patterns implement exactly the operations
2461 ;; min = (op1 < op2 ? op1 : op2)
2462 ;; max = (!(op1 < op2) ? op1 : op2)
2463 ;; Their operands are not commutative, and thus they may be used in the
2464 ;; presence of -0.0 and NaN.
2466 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2467 [(set (match_operand:VF 0 "register_operand" "=x,v")
2469 [(match_operand:VF 1 "register_operand" "0,v")
2470 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2473 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2475 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2476 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2477 [(set_attr "isa" "noavx,avx")
2478 (set_attr "type" "sseadd")
2479 (set_attr "btver2_sse_attr" "maxmin")
2480 (set_attr "prefix" "<mask_prefix3>")
2481 (set_attr "mode" "<MODE>")])
2483 ;; Standard scalar operation patterns which preserve the rest of the
2484 ;; vector for combiner.
2485 (define_insn "*ieee_<ieee_maxmin><mode>3"
2486 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2488 (vec_duplicate:VF_128
2489 (unspec:<ssescalarmode>
2490 [(vec_select:<ssescalarmode>
2491 (match_operand:VF_128 1 "register_operand" "0,v")
2492 (parallel [(const_int 0)]))
2493 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2499 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2500 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2501 [(set_attr "isa" "noavx,avx")
2502 (set_attr "type" "sseadd")
2503 (set_attr "btver2_sse_attr" "maxmin")
2504 (set_attr "prefix" "orig,vex")
2505 (set_attr "mode" "<ssescalarmode>")])
2507 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2508 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2511 (match_operand:VF_128 1 "register_operand" "0,v")
2512 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2517 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2518 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2519 [(set_attr "isa" "noavx,avx")
2520 (set_attr "type" "sse")
2521 (set_attr "btver2_sse_attr" "maxmin")
2522 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2523 (set_attr "mode" "<ssescalarmode>")])
2525 (define_mode_attr addsub_cst [(V4DF "5") (V2DF "1")
2526 (V4SF "5") (V8SF "85")])
2528 (define_insn "vec_addsub<mode>3"
2529 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2530 (vec_merge:VF_128_256
2532 (match_operand:VF_128_256 1 "register_operand" "0,x")
2533 (match_operand:VF_128_256 2 "vector_operand" "xBm, xm"))
2534 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2535 (const_int <addsub_cst>)))]
2538 addsub<ssemodesuffix>\t{%2, %0|%0, %2}
2539 vaddsub<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2540 [(set_attr "isa" "noavx,avx")
2541 (set_attr "type" "sseadd")
2542 (set (attr "atom_unit")
2544 (match_test "<MODE>mode == V2DFmode")
2545 (const_string "complex")
2546 (const_string "*")))
2547 (set_attr "prefix" "orig,vex")
2548 (set (attr "prefix_rep")
2550 (and (match_test "<MODE>mode == V4SFmode")
2551 (eq_attr "alternative" "0"))
2553 (const_string "*")))
2554 (set_attr "mode" "<MODE>")])
2557 [(set (match_operand:VF_128_256 0 "register_operand")
2558 (match_operator:VF_128_256 6 "addsub_vm_operator"
2560 (match_operand:VF_128_256 1 "register_operand")
2561 (match_operand:VF_128_256 2 "vector_operand"))
2563 (match_operand:VF_128_256 3 "vector_operand")
2564 (match_operand:VF_128_256 4 "vector_operand"))
2565 (match_operand 5 "const_int_operand")]))]
2567 && can_create_pseudo_p ()
2568 && ((rtx_equal_p (operands[1], operands[3])
2569 && rtx_equal_p (operands[2], operands[4]))
2570 || (rtx_equal_p (operands[1], operands[4])
2571 && rtx_equal_p (operands[2], operands[3])))"
2573 (vec_merge:VF_128_256
2574 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2575 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2579 [(set (match_operand:VF_128_256 0 "register_operand")
2580 (match_operator:VF_128_256 6 "addsub_vm_operator"
2582 (match_operand:VF_128_256 1 "vector_operand")
2583 (match_operand:VF_128_256 2 "vector_operand"))
2585 (match_operand:VF_128_256 3 "register_operand")
2586 (match_operand:VF_128_256 4 "vector_operand"))
2587 (match_operand 5 "const_int_operand")]))]
2589 && can_create_pseudo_p ()
2590 && ((rtx_equal_p (operands[1], operands[3])
2591 && rtx_equal_p (operands[2], operands[4]))
2592 || (rtx_equal_p (operands[1], operands[4])
2593 && rtx_equal_p (operands[2], operands[3])))"
2595 (vec_merge:VF_128_256
2596 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2597 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2600 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2602 = GEN_INT (~INTVAL (operands[5])
2603 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2607 [(set (match_operand:VF_128_256 0 "register_operand")
2608 (match_operator:VF_128_256 7 "addsub_vs_operator"
2609 [(vec_concat:<ssedoublemode>
2611 (match_operand:VF_128_256 1 "register_operand")
2612 (match_operand:VF_128_256 2 "vector_operand"))
2614 (match_operand:VF_128_256 3 "vector_operand")
2615 (match_operand:VF_128_256 4 "vector_operand")))
2616 (match_parallel 5 "addsub_vs_parallel"
2617 [(match_operand 6 "const_int_operand")])]))]
2619 && can_create_pseudo_p ()
2620 && ((rtx_equal_p (operands[1], operands[3])
2621 && rtx_equal_p (operands[2], operands[4]))
2622 || (rtx_equal_p (operands[1], operands[4])
2623 && rtx_equal_p (operands[2], operands[3])))"
2625 (vec_merge:VF_128_256
2626 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2627 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2630 int i, nelt = XVECLEN (operands[5], 0);
2631 HOST_WIDE_INT ival = 0;
2633 for (i = 0; i < nelt; i++)
2634 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2635 ival |= HOST_WIDE_INT_1 << i;
2637 operands[5] = GEN_INT (ival);
2641 [(set (match_operand:VF_128_256 0 "register_operand")
2642 (match_operator:VF_128_256 7 "addsub_vs_operator"
2643 [(vec_concat:<ssedoublemode>
2645 (match_operand:VF_128_256 1 "vector_operand")
2646 (match_operand:VF_128_256 2 "vector_operand"))
2648 (match_operand:VF_128_256 3 "register_operand")
2649 (match_operand:VF_128_256 4 "vector_operand")))
2650 (match_parallel 5 "addsub_vs_parallel"
2651 [(match_operand 6 "const_int_operand")])]))]
2653 && can_create_pseudo_p ()
2654 && ((rtx_equal_p (operands[1], operands[3])
2655 && rtx_equal_p (operands[2], operands[4]))
2656 || (rtx_equal_p (operands[1], operands[4])
2657 && rtx_equal_p (operands[2], operands[3])))"
2659 (vec_merge:VF_128_256
2660 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2661 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2664 int i, nelt = XVECLEN (operands[5], 0);
2665 HOST_WIDE_INT ival = 0;
2667 for (i = 0; i < nelt; i++)
2668 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2669 ival |= HOST_WIDE_INT_1 << i;
2671 operands[5] = GEN_INT (ival);
2674 (define_insn "avx_h<insn>v4df3"
2675 [(set (match_operand:V4DF 0 "register_operand" "=x")
2680 (match_operand:V4DF 1 "register_operand" "x")
2681 (parallel [(const_int 0)]))
2682 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2685 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2686 (parallel [(const_int 0)]))
2687 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2690 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2691 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2693 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2694 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2696 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2697 [(set_attr "type" "sseadd")
2698 (set_attr "prefix" "vex")
2699 (set_attr "mode" "V4DF")])
2701 (define_expand "sse3_haddv2df3"
2702 [(set (match_operand:V2DF 0 "register_operand")
2706 (match_operand:V2DF 1 "register_operand")
2707 (parallel [(const_int 0)]))
2708 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2711 (match_operand:V2DF 2 "vector_operand")
2712 (parallel [(const_int 0)]))
2713 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2716 (define_insn "*sse3_haddv2df3"
2717 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2721 (match_operand:V2DF 1 "register_operand" "0,x")
2722 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2725 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2728 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2729 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2732 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2734 && INTVAL (operands[3]) != INTVAL (operands[4])
2735 && INTVAL (operands[5]) != INTVAL (operands[6])"
2737 haddpd\t{%2, %0|%0, %2}
2738 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2739 [(set_attr "isa" "noavx,avx")
2740 (set_attr "type" "sseadd")
2741 (set_attr "prefix" "orig,vex")
2742 (set_attr "mode" "V2DF")])
2744 (define_insn "sse3_hsubv2df3"
2745 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2749 (match_operand:V2DF 1 "register_operand" "0,x")
2750 (parallel [(const_int 0)]))
2751 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2754 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2755 (parallel [(const_int 0)]))
2756 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2759 hsubpd\t{%2, %0|%0, %2}
2760 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2761 [(set_attr "isa" "noavx,avx")
2762 (set_attr "type" "sseadd")
2763 (set_attr "prefix" "orig,vex")
2764 (set_attr "mode" "V2DF")])
2766 (define_insn "*sse3_haddv2df3_low"
2767 [(set (match_operand:DF 0 "register_operand" "=x,x")
2770 (match_operand:V2DF 1 "register_operand" "0,x")
2771 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2774 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2776 && INTVAL (operands[2]) != INTVAL (operands[3])"
2778 haddpd\t{%0, %0|%0, %0}
2779 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2780 [(set_attr "isa" "noavx,avx")
2781 (set_attr "type" "sseadd1")
2782 (set_attr "prefix" "orig,vex")
2783 (set_attr "mode" "V2DF")])
2785 (define_insn "*sse3_hsubv2df3_low"
2786 [(set (match_operand:DF 0 "register_operand" "=x,x")
2789 (match_operand:V2DF 1 "register_operand" "0,x")
2790 (parallel [(const_int 0)]))
2793 (parallel [(const_int 1)]))))]
2796 hsubpd\t{%0, %0|%0, %0}
2797 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2798 [(set_attr "isa" "noavx,avx")
2799 (set_attr "type" "sseadd1")
2800 (set_attr "prefix" "orig,vex")
2801 (set_attr "mode" "V2DF")])
2803 (define_insn "avx_h<insn>v8sf3"
2804 [(set (match_operand:V8SF 0 "register_operand" "=x")
2810 (match_operand:V8SF 1 "register_operand" "x")
2811 (parallel [(const_int 0)]))
2812 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2814 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2815 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2819 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2820 (parallel [(const_int 0)]))
2821 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2823 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2824 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2828 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2829 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2831 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2832 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2835 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2836 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2838 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2839 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2841 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2842 [(set_attr "type" "sseadd")
2843 (set_attr "prefix" "vex")
2844 (set_attr "mode" "V8SF")])
2846 (define_insn "sse3_h<insn>v4sf3"
2847 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2852 (match_operand:V4SF 1 "register_operand" "0,x")
2853 (parallel [(const_int 0)]))
2854 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2856 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2857 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2861 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2862 (parallel [(const_int 0)]))
2863 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2865 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2866 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2869 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2870 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2871 [(set_attr "isa" "noavx,avx")
2872 (set_attr "type" "sseadd")
2873 (set_attr "atom_unit" "complex")
2874 (set_attr "prefix" "orig,vex")
2875 (set_attr "prefix_rep" "1,*")
2876 (set_attr "mode" "V4SF")])
2878 (define_mode_iterator REDUC_SSE_PLUS_MODE
2879 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2881 (define_expand "reduc_plus_scal_<mode>"
2882 [(plus:REDUC_SSE_PLUS_MODE
2883 (match_operand:<ssescalarmode> 0 "register_operand")
2884 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2887 rtx tmp = gen_reg_rtx (<MODE>mode);
2888 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2889 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2894 (define_expand "reduc_plus_scal_v16qi"
2896 (match_operand:QI 0 "register_operand")
2897 (match_operand:V16QI 1 "register_operand"))]
2900 rtx tmp = gen_reg_rtx (V1TImode);
2901 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2903 rtx tmp2 = gen_reg_rtx (V16QImode);
2904 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2905 rtx tmp3 = gen_reg_rtx (V16QImode);
2906 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2907 rtx tmp4 = gen_reg_rtx (V2DImode);
2908 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2909 tmp4 = gen_lowpart (V16QImode, tmp4);
2910 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2914 (define_mode_iterator REDUC_PLUS_MODE
2915 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2916 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2917 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2919 (define_expand "reduc_plus_scal_<mode>"
2920 [(plus:REDUC_PLUS_MODE
2921 (match_operand:<ssescalarmode> 0 "register_operand")
2922 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2925 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2926 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2927 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2928 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2929 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2930 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2934 ;; Modes handled by reduc_sm{in,ax}* patterns.
2935 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
2936 [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2937 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2938 (V2DI "TARGET_SSE4_2")])
2940 (define_expand "reduc_<code>_scal_<mode>"
2941 [(smaxmin:REDUC_SSE_SMINMAX_MODE
2942 (match_operand:<ssescalarmode> 0 "register_operand")
2943 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2946 rtx tmp = gen_reg_rtx (<MODE>mode);
2947 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2948 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2953 (define_mode_iterator REDUC_SMINMAX_MODE
2954 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2955 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2956 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2957 (V64QI "TARGET_AVX512BW")
2958 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2959 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2960 (V8DF "TARGET_AVX512F")])
2962 (define_expand "reduc_<code>_scal_<mode>"
2963 [(smaxmin:REDUC_SMINMAX_MODE
2964 (match_operand:<ssescalarmode> 0 "register_operand")
2965 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2968 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2969 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2970 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2971 emit_insn (gen_<code><ssehalfvecmodelower>3
2972 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2973 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2977 (define_expand "reduc_<code>_scal_<mode>"
2978 [(umaxmin:VI_AVX512BW
2979 (match_operand:<ssescalarmode> 0 "register_operand")
2980 (match_operand:VI_AVX512BW 1 "register_operand"))]
2983 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2984 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2985 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2986 emit_insn (gen_<code><ssehalfvecmodelower>3
2987 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2988 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2992 (define_expand "reduc_<code>_scal_<mode>"
2994 (match_operand:<ssescalarmode> 0 "register_operand")
2995 (match_operand:VI_256 1 "register_operand"))]
2998 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2999 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
3000 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
3001 emit_insn (gen_<code><ssehalfvecmodelower>3
3002 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
3003 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
3004 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
3005 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
3006 (operands[0], tmp3, const0_rtx));
3010 (define_expand "reduc_umin_scal_v8hi"
3012 (match_operand:HI 0 "register_operand")
3013 (match_operand:V8HI 1 "register_operand"))]
3016 rtx tmp = gen_reg_rtx (V8HImode);
3017 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
3018 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
3022 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
3023 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3025 [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3026 (match_operand:SI 2 "const_0_to_255_operand")]
3029 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
3030 [(set_attr "type" "sse")
3031 (set_attr "prefix" "evex")
3032 (set_attr "mode" "<MODE>")])
3034 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
3035 [(set (match_operand:VF_128 0 "register_operand" "=v")
3038 [(match_operand:VF_128 1 "register_operand" "v")
3039 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
3040 (match_operand:SI 3 "const_0_to_255_operand")]
3045 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
3046 [(set_attr "type" "sse")
3047 (set_attr "prefix" "evex")
3048 (set_attr "mode" "<MODE>")])
3050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3052 ;; Parallel floating point comparisons
3054 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3056 (define_insn "avx_cmp<mode>3"
3057 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
3059 [(match_operand:VF_128_256 1 "register_operand" "x")
3060 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
3061 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3064 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3065 [(set_attr "type" "ssecmp")
3066 (set_attr "length_immediate" "1")
3067 (set_attr "prefix" "vex")
3068 (set_attr "mode" "<MODE>")])
3070 (define_insn_and_split "*avx_cmp<mode>3_1"
3071 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3072 (vec_merge:<sseintvecmode>
3073 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3074 (match_operand:<sseintvecmode> 2 "const0_operand")
3075 (unspec:<avx512fmaskmode>
3076 [(match_operand:VF_128_256 3 "register_operand")
3077 (match_operand:VF_128_256 4 "nonimmediate_operand")
3078 (match_operand:SI 5 "const_0_to_31_operand")]
3080 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3089 (set (match_dup 0) (match_dup 7))]
3091 operands[6] = gen_reg_rtx (<MODE>mode);
3093 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3096 (define_insn_and_split "*avx_cmp<mode>3_2"
3097 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3098 (vec_merge:<sseintvecmode>
3099 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3100 (match_operand:<sseintvecmode> 2 "const0_operand")
3101 (not:<avx512fmaskmode>
3102 (unspec:<avx512fmaskmode>
3103 [(match_operand:VF_128_256 3 "register_operand")
3104 (match_operand:VF_128_256 4 "nonimmediate_operand")
3105 (match_operand:SI 5 "const_0_to_31_operand")]
3107 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3116 (set (match_dup 0) (match_dup 7))]
3118 operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);
3119 operands[6] = gen_reg_rtx (<MODE>mode);
3121 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3124 (define_insn_and_split "*avx_cmp<mode>3_3"
3125 [(set (match_operand:VF_128_256 0 "register_operand")
3126 (vec_merge:VF_128_256
3127 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3128 (match_operand:VF_128_256 2 "const0_operand")
3129 (unspec:<avx512fmaskmode>
3130 [(match_operand:VF_128_256 3 "register_operand")
3131 (match_operand:VF_128_256 4 "nonimmediate_operand")
3132 (match_operand:SI 5 "const_0_to_31_operand")]
3134 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3144 (define_insn_and_split "*avx_cmp<mode>3_4"
3145 [(set (match_operand:VF_128_256 0 "register_operand")
3146 (vec_merge:VF_128_256
3147 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3148 (match_operand:VF_128_256 2 "const0_operand")
3149 (not:<avx512fmaskmode>
3150 (unspec:<avx512fmaskmode>
3151 [(match_operand:VF_128_256 3 "register_operand")
3152 (match_operand:VF_128_256 4 "nonimmediate_operand")
3153 (match_operand:SI 5 "const_0_to_31_operand")]
3155 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3164 "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
3166 (define_insn_and_split "*avx_cmp<mode>3_lt"
3167 [(set (match_operand:VF_128_256 0 "register_operand")
3168 (vec_merge:VF_128_256
3169 (match_operand:VF_128_256 1 "vector_operand")
3170 (match_operand:VF_128_256 2 "vector_operand")
3171 (unspec:<avx512fmaskmode>
3172 [(match_operand:<sseintvecmode> 3 "register_operand")
3173 (match_operand:<sseintvecmode> 4 "const0_operand")
3174 (match_operand:SI 5 "const_0_to_7_operand")]
3176 "TARGET_AVX512VL && ix86_pre_reload_split ()
3178 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
3179 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
3191 if (INTVAL (operands[5]) == 5)
3192 std::swap (operands[1], operands[2]);
3195 (define_insn_and_split "*avx_cmp<mode>3_ltint"
3196 [(set (match_operand:VI48_AVX 0 "register_operand")
3198 (match_operand:VI48_AVX 1 "vector_operand")
3199 (match_operand:VI48_AVX 2 "vector_operand")
3200 (unspec:<avx512fmaskmode>
3201 [(match_operand:VI48_AVX 3 "register_operand")
3202 (match_operand:VI48_AVX 4 "const0_operand")
3203 (match_operand:SI 5 "const_0_to_7_operand")]
3205 "TARGET_AVX512VL && ix86_pre_reload_split ()
3207 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
3208 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
3212 (unspec:<ssebytemode>
3215 (subreg:<ssebytemode>
3221 if (INTVAL (operands[5]) == 5)
3222 std::swap (operands[1], operands[2]);
3223 operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
3224 operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]);
3225 operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]);
3228 (define_insn "avx_vmcmp<mode>3"
3229 [(set (match_operand:VF_128 0 "register_operand" "=x")
3232 [(match_operand:VF_128 1 "register_operand" "x")
3233 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
3234 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3239 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
3240 [(set_attr "type" "ssecmp")
3241 (set_attr "length_immediate" "1")
3242 (set_attr "prefix" "vex")
3243 (set_attr "mode" "<ssescalarmode>")])
3245 (define_insn "*<sse>_maskcmp<mode>3_comm"
3246 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3247 (match_operator:VF_128_256 3 "sse_comparison_operator"
3248 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
3249 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3251 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
3253 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3254 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3255 [(set_attr "isa" "noavx,avx")
3256 (set_attr "type" "ssecmp")
3257 (set_attr "length_immediate" "1")
3258 (set_attr "prefix" "orig,vex")
3259 (set_attr "mode" "<MODE>")])
3261 (define_insn "<sse>_maskcmp<mode>3"
3262 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3263 (match_operator:VF_128_256 3 "sse_comparison_operator"
3264 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3265 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3268 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3269 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3270 [(set_attr "isa" "noavx,avx")
3271 (set_attr "type" "ssecmp")
3272 (set_attr "length_immediate" "1")
3273 (set_attr "prefix" "orig,vex")
3274 (set_attr "mode" "<MODE>")])
3276 (define_insn "<sse>_vmmaskcmp<mode>3"
3277 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3279 (match_operator:VF_128 3 "sse_comparison_operator"
3280 [(match_operand:VF_128 1 "register_operand" "0,x")
3281 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3286 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3287 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3288 [(set_attr "isa" "noavx,avx")
3289 (set_attr "type" "ssecmp")
3290 (set_attr "length_immediate" "1,*")
3291 (set_attr "prefix" "orig,vex")
3292 (set_attr "mode" "<ssescalarmode>")])
3294 (define_mode_attr cmp_imm_predicate
3295 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3296 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3297 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3298 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3299 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3300 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3301 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3302 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3303 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3305 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3306 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3307 (unspec:<avx512fmaskmode>
3308 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
3309 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3310 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3312 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3313 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3314 [(set_attr "type" "ssecmp")
3315 (set_attr "length_immediate" "1")
3316 (set_attr "prefix" "evex")
3317 (set_attr "mode" "<sseinsnmode>")])
3319 (define_insn_and_split "*<avx512>_cmp<mode>3"
3320 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3321 (not:<avx512fmaskmode>
3322 (unspec:<avx512fmaskmode>
3323 [(match_operand:V48_AVX512VL 1 "register_operand")
3324 (match_operand:V48_AVX512VL 2 "nonimmediate_operand")
3325 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3327 "TARGET_AVX512F && ix86_pre_reload_split ()"
3331 (unspec:<avx512fmaskmode>
3336 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3338 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3339 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3340 (unspec:<avx512fmaskmode>
3341 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3342 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3343 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3346 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3347 [(set_attr "type" "ssecmp")
3348 (set_attr "length_immediate" "1")
3349 (set_attr "prefix" "evex")
3350 (set_attr "mode" "<sseinsnmode>")])
3352 (define_int_iterator UNSPEC_PCMP_ITER
3353 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3355 (define_insn_and_split "*<avx512>_cmp<mode>3"
3356 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3357 (not:<avx512fmaskmode>
3358 (unspec:<avx512fmaskmode>
3359 [(match_operand:VI12_AVX512VL 1 "register_operand")
3360 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3361 (match_operand:SI 3 "<cmp_imm_predicate>")]
3362 UNSPEC_PCMP_ITER)))]
3363 "TARGET_AVX512BW && ix86_pre_reload_split ()"
3367 (unspec:<avx512fmaskmode>
3372 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3374 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3375 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3376 (unspec:<avx512fmaskmode>
3377 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3378 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3379 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3380 UNSPEC_UNSIGNED_PCMP))]
3382 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3383 [(set_attr "type" "ssecmp")
3384 (set_attr "length_immediate" "1")
3385 (set_attr "prefix" "evex")
3386 (set_attr "mode" "<sseinsnmode>")])
3388 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3389 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3390 (unspec:<avx512fmaskmode>
3391 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
3392 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
3393 (match_operand:SI 3 "const_0_to_7_operand" "n")]
3394 UNSPEC_UNSIGNED_PCMP))]
3396 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3397 [(set_attr "type" "ssecmp")
3398 (set_attr "length_immediate" "1")
3399 (set_attr "prefix" "evex")
3400 (set_attr "mode" "<sseinsnmode>")])
3402 (define_insn_and_split "*<avx512>_ucmp<mode>3"
3403 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3404 (not:<avx512fmaskmode>
3405 (unspec:<avx512fmaskmode>
3406 [(match_operand:VI48_AVX512VL 1 "register_operand")
3407 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
3408 (match_operand:SI 3 "const_0_to_7_operand")]
3409 UNSPEC_UNSIGNED_PCMP)))]
3410 "TARGET_AVX512F && ix86_pre_reload_split ()"
3414 (unspec:<avx512fmaskmode>
3418 UNSPEC_UNSIGNED_PCMP))]
3419 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3421 (define_int_attr pcmp_signed_mask
3422 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
3424 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
3425 ;; For signed comparison, handle EQ 0: NEQ 4,
3426 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
3429 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3430 (unspec:<avx512fmaskmode>
3431 [(us_minus:VI12_AVX512VL
3432 (match_operand:VI12_AVX512VL 1 "vector_operand")
3433 (match_operand:VI12_AVX512VL 2 "vector_operand"))
3434 (match_operand:VI12_AVX512VL 3 "const0_operand")
3435 (match_operand:SI 4 "const_0_to_7_operand")]
3438 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
3439 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
3442 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
3443 int cmp_predicate = 2; /* LE */
3444 if (MEM_P (operands[1]))
3446 std::swap (operands[1], operands[2]);
3447 cmp_predicate = 5; /* NLT (GE) */
3449 if ((INTVAL (operands[4]) & 4) != 0)
3450 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
3451 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
3452 GEN_INT (cmp_predicate)));
3456 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3457 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3458 (and:<avx512fmaskmode>
3459 (unspec:<avx512fmaskmode>
3460 [(match_operand:VF_128 1 "register_operand" "v")
3461 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3462 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3466 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3467 [(set_attr "type" "ssecmp")
3468 (set_attr "length_immediate" "1")
3469 (set_attr "prefix" "evex")
3470 (set_attr "mode" "<ssescalarmode>")])
3472 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3473 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3474 (and:<avx512fmaskmode>
3475 (unspec:<avx512fmaskmode>
3476 [(match_operand:VF_128 1 "register_operand" "v")
3477 (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3478 (match_operand:SI 3 "const_0_to_31_operand" "n")]
3480 (and:<avx512fmaskmode>
3481 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3484 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3485 [(set_attr "type" "ssecmp")
3486 (set_attr "length_immediate" "1")
3487 (set_attr "prefix" "evex")
3488 (set_attr "mode" "<ssescalarmode>")])
3490 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
3491 [(set (reg:CCFP FLAGS_REG)
3494 (match_operand:<ssevecmode> 0 "register_operand" "v")
3495 (parallel [(const_int 0)]))
3497 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3498 (parallel [(const_int 0)]))))]
3499 "SSE_FLOAT_MODE_P (<MODE>mode)"
3500 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3501 [(set_attr "type" "ssecomi")
3502 (set_attr "prefix" "maybe_vex")
3503 (set_attr "prefix_rep" "0")
3504 (set (attr "prefix_data16")
3505 (if_then_else (eq_attr "mode" "DF")
3507 (const_string "0")))
3508 (set_attr "mode" "<MODE>")])
3510 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3511 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3512 (match_operator:<avx512fmaskmode> 1 ""
3513 [(match_operand:V48_AVX512VL 2 "register_operand")
3514 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3517 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3518 operands[2], operands[3]);
3523 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3524 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3525 (match_operator:<avx512fmaskmode> 1 ""
3526 [(match_operand:VI12_AVX512VL 2 "register_operand")
3527 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3530 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3531 operands[2], operands[3]);
3536 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3537 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3538 (match_operator:<sseintvecmode> 1 ""
3539 [(match_operand:VI_256 2 "register_operand")
3540 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3543 bool ok = ix86_expand_int_vec_cmp (operands);
3548 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3549 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3550 (match_operator:<sseintvecmode> 1 ""
3551 [(match_operand:VI124_128 2 "register_operand")
3552 (match_operand:VI124_128 3 "vector_operand")]))]
3555 bool ok = ix86_expand_int_vec_cmp (operands);
3560 (define_expand "vec_cmpv2div2di"
3561 [(set (match_operand:V2DI 0 "register_operand")
3562 (match_operator:V2DI 1 ""
3563 [(match_operand:V2DI 2 "register_operand")
3564 (match_operand:V2DI 3 "vector_operand")]))]
3567 bool ok = ix86_expand_int_vec_cmp (operands);
3572 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3573 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3574 (match_operator:<sseintvecmode> 1 ""
3575 [(match_operand:VF_256 2 "register_operand")
3576 (match_operand:VF_256 3 "nonimmediate_operand")]))]
3579 bool ok = ix86_expand_fp_vec_cmp (operands);
3584 (define_expand "vec_cmp<mode><sseintvecmodelower>"
3585 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3586 (match_operator:<sseintvecmode> 1 ""
3587 [(match_operand:VF_128 2 "register_operand")
3588 (match_operand:VF_128 3 "vector_operand")]))]
3591 bool ok = ix86_expand_fp_vec_cmp (operands);
3596 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3597 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3598 (match_operator:<avx512fmaskmode> 1 ""
3599 [(match_operand:VI48_AVX512VL 2 "register_operand")
3600 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3603 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3604 operands[2], operands[3]);
3609 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3610 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3611 (match_operator:<avx512fmaskmode> 1 ""
3612 [(match_operand:VI12_AVX512VL 2 "register_operand")
3613 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3616 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3617 operands[2], operands[3]);
3622 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3623 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3624 (match_operator:<sseintvecmode> 1 ""
3625 [(match_operand:VI_256 2 "register_operand")
3626 (match_operand:VI_256 3 "nonimmediate_operand")]))]
3629 bool ok = ix86_expand_int_vec_cmp (operands);
3634 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
3635 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3636 (match_operator:<sseintvecmode> 1 ""
3637 [(match_operand:VI124_128 2 "register_operand")
3638 (match_operand:VI124_128 3 "vector_operand")]))]
3641 bool ok = ix86_expand_int_vec_cmp (operands);
3646 (define_expand "vec_cmpuv2div2di"
3647 [(set (match_operand:V2DI 0 "register_operand")
3648 (match_operator:V2DI 1 ""
3649 [(match_operand:V2DI 2 "register_operand")
3650 (match_operand:V2DI 3 "vector_operand")]))]
3653 bool ok = ix86_expand_int_vec_cmp (operands);
3658 (define_expand "vec_cmpeqv2div2di"
3659 [(set (match_operand:V2DI 0 "register_operand")
3660 (match_operator:V2DI 1 ""
3661 [(match_operand:V2DI 2 "register_operand")
3662 (match_operand:V2DI 3 "vector_operand")]))]
3665 bool ok = ix86_expand_int_vec_cmp (operands);
3670 (define_expand "vcond<V_512:mode><VF_512:mode>"
3671 [(set (match_operand:V_512 0 "register_operand")
3673 (match_operator 3 ""
3674 [(match_operand:VF_512 4 "nonimmediate_operand")
3675 (match_operand:VF_512 5 "nonimmediate_operand")])
3676 (match_operand:V_512 1 "general_operand")
3677 (match_operand:V_512 2 "general_operand")))]
3679 && (GET_MODE_NUNITS (<V_512:MODE>mode)
3680 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3682 bool ok = ix86_expand_fp_vcond (operands);
3687 (define_expand "vcond<V_256:mode><VF_256:mode>"
3688 [(set (match_operand:V_256 0 "register_operand")
3690 (match_operator 3 ""
3691 [(match_operand:VF_256 4 "nonimmediate_operand")
3692 (match_operand:VF_256 5 "nonimmediate_operand")])
3693 (match_operand:V_256 1 "general_operand")
3694 (match_operand:V_256 2 "general_operand")))]
3696 && (GET_MODE_NUNITS (<V_256:MODE>mode)
3697 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3699 bool ok = ix86_expand_fp_vcond (operands);
3704 (define_expand "vcond<V_128:mode><VF_128:mode>"
3705 [(set (match_operand:V_128 0 "register_operand")
3707 (match_operator 3 ""
3708 [(match_operand:VF_128 4 "vector_operand")
3709 (match_operand:VF_128 5 "vector_operand")])
3710 (match_operand:V_128 1 "general_operand")
3711 (match_operand:V_128 2 "general_operand")))]
3713 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3714 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3716 bool ok = ix86_expand_fp_vcond (operands);
3721 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3722 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3723 (vec_merge:V48_AVX512VL
3724 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3725 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3726 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3729 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3730 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3731 (vec_merge:VI12_AVX512VL
3732 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3733 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3734 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3737 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3738 ;; and their condition can be folded late into a constant, we need to
3739 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3740 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3743 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3744 [(set (match_operand:VI_256_AVX2 0 "register_operand")
3745 (vec_merge:VI_256_AVX2
3746 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3747 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3748 (match_operand:<sseintvecmode> 3 "register_operand")))]
3751 ix86_expand_sse_movcc (operands[0], operands[3],
3752 operands[1], operands[2]);
3756 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3757 [(set (match_operand:VI124_128 0 "register_operand")
3758 (vec_merge:VI124_128
3759 (match_operand:VI124_128 1 "vector_operand")
3760 (match_operand:VI124_128 2 "nonimm_or_0_operand")
3761 (match_operand:<sseintvecmode> 3 "register_operand")))]
3764 ix86_expand_sse_movcc (operands[0], operands[3],
3765 operands[1], operands[2]);
3769 (define_expand "vcond_mask_v2div2di"
3770 [(set (match_operand:V2DI 0 "register_operand")
3772 (match_operand:V2DI 1 "vector_operand")
3773 (match_operand:V2DI 2 "nonimm_or_0_operand")
3774 (match_operand:V2DI 3 "register_operand")))]
3777 ix86_expand_sse_movcc (operands[0], operands[3],
3778 operands[1], operands[2]);
3782 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3783 [(set (match_operand:VF_256 0 "register_operand")
3785 (match_operand:VF_256 1 "nonimmediate_operand")
3786 (match_operand:VF_256 2 "nonimm_or_0_operand")
3787 (match_operand:<sseintvecmode> 3 "register_operand")))]
3790 ix86_expand_sse_movcc (operands[0], operands[3],
3791 operands[1], operands[2]);
3795 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3796 [(set (match_operand:VF_128 0 "register_operand")
3798 (match_operand:VF_128 1 "vector_operand")
3799 (match_operand:VF_128 2 "nonimm_or_0_operand")
3800 (match_operand:<sseintvecmode> 3 "register_operand")))]
3803 ix86_expand_sse_movcc (operands[0], operands[3],
3804 operands[1], operands[2]);
3808 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3810 ;; Parallel floating point logical operations
3812 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3814 (define_insn "<sse>_andnot<mode>3<mask_name>"
3815 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3818 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3819 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3820 "TARGET_SSE && <mask_avx512vl_condition>"
3826 switch (which_alternative)
3829 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3834 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3840 switch (get_attr_mode (insn))
3848 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3849 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3850 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3853 suffix = "<ssemodesuffix>";
3856 snprintf (buf, sizeof (buf), ops, suffix);
3857 output_asm_insn (buf, operands);
3860 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3861 (set_attr "type" "sselog")
3862 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3864 (cond [(and (match_test "<mask_applied>")
3865 (and (eq_attr "alternative" "1")
3866 (match_test "!TARGET_AVX512DQ")))
3867 (const_string "<sseintvecmode2>")
3868 (eq_attr "alternative" "3")
3869 (const_string "<sseintvecmode2>")
3870 (match_test "TARGET_AVX")
3871 (const_string "<MODE>")
3872 (match_test "optimize_function_for_size_p (cfun)")
3873 (const_string "V4SF")
3874 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3875 (const_string "V4SF")
3877 (const_string "<MODE>")))])
3879 (define_insn "<sse>_andnot<mode>3<mask_name>"
3880 [(set (match_operand:VF_512 0 "register_operand" "=v")
3883 (match_operand:VF_512 1 "register_operand" "v"))
3884 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3891 suffix = "<ssemodesuffix>";
3894 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3895 if (!TARGET_AVX512DQ)
3897 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3901 snprintf (buf, sizeof (buf),
3902 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3904 output_asm_insn (buf, operands);
3907 [(set_attr "type" "sselog")
3908 (set_attr "prefix" "evex")
3910 (if_then_else (match_test "TARGET_AVX512DQ")
3911 (const_string "<sseinsnmode>")
3912 (const_string "XI")))])
3914 (define_expand "<code><mode>3<mask_name>"
3915 [(set (match_operand:VF_128_256 0 "register_operand")
3916 (any_logic:VF_128_256
3917 (match_operand:VF_128_256 1 "vector_operand")
3918 (match_operand:VF_128_256 2 "vector_operand")))]
3919 "TARGET_SSE && <mask_avx512vl_condition>"
3920 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3922 (define_expand "<code><mode>3<mask_name>"
3923 [(set (match_operand:VF_512 0 "register_operand")
3925 (match_operand:VF_512 1 "nonimmediate_operand")
3926 (match_operand:VF_512 2 "nonimmediate_operand")))]
3928 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3930 (define_insn "*<code><mode>3<mask_name>"
3931 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3932 (any_logic:VF_128_256
3933 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3934 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3935 "TARGET_SSE && <mask_avx512vl_condition>
3936 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3942 switch (which_alternative)
3945 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3950 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3956 switch (get_attr_mode (insn))
3964 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3965 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3966 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3969 suffix = "<ssemodesuffix>";
3972 snprintf (buf, sizeof (buf), ops, suffix);
3973 output_asm_insn (buf, operands);
3976 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3977 (set_attr "type" "sselog")
3978 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3980 (cond [(and (match_test "<mask_applied>")
3981 (and (eq_attr "alternative" "1")
3982 (match_test "!TARGET_AVX512DQ")))
3983 (const_string "<sseintvecmode2>")
3984 (eq_attr "alternative" "3")
3985 (const_string "<sseintvecmode2>")
3986 (match_test "TARGET_AVX")
3987 (const_string "<MODE>")
3988 (match_test "optimize_function_for_size_p (cfun)")
3989 (const_string "V4SF")
3990 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3991 (const_string "V4SF")
3993 (const_string "<MODE>")))])
3995 (define_insn "*<code><mode>3<mask_name>"
3996 [(set (match_operand:VF_512 0 "register_operand" "=v")
3998 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3999 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
4000 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4006 suffix = "<ssemodesuffix>";
4009 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
4010 if (!TARGET_AVX512DQ)
4012 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
4016 snprintf (buf, sizeof (buf),
4017 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
4019 output_asm_insn (buf, operands);
4022 [(set_attr "type" "sselog")
4023 (set_attr "prefix" "evex")
4025 (if_then_else (match_test "TARGET_AVX512DQ")
4026 (const_string "<sseinsnmode>")
4027 (const_string "XI")))])
4029 (define_expand "copysign<mode>3"
4032 (not:VF (match_dup 3))
4033 (match_operand:VF 1 "vector_operand")))
4035 (and:VF (match_dup 3)
4036 (match_operand:VF 2 "vector_operand")))
4037 (set (match_operand:VF 0 "register_operand")
4038 (ior:VF (match_dup 4) (match_dup 5)))]
4041 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
4043 operands[4] = gen_reg_rtx (<MODE>mode);
4044 operands[5] = gen_reg_rtx (<MODE>mode);
4047 (define_expand "xorsign<mode>3"
4049 (and:VF (match_dup 3)
4050 (match_operand:VF 2 "vector_operand")))
4051 (set (match_operand:VF 0 "register_operand")
4052 (xor:VF (match_dup 4)
4053 (match_operand:VF 1 "vector_operand")))]
4056 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
4058 operands[4] = gen_reg_rtx (<MODE>mode);
4061 (define_expand "signbit<mode>2"
4062 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4063 (lshiftrt:<sseintvecmode>
4064 (subreg:<sseintvecmode>
4065 (match_operand:VF1_AVX2 1 "register_operand") 0)
4068 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
4070 ;; Also define scalar versions. These are used for abs, neg, and
4071 ;; conditional move. Using subregs into vector modes causes register
4072 ;; allocation lossage. These patterns do not allow memory operands
4073 ;; because the native instructions read the full 128-bits.
4075 (define_insn "*andnot<mode>3"
4076 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4079 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
4080 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4081 "SSE_FLOAT_MODE_P (<MODE>mode)"
4086 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4088 switch (which_alternative)
4091 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
4094 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4097 if (TARGET_AVX512DQ)
4098 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4101 suffix = <MODE>mode == DFmode ? "q" : "d";
4102 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4106 if (TARGET_AVX512DQ)
4107 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4110 suffix = <MODE>mode == DFmode ? "q" : "d";
4111 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4118 snprintf (buf, sizeof (buf), ops, suffix);
4119 output_asm_insn (buf, operands);
4122 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4123 (set_attr "type" "sselog")
4124 (set_attr "prefix" "orig,vex,evex,evex")
4126 (cond [(eq_attr "alternative" "2")
4127 (if_then_else (match_test "TARGET_AVX512DQ")
4128 (const_string "<ssevecmode>")
4129 (const_string "TI"))
4130 (eq_attr "alternative" "3")
4131 (if_then_else (match_test "TARGET_AVX512DQ")
4132 (const_string "<avx512fvecmode>")
4133 (const_string "XI"))
4134 (match_test "TARGET_AVX")
4135 (const_string "<ssevecmode>")
4136 (match_test "optimize_function_for_size_p (cfun)")
4137 (const_string "V4SF")
4138 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4139 (const_string "V4SF")
4141 (const_string "<ssevecmode>")))])
4143 (define_insn "*andnottf3"
4144 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
4146 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
4147 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
4153 = (which_alternative >= 2 ? "pandnq"
4154 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
4156 switch (which_alternative)
4159 ops = "%s\t{%%2, %%0|%%0, %%2}";
4163 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4166 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4172 snprintf (buf, sizeof (buf), ops, tmp);
4173 output_asm_insn (buf, operands);
4176 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4177 (set_attr "type" "sselog")
4178 (set (attr "prefix_data16")
4180 (and (eq_attr "alternative" "0")
4181 (eq_attr "mode" "TI"))
4183 (const_string "*")))
4184 (set_attr "prefix" "orig,vex,evex,evex")
4186 (cond [(eq_attr "alternative" "2")
4188 (eq_attr "alternative" "3")
4190 (match_test "TARGET_AVX")
4192 (ior (not (match_test "TARGET_SSE2"))
4193 (match_test "optimize_function_for_size_p (cfun)"))
4194 (const_string "V4SF")
4195 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4196 (const_string "V4SF")
4198 (const_string "TI")))])
4200 (define_insn "*<code><mode>3"
4201 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4203 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
4204 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4205 "SSE_FLOAT_MODE_P (<MODE>mode)"
4210 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4212 switch (which_alternative)
4215 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
4218 if (!TARGET_AVX512DQ)
4220 suffix = <MODE>mode == DFmode ? "q" : "d";
4221 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4226 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4229 if (TARGET_AVX512DQ)
4230 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4233 suffix = <MODE>mode == DFmode ? "q" : "d";
4234 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4241 snprintf (buf, sizeof (buf), ops, suffix);
4242 output_asm_insn (buf, operands);
4245 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4246 (set_attr "type" "sselog")
4247 (set_attr "prefix" "orig,vex,evex,evex")
4249 (cond [(eq_attr "alternative" "2")
4250 (if_then_else (match_test "TARGET_AVX512DQ")
4251 (const_string "<ssevecmode>")
4252 (const_string "TI"))
4253 (eq_attr "alternative" "3")
4254 (if_then_else (match_test "TARGET_AVX512DQ")
4255 (const_string "<avx512fvecmode>")
4256 (const_string "XI"))
4257 (match_test "TARGET_AVX")
4258 (const_string "<ssevecmode>")
4259 (match_test "optimize_function_for_size_p (cfun)")
4260 (const_string "V4SF")
4261 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4262 (const_string "V4SF")
4264 (const_string "<ssevecmode>")))])
4266 (define_expand "<code>tf3"
4267 [(set (match_operand:TF 0 "register_operand")
4269 (match_operand:TF 1 "vector_operand")
4270 (match_operand:TF 2 "vector_operand")))]
4272 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
4274 (define_insn "*<code>tf3"
4275 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
4277 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
4278 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
4279 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4284 = (which_alternative >= 2 ? "p<logic>q"
4285 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
4287 switch (which_alternative)
4290 ops = "%s\t{%%2, %%0|%%0, %%2}";
4294 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4297 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4303 snprintf (buf, sizeof (buf), ops, tmp);
4304 output_asm_insn (buf, operands);
4307 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
4308 (set_attr "type" "sselog")
4309 (set (attr "prefix_data16")
4311 (and (eq_attr "alternative" "0")
4312 (eq_attr "mode" "TI"))
4314 (const_string "*")))
4315 (set_attr "prefix" "orig,vex,evex,evex")
4317 (cond [(eq_attr "alternative" "2")
4319 (eq_attr "alternative" "3")
4321 (match_test "TARGET_AVX")
4323 (ior (not (match_test "TARGET_SSE2"))
4324 (match_test "optimize_function_for_size_p (cfun)"))
4325 (const_string "V4SF")
4326 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4327 (const_string "V4SF")
4329 (const_string "TI")))])
4331 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4333 ;; FMA floating point multiply/accumulate instructions. These include
4334 ;; scalar versions of the instructions as well as vector versions.
4336 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4338 ;; The standard names for scalar FMA are only available with SSE math enabled.
4339 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
4340 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
4341 ;; and TARGET_FMA4 are both false.
4342 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
4343 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
4344 ;; GAS to allow proper prefix selection. However, for the moment all hardware
4345 ;; that supports AVX512F also supports FMA so we can ignore this for now.
4346 (define_mode_iterator FMAMODEM
4347 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4348 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
4349 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4350 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4351 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4352 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4353 (V16SF "TARGET_AVX512F")
4354 (V8DF "TARGET_AVX512F")])
4356 (define_expand "fma<mode>4"
4357 [(set (match_operand:FMAMODEM 0 "register_operand")
4359 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4360 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4361 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4363 (define_expand "fms<mode>4"
4364 [(set (match_operand:FMAMODEM 0 "register_operand")
4366 (match_operand:FMAMODEM 1 "nonimmediate_operand")
4367 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4368 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4370 (define_expand "fnma<mode>4"
4371 [(set (match_operand:FMAMODEM 0 "register_operand")
4373 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4374 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4375 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
4377 (define_expand "fnms<mode>4"
4378 [(set (match_operand:FMAMODEM 0 "register_operand")
4380 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
4381 (match_operand:FMAMODEM 2 "nonimmediate_operand")
4382 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
4384 ;; The builtins for intrinsics are not constrained by SSE math enabled.
4385 (define_mode_iterator FMAMODE_AVX512
4386 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4387 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4388 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4389 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4390 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4391 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
4392 (V16SF "TARGET_AVX512F")
4393 (V8DF "TARGET_AVX512F")])
4395 (define_mode_iterator FMAMODE
4396 [SF DF V4SF V2DF V8SF V4DF])
4398 (define_expand "fma4i_fmadd_<mode>"
4399 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4401 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4402 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4403 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4405 (define_expand "fma4i_fmsub_<mode>"
4406 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4408 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
4409 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4411 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4413 (define_expand "fma4i_fnmadd_<mode>"
4414 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4417 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4418 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4419 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
4421 (define_expand "fma4i_fnmsub_<mode>"
4422 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
4425 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
4426 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
4428 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
4430 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
4431 [(match_operand:VF_AVX512VL 0 "register_operand")
4432 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4433 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4434 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4435 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4436 "TARGET_AVX512F && <round_mode512bit_condition>"
4438 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
4439 operands[0], operands[1], operands[2], operands[3],
4440 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4444 (define_insn "*fma_fmadd_<mode>"
4445 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4447 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4448 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4449 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4450 "TARGET_FMA || TARGET_FMA4"
4452 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4453 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4454 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4455 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4456 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4457 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4458 (set_attr "type" "ssemuladd")
4459 (set_attr "mode" "<MODE>")])
4461 ;; Suppose AVX-512F as baseline
4462 (define_mode_iterator VF_SF_AVX512VL
4463 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4464 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4466 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4467 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4469 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4470 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4471 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4472 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4474 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4475 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4476 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4477 [(set_attr "type" "ssemuladd")
4478 (set_attr "mode" "<MODE>")])
4480 (define_expand "cond_fma<mode>"
4481 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4482 (vec_merge:VF_AVX512VL
4484 (match_operand:VF_AVX512VL 2 "vector_operand")
4485 (match_operand:VF_AVX512VL 3 "vector_operand")
4486 (match_operand:VF_AVX512VL 4 "vector_operand"))
4487 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4488 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4491 rtx tmp = gen_reg_rtx (<MODE>mode);
4492 emit_insn (gen_fma<mode>4 (tmp,
4496 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4503 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4504 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4505 (vec_merge:VF_AVX512VL
4507 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4508 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4509 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4511 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4512 "TARGET_AVX512F && <round_mode512bit_condition>"
4514 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4515 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4516 [(set_attr "type" "ssemuladd")
4517 (set_attr "mode" "<MODE>")])
4519 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4520 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4521 (vec_merge:VF_AVX512VL
4523 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4524 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4525 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4527 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4529 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4530 [(set_attr "type" "ssemuladd")
4531 (set_attr "mode" "<MODE>")])
4533 (define_insn "*fma_fmsub_<mode>"
4534 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4536 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
4537 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4539 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4540 "TARGET_FMA || TARGET_FMA4"
4542 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4543 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4544 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4545 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4546 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4547 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4548 (set_attr "type" "ssemuladd")
4549 (set_attr "mode" "<MODE>")])
4551 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4552 [(match_operand:VF_AVX512VL 0 "register_operand")
4553 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4554 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4555 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4556 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4557 "TARGET_AVX512F && <round_mode512bit_condition>"
4559 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4560 operands[0], operands[1], operands[2], operands[3],
4561 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4565 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4566 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4568 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
4569 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4571 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4572 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4574 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4575 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4576 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4577 [(set_attr "type" "ssemuladd")
4578 (set_attr "mode" "<MODE>")])
4580 (define_expand "cond_fms<mode>"
4581 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4582 (vec_merge:VF_AVX512VL
4584 (match_operand:VF_AVX512VL 2 "vector_operand")
4585 (match_operand:VF_AVX512VL 3 "vector_operand")
4587 (match_operand:VF_AVX512VL 4 "vector_operand")))
4588 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4589 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4592 rtx tmp = gen_reg_rtx (<MODE>mode);
4593 emit_insn (gen_fms<mode>4 (tmp,
4597 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4604 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4605 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4606 (vec_merge:VF_AVX512VL
4608 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4609 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4611 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4613 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4616 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4617 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4618 [(set_attr "type" "ssemuladd")
4619 (set_attr "mode" "<MODE>")])
4621 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4622 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4623 (vec_merge:VF_AVX512VL
4625 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4626 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4628 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4630 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4631 "TARGET_AVX512F && <round_mode512bit_condition>"
4632 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4633 [(set_attr "type" "ssemuladd")
4634 (set_attr "mode" "<MODE>")])
4636 (define_insn "*fma_fnmadd_<mode>"
4637 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4640 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4641 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4642 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4643 "TARGET_FMA || TARGET_FMA4"
4645 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4646 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4647 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4648 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4649 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4650 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4651 (set_attr "type" "ssemuladd")
4652 (set_attr "mode" "<MODE>")])
4654 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4655 [(match_operand:VF_AVX512VL 0 "register_operand")
4656 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4657 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4658 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4659 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4660 "TARGET_AVX512F && <round_mode512bit_condition>"
4662 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4663 operands[0], operands[1], operands[2], operands[3],
4664 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4668 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4669 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4672 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4673 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4674 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
4675 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4677 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4678 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4679 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4680 [(set_attr "type" "ssemuladd")
4681 (set_attr "mode" "<MODE>")])
4683 (define_expand "cond_fnma<mode>"
4684 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4685 (vec_merge:VF_AVX512VL
4688 (match_operand:VF_AVX512VL 2 "vector_operand"))
4689 (match_operand:VF_AVX512VL 3 "vector_operand")
4690 (match_operand:VF_AVX512VL 4 "vector_operand"))
4691 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4692 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4695 rtx tmp = gen_reg_rtx (<MODE>mode);
4696 emit_insn (gen_fnma<mode>4 (tmp,
4700 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4707 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4708 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4709 (vec_merge:VF_AVX512VL
4712 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4713 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4714 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4716 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4717 "TARGET_AVX512F && <round_mode512bit_condition>"
4719 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4720 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4721 [(set_attr "type" "ssemuladd")
4722 (set_attr "mode" "<MODE>")])
4724 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4725 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4726 (vec_merge:VF_AVX512VL
4729 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4730 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4731 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4733 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4734 "TARGET_AVX512F && <round_mode512bit_condition>"
4735 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4736 [(set_attr "type" "ssemuladd")
4737 (set_attr "mode" "<MODE>")])
4739 (define_insn "*fma_fnmsub_<mode>"
4740 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4743 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4744 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
4746 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4747 "TARGET_FMA || TARGET_FMA4"
4749 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4750 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4751 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4752 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4753 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4754 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4755 (set_attr "type" "ssemuladd")
4756 (set_attr "mode" "<MODE>")])
4758 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4759 [(match_operand:VF_AVX512VL 0 "register_operand")
4760 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4761 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4762 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4763 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4764 "TARGET_AVX512F && <round_mode512bit_condition>"
4766 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4767 operands[0], operands[1], operands[2], operands[3],
4768 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4772 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4773 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4776 (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
4777 (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
4779 (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
4780 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4782 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4783 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4784 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4785 [(set_attr "type" "ssemuladd")
4786 (set_attr "mode" "<MODE>")])
4788 (define_expand "cond_fnms<mode>"
4789 [(set (match_operand:VF_AVX512VL 0 "register_operand")
4790 (vec_merge:VF_AVX512VL
4793 (match_operand:VF_AVX512VL 2 "vector_operand"))
4794 (match_operand:VF_AVX512VL 3 "vector_operand")
4796 (match_operand:VF_AVX512VL 4 "vector_operand")))
4797 (match_operand:VF_AVX512VL 5 "nonimm_or_0_operand")
4798 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
4801 rtx tmp = gen_reg_rtx (<MODE>mode);
4802 emit_insn (gen_fnms<mode>4 (tmp,
4806 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
4813 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4814 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4815 (vec_merge:VF_AVX512VL
4818 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4819 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4821 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4823 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4824 "TARGET_AVX512F && <round_mode512bit_condition>"
4826 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4827 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4828 [(set_attr "type" "ssemuladd")
4829 (set_attr "mode" "<MODE>")])
4831 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4832 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4833 (vec_merge:VF_AVX512VL
4836 (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4837 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4839 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4841 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4843 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4844 [(set_attr "type" "ssemuladd")
4845 (set_attr "mode" "<MODE>")])
4847 ;; FMA parallel floating point multiply addsub and subadd operations.
4849 ;; It would be possible to represent these without the UNSPEC as
4852 ;; (fma op1 op2 op3)
4853 ;; (fma op1 op2 (neg op3))
4856 ;; But this doesn't seem useful in practice.
4858 (define_expand "vec_fmaddsub<mode>4"
4859 [(set (match_operand:VF 0 "register_operand")
4861 [(match_operand:VF 1 "nonimmediate_operand")
4862 (match_operand:VF 2 "nonimmediate_operand")
4863 (match_operand:VF 3 "nonimmediate_operand")]
4865 "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
4867 (define_expand "vec_fmsubadd<mode>4"
4868 [(set (match_operand:VF 0 "register_operand")
4870 [(match_operand:VF 1 "nonimmediate_operand")
4871 (match_operand:VF 2 "nonimmediate_operand")
4873 (match_operand:VF 3 "nonimmediate_operand"))]
4875 "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
4877 (define_expand "fmaddsub_<mode>"
4878 [(set (match_operand:VF 0 "register_operand")
4880 [(match_operand:VF 1 "nonimmediate_operand")
4881 (match_operand:VF 2 "nonimmediate_operand")
4882 (match_operand:VF 3 "nonimmediate_operand")]
4884 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4886 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4887 [(match_operand:VF_AVX512VL 0 "register_operand")
4888 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4889 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4890 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4891 (match_operand:<avx512fmaskmode> 4 "register_operand")]
4894 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4895 operands[0], operands[1], operands[2], operands[3],
4896 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4900 (define_insn "*fma_fmaddsub_<mode>"
4901 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4903 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4904 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4905 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4907 "TARGET_FMA || TARGET_FMA4"
4909 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4910 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4911 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4912 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4913 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4914 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4915 (set_attr "type" "ssemuladd")
4916 (set_attr "mode" "<MODE>")])
4918 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4919 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4920 (unspec:VF_SF_AVX512VL
4921 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4922 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4923 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4925 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4927 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4928 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4929 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4930 [(set_attr "type" "ssemuladd")
4931 (set_attr "mode" "<MODE>")])
4933 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4934 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4935 (vec_merge:VF_AVX512VL
4937 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4938 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4939 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4942 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4945 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4946 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4947 [(set_attr "type" "ssemuladd")
4948 (set_attr "mode" "<MODE>")])
4950 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4951 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4952 (vec_merge:VF_AVX512VL
4954 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4955 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4956 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4959 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4961 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4962 [(set_attr "type" "ssemuladd")
4963 (set_attr "mode" "<MODE>")])
4965 (define_insn "*fma_fmsubadd_<mode>"
4966 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4968 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4969 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4971 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4973 "TARGET_FMA || TARGET_FMA4"
4975 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4976 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4977 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4978 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4979 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4980 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4981 (set_attr "type" "ssemuladd")
4982 (set_attr "mode" "<MODE>")])
4984 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4985 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4986 (unspec:VF_SF_AVX512VL
4987 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4988 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4990 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4992 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4994 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4995 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4996 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4997 [(set_attr "type" "ssemuladd")
4998 (set_attr "mode" "<MODE>")])
5000 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
5001 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
5002 (vec_merge:VF_AVX512VL
5004 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
5005 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5007 (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
5010 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5013 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5014 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5015 [(set_attr "type" "ssemuladd")
5016 (set_attr "mode" "<MODE>")])
5018 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
5019 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
5020 (vec_merge:VF_AVX512VL
5022 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
5023 (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5025 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
5028 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5030 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5031 [(set_attr "type" "ssemuladd")
5032 (set_attr "mode" "<MODE>")])
5034 ;; FMA3 floating point scalar intrinsics. These merge result with
5035 ;; high-order elements from the destination register.
5037 (define_expand "fmai_vmfmadd_<mode><round_name>"
5038 [(set (match_operand:VF_128 0 "register_operand")
5041 (match_operand:VF_128 1 "register_operand")
5042 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
5043 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
5048 (define_expand "fmai_vmfmsub_<mode><round_name>"
5049 [(set (match_operand:VF_128 0 "register_operand")
5052 (match_operand:VF_128 1 "register_operand")
5053 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
5055 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
5060 (define_expand "fmai_vmfnmadd_<mode><round_name>"
5061 [(set (match_operand:VF_128 0 "register_operand")
5065 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
5066 (match_operand:VF_128 1 "register_operand")
5067 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
5072 (define_expand "fmai_vmfnmsub_<mode><round_name>"
5073 [(set (match_operand:VF_128 0 "register_operand")
5077 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
5078 (match_operand:VF_128 1 "register_operand")
5080 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
5085 (define_insn "*fmai_fmadd_<mode>"
5086 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5089 (match_operand:VF_128 1 "register_operand" "0,0")
5090 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
5091 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5094 "TARGET_FMA || TARGET_AVX512F"
5096 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5097 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5098 [(set_attr "type" "ssemuladd")
5099 (set_attr "mode" "<MODE>")])
5101 (define_insn "*fmai_fmsub_<mode>"
5102 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5105 (match_operand:VF_128 1 "register_operand" "0,0")
5106 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5108 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5111 "TARGET_FMA || TARGET_AVX512F"
5113 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5114 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5115 [(set_attr "type" "ssemuladd")
5116 (set_attr "mode" "<MODE>")])
5118 (define_insn "*fmai_fnmadd_<mode><round_name>"
5119 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5123 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5124 (match_operand:VF_128 1 "register_operand" "0,0")
5125 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5128 "TARGET_FMA || TARGET_AVX512F"
5130 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5131 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5132 [(set_attr "type" "ssemuladd")
5133 (set_attr "mode" "<MODE>")])
5135 (define_insn "*fmai_fnmsub_<mode><round_name>"
5136 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5140 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5141 (match_operand:VF_128 1 "register_operand" "0,0")
5143 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5146 "TARGET_FMA || TARGET_AVX512F"
5148 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
5149 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
5150 [(set_attr "type" "ssemuladd")
5151 (set_attr "mode" "<MODE>")])
5153 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
5154 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5158 (match_operand:VF_128 1 "register_operand" "0,0")
5159 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5160 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5162 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5167 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5168 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5169 [(set_attr "type" "ssemuladd")
5170 (set_attr "mode" "<MODE>")])
5172 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
5173 [(set (match_operand:VF_128 0 "register_operand" "=v")
5177 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5178 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
5179 (match_operand:VF_128 3 "register_operand" "0"))
5181 (match_operand:QI 4 "register_operand" "Yk"))
5185 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5186 [(set_attr "type" "ssemuladd")
5187 (set_attr "mode" "<MODE>")])
5189 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
5190 [(match_operand:VF_128 0 "register_operand")
5191 (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
5192 (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
5193 (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
5194 (match_operand:QI 4 "register_operand")]
5197 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
5198 operands[0], operands[1], operands[2], operands[3],
5199 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5203 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
5204 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5208 (match_operand:VF_128 1 "register_operand" "0,0")
5209 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5210 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5211 (match_operand:VF_128 4 "const0_operand" "C,C")
5212 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5217 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5218 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5219 [(set_attr "type" "ssemuladd")
5220 (set_attr "mode" "<MODE>")])
5222 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
5223 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5227 (match_operand:VF_128 1 "register_operand" "0,0")
5228 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5230 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5232 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5237 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5238 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5239 [(set_attr "type" "ssemuladd")
5240 (set_attr "mode" "<MODE>")])
5242 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
5243 [(set (match_operand:VF_128 0 "register_operand" "=v")
5247 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5248 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
5250 (match_operand:VF_128 3 "register_operand" "0")))
5252 (match_operand:QI 4 "register_operand" "Yk"))
5256 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5257 [(set_attr "type" "ssemuladd")
5258 (set_attr "mode" "<MODE>")])
5260 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
5261 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5265 (match_operand:VF_128 1 "register_operand" "0,0")
5266 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
5268 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5269 (match_operand:VF_128 4 "const0_operand" "C,C")
5270 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5275 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5276 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5277 [(set_attr "type" "ssemuladd")
5278 (set_attr "mode" "<MODE>")])
5280 (define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
5281 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5286 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5287 (match_operand:VF_128 1 "register_operand" "0,0")
5288 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5290 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5295 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5296 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5297 [(set_attr "type" "ssemuladd")
5298 (set_attr "mode" "<MODE>")])
5300 (define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
5301 [(set (match_operand:VF_128 0 "register_operand" "=v")
5306 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5307 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5308 (match_operand:VF_128 3 "register_operand" "0"))
5310 (match_operand:QI 4 "register_operand" "Yk"))
5314 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5315 [(set_attr "type" "ssemuladd")
5316 (set_attr "mode" "<MODE>")])
5318 (define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
5319 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5324 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5325 (match_operand:VF_128 1 "register_operand" "0,0")
5326 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
5327 (match_operand:VF_128 4 "const0_operand" "C,C")
5328 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5333 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5334 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5335 [(set_attr "type" "ssemuladd")
5336 (set_attr "mode" "<MODE>")])
5338 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
5339 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5344 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5345 (match_operand:VF_128 1 "register_operand" "0,0")
5347 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5349 (match_operand:QI 4 "register_operand" "Yk,Yk"))
5354 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
5355 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
5356 [(set_attr "type" "ssemuladd")
5357 (set_attr "mode" "<MODE>")])
5359 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
5360 [(set (match_operand:VF_128 0 "register_operand" "=v")
5365 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
5366 (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
5368 (match_operand:VF_128 3 "register_operand" "0")))
5370 (match_operand:QI 4 "register_operand" "Yk"))
5374 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
5375 [(set_attr "type" "ssemuladd")
5376 (set_attr "mode" "<MODE>")])
5378 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
5379 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
5384 (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
5385 (match_operand:VF_128 1 "register_operand" "0,0")
5387 (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
5388 (match_operand:VF_128 4 "const0_operand" "C,C")
5389 (match_operand:QI 5 "register_operand" "Yk,Yk"))
5394 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
5395 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
5396 [(set_attr "type" "ssemuladd")
5397 (set_attr "mode" "<MODE>")])
5399 ;; FMA4 floating point scalar intrinsics. These write the
5400 ;; entire destination register, with the high-order elements zeroed.
5402 (define_expand "fma4i_vmfmadd_<mode>"
5403 [(set (match_operand:VF_128 0 "register_operand")
5406 (match_operand:VF_128 1 "nonimmediate_operand")
5407 (match_operand:VF_128 2 "nonimmediate_operand")
5408 (match_operand:VF_128 3 "nonimmediate_operand"))
5412 "operands[4] = CONST0_RTX (<MODE>mode);")
5414 (define_insn "*fma4i_vmfmadd_<mode>"
5415 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5418 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5419 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5420 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5421 (match_operand:VF_128 4 "const0_operand")
5424 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5425 [(set_attr "type" "ssemuladd")
5426 (set_attr "mode" "<MODE>")])
5428 (define_insn "*fma4i_vmfmsub_<mode>"
5429 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5432 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5433 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5435 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5436 (match_operand:VF_128 4 "const0_operand")
5439 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5440 [(set_attr "type" "ssemuladd")
5441 (set_attr "mode" "<MODE>")])
5443 (define_insn "*fma4i_vmfnmadd_<mode>"
5444 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5448 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5449 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5450 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5451 (match_operand:VF_128 4 "const0_operand")
5454 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5455 [(set_attr "type" "ssemuladd")
5456 (set_attr "mode" "<MODE>")])
5458 (define_insn "*fma4i_vmfnmsub_<mode>"
5459 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5463 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5464 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5466 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5467 (match_operand:VF_128 4 "const0_operand")
5470 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5471 [(set_attr "type" "ssemuladd")
5472 (set_attr "mode" "<MODE>")])
5474 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5476 ;; Parallel single-precision floating point conversion operations
5478 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5480 (define_insn_and_split "sse_cvtpi2ps"
5481 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5484 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5485 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5487 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5488 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5490 cvtpi2ps\t{%2, %0|%0, %2}
5493 "TARGET_SSE2 && reload_completed
5494 && SSE_REG_P (operands[2])"
5497 rtx op2 = lowpart_subreg (V4SImode, operands[2],
5498 GET_MODE (operands[2]));
5499 /* Generate SSE2 cvtdq2ps. */
5500 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5502 /* Merge operands[3] with operands[0]. */
5506 mask = gen_rtx_PARALLEL (VOIDmode,
5507 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5508 GEN_INT (6), GEN_INT (7)));
5509 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5510 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5511 emit_insn (gen_rtx_SET (operands[0], op2));
5515 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
5516 mask = gen_rtx_PARALLEL (VOIDmode,
5517 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5518 GEN_INT (4), GEN_INT (5)));
5519 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5520 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5521 emit_insn (gen_rtx_SET (operands[0], op2));
5523 /* Swap bits 0:63 with bits 64:127. */
5524 mask = gen_rtx_PARALLEL (VOIDmode,
5525 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5526 GEN_INT (0), GEN_INT (1)));
5527 rtx dest = lowpart_subreg (V4SImode, operands[0],
5528 GET_MODE (operands[0]));
5529 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5530 emit_insn (gen_rtx_SET (dest, op1));
5534 [(set_attr "mmx_isa" "native,sse_noavx,avx")
5535 (set_attr "type" "ssecvt")
5536 (set_attr "mode" "V4SF")])
5538 (define_insn_and_split "sse_cvtps2pi"
5539 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5541 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5543 (parallel [(const_int 0) (const_int 1)])))]
5544 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5546 cvtps2pi\t{%1, %0|%0, %q1}
5548 "TARGET_SSE2 && reload_completed
5549 && SSE_REG_P (operands[0])"
5552 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5553 GET_MODE (operands[1]));
5554 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5555 GET_MODE (operands[0]));
5557 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5558 emit_insn (gen_rtx_SET (tmp, op1));
5560 rtx dest = lowpart_subreg (V4SImode, operands[0],
5561 GET_MODE (operands[0]));
5562 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5565 [(set_attr "isa" "*,sse2")
5566 (set_attr "mmx_isa" "native,*")
5567 (set_attr "type" "ssecvt")
5568 (set_attr "unit" "mmx,*")
5569 (set_attr "mode" "DI")])
5571 (define_insn_and_split "sse_cvttps2pi"
5572 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5574 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5575 (parallel [(const_int 0) (const_int 1)])))]
5576 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5578 cvttps2pi\t{%1, %0|%0, %q1}
5580 "TARGET_SSE2 && reload_completed
5581 && SSE_REG_P (operands[0])"
5584 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5585 GET_MODE (operands[1]));
5586 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5587 GET_MODE (operands[0]));
5589 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5590 emit_insn (gen_rtx_SET (tmp, op1));
5592 rtx dest = lowpart_subreg (V4SImode, operands[0],
5593 GET_MODE (operands[0]));
5594 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5597 [(set_attr "isa" "*,sse2")
5598 (set_attr "mmx_isa" "native,*")
5599 (set_attr "type" "ssecvt")
5600 (set_attr "unit" "mmx,*")
5601 (set_attr "prefix_rep" "0")
5602 (set_attr "mode" "SF")])
5604 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5605 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5608 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5609 (match_operand:V4SF 1 "register_operand" "0,0,v")
5613 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5614 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5615 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5616 [(set_attr "isa" "noavx,noavx,avx")
5617 (set_attr "type" "sseicvt")
5618 (set_attr "athlon_decode" "vector,double,*")
5619 (set_attr "amdfam10_decode" "vector,double,*")
5620 (set_attr "bdver1_decode" "double,direct,*")
5621 (set_attr "btver2_decode" "double,double,double")
5622 (set_attr "znver1_decode" "double,double,double")
5623 (set (attr "length_vex")
5625 (and (match_test "<MODE>mode == DImode")
5626 (eq_attr "alternative" "2"))
5628 (const_string "*")))
5629 (set (attr "prefix_rex")
5631 (and (match_test "<MODE>mode == DImode")
5632 (eq_attr "alternative" "0,1"))
5634 (const_string "*")))
5635 (set_attr "prefix" "orig,orig,maybe_evex")
5636 (set_attr "mode" "SF")])
5638 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5639 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5642 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5643 (parallel [(const_int 0)]))]
5644 UNSPEC_FIX_NOTRUNC))]
5646 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5647 [(set_attr "type" "sseicvt")
5648 (set_attr "athlon_decode" "double,vector")
5649 (set_attr "bdver1_decode" "double,double")
5650 (set_attr "prefix_rep" "1")
5651 (set_attr "prefix" "maybe_vex")
5652 (set_attr "mode" "<MODE>")])
5654 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
5655 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5656 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5657 UNSPEC_FIX_NOTRUNC))]
5659 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5660 [(set_attr "type" "sseicvt")
5661 (set_attr "athlon_decode" "double,vector")
5662 (set_attr "amdfam10_decode" "double,double")
5663 (set_attr "bdver1_decode" "double,double")
5664 (set_attr "prefix_rep" "1")
5665 (set_attr "prefix" "maybe_vex")
5666 (set_attr "mode" "<MODE>")])
5668 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5669 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5672 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5673 (parallel [(const_int 0)]))))]
5675 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5676 [(set_attr "type" "sseicvt")
5677 (set_attr "athlon_decode" "double,vector")
5678 (set_attr "amdfam10_decode" "double,double")
5679 (set_attr "bdver1_decode" "double,double")
5680 (set_attr "prefix_rep" "1")
5681 (set_attr "prefix" "maybe_vex")
5682 (set_attr "mode" "<MODE>")])
5684 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5685 [(set (match_operand:VF_128 0 "register_operand" "=v")
5687 (vec_duplicate:VF_128
5688 (unsigned_float:<ssescalarmode>
5689 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5690 (match_operand:VF_128 1 "register_operand" "v")
5692 "TARGET_AVX512F && <round_modev4sf_condition>"
5693 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5694 [(set_attr "type" "sseicvt")
5695 (set_attr "prefix" "evex")
5696 (set_attr "mode" "<ssescalarmode>")])
5698 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5699 [(set (match_operand:VF_128 0 "register_operand" "=v")
5701 (vec_duplicate:VF_128
5702 (unsigned_float:<ssescalarmode>
5703 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5704 (match_operand:VF_128 1 "register_operand" "v")
5706 "TARGET_AVX512F && TARGET_64BIT"
5707 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5708 [(set_attr "type" "sseicvt")
5709 (set_attr "prefix" "evex")
5710 (set_attr "mode" "<ssescalarmode>")])
5712 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5713 [(set (match_operand:VF1 0 "register_operand" "=x,v")
5715 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5716 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5718 cvtdq2ps\t{%1, %0|%0, %1}
5719 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5720 [(set_attr "isa" "noavx,avx")
5721 (set_attr "type" "ssecvt")
5722 (set_attr "prefix" "maybe_vex")
5723 (set_attr "mode" "<sseinsnmode>")])
5725 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5726 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5727 (unsigned_float:VF1_AVX512VL
5728 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5730 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5731 [(set_attr "type" "ssecvt")
5732 (set_attr "prefix" "evex")
5733 (set_attr "mode" "<MODE>")])
5735 (define_expand "floatuns<sseintvecmodelower><mode>2"
5736 [(match_operand:VF1 0 "register_operand")
5737 (match_operand:<sseintvecmode> 1 "register_operand")]
5738 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5740 if (<MODE>mode == V16SFmode)
5741 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5743 if (TARGET_AVX512VL)
5745 if (<MODE>mode == V4SFmode)
5746 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5748 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5751 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5757 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5758 (define_mode_attr sf2simodelower
5759 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5761 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5762 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5764 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5765 UNSPEC_FIX_NOTRUNC))]
5766 "TARGET_SSE2 && <mask_mode512bit_condition>"
5767 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5768 [(set_attr "type" "ssecvt")
5769 (set (attr "prefix_data16")
5771 (match_test "TARGET_AVX")
5773 (const_string "1")))
5774 (set_attr "prefix" "maybe_vex")
5775 (set_attr "mode" "<sseinsnmode>")])
5777 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5778 [(set (match_operand:V16SI 0 "register_operand" "=v")
5780 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5781 UNSPEC_FIX_NOTRUNC))]
5783 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5784 [(set_attr "type" "ssecvt")
5785 (set_attr "prefix" "evex")
5786 (set_attr "mode" "XI")])
5788 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5789 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5790 (unspec:VI4_AVX512VL
5791 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5792 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5794 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5795 [(set_attr "type" "ssecvt")
5796 (set_attr "prefix" "evex")
5797 (set_attr "mode" "<sseinsnmode>")])
5799 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5800 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5801 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5802 UNSPEC_FIX_NOTRUNC))]
5803 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5804 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5805 [(set_attr "type" "ssecvt")
5806 (set_attr "prefix" "evex")
5807 (set_attr "mode" "<sseinsnmode>")])
5809 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5810 [(set (match_operand:V2DI 0 "register_operand" "=v")
5813 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5814 (parallel [(const_int 0) (const_int 1)]))]
5815 UNSPEC_FIX_NOTRUNC))]
5816 "TARGET_AVX512DQ && TARGET_AVX512VL"
5817 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5818 [(set_attr "type" "ssecvt")
5819 (set_attr "prefix" "evex")
5820 (set_attr "mode" "TI")])
5822 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5823 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5824 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5825 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5826 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5827 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5828 [(set_attr "type" "ssecvt")
5829 (set_attr "prefix" "evex")
5830 (set_attr "mode" "<sseinsnmode>")])
5832 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5833 [(set (match_operand:V2DI 0 "register_operand" "=v")
5836 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5837 (parallel [(const_int 0) (const_int 1)]))]
5838 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5839 "TARGET_AVX512DQ && TARGET_AVX512VL"
5840 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5841 [(set_attr "type" "ssecvt")
5842 (set_attr "prefix" "evex")
5843 (set_attr "mode" "TI")])
5845 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5846 [(set (match_operand:V16SI 0 "register_operand" "=v")
5848 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5850 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5851 [(set_attr "type" "ssecvt")
5852 (set_attr "prefix" "evex")
5853 (set_attr "mode" "XI")])
5855 (define_insn "fix_truncv8sfv8si2<mask_name>"
5856 [(set (match_operand:V8SI 0 "register_operand" "=v")
5857 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5858 "TARGET_AVX && <mask_avx512vl_condition>"
5859 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5860 [(set_attr "type" "ssecvt")
5861 (set_attr "prefix" "<mask_prefix>")
5862 (set_attr "mode" "OI")])
5864 (define_insn "fix_truncv4sfv4si2<mask_name>"
5865 [(set (match_operand:V4SI 0 "register_operand" "=v")
5866 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5867 "TARGET_SSE2 && <mask_avx512vl_condition>"
5868 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5869 [(set_attr "type" "ssecvt")
5870 (set (attr "prefix_rep")
5872 (match_test "TARGET_AVX")
5874 (const_string "1")))
5875 (set (attr "prefix_data16")
5877 (match_test "TARGET_AVX")
5879 (const_string "0")))
5880 (set_attr "prefix_data16" "0")
5881 (set_attr "prefix" "<mask_prefix2>")
5882 (set_attr "mode" "TI")])
5884 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5885 [(match_operand:<sseintvecmode> 0 "register_operand")
5886 (match_operand:VF1 1 "register_operand")]
5889 if (<MODE>mode == V16SFmode)
5890 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5895 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5896 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5897 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5898 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5903 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5905 ;; Parallel double-precision floating point conversion operations
5907 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5909 (define_insn "sse2_cvtpi2pd"
5910 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
5911 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
5914 %vcvtdq2pd\t{%1, %0|%0, %1}
5915 cvtpi2pd\t{%1, %0|%0, %1}"
5916 [(set_attr "mmx_isa" "*,native")
5917 (set_attr "type" "ssecvt")
5918 (set_attr "unit" "*,mmx")
5919 (set_attr "prefix_data16" "*,1")
5920 (set_attr "prefix" "maybe_vex,*")
5921 (set_attr "mode" "V2DF")])
5923 (define_expand "floatv2siv2df2"
5924 [(set (match_operand:V2DF 0 "register_operand")
5925 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
5926 "TARGET_MMX_WITH_SSE")
5928 (define_insn "floatunsv2siv2df2"
5929 [(set (match_operand:V2DF 0 "register_operand" "=v")
5930 (unsigned_float:V2DF
5931 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
5932 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5933 "vcvtudq2pd\t{%1, %0|%0, %1}"
5934 [(set_attr "type" "ssecvt")
5935 (set_attr "prefix" "evex")
5936 (set_attr "mode" "V2DF")])
5938 (define_insn "sse2_cvtpd2pi"
5939 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5940 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5941 UNSPEC_FIX_NOTRUNC))]
5944 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5945 cvtpd2pi\t{%1, %0|%0, %1}"
5946 [(set_attr "mmx_isa" "*,native")
5947 (set_attr "type" "ssecvt")
5948 (set_attr "unit" "*,mmx")
5949 (set_attr "amdfam10_decode" "double")
5950 (set_attr "athlon_decode" "vector")
5951 (set_attr "bdver1_decode" "double")
5952 (set_attr "prefix_data16" "*,1")
5953 (set_attr "prefix" "maybe_vex,*")
5954 (set_attr "mode" "TI")])
5956 (define_insn "sse2_cvttpd2pi"
5957 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5958 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5961 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5962 cvttpd2pi\t{%1, %0|%0, %1}"
5963 [(set_attr "mmx_isa" "*,native")
5964 (set_attr "type" "ssecvt")
5965 (set_attr "unit" "*,mmx")
5966 (set_attr "amdfam10_decode" "double")
5967 (set_attr "athlon_decode" "vector")
5968 (set_attr "bdver1_decode" "double")
5969 (set_attr "prefix_data16" "*,1")
5970 (set_attr "prefix" "maybe_vex,*")
5971 (set_attr "mode" "TI")])
5973 (define_expand "fix_truncv2dfv2si2"
5974 [(set (match_operand:V2SI 0 "register_operand")
5975 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
5976 "TARGET_MMX_WITH_SSE")
5978 (define_insn "fixuns_truncv2dfv2si2"
5979 [(set (match_operand:V2SI 0 "register_operand" "=v")
5981 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
5982 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
5983 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
5984 [(set_attr "type" "ssecvt")
5985 (set_attr "prefix" "evex")
5986 (set_attr "mode" "TI")])
5988 (define_insn "sse2_cvtsi2sd"
5989 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5992 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5993 (match_operand:V2DF 1 "register_operand" "0,0,v")
5997 cvtsi2sd{l}\t{%2, %0|%0, %2}
5998 cvtsi2sd{l}\t{%2, %0|%0, %2}
5999 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
6000 [(set_attr "isa" "noavx,noavx,avx")
6001 (set_attr "type" "sseicvt")
6002 (set_attr "athlon_decode" "double,direct,*")
6003 (set_attr "amdfam10_decode" "vector,double,*")
6004 (set_attr "bdver1_decode" "double,direct,*")
6005 (set_attr "btver2_decode" "double,double,double")
6006 (set_attr "znver1_decode" "double,double,double")
6007 (set_attr "prefix" "orig,orig,maybe_evex")
6008 (set_attr "mode" "DF")])
6010 (define_insn "sse2_cvtsi2sdq<round_name>"
6011 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6014 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
6015 (match_operand:V2DF 1 "register_operand" "0,0,v")
6017 "TARGET_SSE2 && TARGET_64BIT"
6019 cvtsi2sd{q}\t{%2, %0|%0, %2}
6020 cvtsi2sd{q}\t{%2, %0|%0, %2}
6021 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
6022 [(set_attr "isa" "noavx,noavx,avx")
6023 (set_attr "type" "sseicvt")
6024 (set_attr "athlon_decode" "double,direct,*")
6025 (set_attr "amdfam10_decode" "vector,double,*")
6026 (set_attr "bdver1_decode" "double,direct,*")
6027 (set_attr "length_vex" "*,*,4")
6028 (set_attr "prefix_rex" "1,1,*")
6029 (set_attr "prefix" "orig,orig,maybe_evex")
6030 (set_attr "mode" "DF")])
6032 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
6033 [(set (match_operand:SWI48 0 "register_operand" "=r")
6036 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
6037 (parallel [(const_int 0)]))]
6038 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6040 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
6041 [(set_attr "type" "sseicvt")
6042 (set_attr "prefix" "evex")
6043 (set_attr "mode" "<MODE>")])
6045 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
6046 [(set (match_operand:SWI48 0 "register_operand" "=r")
6049 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
6050 (parallel [(const_int 0)]))))]
6052 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
6053 [(set_attr "type" "sseicvt")
6054 (set_attr "prefix" "evex")
6055 (set_attr "mode" "<MODE>")])
6057 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
6058 [(set (match_operand:SWI48 0 "register_operand" "=r")
6061 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
6062 (parallel [(const_int 0)]))]
6063 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6065 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
6066 [(set_attr "type" "sseicvt")
6067 (set_attr "prefix" "evex")
6068 (set_attr "mode" "<MODE>")])
6070 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
6071 [(set (match_operand:SWI48 0 "register_operand" "=r")
6074 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
6075 (parallel [(const_int 0)]))))]
6077 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
6078 [(set_attr "type" "sseicvt")
6079 (set_attr "prefix" "evex")
6080 (set_attr "mode" "<MODE>")])
6082 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
6083 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
6086 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
6087 (parallel [(const_int 0)]))]
6088 UNSPEC_FIX_NOTRUNC))]
6090 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
6091 [(set_attr "type" "sseicvt")
6092 (set_attr "athlon_decode" "double,vector")
6093 (set_attr "bdver1_decode" "double,double")
6094 (set_attr "btver2_decode" "double,double")
6095 (set_attr "prefix_rep" "1")
6096 (set_attr "prefix" "maybe_vex")
6097 (set_attr "mode" "<MODE>")])
6099 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
6100 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
6101 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
6102 UNSPEC_FIX_NOTRUNC))]
6104 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
6105 [(set_attr "type" "sseicvt")
6106 (set_attr "athlon_decode" "double,vector")
6107 (set_attr "amdfam10_decode" "double,double")
6108 (set_attr "bdver1_decode" "double,double")
6109 (set_attr "prefix_rep" "1")
6110 (set_attr "prefix" "maybe_vex")
6111 (set_attr "mode" "<MODE>")])
6113 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
6114 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
6117 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
6118 (parallel [(const_int 0)]))))]
6120 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
6121 [(set_attr "type" "sseicvt")
6122 (set_attr "athlon_decode" "double,vector")
6123 (set_attr "amdfam10_decode" "double,double")
6124 (set_attr "bdver1_decode" "double,double")
6125 (set_attr "btver2_decode" "double,double")
6126 (set_attr "prefix_rep" "1")
6127 (set_attr "prefix" "maybe_vex")
6128 (set_attr "mode" "<MODE>")])
6130 ;; For float<si2dfmode><mode>2 insn pattern
6131 (define_mode_attr si2dfmode
6132 [(V8DF "V8SI") (V4DF "V4SI")])
6133 (define_mode_attr si2dfmodelower
6134 [(V8DF "v8si") (V4DF "v4si")])
6136 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
6137 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6138 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
6139 "TARGET_AVX && <mask_mode512bit_condition>"
6140 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6141 [(set_attr "type" "ssecvt")
6142 (set_attr "prefix" "maybe_vex")
6143 (set_attr "mode" "<MODE>")])
6145 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
6146 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
6147 (any_float:VF2_AVX512VL
6148 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
6150 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6151 [(set_attr "type" "ssecvt")
6152 (set_attr "prefix" "evex")
6153 (set_attr "mode" "<MODE>")])
6155 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
6156 (define_mode_attr qq2pssuff
6157 [(V8SF "") (V4SF "{y}")])
6159 (define_mode_attr sselongvecmode
6160 [(V8SF "V8DI") (V4SF "V4DI")])
6162 (define_mode_attr sselongvecmodelower
6163 [(V8SF "v8di") (V4SF "v4di")])
6165 (define_mode_attr sseintvecmode3
6166 [(V8SF "XI") (V4SF "OI")
6167 (V8DF "OI") (V4DF "TI")])
6169 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
6170 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
6171 (any_float:VF1_128_256VL
6172 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
6173 "TARGET_AVX512DQ && <round_modev8sf_condition>"
6174 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6175 [(set_attr "type" "ssecvt")
6176 (set_attr "prefix" "evex")
6177 (set_attr "mode" "<MODE>")])
6179 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
6180 [(set (match_operand:V4SF 0 "register_operand" "=v")
6182 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6184 "TARGET_AVX512DQ && TARGET_AVX512VL"
6185 "operands[2] = CONST0_RTX (V2SFmode);")
6187 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
6188 [(set (match_operand:V4SF 0 "register_operand" "=v")
6190 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6191 (match_operand:V2SF 2 "const0_operand" "C")))]
6192 "TARGET_AVX512DQ && TARGET_AVX512VL"
6193 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
6194 [(set_attr "type" "ssecvt")
6195 (set_attr "prefix" "evex")
6196 (set_attr "mode" "V4SF")])
6198 (define_expand "float<floatunssuffix>v2div2sf2"
6199 [(set (match_operand:V2SF 0 "register_operand")
6200 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
6201 "TARGET_AVX512DQ && TARGET_AVX512VL"
6203 operands[0] = simplify_gen_subreg (V4SFmode, operands[0], V2SFmode, 0);
6204 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
6205 (operands[0], operands[1]));
6209 (define_mode_attr vpckfloat_concat_mode
6210 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
6211 (define_mode_attr vpckfloat_temp_mode
6212 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
6213 (define_mode_attr vpckfloat_op_mode
6214 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
6216 (define_expand "vec_pack<floatprefix>_float_<mode>"
6217 [(match_operand:<ssePSmode> 0 "register_operand")
6218 (any_float:<ssePSmode>
6219 (match_operand:VI8_AVX512VL 1 "register_operand"))
6220 (match_operand:VI8_AVX512VL 2 "register_operand")]
6223 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
6224 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
6225 rtx (*gen) (rtx, rtx);
6227 if (<MODE>mode == V2DImode)
6228 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
6230 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
6231 emit_insn (gen (r1, operands[1]));
6232 emit_insn (gen (r2, operands[2]));
6233 if (<MODE>mode == V2DImode)
6234 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
6236 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
6241 (define_expand "float<floatunssuffix>v2div2sf2_mask"
6242 [(set (match_operand:V4SF 0 "register_operand" "=v")
6245 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6247 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6248 (parallel [(const_int 0) (const_int 1)]))
6249 (match_operand:QI 3 "register_operand" "Yk"))
6251 "TARGET_AVX512DQ && TARGET_AVX512VL"
6252 "operands[4] = CONST0_RTX (V2SFmode);")
6254 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
6255 [(set (match_operand:V4SF 0 "register_operand" "=v")
6258 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
6260 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6261 (parallel [(const_int 0) (const_int 1)]))
6262 (match_operand:QI 3 "register_operand" "Yk"))
6263 (match_operand:V2SF 4 "const0_operand" "C")))]
6264 "TARGET_AVX512DQ && TARGET_AVX512VL"
6265 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6266 [(set_attr "type" "ssecvt")
6267 (set_attr "prefix" "evex")
6268 (set_attr "mode" "V4SF")])
6270 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
6271 [(set (match_operand:V4SF 0 "register_operand" "=v")
6274 (any_float:V2SF (match_operand:V2DI 1
6275 "nonimmediate_operand" "vm"))
6276 (match_operand:V2SF 3 "const0_operand" "C")
6277 (match_operand:QI 2 "register_operand" "Yk"))
6278 (match_operand:V2SF 4 "const0_operand" "C")))]
6279 "TARGET_AVX512DQ && TARGET_AVX512VL"
6280 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6281 [(set_attr "type" "ssecvt")
6282 (set_attr "prefix" "evex")
6283 (set_attr "mode" "V4SF")])
6285 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
6286 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
6287 (unsigned_float:VF2_512_256VL
6288 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
6290 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6291 [(set_attr "type" "ssecvt")
6292 (set_attr "prefix" "evex")
6293 (set_attr "mode" "<MODE>")])
6295 (define_insn "ufloatv2siv2df2<mask_name>"
6296 [(set (match_operand:V2DF 0 "register_operand" "=v")
6297 (unsigned_float:V2DF
6299 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6300 (parallel [(const_int 0) (const_int 1)]))))]
6302 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6303 [(set_attr "type" "ssecvt")
6304 (set_attr "prefix" "evex")
6305 (set_attr "mode" "V2DF")])
6307 (define_insn "avx512f_cvtdq2pd512_2"
6308 [(set (match_operand:V8DF 0 "register_operand" "=v")
6311 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
6312 (parallel [(const_int 0) (const_int 1)
6313 (const_int 2) (const_int 3)
6314 (const_int 4) (const_int 5)
6315 (const_int 6) (const_int 7)]))))]
6317 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
6318 [(set_attr "type" "ssecvt")
6319 (set_attr "prefix" "evex")
6320 (set_attr "mode" "V8DF")])
6322 (define_insn "avx_cvtdq2pd256_2"
6323 [(set (match_operand:V4DF 0 "register_operand" "=v")
6326 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
6327 (parallel [(const_int 0) (const_int 1)
6328 (const_int 2) (const_int 3)]))))]
6330 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
6331 [(set_attr "type" "ssecvt")
6332 (set_attr "prefix" "maybe_evex")
6333 (set_attr "mode" "V4DF")])
6335 (define_insn "sse2_cvtdq2pd<mask_name>"
6336 [(set (match_operand:V2DF 0 "register_operand" "=v")
6339 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
6340 (parallel [(const_int 0) (const_int 1)]))))]
6341 "TARGET_SSE2 && <mask_avx512vl_condition>"
6342 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6343 [(set_attr "type" "ssecvt")
6344 (set_attr "prefix" "maybe_vex")
6345 (set_attr "mode" "V2DF")])
6347 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
6348 [(set (match_operand:V8SI 0 "register_operand" "=v")
6350 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
6351 UNSPEC_FIX_NOTRUNC))]
6353 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6354 [(set_attr "type" "ssecvt")
6355 (set_attr "prefix" "evex")
6356 (set_attr "mode" "OI")])
6358 (define_insn "avx_cvtpd2dq256<mask_name>"
6359 [(set (match_operand:V4SI 0 "register_operand" "=v")
6360 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6361 UNSPEC_FIX_NOTRUNC))]
6362 "TARGET_AVX && <mask_avx512vl_condition>"
6363 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6364 [(set_attr "type" "ssecvt")
6365 (set_attr "prefix" "<mask_prefix>")
6366 (set_attr "mode" "OI")])
6368 (define_expand "avx_cvtpd2dq256_2"
6369 [(set (match_operand:V8SI 0 "register_operand")
6371 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
6375 "operands[2] = CONST0_RTX (V4SImode);")
6377 (define_insn "*avx_cvtpd2dq256_2"
6378 [(set (match_operand:V8SI 0 "register_operand" "=v")
6380 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
6382 (match_operand:V4SI 2 "const0_operand")))]
6384 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
6385 [(set_attr "type" "ssecvt")
6386 (set_attr "prefix" "vex")
6387 (set_attr "btver2_decode" "vector")
6388 (set_attr "mode" "OI")])
6390 (define_insn "sse2_cvtpd2dq"
6391 [(set (match_operand:V4SI 0 "register_operand" "=v")
6393 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
6395 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6399 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
6401 return "cvtpd2dq\t{%1, %0|%0, %1}";
6403 [(set_attr "type" "ssecvt")
6404 (set_attr "prefix_rep" "1")
6405 (set_attr "prefix_data16" "0")
6406 (set_attr "prefix" "maybe_vex")
6407 (set_attr "mode" "TI")
6408 (set_attr "amdfam10_decode" "double")
6409 (set_attr "athlon_decode" "vector")
6410 (set_attr "bdver1_decode" "double")])
6412 (define_insn "sse2_cvtpd2dq_mask"
6413 [(set (match_operand:V4SI 0 "register_operand" "=v")
6416 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6419 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6420 (parallel [(const_int 0) (const_int 1)]))
6421 (match_operand:QI 3 "register_operand" "Yk"))
6422 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6424 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6425 [(set_attr "type" "ssecvt")
6426 (set_attr "prefix" "evex")
6427 (set_attr "mode" "TI")])
6429 (define_insn "*sse2_cvtpd2dq_mask_1"
6430 [(set (match_operand:V4SI 0 "register_operand" "=v")
6433 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6435 (const_vector:V2SI [(const_int 0) (const_int 0)])
6436 (match_operand:QI 2 "register_operand" "Yk"))
6437 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6439 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6440 [(set_attr "type" "ssecvt")
6441 (set_attr "prefix" "evex")
6442 (set_attr "mode" "TI")])
6444 ;; For ufix_notrunc* insn patterns
6445 (define_mode_attr pd2udqsuff
6446 [(V8DF "") (V4DF "{y}")])
6448 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6449 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6451 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6452 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6454 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6455 [(set_attr "type" "ssecvt")
6456 (set_attr "prefix" "evex")
6457 (set_attr "mode" "<sseinsnmode>")])
6459 (define_insn "ufix_notruncv2dfv2si2"
6460 [(set (match_operand:V4SI 0 "register_operand" "=v")
6463 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6464 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6465 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6467 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6468 [(set_attr "type" "ssecvt")
6469 (set_attr "prefix" "evex")
6470 (set_attr "mode" "TI")])
6472 (define_insn "ufix_notruncv2dfv2si2_mask"
6473 [(set (match_operand:V4SI 0 "register_operand" "=v")
6477 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6478 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6480 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6481 (parallel [(const_int 0) (const_int 1)]))
6482 (match_operand:QI 3 "register_operand" "Yk"))
6483 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6485 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6486 [(set_attr "type" "ssecvt")
6487 (set_attr "prefix" "evex")
6488 (set_attr "mode" "TI")])
6490 (define_insn "*ufix_notruncv2dfv2si2_mask_1"
6491 [(set (match_operand:V4SI 0 "register_operand" "=v")
6495 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6496 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6497 (const_vector:V2SI [(const_int 0) (const_int 0)])
6498 (match_operand:QI 2 "register_operand" "Yk"))
6499 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6501 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6502 [(set_attr "type" "ssecvt")
6503 (set_attr "prefix" "evex")
6504 (set_attr "mode" "TI")])
6506 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6507 [(set (match_operand:V8SI 0 "register_operand" "=v")
6509 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6511 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6512 [(set_attr "type" "ssecvt")
6513 (set_attr "prefix" "evex")
6514 (set_attr "mode" "OI")])
6516 (define_insn "ufix_truncv2dfv2si2"
6517 [(set (match_operand:V4SI 0 "register_operand" "=v")
6519 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6520 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6522 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6523 [(set_attr "type" "ssecvt")
6524 (set_attr "prefix" "evex")
6525 (set_attr "mode" "TI")])
6527 (define_insn "ufix_truncv2dfv2si2_mask"
6528 [(set (match_operand:V4SI 0 "register_operand" "=v")
6531 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6533 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6534 (parallel [(const_int 0) (const_int 1)]))
6535 (match_operand:QI 3 "register_operand" "Yk"))
6536 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6538 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6539 [(set_attr "type" "ssecvt")
6540 (set_attr "prefix" "evex")
6541 (set_attr "mode" "TI")])
6543 (define_insn "*ufix_truncv2dfv2si2_mask_1"
6544 [(set (match_operand:V4SI 0 "register_operand" "=v")
6547 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6548 (const_vector:V2SI [(const_int 0) (const_int 0)])
6549 (match_operand:QI 2 "register_operand" "Yk"))
6550 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6552 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6553 [(set_attr "type" "ssecvt")
6554 (set_attr "prefix" "evex")
6555 (set_attr "mode" "TI")])
6557 (define_insn "fix_truncv4dfv4si2<mask_name>"
6558 [(set (match_operand:V4SI 0 "register_operand" "=v")
6559 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6560 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6561 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6562 [(set_attr "type" "ssecvt")
6563 (set_attr "prefix" "maybe_evex")
6564 (set_attr "mode" "OI")])
6566 (define_insn "ufix_truncv4dfv4si2<mask_name>"
6567 [(set (match_operand:V4SI 0 "register_operand" "=v")
6568 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6569 "TARGET_AVX512VL && TARGET_AVX512F"
6570 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6571 [(set_attr "type" "ssecvt")
6572 (set_attr "prefix" "maybe_evex")
6573 (set_attr "mode" "OI")])
6575 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6576 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6577 (any_fix:<sseintvecmode>
6578 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6579 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6580 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6581 [(set_attr "type" "ssecvt")
6582 (set_attr "prefix" "evex")
6583 (set_attr "mode" "<sseintvecmode2>")])
6585 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6586 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6587 (unspec:<sseintvecmode>
6588 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6589 UNSPEC_FIX_NOTRUNC))]
6590 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6591 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6592 [(set_attr "type" "ssecvt")
6593 (set_attr "prefix" "evex")
6594 (set_attr "mode" "<sseintvecmode2>")])
6596 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6597 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6598 (unspec:<sseintvecmode>
6599 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6600 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6601 "TARGET_AVX512DQ && <round_mode512bit_condition>"
6602 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6603 [(set_attr "type" "ssecvt")
6604 (set_attr "prefix" "evex")
6605 (set_attr "mode" "<sseintvecmode2>")])
6607 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6608 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6609 (any_fix:<sselongvecmode>
6610 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6611 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6612 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6613 [(set_attr "type" "ssecvt")
6614 (set_attr "prefix" "evex")
6615 (set_attr "mode" "<sseintvecmode3>")])
6617 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6618 [(set (match_operand:V2DI 0 "register_operand" "=v")
6621 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6622 (parallel [(const_int 0) (const_int 1)]))))]
6623 "TARGET_AVX512DQ && TARGET_AVX512VL"
6624 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6625 [(set_attr "type" "ssecvt")
6626 (set_attr "prefix" "evex")
6627 (set_attr "mode" "TI")])
6629 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
6630 [(set (match_operand:V2DI 0 "register_operand")
6632 (match_operand:V2SF 1 "register_operand")))]
6633 "TARGET_AVX512DQ && TARGET_AVX512VL"
6635 operands[1] = force_reg (V2SFmode, operands[1]);
6636 operands[1] = simplify_gen_subreg (V4SFmode, operands[1], V2SFmode, 0);
6637 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
6638 (operands[0], operands[1]));
6642 (define_mode_attr vunpckfixt_mode
6643 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6644 (define_mode_attr vunpckfixt_model
6645 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6646 (define_mode_attr vunpckfixt_extract_mode
6647 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6649 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6650 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6651 (any_fix:<vunpckfixt_mode>
6652 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6655 rtx tem = operands[1];
6656 rtx (*gen) (rtx, rtx);
6658 if (<MODE>mode != V4SFmode)
6660 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6661 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6663 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6666 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6668 emit_insn (gen (operands[0], tem));
6672 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6673 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6674 (any_fix:<vunpckfixt_mode>
6675 (match_operand:VF1_AVX512VL 1 "register_operand"))]
6679 rtx (*gen) (rtx, rtx);
6681 if (<MODE>mode != V4SFmode)
6683 tem = gen_reg_rtx (<ssehalfvecmode>mode);
6684 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6686 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6690 tem = gen_reg_rtx (V4SFmode);
6691 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6692 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
6695 emit_insn (gen (operands[0], tem));
6699 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6700 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6701 (unsigned_fix:<sseintvecmode>
6702 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6704 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6705 [(set_attr "type" "ssecvt")
6706 (set_attr "prefix" "evex")
6707 (set_attr "mode" "<sseintvecmode2>")])
6709 (define_expand "avx_cvttpd2dq256_2"
6710 [(set (match_operand:V8SI 0 "register_operand")
6712 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6715 "operands[2] = CONST0_RTX (V4SImode);")
6717 (define_insn "sse2_cvttpd2dq"
6718 [(set (match_operand:V4SI 0 "register_operand" "=v")
6720 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6721 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6725 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6727 return "cvttpd2dq\t{%1, %0|%0, %1}";
6729 [(set_attr "type" "ssecvt")
6730 (set_attr "amdfam10_decode" "double")
6731 (set_attr "athlon_decode" "vector")
6732 (set_attr "bdver1_decode" "double")
6733 (set_attr "prefix" "maybe_vex")
6734 (set_attr "mode" "TI")])
6736 (define_insn "sse2_cvttpd2dq_mask"
6737 [(set (match_operand:V4SI 0 "register_operand" "=v")
6740 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6742 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6743 (parallel [(const_int 0) (const_int 1)]))
6744 (match_operand:QI 3 "register_operand" "Yk"))
6745 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6747 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6748 [(set_attr "type" "ssecvt")
6749 (set_attr "prefix" "evex")
6750 (set_attr "mode" "TI")])
6752 (define_insn "*sse2_cvttpd2dq_mask_1"
6753 [(set (match_operand:V4SI 0 "register_operand" "=v")
6756 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6757 (const_vector:V2SI [(const_int 0) (const_int 0)])
6758 (match_operand:QI 2 "register_operand" "Yk"))
6759 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6761 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6762 [(set_attr "type" "ssecvt")
6763 (set_attr "prefix" "evex")
6764 (set_attr "mode" "TI")])
6766 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
6767 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6770 (float_truncate:V2SF
6771 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6772 (match_operand:V4SF 1 "register_operand" "0,0,v")
6776 cvtsd2ss\t{%2, %0|%0, %2}
6777 cvtsd2ss\t{%2, %0|%0, %q2}
6778 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
6779 [(set_attr "isa" "noavx,noavx,avx")
6780 (set_attr "type" "ssecvt")
6781 (set_attr "athlon_decode" "vector,double,*")
6782 (set_attr "amdfam10_decode" "vector,double,*")
6783 (set_attr "bdver1_decode" "direct,direct,*")
6784 (set_attr "btver2_decode" "double,double,double")
6785 (set_attr "prefix" "orig,orig,<round_prefix>")
6786 (set_attr "mode" "SF")])
6788 (define_insn "*sse2_vd_cvtsd2ss"
6789 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6792 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6793 (match_operand:V4SF 1 "register_operand" "0,0,v")
6797 cvtsd2ss\t{%2, %0|%0, %2}
6798 cvtsd2ss\t{%2, %0|%0, %2}
6799 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6800 [(set_attr "isa" "noavx,noavx,avx")
6801 (set_attr "type" "ssecvt")
6802 (set_attr "athlon_decode" "vector,double,*")
6803 (set_attr "amdfam10_decode" "vector,double,*")
6804 (set_attr "bdver1_decode" "direct,direct,*")
6805 (set_attr "btver2_decode" "double,double,double")
6806 (set_attr "prefix" "orig,orig,vex")
6807 (set_attr "mode" "SF")])
6809 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
6810 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6814 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6815 (parallel [(const_int 0) (const_int 1)])))
6816 (match_operand:V2DF 1 "register_operand" "0,0,v")
6820 cvtss2sd\t{%2, %0|%0, %2}
6821 cvtss2sd\t{%2, %0|%0, %k2}
6822 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
6823 [(set_attr "isa" "noavx,noavx,avx")
6824 (set_attr "type" "ssecvt")
6825 (set_attr "amdfam10_decode" "vector,double,*")
6826 (set_attr "athlon_decode" "direct,direct,*")
6827 (set_attr "bdver1_decode" "direct,direct,*")
6828 (set_attr "btver2_decode" "double,double,double")
6829 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6830 (set_attr "mode" "DF")])
6832 (define_insn "*sse2_vd_cvtss2sd"
6833 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6836 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6837 (match_operand:V2DF 1 "register_operand" "0,0,v")
6841 cvtss2sd\t{%2, %0|%0, %2}
6842 cvtss2sd\t{%2, %0|%0, %2}
6843 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6844 [(set_attr "isa" "noavx,noavx,avx")
6845 (set_attr "type" "ssecvt")
6846 (set_attr "amdfam10_decode" "vector,double,*")
6847 (set_attr "athlon_decode" "direct,direct,*")
6848 (set_attr "bdver1_decode" "direct,direct,*")
6849 (set_attr "btver2_decode" "double,double,double")
6850 (set_attr "prefix" "orig,orig,vex")
6851 (set_attr "mode" "DF")])
6853 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6854 [(set (match_operand:V8SF 0 "register_operand" "=v")
6855 (float_truncate:V8SF
6856 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6858 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6859 [(set_attr "type" "ssecvt")
6860 (set_attr "prefix" "evex")
6861 (set_attr "mode" "V8SF")])
6863 (define_insn "avx_cvtpd2ps256<mask_name>"
6864 [(set (match_operand:V4SF 0 "register_operand" "=v")
6865 (float_truncate:V4SF
6866 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6867 "TARGET_AVX && <mask_avx512vl_condition>"
6868 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6869 [(set_attr "type" "ssecvt")
6870 (set_attr "prefix" "maybe_evex")
6871 (set_attr "btver2_decode" "vector")
6872 (set_attr "mode" "V4SF")])
6874 (define_expand "sse2_cvtpd2ps"
6875 [(set (match_operand:V4SF 0 "register_operand")
6877 (float_truncate:V2SF
6878 (match_operand:V2DF 1 "vector_operand"))
6881 "operands[2] = CONST0_RTX (V2SFmode);")
6883 (define_expand "sse2_cvtpd2ps_mask"
6884 [(set (match_operand:V4SF 0 "register_operand")
6887 (float_truncate:V2SF
6888 (match_operand:V2DF 1 "vector_operand"))
6890 (match_operand:V4SF 2 "nonimm_or_0_operand")
6891 (parallel [(const_int 0) (const_int 1)]))
6892 (match_operand:QI 3 "register_operand"))
6895 "operands[4] = CONST0_RTX (V2SFmode);")
6897 (define_insn "*sse2_cvtpd2ps"
6898 [(set (match_operand:V4SF 0 "register_operand" "=v")
6900 (float_truncate:V2SF
6901 (match_operand:V2DF 1 "vector_operand" "vBm"))
6902 (match_operand:V2SF 2 "const0_operand" "C")))]
6906 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6908 return "cvtpd2ps\t{%1, %0|%0, %1}";
6910 [(set_attr "type" "ssecvt")
6911 (set_attr "amdfam10_decode" "double")
6912 (set_attr "athlon_decode" "vector")
6913 (set_attr "bdver1_decode" "double")
6914 (set_attr "prefix_data16" "1")
6915 (set_attr "prefix" "maybe_vex")
6916 (set_attr "mode" "V4SF")])
6918 (define_insn "truncv2dfv2sf2"
6919 [(set (match_operand:V2SF 0 "register_operand" "=v")
6920 (float_truncate:V2SF
6921 (match_operand:V2DF 1 "vector_operand" "vBm")))]
6922 "TARGET_MMX_WITH_SSE"
6925 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6927 return "cvtpd2ps\t{%1, %0|%0, %1}";
6929 [(set_attr "type" "ssecvt")
6930 (set_attr "amdfam10_decode" "double")
6931 (set_attr "athlon_decode" "vector")
6932 (set_attr "bdver1_decode" "double")
6933 (set_attr "prefix_data16" "1")
6934 (set_attr "prefix" "maybe_vex")
6935 (set_attr "mode" "V4SF")])
6937 (define_insn "*sse2_cvtpd2ps_mask"
6938 [(set (match_operand:V4SF 0 "register_operand" "=v")
6941 (float_truncate:V2SF
6942 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6944 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6945 (parallel [(const_int 0) (const_int 1)]))
6946 (match_operand:QI 3 "register_operand" "Yk"))
6947 (match_operand:V2SF 4 "const0_operand" "C")))]
6949 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6950 [(set_attr "type" "ssecvt")
6951 (set_attr "prefix" "evex")
6952 (set_attr "mode" "V4SF")])
6954 (define_insn "*sse2_cvtpd2ps_mask_1"
6955 [(set (match_operand:V4SF 0 "register_operand" "=v")
6958 (float_truncate:V2SF
6959 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6960 (match_operand:V2SF 3 "const0_operand" "C")
6961 (match_operand:QI 2 "register_operand" "Yk"))
6962 (match_operand:V2SF 4 "const0_operand" "C")))]
6964 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6965 [(set_attr "type" "ssecvt")
6966 (set_attr "prefix" "evex")
6967 (set_attr "mode" "V4SF")])
6969 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6970 (define_mode_attr sf2dfmode
6971 [(V8DF "V8SF") (V4DF "V4SF")])
6972 (define_mode_attr sf2dfmode_lower
6973 [(V8DF "v8sf") (V4DF "v4sf")])
6975 (define_expand "trunc<mode><sf2dfmode_lower>2"
6976 [(set (match_operand:<sf2dfmode> 0 "register_operand")
6977 (float_truncate:<sf2dfmode>
6978 (match_operand:VF2_512_256 1 "vector_operand")))]
6981 (define_expand "extend<sf2dfmode_lower><mode>2"
6982 [(set (match_operand:VF2_512_256 0 "register_operand")
6983 (float_extend:VF2_512_256
6984 (match_operand:<sf2dfmode> 1 "vector_operand")))]
6987 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6988 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6989 (float_extend:VF2_512_256
6990 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6991 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6992 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6993 [(set_attr "type" "ssecvt")
6994 (set_attr "prefix" "maybe_vex")
6995 (set_attr "mode" "<MODE>")])
6997 (define_insn "*avx_cvtps2pd256_2"
6998 [(set (match_operand:V4DF 0 "register_operand" "=v")
7001 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7002 (parallel [(const_int 0) (const_int 1)
7003 (const_int 2) (const_int 3)]))))]
7005 "vcvtps2pd\t{%x1, %0|%0, %x1}"
7006 [(set_attr "type" "ssecvt")
7007 (set_attr "prefix" "vex")
7008 (set_attr "mode" "V4DF")])
7010 (define_insn "vec_unpacks_lo_v16sf"
7011 [(set (match_operand:V8DF 0 "register_operand" "=v")
7014 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7015 (parallel [(const_int 0) (const_int 1)
7016 (const_int 2) (const_int 3)
7017 (const_int 4) (const_int 5)
7018 (const_int 6) (const_int 7)]))))]
7020 "vcvtps2pd\t{%t1, %0|%0, %t1}"
7021 [(set_attr "type" "ssecvt")
7022 (set_attr "prefix" "evex")
7023 (set_attr "mode" "V8DF")])
7025 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
7026 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
7027 (unspec:<avx512fmaskmode>
7028 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
7029 UNSPEC_CVTINT2MASK))]
7031 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
7032 [(set_attr "prefix" "evex")
7033 (set_attr "mode" "<sseinsnmode>")])
7035 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
7036 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
7037 (unspec:<avx512fmaskmode>
7038 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
7039 UNSPEC_CVTINT2MASK))]
7041 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
7042 [(set_attr "prefix" "evex")
7043 (set_attr "mode" "<sseinsnmode>")])
7045 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
7046 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
7047 (vec_merge:VI12_AVX512VL
7050 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
7053 operands[2] = CONSTM1_RTX (<MODE>mode);
7054 operands[3] = CONST0_RTX (<MODE>mode);
7057 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
7058 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
7059 (vec_merge:VI12_AVX512VL
7060 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
7061 (match_operand:VI12_AVX512VL 3 "const0_operand")
7062 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
7064 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
7065 [(set_attr "prefix" "evex")
7066 (set_attr "mode" "<sseinsnmode>")])
7068 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
7069 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
7070 (vec_merge:VI48_AVX512VL
7073 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
7076 operands[2] = CONSTM1_RTX (<MODE>mode);
7077 operands[3] = CONST0_RTX (<MODE>mode);
7080 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
7081 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
7082 (vec_merge:VI48_AVX512VL
7083 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
7084 (match_operand:VI48_AVX512VL 3 "const0_operand")
7085 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
7088 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
7089 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
7090 [(set_attr "isa" "avx512dq,*")
7091 (set_attr "length_immediate" "0,1")
7092 (set_attr "prefix" "evex")
7093 (set_attr "mode" "<sseinsnmode>")])
7095 (define_insn "sse2_cvtps2pd<mask_name>"
7096 [(set (match_operand:V2DF 0 "register_operand" "=v")
7099 (match_operand:V4SF 1 "vector_operand" "vm")
7100 (parallel [(const_int 0) (const_int 1)]))))]
7101 "TARGET_SSE2 && <mask_avx512vl_condition>"
7102 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7103 [(set_attr "type" "ssecvt")
7104 (set_attr "amdfam10_decode" "direct")
7105 (set_attr "athlon_decode" "double")
7106 (set_attr "bdver1_decode" "double")
7107 (set_attr "prefix_data16" "0")
7108 (set_attr "prefix" "maybe_vex")
7109 (set_attr "mode" "V2DF")])
7111 (define_insn "extendv2sfv2df2"
7112 [(set (match_operand:V2DF 0 "register_operand" "=v")
7114 (match_operand:V2SF 1 "register_operand" "v")))]
7115 "TARGET_MMX_WITH_SSE"
7116 "%vcvtps2pd\t{%1, %0|%0, %1}"
7117 [(set_attr "type" "ssecvt")
7118 (set_attr "amdfam10_decode" "direct")
7119 (set_attr "athlon_decode" "double")
7120 (set_attr "bdver1_decode" "double")
7121 (set_attr "prefix_data16" "0")
7122 (set_attr "prefix" "maybe_vex")
7123 (set_attr "mode" "V2DF")])
7125 (define_expand "vec_unpacks_hi_v4sf"
7130 (match_operand:V4SF 1 "vector_operand"))
7131 (parallel [(const_int 6) (const_int 7)
7132 (const_int 2) (const_int 3)])))
7133 (set (match_operand:V2DF 0 "register_operand")
7137 (parallel [(const_int 0) (const_int 1)]))))]
7139 "operands[2] = gen_reg_rtx (V4SFmode);")
7141 (define_expand "vec_unpacks_hi_v8sf"
7144 (match_operand:V8SF 1 "register_operand")
7145 (parallel [(const_int 4) (const_int 5)
7146 (const_int 6) (const_int 7)])))
7147 (set (match_operand:V4DF 0 "register_operand")
7151 "operands[2] = gen_reg_rtx (V4SFmode);")
7153 (define_expand "vec_unpacks_hi_v16sf"
7156 (match_operand:V16SF 1 "register_operand")
7157 (parallel [(const_int 8) (const_int 9)
7158 (const_int 10) (const_int 11)
7159 (const_int 12) (const_int 13)
7160 (const_int 14) (const_int 15)])))
7161 (set (match_operand:V8DF 0 "register_operand")
7165 "operands[2] = gen_reg_rtx (V8SFmode);")
7167 (define_expand "vec_unpacks_lo_v4sf"
7168 [(set (match_operand:V2DF 0 "register_operand")
7171 (match_operand:V4SF 1 "vector_operand")
7172 (parallel [(const_int 0) (const_int 1)]))))]
7175 (define_expand "vec_unpacks_lo_v8sf"
7176 [(set (match_operand:V4DF 0 "register_operand")
7179 (match_operand:V8SF 1 "nonimmediate_operand")
7180 (parallel [(const_int 0) (const_int 1)
7181 (const_int 2) (const_int 3)]))))]
7184 (define_mode_attr sseunpackfltmode
7185 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
7186 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
7188 (define_expand "vec_unpacks_float_hi_<mode>"
7189 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7190 (match_operand:VI2_AVX512F 1 "register_operand")]
7193 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7195 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
7196 emit_insn (gen_rtx_SET (operands[0],
7197 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7201 (define_expand "vec_unpacks_float_lo_<mode>"
7202 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7203 (match_operand:VI2_AVX512F 1 "register_operand")]
7206 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7208 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
7209 emit_insn (gen_rtx_SET (operands[0],
7210 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7214 (define_expand "vec_unpacku_float_hi_<mode>"
7215 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7216 (match_operand:VI2_AVX512F 1 "register_operand")]
7219 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7221 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
7222 emit_insn (gen_rtx_SET (operands[0],
7223 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7227 (define_expand "vec_unpacku_float_lo_<mode>"
7228 [(match_operand:<sseunpackfltmode> 0 "register_operand")
7229 (match_operand:VI2_AVX512F 1 "register_operand")]
7232 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
7234 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
7235 emit_insn (gen_rtx_SET (operands[0],
7236 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
7240 (define_expand "vec_unpacks_float_hi_v4si"
7243 (match_operand:V4SI 1 "vector_operand")
7244 (parallel [(const_int 2) (const_int 3)
7245 (const_int 2) (const_int 3)])))
7246 (set (match_operand:V2DF 0 "register_operand")
7250 (parallel [(const_int 0) (const_int 1)]))))]
7252 "operands[2] = gen_reg_rtx (V4SImode);")
7254 (define_expand "vec_unpacks_float_lo_v4si"
7255 [(set (match_operand:V2DF 0 "register_operand")
7258 (match_operand:V4SI 1 "vector_operand")
7259 (parallel [(const_int 0) (const_int 1)]))))]
7262 (define_expand "vec_unpacks_float_hi_v8si"
7265 (match_operand:V8SI 1 "vector_operand")
7266 (parallel [(const_int 4) (const_int 5)
7267 (const_int 6) (const_int 7)])))
7268 (set (match_operand:V4DF 0 "register_operand")
7272 "operands[2] = gen_reg_rtx (V4SImode);")
7274 (define_expand "vec_unpacks_float_lo_v8si"
7275 [(set (match_operand:V4DF 0 "register_operand")
7278 (match_operand:V8SI 1 "nonimmediate_operand")
7279 (parallel [(const_int 0) (const_int 1)
7280 (const_int 2) (const_int 3)]))))]
7283 (define_expand "vec_unpacks_float_hi_v16si"
7286 (match_operand:V16SI 1 "nonimmediate_operand")
7287 (parallel [(const_int 8) (const_int 9)
7288 (const_int 10) (const_int 11)
7289 (const_int 12) (const_int 13)
7290 (const_int 14) (const_int 15)])))
7291 (set (match_operand:V8DF 0 "register_operand")
7295 "operands[2] = gen_reg_rtx (V8SImode);")
7297 (define_expand "vec_unpacks_float_lo_v16si"
7298 [(set (match_operand:V8DF 0 "register_operand")
7301 (match_operand:V16SI 1 "nonimmediate_operand")
7302 (parallel [(const_int 0) (const_int 1)
7303 (const_int 2) (const_int 3)
7304 (const_int 4) (const_int 5)
7305 (const_int 6) (const_int 7)]))))]
7308 (define_expand "vec_unpacku_float_hi_v4si"
7311 (match_operand:V4SI 1 "vector_operand")
7312 (parallel [(const_int 2) (const_int 3)
7313 (const_int 2) (const_int 3)])))
7318 (parallel [(const_int 0) (const_int 1)]))))
7320 (lt:V2DF (match_dup 6) (match_dup 3)))
7322 (and:V2DF (match_dup 7) (match_dup 4)))
7323 (set (match_operand:V2DF 0 "register_operand")
7324 (plus:V2DF (match_dup 6) (match_dup 8)))]
7327 REAL_VALUE_TYPE TWO32r;
7331 real_ldexp (&TWO32r, &dconst1, 32);
7332 x = const_double_from_real_value (TWO32r, DFmode);
7334 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7335 operands[4] = force_reg (V2DFmode,
7336 ix86_build_const_vector (V2DFmode, 1, x));
7338 operands[5] = gen_reg_rtx (V4SImode);
7340 for (i = 6; i < 9; i++)
7341 operands[i] = gen_reg_rtx (V2DFmode);
7344 (define_expand "vec_unpacku_float_lo_v4si"
7348 (match_operand:V4SI 1 "vector_operand")
7349 (parallel [(const_int 0) (const_int 1)]))))
7351 (lt:V2DF (match_dup 5) (match_dup 3)))
7353 (and:V2DF (match_dup 6) (match_dup 4)))
7354 (set (match_operand:V2DF 0 "register_operand")
7355 (plus:V2DF (match_dup 5) (match_dup 7)))]
7358 REAL_VALUE_TYPE TWO32r;
7362 real_ldexp (&TWO32r, &dconst1, 32);
7363 x = const_double_from_real_value (TWO32r, DFmode);
7365 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
7366 operands[4] = force_reg (V2DFmode,
7367 ix86_build_const_vector (V2DFmode, 1, x));
7369 for (i = 5; i < 8; i++)
7370 operands[i] = gen_reg_rtx (V2DFmode);
7373 (define_expand "vec_unpacku_float_hi_v8si"
7374 [(match_operand:V4DF 0 "register_operand")
7375 (match_operand:V8SI 1 "register_operand")]
7378 REAL_VALUE_TYPE TWO32r;
7382 real_ldexp (&TWO32r, &dconst1, 32);
7383 x = const_double_from_real_value (TWO32r, DFmode);
7385 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7386 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7387 tmp[5] = gen_reg_rtx (V4SImode);
7389 for (i = 2; i < 5; i++)
7390 tmp[i] = gen_reg_rtx (V4DFmode);
7391 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
7392 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
7393 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7394 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7395 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7399 (define_expand "vec_unpacku_float_hi_v16si"
7400 [(match_operand:V8DF 0 "register_operand")
7401 (match_operand:V16SI 1 "register_operand")]
7404 REAL_VALUE_TYPE TWO32r;
7407 real_ldexp (&TWO32r, &dconst1, 32);
7408 x = const_double_from_real_value (TWO32r, DFmode);
7410 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7411 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7412 tmp[2] = gen_reg_rtx (V8DFmode);
7413 tmp[3] = gen_reg_rtx (V8SImode);
7414 k = gen_reg_rtx (QImode);
7416 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
7417 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
7418 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7419 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7420 emit_move_insn (operands[0], tmp[2]);
7424 (define_expand "vec_unpacku_float_lo_v8si"
7425 [(match_operand:V4DF 0 "register_operand")
7426 (match_operand:V8SI 1 "nonimmediate_operand")]
7429 REAL_VALUE_TYPE TWO32r;
7433 real_ldexp (&TWO32r, &dconst1, 32);
7434 x = const_double_from_real_value (TWO32r, DFmode);
7436 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
7437 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
7439 for (i = 2; i < 5; i++)
7440 tmp[i] = gen_reg_rtx (V4DFmode);
7441 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
7442 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
7443 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
7444 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
7448 (define_expand "vec_unpacku_float_lo_v16si"
7449 [(match_operand:V8DF 0 "register_operand")
7450 (match_operand:V16SI 1 "nonimmediate_operand")]
7453 REAL_VALUE_TYPE TWO32r;
7456 real_ldexp (&TWO32r, &dconst1, 32);
7457 x = const_double_from_real_value (TWO32r, DFmode);
7459 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
7460 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
7461 tmp[2] = gen_reg_rtx (V8DFmode);
7462 k = gen_reg_rtx (QImode);
7464 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
7465 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
7466 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
7467 emit_move_insn (operands[0], tmp[2]);
7471 (define_expand "vec_pack_trunc_<mode>"
7473 (float_truncate:<sf2dfmode>
7474 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
7476 (float_truncate:<sf2dfmode>
7477 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
7478 (set (match_operand:<ssePSmode> 0 "register_operand")
7479 (vec_concat:<ssePSmode>
7484 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
7485 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
7488 (define_expand "vec_pack_trunc_v2df"
7489 [(match_operand:V4SF 0 "register_operand")
7490 (match_operand:V2DF 1 "vector_operand")
7491 (match_operand:V2DF 2 "vector_operand")]
7496 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7498 tmp0 = gen_reg_rtx (V4DFmode);
7499 tmp1 = force_reg (V2DFmode, operands[1]);
7501 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7502 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
7506 tmp0 = gen_reg_rtx (V4SFmode);
7507 tmp1 = gen_reg_rtx (V4SFmode);
7509 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7510 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7511 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7516 (define_expand "vec_pack_sfix_trunc_v8df"
7517 [(match_operand:V16SI 0 "register_operand")
7518 (match_operand:V8DF 1 "nonimmediate_operand")
7519 (match_operand:V8DF 2 "nonimmediate_operand")]
7524 r1 = gen_reg_rtx (V8SImode);
7525 r2 = gen_reg_rtx (V8SImode);
7527 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7528 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7529 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7533 (define_expand "vec_pack_sfix_trunc_v4df"
7534 [(match_operand:V8SI 0 "register_operand")
7535 (match_operand:V4DF 1 "nonimmediate_operand")
7536 (match_operand:V4DF 2 "nonimmediate_operand")]
7541 r1 = gen_reg_rtx (V4SImode);
7542 r2 = gen_reg_rtx (V4SImode);
7544 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7545 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7546 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7550 (define_expand "vec_pack_sfix_trunc_v2df"
7551 [(match_operand:V4SI 0 "register_operand")
7552 (match_operand:V2DF 1 "vector_operand")
7553 (match_operand:V2DF 2 "vector_operand")]
7556 rtx tmp0, tmp1, tmp2;
7558 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7560 tmp0 = gen_reg_rtx (V4DFmode);
7561 tmp1 = force_reg (V2DFmode, operands[1]);
7563 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7564 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7568 tmp0 = gen_reg_rtx (V4SImode);
7569 tmp1 = gen_reg_rtx (V4SImode);
7570 tmp2 = gen_reg_rtx (V2DImode);
7572 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7573 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7574 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7575 gen_lowpart (V2DImode, tmp0),
7576 gen_lowpart (V2DImode, tmp1)));
7577 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7582 (define_mode_attr ssepackfltmode
7583 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7585 (define_expand "vec_pack_ufix_trunc_<mode>"
7586 [(match_operand:<ssepackfltmode> 0 "register_operand")
7587 (match_operand:VF2 1 "register_operand")
7588 (match_operand:VF2 2 "register_operand")]
7591 if (<MODE>mode == V8DFmode)
7595 r1 = gen_reg_rtx (V8SImode);
7596 r2 = gen_reg_rtx (V8SImode);
7598 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7599 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7600 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7605 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7606 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7607 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7608 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7609 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7611 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7612 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7616 tmp[5] = gen_reg_rtx (V8SFmode);
7617 ix86_expand_vec_extract_even_odd (tmp[5],
7618 gen_lowpart (V8SFmode, tmp[2]),
7619 gen_lowpart (V8SFmode, tmp[3]), 0);
7620 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7622 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7623 operands[0], 0, OPTAB_DIRECT);
7624 if (tmp[6] != operands[0])
7625 emit_move_insn (operands[0], tmp[6]);
7631 (define_expand "avx512f_vec_pack_sfix_v8df"
7632 [(match_operand:V16SI 0 "register_operand")
7633 (match_operand:V8DF 1 "nonimmediate_operand")
7634 (match_operand:V8DF 2 "nonimmediate_operand")]
7639 r1 = gen_reg_rtx (V8SImode);
7640 r2 = gen_reg_rtx (V8SImode);
7642 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7643 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7644 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7648 (define_expand "vec_pack_sfix_v4df"
7649 [(match_operand:V8SI 0 "register_operand")
7650 (match_operand:V4DF 1 "nonimmediate_operand")
7651 (match_operand:V4DF 2 "nonimmediate_operand")]
7656 r1 = gen_reg_rtx (V4SImode);
7657 r2 = gen_reg_rtx (V4SImode);
7659 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7660 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7661 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7665 (define_expand "vec_pack_sfix_v2df"
7666 [(match_operand:V4SI 0 "register_operand")
7667 (match_operand:V2DF 1 "vector_operand")
7668 (match_operand:V2DF 2 "vector_operand")]
7671 rtx tmp0, tmp1, tmp2;
7673 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7675 tmp0 = gen_reg_rtx (V4DFmode);
7676 tmp1 = force_reg (V2DFmode, operands[1]);
7678 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7679 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7683 tmp0 = gen_reg_rtx (V4SImode);
7684 tmp1 = gen_reg_rtx (V4SImode);
7685 tmp2 = gen_reg_rtx (V2DImode);
7687 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7688 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7689 emit_insn (gen_vec_interleave_lowv2di (tmp2,
7690 gen_lowpart (V2DImode, tmp0),
7691 gen_lowpart (V2DImode, tmp1)));
7692 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7697 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7699 ;; Parallel single-precision floating point element swizzling
7701 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7703 (define_expand "sse_movhlps_exp"
7704 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7707 (match_operand:V4SF 1 "nonimmediate_operand")
7708 (match_operand:V4SF 2 "nonimmediate_operand"))
7709 (parallel [(const_int 6)
7715 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7717 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7719 /* Fix up the destination if needed. */
7720 if (dst != operands[0])
7721 emit_move_insn (operands[0], dst);
7726 (define_insn "sse_movhlps"
7727 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
7730 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7731 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7732 (parallel [(const_int 6)
7736 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7738 movhlps\t{%2, %0|%0, %2}
7739 vmovhlps\t{%2, %1, %0|%0, %1, %2}
7740 movlps\t{%H2, %0|%0, %H2}
7741 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7742 %vmovhps\t{%2, %0|%q0, %2}"
7743 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7744 (set_attr "type" "ssemov")
7745 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7746 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7748 (define_expand "sse_movlhps_exp"
7749 [(set (match_operand:V4SF 0 "nonimmediate_operand")
7752 (match_operand:V4SF 1 "nonimmediate_operand")
7753 (match_operand:V4SF 2 "nonimmediate_operand"))
7754 (parallel [(const_int 0)
7760 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7762 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7764 /* Fix up the destination if needed. */
7765 if (dst != operands[0])
7766 emit_move_insn (operands[0], dst);
7771 (define_insn "sse_movlhps"
7772 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
7775 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7776 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7777 (parallel [(const_int 0)
7781 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7783 movlhps\t{%2, %0|%0, %2}
7784 vmovlhps\t{%2, %1, %0|%0, %1, %2}
7785 movhps\t{%2, %0|%0, %q2}
7786 vmovhps\t{%2, %1, %0|%0, %1, %q2}
7787 %vmovlps\t{%2, %H0|%H0, %2}"
7788 [(set_attr "isa" "noavx,avx,noavx,avx,*")
7789 (set_attr "type" "ssemov")
7790 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7791 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7793 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7794 [(set (match_operand:V16SF 0 "register_operand" "=v")
7797 (match_operand:V16SF 1 "register_operand" "v")
7798 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7799 (parallel [(const_int 2) (const_int 18)
7800 (const_int 3) (const_int 19)
7801 (const_int 6) (const_int 22)
7802 (const_int 7) (const_int 23)
7803 (const_int 10) (const_int 26)
7804 (const_int 11) (const_int 27)
7805 (const_int 14) (const_int 30)
7806 (const_int 15) (const_int 31)])))]
7808 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7809 [(set_attr "type" "sselog")
7810 (set_attr "prefix" "evex")
7811 (set_attr "mode" "V16SF")])
7813 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7814 (define_insn "avx_unpckhps256<mask_name>"
7815 [(set (match_operand:V8SF 0 "register_operand" "=v")
7818 (match_operand:V8SF 1 "register_operand" "v")
7819 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7820 (parallel [(const_int 2) (const_int 10)
7821 (const_int 3) (const_int 11)
7822 (const_int 6) (const_int 14)
7823 (const_int 7) (const_int 15)])))]
7824 "TARGET_AVX && <mask_avx512vl_condition>"
7825 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7826 [(set_attr "type" "sselog")
7827 (set_attr "prefix" "vex")
7828 (set_attr "mode" "V8SF")])
7830 (define_expand "vec_interleave_highv8sf"
7834 (match_operand:V8SF 1 "register_operand")
7835 (match_operand:V8SF 2 "nonimmediate_operand"))
7836 (parallel [(const_int 0) (const_int 8)
7837 (const_int 1) (const_int 9)
7838 (const_int 4) (const_int 12)
7839 (const_int 5) (const_int 13)])))
7845 (parallel [(const_int 2) (const_int 10)
7846 (const_int 3) (const_int 11)
7847 (const_int 6) (const_int 14)
7848 (const_int 7) (const_int 15)])))
7849 (set (match_operand:V8SF 0 "register_operand")
7854 (parallel [(const_int 4) (const_int 5)
7855 (const_int 6) (const_int 7)
7856 (const_int 12) (const_int 13)
7857 (const_int 14) (const_int 15)])))]
7860 operands[3] = gen_reg_rtx (V8SFmode);
7861 operands[4] = gen_reg_rtx (V8SFmode);
7864 (define_insn "vec_interleave_highv4sf<mask_name>"
7865 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7868 (match_operand:V4SF 1 "register_operand" "0,v")
7869 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7870 (parallel [(const_int 2) (const_int 6)
7871 (const_int 3) (const_int 7)])))]
7872 "TARGET_SSE && <mask_avx512vl_condition>"
7874 unpckhps\t{%2, %0|%0, %2}
7875 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7876 [(set_attr "isa" "noavx,avx")
7877 (set_attr "type" "sselog")
7878 (set_attr "prefix" "orig,vex")
7879 (set_attr "mode" "V4SF")])
7881 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7882 [(set (match_operand:V16SF 0 "register_operand" "=v")
7885 (match_operand:V16SF 1 "register_operand" "v")
7886 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7887 (parallel [(const_int 0) (const_int 16)
7888 (const_int 1) (const_int 17)
7889 (const_int 4) (const_int 20)
7890 (const_int 5) (const_int 21)
7891 (const_int 8) (const_int 24)
7892 (const_int 9) (const_int 25)
7893 (const_int 12) (const_int 28)
7894 (const_int 13) (const_int 29)])))]
7896 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7897 [(set_attr "type" "sselog")
7898 (set_attr "prefix" "evex")
7899 (set_attr "mode" "V16SF")])
7901 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7902 (define_insn "avx_unpcklps256<mask_name>"
7903 [(set (match_operand:V8SF 0 "register_operand" "=v")
7906 (match_operand:V8SF 1 "register_operand" "v")
7907 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7908 (parallel [(const_int 0) (const_int 8)
7909 (const_int 1) (const_int 9)
7910 (const_int 4) (const_int 12)
7911 (const_int 5) (const_int 13)])))]
7912 "TARGET_AVX && <mask_avx512vl_condition>"
7913 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7914 [(set_attr "type" "sselog")
7915 (set_attr "prefix" "vex")
7916 (set_attr "mode" "V8SF")])
7918 (define_insn "unpcklps128_mask"
7919 [(set (match_operand:V4SF 0 "register_operand" "=v")
7923 (match_operand:V4SF 1 "register_operand" "v")
7924 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7925 (parallel [(const_int 0) (const_int 4)
7926 (const_int 1) (const_int 5)]))
7927 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7928 (match_operand:QI 4 "register_operand" "Yk")))]
7930 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7931 [(set_attr "type" "sselog")
7932 (set_attr "prefix" "evex")
7933 (set_attr "mode" "V4SF")])
7935 (define_expand "vec_interleave_lowv8sf"
7939 (match_operand:V8SF 1 "register_operand")
7940 (match_operand:V8SF 2 "nonimmediate_operand"))
7941 (parallel [(const_int 0) (const_int 8)
7942 (const_int 1) (const_int 9)
7943 (const_int 4) (const_int 12)
7944 (const_int 5) (const_int 13)])))
7950 (parallel [(const_int 2) (const_int 10)
7951 (const_int 3) (const_int 11)
7952 (const_int 6) (const_int 14)
7953 (const_int 7) (const_int 15)])))
7954 (set (match_operand:V8SF 0 "register_operand")
7959 (parallel [(const_int 0) (const_int 1)
7960 (const_int 2) (const_int 3)
7961 (const_int 8) (const_int 9)
7962 (const_int 10) (const_int 11)])))]
7965 operands[3] = gen_reg_rtx (V8SFmode);
7966 operands[4] = gen_reg_rtx (V8SFmode);
7969 (define_insn "vec_interleave_lowv4sf"
7970 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7973 (match_operand:V4SF 1 "register_operand" "0,v")
7974 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7975 (parallel [(const_int 0) (const_int 4)
7976 (const_int 1) (const_int 5)])))]
7979 unpcklps\t{%2, %0|%0, %2}
7980 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7981 [(set_attr "isa" "noavx,avx")
7982 (set_attr "type" "sselog")
7983 (set_attr "prefix" "orig,maybe_evex")
7984 (set_attr "mode" "V4SF")])
7986 ;; These are modeled with the same vec_concat as the others so that we
7987 ;; capture users of shufps that can use the new instructions
7988 (define_insn "avx_movshdup256<mask_name>"
7989 [(set (match_operand:V8SF 0 "register_operand" "=v")
7992 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7994 (parallel [(const_int 1) (const_int 1)
7995 (const_int 3) (const_int 3)
7996 (const_int 5) (const_int 5)
7997 (const_int 7) (const_int 7)])))]
7998 "TARGET_AVX && <mask_avx512vl_condition>"
7999 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8000 [(set_attr "type" "sse")
8001 (set_attr "prefix" "vex")
8002 (set_attr "mode" "V8SF")])
8004 (define_insn "sse3_movshdup<mask_name>"
8005 [(set (match_operand:V4SF 0 "register_operand" "=v")
8008 (match_operand:V4SF 1 "vector_operand" "vBm")
8010 (parallel [(const_int 1)
8014 "TARGET_SSE3 && <mask_avx512vl_condition>"
8015 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8016 [(set_attr "type" "sse")
8017 (set_attr "prefix_rep" "1")
8018 (set_attr "prefix" "maybe_vex")
8019 (set_attr "mode" "V4SF")])
8021 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
8022 [(set (match_operand:V16SF 0 "register_operand" "=v")
8025 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
8027 (parallel [(const_int 1) (const_int 1)
8028 (const_int 3) (const_int 3)
8029 (const_int 5) (const_int 5)
8030 (const_int 7) (const_int 7)
8031 (const_int 9) (const_int 9)
8032 (const_int 11) (const_int 11)
8033 (const_int 13) (const_int 13)
8034 (const_int 15) (const_int 15)])))]
8036 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8037 [(set_attr "type" "sse")
8038 (set_attr "prefix" "evex")
8039 (set_attr "mode" "V16SF")])
8041 (define_insn "avx_movsldup256<mask_name>"
8042 [(set (match_operand:V8SF 0 "register_operand" "=v")
8045 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
8047 (parallel [(const_int 0) (const_int 0)
8048 (const_int 2) (const_int 2)
8049 (const_int 4) (const_int 4)
8050 (const_int 6) (const_int 6)])))]
8051 "TARGET_AVX && <mask_avx512vl_condition>"
8052 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8053 [(set_attr "type" "sse")
8054 (set_attr "prefix" "vex")
8055 (set_attr "mode" "V8SF")])
8057 (define_insn "sse3_movsldup<mask_name>"
8058 [(set (match_operand:V4SF 0 "register_operand" "=v")
8061 (match_operand:V4SF 1 "vector_operand" "vBm")
8063 (parallel [(const_int 0)
8067 "TARGET_SSE3 && <mask_avx512vl_condition>"
8068 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8069 [(set_attr "type" "sse")
8070 (set_attr "prefix_rep" "1")
8071 (set_attr "prefix" "maybe_vex")
8072 (set_attr "mode" "V4SF")])
8074 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
8075 [(set (match_operand:V16SF 0 "register_operand" "=v")
8078 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
8080 (parallel [(const_int 0) (const_int 0)
8081 (const_int 2) (const_int 2)
8082 (const_int 4) (const_int 4)
8083 (const_int 6) (const_int 6)
8084 (const_int 8) (const_int 8)
8085 (const_int 10) (const_int 10)
8086 (const_int 12) (const_int 12)
8087 (const_int 14) (const_int 14)])))]
8089 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8090 [(set_attr "type" "sse")
8091 (set_attr "prefix" "evex")
8092 (set_attr "mode" "V16SF")])
8094 (define_expand "avx_shufps256<mask_expand4_name>"
8095 [(match_operand:V8SF 0 "register_operand")
8096 (match_operand:V8SF 1 "register_operand")
8097 (match_operand:V8SF 2 "nonimmediate_operand")
8098 (match_operand:SI 3 "const_int_operand")]
8101 int mask = INTVAL (operands[3]);
8102 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
8105 GEN_INT ((mask >> 0) & 3),
8106 GEN_INT ((mask >> 2) & 3),
8107 GEN_INT (((mask >> 4) & 3) + 8),
8108 GEN_INT (((mask >> 6) & 3) + 8),
8109 GEN_INT (((mask >> 0) & 3) + 4),
8110 GEN_INT (((mask >> 2) & 3) + 4),
8111 GEN_INT (((mask >> 4) & 3) + 12),
8112 GEN_INT (((mask >> 6) & 3) + 12)
8113 <mask_expand4_args>));
8117 ;; One bit in mask selects 2 elements.
8118 (define_insn "avx_shufps256_1<mask_name>"
8119 [(set (match_operand:V8SF 0 "register_operand" "=v")
8122 (match_operand:V8SF 1 "register_operand" "v")
8123 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
8124 (parallel [(match_operand 3 "const_0_to_3_operand" )
8125 (match_operand 4 "const_0_to_3_operand" )
8126 (match_operand 5 "const_8_to_11_operand" )
8127 (match_operand 6 "const_8_to_11_operand" )
8128 (match_operand 7 "const_4_to_7_operand" )
8129 (match_operand 8 "const_4_to_7_operand" )
8130 (match_operand 9 "const_12_to_15_operand")
8131 (match_operand 10 "const_12_to_15_operand")])))]
8133 && <mask_avx512vl_condition>
8134 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8135 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8136 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8137 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
8140 mask = INTVAL (operands[3]);
8141 mask |= INTVAL (operands[4]) << 2;
8142 mask |= (INTVAL (operands[5]) - 8) << 4;
8143 mask |= (INTVAL (operands[6]) - 8) << 6;
8144 operands[3] = GEN_INT (mask);
8146 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8148 [(set_attr "type" "sseshuf")
8149 (set_attr "length_immediate" "1")
8150 (set_attr "prefix" "<mask_prefix>")
8151 (set_attr "mode" "V8SF")])
8153 (define_expand "sse_shufps<mask_expand4_name>"
8154 [(match_operand:V4SF 0 "register_operand")
8155 (match_operand:V4SF 1 "register_operand")
8156 (match_operand:V4SF 2 "vector_operand")
8157 (match_operand:SI 3 "const_int_operand")]
8160 int mask = INTVAL (operands[3]);
8161 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
8164 GEN_INT ((mask >> 0) & 3),
8165 GEN_INT ((mask >> 2) & 3),
8166 GEN_INT (((mask >> 4) & 3) + 4),
8167 GEN_INT (((mask >> 6) & 3) + 4)
8168 <mask_expand4_args>));
8172 (define_insn "sse_shufps_v4sf_mask"
8173 [(set (match_operand:V4SF 0 "register_operand" "=v")
8177 (match_operand:V4SF 1 "register_operand" "v")
8178 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
8179 (parallel [(match_operand 3 "const_0_to_3_operand")
8180 (match_operand 4 "const_0_to_3_operand")
8181 (match_operand 5 "const_4_to_7_operand")
8182 (match_operand 6 "const_4_to_7_operand")]))
8183 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
8184 (match_operand:QI 8 "register_operand" "Yk")))]
8188 mask |= INTVAL (operands[3]) << 0;
8189 mask |= INTVAL (operands[4]) << 2;
8190 mask |= (INTVAL (operands[5]) - 4) << 4;
8191 mask |= (INTVAL (operands[6]) - 4) << 6;
8192 operands[3] = GEN_INT (mask);
8194 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
8196 [(set_attr "type" "sseshuf")
8197 (set_attr "length_immediate" "1")
8198 (set_attr "prefix" "evex")
8199 (set_attr "mode" "V4SF")])
8201 (define_insn "sse_shufps_<mode>"
8202 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
8203 (vec_select:VI4F_128
8204 (vec_concat:<ssedoublevecmode>
8205 (match_operand:VI4F_128 1 "register_operand" "0,v")
8206 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
8207 (parallel [(match_operand 3 "const_0_to_3_operand")
8208 (match_operand 4 "const_0_to_3_operand")
8209 (match_operand 5 "const_4_to_7_operand")
8210 (match_operand 6 "const_4_to_7_operand")])))]
8214 mask |= INTVAL (operands[3]) << 0;
8215 mask |= INTVAL (operands[4]) << 2;
8216 mask |= (INTVAL (operands[5]) - 4) << 4;
8217 mask |= (INTVAL (operands[6]) - 4) << 6;
8218 operands[3] = GEN_INT (mask);
8220 switch (which_alternative)
8223 return "shufps\t{%3, %2, %0|%0, %2, %3}";
8225 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8230 [(set_attr "isa" "noavx,avx")
8231 (set_attr "type" "sseshuf")
8232 (set_attr "length_immediate" "1")
8233 (set_attr "prefix" "orig,maybe_evex")
8234 (set_attr "mode" "V4SF")])
8236 (define_insn "sse_storehps"
8237 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
8239 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
8240 (parallel [(const_int 2) (const_int 3)])))]
8241 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8243 %vmovhps\t{%1, %0|%q0, %1}
8244 %vmovhlps\t{%1, %d0|%d0, %1}
8245 %vmovlps\t{%H1, %d0|%d0, %H1}"
8246 [(set_attr "type" "ssemov")
8247 (set_attr "prefix" "maybe_vex")
8248 (set_attr "mode" "V2SF,V4SF,V2SF")])
8250 (define_expand "sse_loadhps_exp"
8251 [(set (match_operand:V4SF 0 "nonimmediate_operand")
8254 (match_operand:V4SF 1 "nonimmediate_operand")
8255 (parallel [(const_int 0) (const_int 1)]))
8256 (match_operand:V2SF 2 "nonimmediate_operand")))]
8259 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
8261 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
8263 /* Fix up the destination if needed. */
8264 if (dst != operands[0])
8265 emit_move_insn (operands[0], dst);
8270 (define_insn "sse_loadhps"
8271 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
8274 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
8275 (parallel [(const_int 0) (const_int 1)]))
8276 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
8279 movhps\t{%2, %0|%0, %q2}
8280 vmovhps\t{%2, %1, %0|%0, %1, %q2}
8281 movlhps\t{%2, %0|%0, %2}
8282 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8283 %vmovlps\t{%2, %H0|%H0, %2}"
8284 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8285 (set_attr "type" "ssemov")
8286 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8287 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
8289 (define_insn "sse_storelps"
8290 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
8292 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
8293 (parallel [(const_int 0) (const_int 1)])))]
8294 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8296 %vmovlps\t{%1, %0|%q0, %1}
8297 %vmovaps\t{%1, %0|%0, %1}
8298 %vmovlps\t{%1, %d0|%d0, %q1}"
8299 [(set_attr "type" "ssemov")
8300 (set_attr "prefix" "maybe_vex")
8301 (set_attr "mode" "V2SF,V4SF,V2SF")])
8303 (define_expand "sse_loadlps_exp"
8304 [(set (match_operand:V4SF 0 "nonimmediate_operand")
8306 (match_operand:V2SF 2 "nonimmediate_operand")
8308 (match_operand:V4SF 1 "nonimmediate_operand")
8309 (parallel [(const_int 2) (const_int 3)]))))]
8312 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
8314 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
8316 /* Fix up the destination if needed. */
8317 if (dst != operands[0])
8318 emit_move_insn (operands[0], dst);
8323 (define_insn "sse_loadlps"
8324 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
8326 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
8328 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
8329 (parallel [(const_int 2) (const_int 3)]))))]
8332 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
8333 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
8334 movlps\t{%2, %0|%0, %q2}
8335 vmovlps\t{%2, %1, %0|%0, %1, %q2}
8336 %vmovlps\t{%2, %0|%q0, %2}"
8337 [(set_attr "isa" "noavx,avx,noavx,avx,*")
8338 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
8339 (set (attr "length_immediate")
8340 (if_then_else (eq_attr "alternative" "0,1")
8342 (const_string "*")))
8343 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
8344 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
8346 (define_insn "sse_movss"
8347 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
8349 (match_operand:V4SF 2 "register_operand" " x,v")
8350 (match_operand:V4SF 1 "register_operand" " 0,v")
8354 movss\t{%2, %0|%0, %2}
8355 vmovss\t{%2, %1, %0|%0, %1, %2}"
8356 [(set_attr "isa" "noavx,avx")
8357 (set_attr "type" "ssemov")
8358 (set_attr "prefix" "orig,maybe_evex")
8359 (set_attr "mode" "SF")])
8361 (define_insn "avx2_vec_dup<mode>"
8362 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
8363 (vec_duplicate:VF1_128_256
8365 (match_operand:V4SF 1 "register_operand" "v")
8366 (parallel [(const_int 0)]))))]
8368 "vbroadcastss\t{%1, %0|%0, %1}"
8369 [(set_attr "type" "sselog1")
8370 (set_attr "prefix" "maybe_evex")
8371 (set_attr "mode" "<MODE>")])
8373 (define_insn "avx2_vec_dupv8sf_1"
8374 [(set (match_operand:V8SF 0 "register_operand" "=v")
8377 (match_operand:V8SF 1 "register_operand" "v")
8378 (parallel [(const_int 0)]))))]
8380 "vbroadcastss\t{%x1, %0|%0, %x1}"
8381 [(set_attr "type" "sselog1")
8382 (set_attr "prefix" "maybe_evex")
8383 (set_attr "mode" "V8SF")])
8385 (define_insn "avx512f_vec_dup<mode>_1"
8386 [(set (match_operand:VF_512 0 "register_operand" "=v")
8387 (vec_duplicate:VF_512
8388 (vec_select:<ssescalarmode>
8389 (match_operand:VF_512 1 "register_operand" "v")
8390 (parallel [(const_int 0)]))))]
8392 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
8393 [(set_attr "type" "sselog1")
8394 (set_attr "prefix" "evex")
8395 (set_attr "mode" "<MODE>")])
8397 ;; Although insertps takes register source, we prefer
8398 ;; unpcklps with register source since it is shorter.
8399 (define_insn "*vec_concatv2sf_sse4_1"
8400 [(set (match_operand:V2SF 0 "register_operand"
8401 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
8403 (match_operand:SF 1 "nonimmediate_operand"
8404 " 0, 0,Yv, 0,0, v,m, 0 , m")
8405 (match_operand:SF 2 "nonimm_or_0_operand"
8406 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
8407 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8409 unpcklps\t{%2, %0|%0, %2}
8410 unpcklps\t{%2, %0|%0, %2}
8411 vunpcklps\t{%2, %1, %0|%0, %1, %2}
8412 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8413 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
8414 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
8415 %vmovss\t{%1, %0|%0, %1}
8416 punpckldq\t{%2, %0|%0, %2}
8417 movd\t{%1, %0|%0, %1}"
8419 (cond [(eq_attr "alternative" "0,1,3,4")
8420 (const_string "noavx")
8421 (eq_attr "alternative" "2,5")
8422 (const_string "avx")
8424 (const_string "*")))
8426 (cond [(eq_attr "alternative" "6")
8427 (const_string "ssemov")
8428 (eq_attr "alternative" "7")
8429 (const_string "mmxcvt")
8430 (eq_attr "alternative" "8")
8431 (const_string "mmxmov")
8433 (const_string "sselog")))
8434 (set (attr "mmx_isa")
8435 (if_then_else (eq_attr "alternative" "7,8")
8436 (const_string "native")
8437 (const_string "*")))
8438 (set (attr "prefix_data16")
8439 (if_then_else (eq_attr "alternative" "3,4")
8441 (const_string "*")))
8442 (set (attr "prefix_extra")
8443 (if_then_else (eq_attr "alternative" "3,4,5")
8445 (const_string "*")))
8446 (set (attr "length_immediate")
8447 (if_then_else (eq_attr "alternative" "3,4,5")
8449 (const_string "*")))
8450 (set (attr "prefix")
8451 (cond [(eq_attr "alternative" "2,5")
8452 (const_string "maybe_evex")
8453 (eq_attr "alternative" "6")
8454 (const_string "maybe_vex")
8456 (const_string "orig")))
8457 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
8459 ;; ??? In theory we can match memory for the MMX alternative, but allowing
8460 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
8461 ;; alternatives pretty much forces the MMX alternative to be chosen.
8462 (define_insn "*vec_concatv2sf_sse"
8463 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
8465 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
8466 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
8469 unpcklps\t{%2, %0|%0, %2}
8470 movss\t{%1, %0|%0, %1}
8471 punpckldq\t{%2, %0|%0, %2}
8472 movd\t{%1, %0|%0, %1}"
8473 [(set_attr "mmx_isa" "*,*,native,native")
8474 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
8475 (set_attr "mode" "V4SF,SF,DI,DI")])
8477 (define_insn "*vec_concatv4sf"
8478 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
8480 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
8481 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
8484 movlhps\t{%2, %0|%0, %2}
8485 vmovlhps\t{%2, %1, %0|%0, %1, %2}
8486 movhps\t{%2, %0|%0, %q2}
8487 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
8488 [(set_attr "isa" "noavx,avx,noavx,avx")
8489 (set_attr "type" "ssemov")
8490 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
8491 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
8493 (define_insn "*vec_concatv4sf_0"
8494 [(set (match_operand:V4SF 0 "register_operand" "=v")
8496 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
8497 (match_operand:V2SF 2 "const0_operand" " C")))]
8499 "%vmovq\t{%1, %0|%0, %1}"
8500 [(set_attr "type" "ssemov")
8501 (set_attr "prefix" "maybe_vex")
8502 (set_attr "mode" "DF")])
8504 ;; Avoid combining registers from different units in a single alternative,
8505 ;; see comment above inline_secondary_memory_needed function in i386.c
8506 (define_insn "vec_set<mode>_0"
8507 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
8508 "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x ,m ,m ,m")
8510 (vec_duplicate:VI4F_128
8511 (match_operand:<ssescalarmode> 2 "general_operand"
8512 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8513 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8514 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
8518 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8519 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8520 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8521 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8522 %vmovd\t{%2, %0|%0, %2}
8523 movss\t{%2, %0|%0, %2}
8524 movss\t{%2, %0|%0, %2}
8525 vmovss\t{%2, %1, %0|%0, %1, %2}
8526 pinsrd\t{$0, %2, %0|%0, %2, 0}
8527 pinsrd\t{$0, %2, %0|%0, %2, 0}
8528 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8533 (cond [(eq_attr "alternative" "0,1,8,9")
8534 (const_string "sse4_noavx")
8535 (eq_attr "alternative" "2,7,10")
8536 (const_string "avx")
8537 (eq_attr "alternative" "3,4")
8538 (const_string "sse2")
8539 (eq_attr "alternative" "5,6")
8540 (const_string "noavx")
8542 (const_string "*")))
8544 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8545 (const_string "sselog")
8546 (eq_attr "alternative" "12")
8547 (const_string "imov")
8548 (eq_attr "alternative" "13")
8549 (const_string "fmov")
8551 (const_string "ssemov")))
8552 (set (attr "prefix_extra")
8553 (if_then_else (eq_attr "alternative" "8,9,10")
8555 (const_string "*")))
8556 (set (attr "length_immediate")
8557 (if_then_else (eq_attr "alternative" "8,9,10")
8559 (const_string "*")))
8560 (set (attr "prefix")
8561 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8562 (const_string "orig")
8563 (eq_attr "alternative" "2")
8564 (const_string "maybe_evex")
8565 (eq_attr "alternative" "3,4")
8566 (const_string "maybe_vex")
8567 (eq_attr "alternative" "7,10")
8568 (const_string "vex")
8570 (const_string "*")))
8571 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8572 (set (attr "preferred_for_speed")
8573 (cond [(eq_attr "alternative" "4")
8574 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8576 (symbol_ref "true")))])
8578 ;; A subset is vec_setv4sf.
8579 (define_insn "*vec_setv4sf_sse4_1"
8580 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8583 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8584 (match_operand:V4SF 1 "register_operand" "0,0,v")
8585 (match_operand:SI 3 "const_int_operand")))]
8587 && ((unsigned) exact_log2 (INTVAL (operands[3]))
8588 < GET_MODE_NUNITS (V4SFmode))"
8590 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8591 switch (which_alternative)
8595 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8597 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8602 [(set_attr "isa" "noavx,noavx,avx")
8603 (set_attr "type" "sselog")
8604 (set_attr "prefix_data16" "1,1,*")
8605 (set_attr "prefix_extra" "1")
8606 (set_attr "length_immediate" "1")
8607 (set_attr "prefix" "orig,orig,maybe_evex")
8608 (set_attr "mode" "V4SF")])
8610 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8611 (define_insn "vec_set<mode>_0"
8612 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8613 (vec_merge:VI4F_256_512
8614 (vec_duplicate:VI4F_256_512
8615 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8616 (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8620 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8621 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8622 vmovd\t{%2, %x0|%x0, %2}"
8624 (if_then_else (eq_attr "alternative" "0")
8625 (const_string "sselog")
8626 (const_string "ssemov")))
8627 (set_attr "prefix" "maybe_evex")
8628 (set_attr "mode" "SF,<ssescalarmode>,SI")
8629 (set (attr "preferred_for_speed")
8630 (cond [(eq_attr "alternative" "2")
8631 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8633 (symbol_ref "true")))])
8635 (define_insn "sse4_1_insertps"
8636 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8637 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8638 (match_operand:V4SF 1 "register_operand" "0,0,v")
8639 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8643 if (MEM_P (operands[2]))
8645 unsigned count_s = INTVAL (operands[3]) >> 6;
8647 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8648 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8650 switch (which_alternative)
8654 return "insertps\t{%3, %2, %0|%0, %2, %3}";
8656 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8661 [(set_attr "isa" "noavx,noavx,avx")
8662 (set_attr "type" "sselog")
8663 (set_attr "prefix_data16" "1,1,*")
8664 (set_attr "prefix_extra" "1")
8665 (set_attr "length_immediate" "1")
8666 (set_attr "prefix" "orig,orig,maybe_evex")
8667 (set_attr "mode" "V4SF")])
8670 [(set (match_operand:VI4F_128 0 "memory_operand")
8672 (vec_duplicate:VI4F_128
8673 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8676 "TARGET_SSE && reload_completed"
8677 [(set (match_dup 0) (match_dup 1))]
8678 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8680 ;; Standard scalar operation patterns which preserve the rest of the
8681 ;; vector for combiner.
8682 (define_insn "vec_setv2df_0"
8683 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
8686 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8687 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
8691 movsd\t{%2, %0|%0, %2}
8692 vmovsd\t{%2, %1, %0|%0, %1, %2}
8693 movlpd\t{%2, %0|%0, %2}
8694 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8695 [(set_attr "isa" "noavx,avx,noavx,avx")
8696 (set_attr "type" "ssemov")
8697 (set_attr "mode" "DF")])
8699 (define_expand "vec_set<mode>"
8700 [(match_operand:V_128 0 "register_operand")
8701 (match_operand:<ssescalarmode> 1 "register_operand")
8702 (match_operand 2 "vec_setm_sse41_operand")]
8705 if (CONST_INT_P (operands[2]))
8706 ix86_expand_vector_set (false, operands[0], operands[1],
8707 INTVAL (operands[2]));
8709 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8713 (define_expand "vec_set<mode>"
8714 [(match_operand:V_256_512 0 "register_operand")
8715 (match_operand:<ssescalarmode> 1 "register_operand")
8716 (match_operand 2 "vec_setm_avx2_operand")]
8719 if (CONST_INT_P (operands[2]))
8720 ix86_expand_vector_set (false, operands[0], operands[1],
8721 INTVAL (operands[2]));
8723 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
8727 (define_insn_and_split "*vec_extractv4sf_0"
8728 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8730 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8731 (parallel [(const_int 0)])))]
8732 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8734 "&& reload_completed"
8735 [(set (match_dup 0) (match_dup 1))]
8736 "operands[1] = gen_lowpart (SFmode, operands[1]);")
8738 (define_insn_and_split "*sse4_1_extractps"
8739 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8741 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8742 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8745 extractps\t{%2, %1, %0|%0, %1, %2}
8746 extractps\t{%2, %1, %0|%0, %1, %2}
8747 vextractps\t{%2, %1, %0|%0, %1, %2}
8750 "&& reload_completed && SSE_REG_P (operands[0])"
8753 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8754 switch (INTVAL (operands[2]))
8758 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8759 operands[2], operands[2],
8760 GEN_INT (INTVAL (operands[2]) + 4),
8761 GEN_INT (INTVAL (operands[2]) + 4)));
8764 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8767 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
8772 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8773 (set_attr "type" "sselog,sselog,sselog,*,*")
8774 (set_attr "prefix_data16" "1,1,1,*,*")
8775 (set_attr "prefix_extra" "1,1,1,*,*")
8776 (set_attr "length_immediate" "1,1,1,*,*")
8777 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8778 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8780 (define_insn_and_split "*vec_extractv4sf_mem"
8781 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8783 (match_operand:V4SF 1 "memory_operand" "o,o,o")
8784 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8787 "&& reload_completed"
8788 [(set (match_dup 0) (match_dup 1))]
8790 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8793 (define_mode_attr extract_type
8794 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8796 (define_mode_attr extract_suf
8797 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8799 (define_mode_iterator AVX512_VEC
8800 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8802 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8803 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8804 (match_operand:AVX512_VEC 1 "register_operand")
8805 (match_operand:SI 2 "const_0_to_3_operand")
8806 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8807 (match_operand:QI 4 "register_operand")]
8811 mask = INTVAL (operands[2]);
8812 rtx dest = operands[0];
8814 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8815 dest = gen_reg_rtx (<ssequartermode>mode);
8817 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8818 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8819 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8820 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8823 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8824 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8826 if (dest != operands[0])
8827 emit_move_insn (operands[0], dest);
8831 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
8832 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8833 (vec_merge:<ssequartermode>
8834 (vec_select:<ssequartermode>
8835 (match_operand:V8FI 1 "register_operand" "v,v")
8836 (parallel [(match_operand 2 "const_0_to_7_operand")
8837 (match_operand 3 "const_0_to_7_operand")]))
8838 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
8839 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
8841 && INTVAL (operands[2]) % 2 == 0
8842 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8843 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
8845 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8846 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
8848 [(set_attr "type" "sselog1")
8849 (set_attr "prefix_extra" "1")
8850 (set_attr "length_immediate" "1")
8851 (set_attr "prefix" "evex")
8852 (set_attr "mode" "<sseinsnmode>")])
8854 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
8855 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8856 (vec_select:<ssequartermode>
8857 (match_operand:V8FI 1 "register_operand" "v")
8858 (parallel [(match_operand 2 "const_0_to_7_operand")
8859 (match_operand 3 "const_0_to_7_operand")])))]
8861 && INTVAL (operands[2]) % 2 == 0
8862 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8864 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8865 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
8867 [(set_attr "type" "sselog1")
8868 (set_attr "prefix_extra" "1")
8869 (set_attr "length_immediate" "1")
8870 (set_attr "prefix" "evex")
8871 (set_attr "mode" "<sseinsnmode>")])
8874 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8875 (vec_select:<ssequartermode>
8876 (match_operand:V8FI 1 "register_operand")
8877 (parallel [(const_int 0) (const_int 1)])))]
8881 || REG_P (operands[0])
8882 || !EXT_REX_SSE_REG_P (operands[1]))"
8883 [(set (match_dup 0) (match_dup 1))]
8885 if (!TARGET_AVX512VL
8886 && REG_P (operands[0])
8887 && EXT_REX_SSE_REG_P (operands[1]))
8889 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8891 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8894 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
8895 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
8896 (vec_merge:<ssequartermode>
8897 (vec_select:<ssequartermode>
8898 (match_operand:V16FI 1 "register_operand" "v,v")
8899 (parallel [(match_operand 2 "const_0_to_15_operand")
8900 (match_operand 3 "const_0_to_15_operand")
8901 (match_operand 4 "const_0_to_15_operand")
8902 (match_operand 5 "const_0_to_15_operand")]))
8903 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
8904 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
8906 && INTVAL (operands[2]) % 4 == 0
8907 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8908 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8909 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8910 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
8912 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8913 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
8915 [(set_attr "type" "sselog1")
8916 (set_attr "prefix_extra" "1")
8917 (set_attr "length_immediate" "1")
8918 (set_attr "prefix" "evex")
8919 (set_attr "mode" "<sseinsnmode>")])
8921 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
8922 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
8923 (vec_select:<ssequartermode>
8924 (match_operand:V16FI 1 "register_operand" "v")
8925 (parallel [(match_operand 2 "const_0_to_15_operand")
8926 (match_operand 3 "const_0_to_15_operand")
8927 (match_operand 4 "const_0_to_15_operand")
8928 (match_operand 5 "const_0_to_15_operand")])))]
8930 && INTVAL (operands[2]) % 4 == 0
8931 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8932 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8933 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8935 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8936 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
8938 [(set_attr "type" "sselog1")
8939 (set_attr "prefix_extra" "1")
8940 (set_attr "length_immediate" "1")
8941 (set_attr "prefix" "evex")
8942 (set_attr "mode" "<sseinsnmode>")])
8945 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8946 (vec_select:<ssequartermode>
8947 (match_operand:V16FI 1 "register_operand")
8948 (parallel [(const_int 0) (const_int 1)
8949 (const_int 2) (const_int 3)])))]
8953 || REG_P (operands[0])
8954 || !EXT_REX_SSE_REG_P (operands[1]))"
8955 [(set (match_dup 0) (match_dup 1))]
8957 if (!TARGET_AVX512VL
8958 && REG_P (operands[0])
8959 && EXT_REX_SSE_REG_P (operands[1]))
8961 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8963 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8966 (define_mode_attr extract_type_2
8967 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8969 (define_mode_attr extract_suf_2
8970 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8972 (define_mode_iterator AVX512_VEC_2
8973 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8975 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8976 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8977 (match_operand:AVX512_VEC_2 1 "register_operand")
8978 (match_operand:SI 2 "const_0_to_1_operand")
8979 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8980 (match_operand:QI 4 "register_operand")]
8983 rtx (*insn)(rtx, rtx, rtx, rtx);
8984 rtx dest = operands[0];
8986 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8987 dest = gen_reg_rtx (<ssehalfvecmode>mode);
8989 switch (INTVAL (operands[2]))
8992 insn = gen_vec_extract_lo_<mode>_mask;
8995 insn = gen_vec_extract_hi_<mode>_mask;
9001 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
9002 if (dest != operands[0])
9003 emit_move_insn (operands[0], dest);
9008 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9009 (vec_select:<ssehalfvecmode>
9010 (match_operand:V8FI 1 "nonimmediate_operand")
9011 (parallel [(const_int 0) (const_int 1)
9012 (const_int 2) (const_int 3)])))]
9013 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9016 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
9017 [(set (match_dup 0) (match_dup 1))]
9018 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9020 (define_insn "vec_extract_lo_<mode>_mask"
9021 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9022 (vec_merge:<ssehalfvecmode>
9023 (vec_select:<ssehalfvecmode>
9024 (match_operand:V8FI 1 "register_operand" "v,v")
9025 (parallel [(const_int 0) (const_int 1)
9026 (const_int 2) (const_int 3)]))
9027 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9028 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9030 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9031 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9032 [(set_attr "type" "sselog1")
9033 (set_attr "prefix_extra" "1")
9034 (set_attr "length_immediate" "1")
9035 (set_attr "memory" "none,store")
9036 (set_attr "prefix" "evex")
9037 (set_attr "mode" "<sseinsnmode>")])
9039 (define_insn "vec_extract_lo_<mode>"
9040 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
9041 (vec_select:<ssehalfvecmode>
9042 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
9043 (parallel [(const_int 0) (const_int 1)
9044 (const_int 2) (const_int 3)])))]
9045 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9047 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
9048 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9052 [(set_attr "type" "sselog1")
9053 (set_attr "prefix_extra" "1")
9054 (set_attr "length_immediate" "1")
9055 (set_attr "memory" "none,store,load")
9056 (set_attr "prefix" "evex")
9057 (set_attr "mode" "<sseinsnmode>")])
9059 (define_insn "vec_extract_hi_<mode>_mask"
9060 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9061 (vec_merge:<ssehalfvecmode>
9062 (vec_select:<ssehalfvecmode>
9063 (match_operand:V8FI 1 "register_operand" "v,v")
9064 (parallel [(const_int 4) (const_int 5)
9065 (const_int 6) (const_int 7)]))
9066 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9067 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9069 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9070 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9071 [(set_attr "type" "sselog1")
9072 (set_attr "prefix_extra" "1")
9073 (set_attr "length_immediate" "1")
9074 (set_attr "prefix" "evex")
9075 (set_attr "mode" "<sseinsnmode>")])
9077 (define_insn "vec_extract_hi_<mode>"
9078 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
9079 (vec_select:<ssehalfvecmode>
9080 (match_operand:V8FI 1 "register_operand" "v")
9081 (parallel [(const_int 4) (const_int 5)
9082 (const_int 6) (const_int 7)])))]
9084 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9085 [(set_attr "type" "sselog1")
9086 (set_attr "prefix_extra" "1")
9087 (set_attr "length_immediate" "1")
9088 (set_attr "prefix" "evex")
9089 (set_attr "mode" "<sseinsnmode>")])
9091 (define_insn "vec_extract_hi_<mode>_mask"
9092 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9093 (vec_merge:<ssehalfvecmode>
9094 (vec_select:<ssehalfvecmode>
9095 (match_operand:V16FI 1 "register_operand" "v,v")
9096 (parallel [(const_int 8) (const_int 9)
9097 (const_int 10) (const_int 11)
9098 (const_int 12) (const_int 13)
9099 (const_int 14) (const_int 15)]))
9100 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9101 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9103 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9104 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9105 [(set_attr "type" "sselog1")
9106 (set_attr "prefix_extra" "1")
9107 (set_attr "length_immediate" "1")
9108 (set_attr "prefix" "evex")
9109 (set_attr "mode" "<sseinsnmode>")])
9111 (define_insn "vec_extract_hi_<mode>"
9112 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
9113 (vec_select:<ssehalfvecmode>
9114 (match_operand:V16FI 1 "register_operand" "v,v")
9115 (parallel [(const_int 8) (const_int 9)
9116 (const_int 10) (const_int 11)
9117 (const_int 12) (const_int 13)
9118 (const_int 14) (const_int 15)])))]
9121 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
9122 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9123 [(set_attr "type" "sselog1")
9124 (set_attr "prefix_extra" "1")
9125 (set_attr "isa" "avx512dq,noavx512dq")
9126 (set_attr "length_immediate" "1")
9127 (set_attr "prefix" "evex")
9128 (set_attr "mode" "<sseinsnmode>")])
9130 (define_mode_iterator VI48F_256_DQ
9131 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
9133 (define_expand "avx512vl_vextractf128<mode>"
9134 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9135 (match_operand:VI48F_256_DQ 1 "register_operand")
9136 (match_operand:SI 2 "const_0_to_1_operand")
9137 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
9138 (match_operand:QI 4 "register_operand")]
9141 rtx (*insn)(rtx, rtx, rtx, rtx);
9142 rtx dest = operands[0];
9145 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
9146 /* For V8S[IF]mode there are maskm insns with =m and 0
9148 ? !rtx_equal_p (dest, operands[3])
9149 /* For V4D[IF]mode, hi insns don't allow memory, and
9150 lo insns have =m and 0C constraints. */
9151 : (operands[2] != const0_rtx
9152 || (!rtx_equal_p (dest, operands[3])
9153 && GET_CODE (operands[3]) != CONST_VECTOR))))
9154 dest = gen_reg_rtx (<ssehalfvecmode>mode);
9155 switch (INTVAL (operands[2]))
9158 insn = gen_vec_extract_lo_<mode>_mask;
9161 insn = gen_vec_extract_hi_<mode>_mask;
9167 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
9168 if (dest != operands[0])
9169 emit_move_insn (operands[0], dest);
9173 (define_expand "avx_vextractf128<mode>"
9174 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9175 (match_operand:V_256 1 "register_operand")
9176 (match_operand:SI 2 "const_0_to_1_operand")]
9179 rtx (*insn)(rtx, rtx);
9181 switch (INTVAL (operands[2]))
9184 insn = gen_vec_extract_lo_<mode>;
9187 insn = gen_vec_extract_hi_<mode>;
9193 emit_insn (insn (operands[0], operands[1]));
9197 (define_insn "vec_extract_lo_<mode>_mask"
9198 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9199 (vec_merge:<ssehalfvecmode>
9200 (vec_select:<ssehalfvecmode>
9201 (match_operand:V16FI 1 "register_operand" "v,v")
9202 (parallel [(const_int 0) (const_int 1)
9203 (const_int 2) (const_int 3)
9204 (const_int 4) (const_int 5)
9205 (const_int 6) (const_int 7)]))
9206 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9207 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9209 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9210 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9211 [(set_attr "type" "sselog1")
9212 (set_attr "prefix_extra" "1")
9213 (set_attr "length_immediate" "1")
9214 (set_attr "memory" "none,store")
9215 (set_attr "prefix" "evex")
9216 (set_attr "mode" "<sseinsnmode>")])
9218 (define_insn "vec_extract_lo_<mode>"
9219 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
9220 (vec_select:<ssehalfvecmode>
9221 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
9222 (parallel [(const_int 0) (const_int 1)
9223 (const_int 2) (const_int 3)
9224 (const_int 4) (const_int 5)
9225 (const_int 6) (const_int 7)])))]
9227 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9229 if (!TARGET_AVX512VL
9230 && !REG_P (operands[0])
9231 && EXT_REX_SSE_REG_P (operands[1]))
9233 if (TARGET_AVX512DQ)
9234 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
9236 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9241 [(set_attr "type" "sselog1")
9242 (set_attr "prefix_extra" "1")
9243 (set_attr "length_immediate" "1")
9244 (set_attr "memory" "none,load,store")
9245 (set_attr "prefix" "evex")
9246 (set_attr "mode" "<sseinsnmode>")])
9249 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9250 (vec_select:<ssehalfvecmode>
9251 (match_operand:V16FI 1 "nonimmediate_operand")
9252 (parallel [(const_int 0) (const_int 1)
9253 (const_int 2) (const_int 3)
9254 (const_int 4) (const_int 5)
9255 (const_int 6) (const_int 7)])))]
9256 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9259 || REG_P (operands[0])
9260 || !EXT_REX_SSE_REG_P (operands[1]))"
9261 [(set (match_dup 0) (match_dup 1))]
9263 if (!TARGET_AVX512VL
9264 && REG_P (operands[0])
9265 && EXT_REX_SSE_REG_P (operands[1]))
9267 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
9269 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
9272 (define_insn "vec_extract_lo_<mode>_mask"
9273 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9274 (vec_merge:<ssehalfvecmode>
9275 (vec_select:<ssehalfvecmode>
9276 (match_operand:VI8F_256 1 "register_operand" "v,v")
9277 (parallel [(const_int 0) (const_int 1)]))
9278 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9279 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9282 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9283 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9284 [(set_attr "type" "sselog1")
9285 (set_attr "prefix_extra" "1")
9286 (set_attr "length_immediate" "1")
9287 (set_attr "memory" "none,store")
9288 (set_attr "prefix" "evex")
9289 (set_attr "mode" "XI")])
9291 (define_insn "vec_extract_lo_<mode>"
9292 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9293 (vec_select:<ssehalfvecmode>
9294 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
9295 (parallel [(const_int 0) (const_int 1)])))]
9297 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9301 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9302 (vec_select:<ssehalfvecmode>
9303 (match_operand:VI8F_256 1 "nonimmediate_operand")
9304 (parallel [(const_int 0) (const_int 1)])))]
9305 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9306 && reload_completed"
9307 [(set (match_dup 0) (match_dup 1))]
9308 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9310 (define_insn "vec_extract_hi_<mode>_mask"
9311 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9312 (vec_merge:<ssehalfvecmode>
9313 (vec_select:<ssehalfvecmode>
9314 (match_operand:VI8F_256 1 "register_operand" "v,v")
9315 (parallel [(const_int 2) (const_int 3)]))
9316 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9317 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9320 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9321 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9322 [(set_attr "type" "sselog1")
9323 (set_attr "prefix_extra" "1")
9324 (set_attr "length_immediate" "1")
9325 (set_attr "prefix" "vex")
9326 (set_attr "mode" "<sseinsnmode>")])
9328 (define_insn "vec_extract_hi_<mode>"
9329 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
9330 (vec_select:<ssehalfvecmode>
9331 (match_operand:VI8F_256 1 "register_operand" "v")
9332 (parallel [(const_int 2) (const_int 3)])))]
9335 if (TARGET_AVX512VL)
9337 if (TARGET_AVX512DQ)
9338 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
9340 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
9343 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
9345 [(set_attr "type" "sselog1")
9346 (set_attr "prefix_extra" "1")
9347 (set_attr "length_immediate" "1")
9348 (set_attr "prefix" "vex")
9349 (set_attr "mode" "<sseinsnmode>")])
9352 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9353 (vec_select:<ssehalfvecmode>
9354 (match_operand:VI4F_256 1 "nonimmediate_operand")
9355 (parallel [(const_int 0) (const_int 1)
9356 (const_int 2) (const_int 3)])))]
9357 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
9358 && reload_completed"
9359 [(set (match_dup 0) (match_dup 1))]
9360 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
9362 (define_insn "vec_extract_lo_<mode>_mask"
9363 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
9364 (vec_merge:<ssehalfvecmode>
9365 (vec_select:<ssehalfvecmode>
9366 (match_operand:VI4F_256 1 "register_operand" "v,v")
9367 (parallel [(const_int 0) (const_int 1)
9368 (const_int 2) (const_int 3)]))
9369 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9370 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
9372 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9373 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
9374 [(set_attr "type" "sselog1")
9375 (set_attr "prefix_extra" "1")
9376 (set_attr "length_immediate" "1")
9377 (set_attr "prefix" "evex")
9378 (set_attr "mode" "<sseinsnmode>")])
9380 (define_insn "vec_extract_lo_<mode>"
9381 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
9382 (vec_select:<ssehalfvecmode>
9383 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
9384 (parallel [(const_int 0) (const_int 1)
9385 (const_int 2) (const_int 3)])))]
9387 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9389 [(set_attr "type" "sselog1")
9390 (set_attr "prefix_extra" "1")
9391 (set_attr "length_immediate" "1")
9392 (set_attr "prefix" "evex")
9393 (set_attr "mode" "<sseinsnmode>")])
9395 (define_insn "vec_extract_hi_<mode>_mask"
9396 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
9397 (vec_merge:<ssehalfvecmode>
9398 (vec_select:<ssehalfvecmode>
9399 (match_operand:VI4F_256 1 "register_operand" "v,v")
9400 (parallel [(const_int 4) (const_int 5)
9401 (const_int 6) (const_int 7)]))
9402 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
9403 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9405 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
9406 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
9407 [(set_attr "type" "sselog1")
9408 (set_attr "length_immediate" "1")
9409 (set_attr "prefix" "evex")
9410 (set_attr "mode" "<sseinsnmode>")])
9412 (define_insn "vec_extract_hi_<mode>"
9413 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
9414 (vec_select:<ssehalfvecmode>
9415 (match_operand:VI4F_256 1 "register_operand" "x, v")
9416 (parallel [(const_int 4) (const_int 5)
9417 (const_int 6) (const_int 7)])))]
9420 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
9421 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9422 [(set_attr "isa" "*, avx512vl")
9423 (set_attr "prefix" "vex, evex")
9424 (set_attr "type" "sselog1")
9425 (set_attr "length_immediate" "1")
9426 (set_attr "mode" "<sseinsnmode>")])
9428 (define_insn_and_split "vec_extract_lo_v32hi"
9429 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
9431 (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
9432 (parallel [(const_int 0) (const_int 1)
9433 (const_int 2) (const_int 3)
9434 (const_int 4) (const_int 5)
9435 (const_int 6) (const_int 7)
9436 (const_int 8) (const_int 9)
9437 (const_int 10) (const_int 11)
9438 (const_int 12) (const_int 13)
9439 (const_int 14) (const_int 15)])))]
9440 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9443 || REG_P (operands[0])
9444 || !EXT_REX_SSE_REG_P (operands[1]))
9447 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9449 "&& reload_completed
9451 || REG_P (operands[0])
9452 || !EXT_REX_SSE_REG_P (operands[1]))"
9453 [(set (match_dup 0) (match_dup 1))]
9455 if (!TARGET_AVX512VL
9456 && REG_P (operands[0])
9457 && EXT_REX_SSE_REG_P (operands[1]))
9458 operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
9460 operands[1] = gen_lowpart (V16HImode, operands[1]);
9462 [(set_attr "type" "sselog1")
9463 (set_attr "prefix_extra" "1")
9464 (set_attr "length_immediate" "1")
9465 (set_attr "memory" "none,load,store")
9466 (set_attr "prefix" "evex")
9467 (set_attr "mode" "XI")])
9469 (define_insn "vec_extract_hi_v32hi"
9470 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
9472 (match_operand:V32HI 1 "register_operand" "v")
9473 (parallel [(const_int 16) (const_int 17)
9474 (const_int 18) (const_int 19)
9475 (const_int 20) (const_int 21)
9476 (const_int 22) (const_int 23)
9477 (const_int 24) (const_int 25)
9478 (const_int 26) (const_int 27)
9479 (const_int 28) (const_int 29)
9480 (const_int 30) (const_int 31)])))]
9482 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9483 [(set_attr "type" "sselog1")
9484 (set_attr "prefix_extra" "1")
9485 (set_attr "length_immediate" "1")
9486 (set_attr "prefix" "evex")
9487 (set_attr "mode" "XI")])
9489 (define_insn_and_split "vec_extract_lo_v16hi"
9490 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
9492 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
9493 (parallel [(const_int 0) (const_int 1)
9494 (const_int 2) (const_int 3)
9495 (const_int 4) (const_int 5)
9496 (const_int 6) (const_int 7)])))]
9497 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9499 "&& reload_completed"
9500 [(set (match_dup 0) (match_dup 1))]
9501 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
9503 (define_insn "vec_extract_hi_v16hi"
9504 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
9506 (match_operand:V16HI 1 "register_operand" "x,v,v")
9507 (parallel [(const_int 8) (const_int 9)
9508 (const_int 10) (const_int 11)
9509 (const_int 12) (const_int 13)
9510 (const_int 14) (const_int 15)])))]
9513 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9514 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9515 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9516 [(set_attr "type" "sselog1")
9517 (set_attr "prefix_extra" "1")
9518 (set_attr "length_immediate" "1")
9519 (set_attr "isa" "*,avx512dq,avx512f")
9520 (set_attr "prefix" "vex,evex,evex")
9521 (set_attr "mode" "OI")])
9523 (define_insn_and_split "vec_extract_lo_v64qi"
9524 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
9526 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
9527 (parallel [(const_int 0) (const_int 1)
9528 (const_int 2) (const_int 3)
9529 (const_int 4) (const_int 5)
9530 (const_int 6) (const_int 7)
9531 (const_int 8) (const_int 9)
9532 (const_int 10) (const_int 11)
9533 (const_int 12) (const_int 13)
9534 (const_int 14) (const_int 15)
9535 (const_int 16) (const_int 17)
9536 (const_int 18) (const_int 19)
9537 (const_int 20) (const_int 21)
9538 (const_int 22) (const_int 23)
9539 (const_int 24) (const_int 25)
9540 (const_int 26) (const_int 27)
9541 (const_int 28) (const_int 29)
9542 (const_int 30) (const_int 31)])))]
9543 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9546 || REG_P (operands[0])
9547 || !EXT_REX_SSE_REG_P (operands[1]))
9550 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9552 "&& reload_completed
9554 || REG_P (operands[0])
9555 || !EXT_REX_SSE_REG_P (operands[1]))"
9556 [(set (match_dup 0) (match_dup 1))]
9558 if (!TARGET_AVX512VL
9559 && REG_P (operands[0])
9560 && EXT_REX_SSE_REG_P (operands[1]))
9561 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9563 operands[1] = gen_lowpart (V32QImode, operands[1]);
9565 [(set_attr "type" "sselog1")
9566 (set_attr "prefix_extra" "1")
9567 (set_attr "length_immediate" "1")
9568 (set_attr "memory" "none,load,store")
9569 (set_attr "prefix" "evex")
9570 (set_attr "mode" "XI")])
9572 (define_insn "vec_extract_hi_v64qi"
9573 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9575 (match_operand:V64QI 1 "register_operand" "v")
9576 (parallel [(const_int 32) (const_int 33)
9577 (const_int 34) (const_int 35)
9578 (const_int 36) (const_int 37)
9579 (const_int 38) (const_int 39)
9580 (const_int 40) (const_int 41)
9581 (const_int 42) (const_int 43)
9582 (const_int 44) (const_int 45)
9583 (const_int 46) (const_int 47)
9584 (const_int 48) (const_int 49)
9585 (const_int 50) (const_int 51)
9586 (const_int 52) (const_int 53)
9587 (const_int 54) (const_int 55)
9588 (const_int 56) (const_int 57)
9589 (const_int 58) (const_int 59)
9590 (const_int 60) (const_int 61)
9591 (const_int 62) (const_int 63)])))]
9593 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9594 [(set_attr "type" "sselog1")
9595 (set_attr "prefix_extra" "1")
9596 (set_attr "length_immediate" "1")
9597 (set_attr "prefix" "evex")
9598 (set_attr "mode" "XI")])
9600 (define_insn_and_split "vec_extract_lo_v32qi"
9601 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9603 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9604 (parallel [(const_int 0) (const_int 1)
9605 (const_int 2) (const_int 3)
9606 (const_int 4) (const_int 5)
9607 (const_int 6) (const_int 7)
9608 (const_int 8) (const_int 9)
9609 (const_int 10) (const_int 11)
9610 (const_int 12) (const_int 13)
9611 (const_int 14) (const_int 15)])))]
9612 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9614 "&& reload_completed"
9615 [(set (match_dup 0) (match_dup 1))]
9616 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9618 (define_insn "vec_extract_hi_v32qi"
9619 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9621 (match_operand:V32QI 1 "register_operand" "x,v,v")
9622 (parallel [(const_int 16) (const_int 17)
9623 (const_int 18) (const_int 19)
9624 (const_int 20) (const_int 21)
9625 (const_int 22) (const_int 23)
9626 (const_int 24) (const_int 25)
9627 (const_int 26) (const_int 27)
9628 (const_int 28) (const_int 29)
9629 (const_int 30) (const_int 31)])))]
9632 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9633 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9634 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9635 [(set_attr "type" "sselog1")
9636 (set_attr "prefix_extra" "1")
9637 (set_attr "length_immediate" "1")
9638 (set_attr "isa" "*,avx512dq,avx512f")
9639 (set_attr "prefix" "vex,evex,evex")
9640 (set_attr "mode" "OI")])
9642 ;; Modes handled by vec_extract patterns.
9643 (define_mode_iterator VEC_EXTRACT_MODE
9644 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9645 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9646 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9647 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9648 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9649 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9650 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9652 (define_expand "vec_extract<mode><ssescalarmodelower>"
9653 [(match_operand:<ssescalarmode> 0 "register_operand")
9654 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9655 (match_operand 2 "const_int_operand")]
9658 ix86_expand_vector_extract (false, operands[0], operands[1],
9659 INTVAL (operands[2]));
9663 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
9664 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9665 (match_operand:V_256_512 1 "register_operand")
9666 (match_operand 2 "const_0_to_1_operand")]
9669 if (INTVAL (operands[2]))
9670 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9672 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9676 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9678 ;; Parallel double-precision floating point element swizzling
9680 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9682 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9683 [(set (match_operand:V8DF 0 "register_operand" "=v")
9686 (match_operand:V8DF 1 "register_operand" "v")
9687 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9688 (parallel [(const_int 1) (const_int 9)
9689 (const_int 3) (const_int 11)
9690 (const_int 5) (const_int 13)
9691 (const_int 7) (const_int 15)])))]
9693 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9694 [(set_attr "type" "sselog")
9695 (set_attr "prefix" "evex")
9696 (set_attr "mode" "V8DF")])
9698 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9699 (define_insn "avx_unpckhpd256<mask_name>"
9700 [(set (match_operand:V4DF 0 "register_operand" "=v")
9703 (match_operand:V4DF 1 "register_operand" "v")
9704 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9705 (parallel [(const_int 1) (const_int 5)
9706 (const_int 3) (const_int 7)])))]
9707 "TARGET_AVX && <mask_avx512vl_condition>"
9708 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9709 [(set_attr "type" "sselog")
9710 (set_attr "prefix" "vex")
9711 (set_attr "mode" "V4DF")])
9713 (define_expand "vec_interleave_highv4df"
9717 (match_operand:V4DF 1 "register_operand")
9718 (match_operand:V4DF 2 "nonimmediate_operand"))
9719 (parallel [(const_int 0) (const_int 4)
9720 (const_int 2) (const_int 6)])))
9726 (parallel [(const_int 1) (const_int 5)
9727 (const_int 3) (const_int 7)])))
9728 (set (match_operand:V4DF 0 "register_operand")
9733 (parallel [(const_int 2) (const_int 3)
9734 (const_int 6) (const_int 7)])))]
9737 operands[3] = gen_reg_rtx (V4DFmode);
9738 operands[4] = gen_reg_rtx (V4DFmode);
9742 (define_insn "avx512vl_unpckhpd128_mask"
9743 [(set (match_operand:V2DF 0 "register_operand" "=v")
9747 (match_operand:V2DF 1 "register_operand" "v")
9748 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9749 (parallel [(const_int 1) (const_int 3)]))
9750 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9751 (match_operand:QI 4 "register_operand" "Yk")))]
9753 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9754 [(set_attr "type" "sselog")
9755 (set_attr "prefix" "evex")
9756 (set_attr "mode" "V2DF")])
9758 (define_expand "vec_interleave_highv2df"
9759 [(set (match_operand:V2DF 0 "register_operand")
9762 (match_operand:V2DF 1 "nonimmediate_operand")
9763 (match_operand:V2DF 2 "nonimmediate_operand"))
9764 (parallel [(const_int 1)
9768 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9769 operands[2] = force_reg (V2DFmode, operands[2]);
9772 (define_insn "*vec_interleave_highv2df"
9773 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
9776 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9777 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9778 (parallel [(const_int 1)
9780 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9782 unpckhpd\t{%2, %0|%0, %2}
9783 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9784 %vmovddup\t{%H1, %0|%0, %H1}
9785 movlpd\t{%H1, %0|%0, %H1}
9786 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9787 %vmovhpd\t{%1, %0|%q0, %1}"
9788 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9789 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9790 (set (attr "prefix_data16")
9791 (if_then_else (eq_attr "alternative" "3,5")
9793 (const_string "*")))
9794 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9795 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9797 (define_expand "avx512f_movddup512<mask_name>"
9798 [(set (match_operand:V8DF 0 "register_operand")
9801 (match_operand:V8DF 1 "nonimmediate_operand")
9803 (parallel [(const_int 0) (const_int 8)
9804 (const_int 2) (const_int 10)
9805 (const_int 4) (const_int 12)
9806 (const_int 6) (const_int 14)])))]
9809 (define_expand "avx512f_unpcklpd512<mask_name>"
9810 [(set (match_operand:V8DF 0 "register_operand")
9813 (match_operand:V8DF 1 "register_operand")
9814 (match_operand:V8DF 2 "nonimmediate_operand"))
9815 (parallel [(const_int 0) (const_int 8)
9816 (const_int 2) (const_int 10)
9817 (const_int 4) (const_int 12)
9818 (const_int 6) (const_int 14)])))]
9821 (define_insn "*avx512f_unpcklpd512<mask_name>"
9822 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9825 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9826 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9827 (parallel [(const_int 0) (const_int 8)
9828 (const_int 2) (const_int 10)
9829 (const_int 4) (const_int 12)
9830 (const_int 6) (const_int 14)])))]
9833 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9834 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9835 [(set_attr "type" "sselog")
9836 (set_attr "prefix" "evex")
9837 (set_attr "mode" "V8DF")])
9839 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9840 (define_expand "avx_movddup256<mask_name>"
9841 [(set (match_operand:V4DF 0 "register_operand")
9844 (match_operand:V4DF 1 "nonimmediate_operand")
9846 (parallel [(const_int 0) (const_int 4)
9847 (const_int 2) (const_int 6)])))]
9848 "TARGET_AVX && <mask_avx512vl_condition>")
9850 (define_expand "avx_unpcklpd256<mask_name>"
9851 [(set (match_operand:V4DF 0 "register_operand")
9854 (match_operand:V4DF 1 "register_operand")
9855 (match_operand:V4DF 2 "nonimmediate_operand"))
9856 (parallel [(const_int 0) (const_int 4)
9857 (const_int 2) (const_int 6)])))]
9858 "TARGET_AVX && <mask_avx512vl_condition>")
9860 (define_insn "*avx_unpcklpd256<mask_name>"
9861 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
9864 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9865 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9866 (parallel [(const_int 0) (const_int 4)
9867 (const_int 2) (const_int 6)])))]
9868 "TARGET_AVX && <mask_avx512vl_condition>"
9870 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9871 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9872 [(set_attr "type" "sselog")
9873 (set_attr "prefix" "vex")
9874 (set_attr "mode" "V4DF")])
9876 (define_expand "vec_interleave_lowv4df"
9880 (match_operand:V4DF 1 "register_operand")
9881 (match_operand:V4DF 2 "nonimmediate_operand"))
9882 (parallel [(const_int 0) (const_int 4)
9883 (const_int 2) (const_int 6)])))
9889 (parallel [(const_int 1) (const_int 5)
9890 (const_int 3) (const_int 7)])))
9891 (set (match_operand:V4DF 0 "register_operand")
9896 (parallel [(const_int 0) (const_int 1)
9897 (const_int 4) (const_int 5)])))]
9900 operands[3] = gen_reg_rtx (V4DFmode);
9901 operands[4] = gen_reg_rtx (V4DFmode);
9904 (define_insn "avx512vl_unpcklpd128_mask"
9905 [(set (match_operand:V2DF 0 "register_operand" "=v")
9909 (match_operand:V2DF 1 "register_operand" "v")
9910 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9911 (parallel [(const_int 0) (const_int 2)]))
9912 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9913 (match_operand:QI 4 "register_operand" "Yk")))]
9915 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9916 [(set_attr "type" "sselog")
9917 (set_attr "prefix" "evex")
9918 (set_attr "mode" "V2DF")])
9920 (define_expand "vec_interleave_lowv2df"
9921 [(set (match_operand:V2DF 0 "register_operand")
9924 (match_operand:V2DF 1 "nonimmediate_operand")
9925 (match_operand:V2DF 2 "nonimmediate_operand"))
9926 (parallel [(const_int 0)
9930 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9931 operands[1] = force_reg (V2DFmode, operands[1]);
9934 (define_insn "*vec_interleave_lowv2df"
9935 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
9938 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9939 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9940 (parallel [(const_int 0)
9942 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9944 unpcklpd\t{%2, %0|%0, %2}
9945 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9946 %vmovddup\t{%1, %0|%0, %q1}
9947 movhpd\t{%2, %0|%0, %q2}
9948 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9949 %vmovlpd\t{%2, %H0|%H0, %2}"
9950 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9951 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9952 (set (attr "prefix_data16")
9953 (if_then_else (eq_attr "alternative" "3,5")
9955 (const_string "*")))
9956 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9957 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9960 [(set (match_operand:V2DF 0 "memory_operand")
9963 (match_operand:V2DF 1 "register_operand")
9965 (parallel [(const_int 0)
9967 "TARGET_SSE3 && reload_completed"
9970 rtx low = gen_lowpart (DFmode, operands[1]);
9972 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9973 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9978 [(set (match_operand:V2DF 0 "register_operand")
9981 (match_operand:V2DF 1 "memory_operand")
9983 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9984 (match_operand:SI 3 "const_int_operand")])))]
9985 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9986 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9988 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9991 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9992 [(set (match_operand:VF_128 0 "register_operand" "=v")
9995 [(match_operand:VF_128 1 "register_operand" "v")
9996 (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
10001 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
10002 [(set_attr "prefix" "evex")
10003 (set_attr "mode" "<ssescalarmode>")])
10005 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
10006 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10007 (unspec:VF_AVX512VL
10008 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
10009 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
10012 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
10013 [(set_attr "prefix" "evex")
10014 (set_attr "mode" "<MODE>")])
10016 (define_expand "<avx512>_vternlog<mode>_maskz"
10017 [(match_operand:VI48_AVX512VL 0 "register_operand")
10018 (match_operand:VI48_AVX512VL 1 "register_operand")
10019 (match_operand:VI48_AVX512VL 2 "register_operand")
10020 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
10021 (match_operand:SI 4 "const_0_to_255_operand")
10022 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10025 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
10026 operands[0], operands[1], operands[2], operands[3],
10027 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
10031 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
10032 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10033 (unspec:VI48_AVX512VL
10034 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
10035 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
10036 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
10037 (match_operand:SI 4 "const_0_to_255_operand")]
10040 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
10041 [(set_attr "type" "sselog")
10042 (set_attr "prefix" "evex")
10043 (set_attr "mode" "<sseinsnmode>")])
10045 (define_insn "<avx512>_vternlog<mode>_mask"
10046 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10047 (vec_merge:VI48_AVX512VL
10048 (unspec:VI48_AVX512VL
10049 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
10050 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
10051 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
10052 (match_operand:SI 4 "const_0_to_255_operand")]
10055 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
10057 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
10058 [(set_attr "type" "sselog")
10059 (set_attr "prefix" "evex")
10060 (set_attr "mode" "<sseinsnmode>")])
10062 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
10063 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10064 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
10067 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
10068 [(set_attr "prefix" "evex")
10069 (set_attr "mode" "<MODE>")])
10071 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
10072 [(set (match_operand:VF_128 0 "register_operand" "=v")
10075 [(match_operand:VF_128 1 "register_operand" "v")
10076 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
10081 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
10082 [(set_attr "prefix" "evex")
10083 (set_attr "mode" "<ssescalarmode>")])
10085 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
10086 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10087 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10088 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
10089 (match_operand:SI 3 "const_0_to_255_operand")]
10092 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
10093 [(set_attr "prefix" "evex")
10094 (set_attr "mode" "<sseinsnmode>")])
10096 (define_expand "avx512f_shufps512_mask"
10097 [(match_operand:V16SF 0 "register_operand")
10098 (match_operand:V16SF 1 "register_operand")
10099 (match_operand:V16SF 2 "nonimmediate_operand")
10100 (match_operand:SI 3 "const_0_to_255_operand")
10101 (match_operand:V16SF 4 "register_operand")
10102 (match_operand:HI 5 "register_operand")]
10105 int mask = INTVAL (operands[3]);
10106 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
10107 GEN_INT ((mask >> 0) & 3),
10108 GEN_INT ((mask >> 2) & 3),
10109 GEN_INT (((mask >> 4) & 3) + 16),
10110 GEN_INT (((mask >> 6) & 3) + 16),
10111 GEN_INT (((mask >> 0) & 3) + 4),
10112 GEN_INT (((mask >> 2) & 3) + 4),
10113 GEN_INT (((mask >> 4) & 3) + 20),
10114 GEN_INT (((mask >> 6) & 3) + 20),
10115 GEN_INT (((mask >> 0) & 3) + 8),
10116 GEN_INT (((mask >> 2) & 3) + 8),
10117 GEN_INT (((mask >> 4) & 3) + 24),
10118 GEN_INT (((mask >> 6) & 3) + 24),
10119 GEN_INT (((mask >> 0) & 3) + 12),
10120 GEN_INT (((mask >> 2) & 3) + 12),
10121 GEN_INT (((mask >> 4) & 3) + 28),
10122 GEN_INT (((mask >> 6) & 3) + 28),
10123 operands[4], operands[5]));
10128 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
10129 [(match_operand:VF_AVX512VL 0 "register_operand")
10130 (match_operand:VF_AVX512VL 1 "register_operand")
10131 (match_operand:VF_AVX512VL 2 "register_operand")
10132 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
10133 (match_operand:SI 4 "const_0_to_255_operand")
10134 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10137 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
10138 operands[0], operands[1], operands[2], operands[3],
10139 operands[4], CONST0_RTX (<MODE>mode), operands[5]
10140 <round_saeonly_expand_operand6>));
10144 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
10145 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10146 (unspec:VF_AVX512VL
10147 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
10148 (match_operand:VF_AVX512VL 2 "register_operand" "v")
10149 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
10150 (match_operand:SI 4 "const_0_to_255_operand")]
10153 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
10154 [(set_attr "prefix" "evex")
10155 (set_attr "mode" "<MODE>")])
10157 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
10158 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10159 (vec_merge:VF_AVX512VL
10160 (unspec:VF_AVX512VL
10161 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
10162 (match_operand:VF_AVX512VL 2 "register_operand" "v")
10163 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
10164 (match_operand:SI 4 "const_0_to_255_operand")]
10167 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
10169 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
10170 [(set_attr "prefix" "evex")
10171 (set_attr "mode" "<MODE>")])
10173 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
10174 [(match_operand:VF_128 0 "register_operand")
10175 (match_operand:VF_128 1 "register_operand")
10176 (match_operand:VF_128 2 "register_operand")
10177 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
10178 (match_operand:SI 4 "const_0_to_255_operand")
10179 (match_operand:<avx512fmaskmode> 5 "register_operand")]
10182 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
10183 operands[0], operands[1], operands[2], operands[3],
10184 operands[4], CONST0_RTX (<MODE>mode), operands[5]
10185 <round_saeonly_expand_operand6>));
10189 (define_insn "avx512f_sfixupimm<mode><maskz_scalar_name><round_saeonly_name>"
10190 [(set (match_operand:VF_128 0 "register_operand" "=v")
10193 [(match_operand:VF_128 1 "register_operand" "0")
10194 (match_operand:VF_128 2 "register_operand" "v")
10195 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
10196 (match_operand:SI 4 "const_0_to_255_operand")]
10201 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_maskz_scalar_op5>%3, %2, %0<maskz_scalar_op5>|%0<maskz_scalar_op5>, %2, %<iptr>3<round_saeonly_maskz_scalar_op5>, %4}";
10202 [(set_attr "prefix" "evex")
10203 (set_attr "mode" "<ssescalarmode>")])
10205 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
10206 [(set (match_operand:VF_128 0 "register_operand" "=v")
10210 [(match_operand:VF_128 1 "register_operand" "0")
10211 (match_operand:VF_128 2 "register_operand" "v")
10212 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
10213 (match_operand:SI 4 "const_0_to_255_operand")]
10218 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
10220 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
10221 [(set_attr "prefix" "evex")
10222 (set_attr "mode" "<ssescalarmode>")])
10224 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
10225 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
10226 (unspec:VF_AVX512VL
10227 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
10228 (match_operand:SI 2 "const_0_to_255_operand")]
10231 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
10232 [(set_attr "length_immediate" "1")
10233 (set_attr "prefix" "evex")
10234 (set_attr "mode" "<MODE>")])
10236 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
10237 [(set (match_operand:VF_128 0 "register_operand" "=v")
10240 [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
10241 (match_operand:SI 3 "const_0_to_255_operand")]
10243 (match_operand:VF_128 1 "register_operand" "v")
10246 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
10247 [(set_attr "length_immediate" "1")
10248 (set_attr "prefix" "evex")
10249 (set_attr "mode" "<MODE>")])
10251 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
10252 [(set (match_operand:VF_128 0 "register_operand" "=v")
10254 (vec_duplicate:VF_128
10255 (unspec:<ssescalarmode>
10256 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
10257 (match_operand:SI 3 "const_0_to_255_operand")]
10259 (match_operand:VF_128 1 "register_operand" "v")
10262 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
10263 [(set_attr "length_immediate" "1")
10264 (set_attr "prefix" "evex")
10265 (set_attr "mode" "<MODE>")])
10267 ;; One bit in mask selects 2 elements.
10268 (define_insn "avx512f_shufps512_1<mask_name>"
10269 [(set (match_operand:V16SF 0 "register_operand" "=v")
10272 (match_operand:V16SF 1 "register_operand" "v")
10273 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
10274 (parallel [(match_operand 3 "const_0_to_3_operand")
10275 (match_operand 4 "const_0_to_3_operand")
10276 (match_operand 5 "const_16_to_19_operand")
10277 (match_operand 6 "const_16_to_19_operand")
10278 (match_operand 7 "const_4_to_7_operand")
10279 (match_operand 8 "const_4_to_7_operand")
10280 (match_operand 9 "const_20_to_23_operand")
10281 (match_operand 10 "const_20_to_23_operand")
10282 (match_operand 11 "const_8_to_11_operand")
10283 (match_operand 12 "const_8_to_11_operand")
10284 (match_operand 13 "const_24_to_27_operand")
10285 (match_operand 14 "const_24_to_27_operand")
10286 (match_operand 15 "const_12_to_15_operand")
10287 (match_operand 16 "const_12_to_15_operand")
10288 (match_operand 17 "const_28_to_31_operand")
10289 (match_operand 18 "const_28_to_31_operand")])))]
10291 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
10292 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
10293 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
10294 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
10295 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
10296 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
10297 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
10298 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
10299 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
10300 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
10301 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
10302 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
10305 mask = INTVAL (operands[3]);
10306 mask |= INTVAL (operands[4]) << 2;
10307 mask |= (INTVAL (operands[5]) - 16) << 4;
10308 mask |= (INTVAL (operands[6]) - 16) << 6;
10309 operands[3] = GEN_INT (mask);
10311 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
10313 [(set_attr "type" "sselog")
10314 (set_attr "length_immediate" "1")
10315 (set_attr "prefix" "evex")
10316 (set_attr "mode" "V16SF")])
10318 (define_expand "avx512f_shufpd512_mask"
10319 [(match_operand:V8DF 0 "register_operand")
10320 (match_operand:V8DF 1 "register_operand")
10321 (match_operand:V8DF 2 "nonimmediate_operand")
10322 (match_operand:SI 3 "const_0_to_255_operand")
10323 (match_operand:V8DF 4 "register_operand")
10324 (match_operand:QI 5 "register_operand")]
10327 int mask = INTVAL (operands[3]);
10328 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
10329 GEN_INT (mask & 1),
10330 GEN_INT (mask & 2 ? 9 : 8),
10331 GEN_INT (mask & 4 ? 3 : 2),
10332 GEN_INT (mask & 8 ? 11 : 10),
10333 GEN_INT (mask & 16 ? 5 : 4),
10334 GEN_INT (mask & 32 ? 13 : 12),
10335 GEN_INT (mask & 64 ? 7 : 6),
10336 GEN_INT (mask & 128 ? 15 : 14),
10337 operands[4], operands[5]));
10341 (define_insn "avx512f_shufpd512_1<mask_name>"
10342 [(set (match_operand:V8DF 0 "register_operand" "=v")
10345 (match_operand:V8DF 1 "register_operand" "v")
10346 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
10347 (parallel [(match_operand 3 "const_0_to_1_operand")
10348 (match_operand 4 "const_8_to_9_operand")
10349 (match_operand 5 "const_2_to_3_operand")
10350 (match_operand 6 "const_10_to_11_operand")
10351 (match_operand 7 "const_4_to_5_operand")
10352 (match_operand 8 "const_12_to_13_operand")
10353 (match_operand 9 "const_6_to_7_operand")
10354 (match_operand 10 "const_14_to_15_operand")])))]
10358 mask = INTVAL (operands[3]);
10359 mask |= (INTVAL (operands[4]) - 8) << 1;
10360 mask |= (INTVAL (operands[5]) - 2) << 2;
10361 mask |= (INTVAL (operands[6]) - 10) << 3;
10362 mask |= (INTVAL (operands[7]) - 4) << 4;
10363 mask |= (INTVAL (operands[8]) - 12) << 5;
10364 mask |= (INTVAL (operands[9]) - 6) << 6;
10365 mask |= (INTVAL (operands[10]) - 14) << 7;
10366 operands[3] = GEN_INT (mask);
10368 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
10370 [(set_attr "type" "sselog")
10371 (set_attr "length_immediate" "1")
10372 (set_attr "prefix" "evex")
10373 (set_attr "mode" "V8DF")])
10375 (define_expand "avx_shufpd256<mask_expand4_name>"
10376 [(match_operand:V4DF 0 "register_operand")
10377 (match_operand:V4DF 1 "register_operand")
10378 (match_operand:V4DF 2 "nonimmediate_operand")
10379 (match_operand:SI 3 "const_int_operand")]
10382 int mask = INTVAL (operands[3]);
10383 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
10386 GEN_INT (mask & 1),
10387 GEN_INT (mask & 2 ? 5 : 4),
10388 GEN_INT (mask & 4 ? 3 : 2),
10389 GEN_INT (mask & 8 ? 7 : 6)
10390 <mask_expand4_args>));
10394 (define_insn "avx_shufpd256_1<mask_name>"
10395 [(set (match_operand:V4DF 0 "register_operand" "=v")
10398 (match_operand:V4DF 1 "register_operand" "v")
10399 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
10400 (parallel [(match_operand 3 "const_0_to_1_operand")
10401 (match_operand 4 "const_4_to_5_operand")
10402 (match_operand 5 "const_2_to_3_operand")
10403 (match_operand 6 "const_6_to_7_operand")])))]
10404 "TARGET_AVX && <mask_avx512vl_condition>"
10407 mask = INTVAL (operands[3]);
10408 mask |= (INTVAL (operands[4]) - 4) << 1;
10409 mask |= (INTVAL (operands[5]) - 2) << 2;
10410 mask |= (INTVAL (operands[6]) - 6) << 3;
10411 operands[3] = GEN_INT (mask);
10413 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
10415 [(set_attr "type" "sseshuf")
10416 (set_attr "length_immediate" "1")
10417 (set_attr "prefix" "vex")
10418 (set_attr "mode" "V4DF")])
10420 (define_expand "sse2_shufpd<mask_expand4_name>"
10421 [(match_operand:V2DF 0 "register_operand")
10422 (match_operand:V2DF 1 "register_operand")
10423 (match_operand:V2DF 2 "vector_operand")
10424 (match_operand:SI 3 "const_int_operand")]
10427 int mask = INTVAL (operands[3]);
10428 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
10429 operands[2], GEN_INT (mask & 1),
10430 GEN_INT (mask & 2 ? 3 : 2)
10431 <mask_expand4_args>));
10435 (define_insn "sse2_shufpd_v2df_mask"
10436 [(set (match_operand:V2DF 0 "register_operand" "=v")
10440 (match_operand:V2DF 1 "register_operand" "v")
10441 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
10442 (parallel [(match_operand 3 "const_0_to_1_operand")
10443 (match_operand 4 "const_2_to_3_operand")]))
10444 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
10445 (match_operand:QI 6 "register_operand" "Yk")))]
10449 mask = INTVAL (operands[3]);
10450 mask |= (INTVAL (operands[4]) - 2) << 1;
10451 operands[3] = GEN_INT (mask);
10453 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
10455 [(set_attr "type" "sseshuf")
10456 (set_attr "length_immediate" "1")
10457 (set_attr "prefix" "evex")
10458 (set_attr "mode" "V2DF")])
10460 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
10461 (define_insn "avx2_interleave_highv4di<mask_name>"
10462 [(set (match_operand:V4DI 0 "register_operand" "=v")
10465 (match_operand:V4DI 1 "register_operand" "v")
10466 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10467 (parallel [(const_int 1)
10471 "TARGET_AVX2 && <mask_avx512vl_condition>"
10472 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10473 [(set_attr "type" "sselog")
10474 (set_attr "prefix" "vex")
10475 (set_attr "mode" "OI")])
10477 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
10478 [(set (match_operand:V8DI 0 "register_operand" "=v")
10481 (match_operand:V8DI 1 "register_operand" "v")
10482 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10483 (parallel [(const_int 1) (const_int 9)
10484 (const_int 3) (const_int 11)
10485 (const_int 5) (const_int 13)
10486 (const_int 7) (const_int 15)])))]
10488 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10489 [(set_attr "type" "sselog")
10490 (set_attr "prefix" "evex")
10491 (set_attr "mode" "XI")])
10493 (define_insn "vec_interleave_highv2di<mask_name>"
10494 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10497 (match_operand:V2DI 1 "register_operand" "0,v")
10498 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10499 (parallel [(const_int 1)
10501 "TARGET_SSE2 && <mask_avx512vl_condition>"
10503 punpckhqdq\t{%2, %0|%0, %2}
10504 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10505 [(set_attr "isa" "noavx,avx")
10506 (set_attr "type" "sselog")
10507 (set_attr "prefix_data16" "1,*")
10508 (set_attr "prefix" "orig,<mask_prefix>")
10509 (set_attr "mode" "TI")])
10511 (define_insn "avx2_interleave_lowv4di<mask_name>"
10512 [(set (match_operand:V4DI 0 "register_operand" "=v")
10515 (match_operand:V4DI 1 "register_operand" "v")
10516 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
10517 (parallel [(const_int 0)
10521 "TARGET_AVX2 && <mask_avx512vl_condition>"
10522 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10523 [(set_attr "type" "sselog")
10524 (set_attr "prefix" "vex")
10525 (set_attr "mode" "OI")])
10527 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
10528 [(set (match_operand:V8DI 0 "register_operand" "=v")
10531 (match_operand:V8DI 1 "register_operand" "v")
10532 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
10533 (parallel [(const_int 0) (const_int 8)
10534 (const_int 2) (const_int 10)
10535 (const_int 4) (const_int 12)
10536 (const_int 6) (const_int 14)])))]
10538 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10539 [(set_attr "type" "sselog")
10540 (set_attr "prefix" "evex")
10541 (set_attr "mode" "XI")])
10543 (define_insn "vec_interleave_lowv2di<mask_name>"
10544 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10547 (match_operand:V2DI 1 "register_operand" "0,v")
10548 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10549 (parallel [(const_int 0)
10551 "TARGET_SSE2 && <mask_avx512vl_condition>"
10553 punpcklqdq\t{%2, %0|%0, %2}
10554 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10555 [(set_attr "isa" "noavx,avx")
10556 (set_attr "type" "sselog")
10557 (set_attr "prefix_data16" "1,*")
10558 (set_attr "prefix" "orig,vex")
10559 (set_attr "mode" "TI")])
10561 (define_insn "sse2_shufpd_<mode>"
10562 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10563 (vec_select:VI8F_128
10564 (vec_concat:<ssedoublevecmode>
10565 (match_operand:VI8F_128 1 "register_operand" "0,v")
10566 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10567 (parallel [(match_operand 3 "const_0_to_1_operand")
10568 (match_operand 4 "const_2_to_3_operand")])))]
10572 mask = INTVAL (operands[3]);
10573 mask |= (INTVAL (operands[4]) - 2) << 1;
10574 operands[3] = GEN_INT (mask);
10576 switch (which_alternative)
10579 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10581 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10583 gcc_unreachable ();
10586 [(set_attr "isa" "noavx,avx")
10587 (set_attr "type" "sseshuf")
10588 (set_attr "length_immediate" "1")
10589 (set_attr "prefix" "orig,maybe_evex")
10590 (set_attr "mode" "V2DF")])
10592 ;; Avoid combining registers from different units in a single alternative,
10593 ;; see comment above inline_secondary_memory_needed function in i386.c
10594 (define_insn "sse2_storehpd"
10595 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
10597 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10598 (parallel [(const_int 1)])))]
10599 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10601 %vmovhpd\t{%1, %0|%0, %1}
10603 vunpckhpd\t{%d1, %0|%0, %d1}
10607 [(set_attr "isa" "*,noavx,avx,*,*,*")
10608 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10609 (set (attr "prefix_data16")
10611 (and (eq_attr "alternative" "0")
10612 (not (match_test "TARGET_AVX")))
10614 (const_string "*")))
10615 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10616 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10619 [(set (match_operand:DF 0 "register_operand")
10621 (match_operand:V2DF 1 "memory_operand")
10622 (parallel [(const_int 1)])))]
10623 "TARGET_SSE2 && reload_completed"
10624 [(set (match_dup 0) (match_dup 1))]
10625 "operands[1] = adjust_address (operands[1], DFmode, 8);")
10627 (define_insn "*vec_extractv2df_1_sse"
10628 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10630 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10631 (parallel [(const_int 1)])))]
10632 "!TARGET_SSE2 && TARGET_SSE
10633 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10635 movhps\t{%1, %0|%0, %1}
10636 movhlps\t{%1, %0|%0, %1}
10637 movlps\t{%H1, %0|%0, %H1}"
10638 [(set_attr "type" "ssemov")
10639 (set_attr "mode" "V2SF,V4SF,V2SF")])
10641 ;; Avoid combining registers from different units in a single alternative,
10642 ;; see comment above inline_secondary_memory_needed function in i386.c
10643 (define_insn "sse2_storelpd"
10644 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
10646 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10647 (parallel [(const_int 0)])))]
10648 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10650 %vmovlpd\t{%1, %0|%0, %1}
10655 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10656 (set (attr "prefix_data16")
10657 (if_then_else (eq_attr "alternative" "0")
10659 (const_string "*")))
10660 (set_attr "prefix" "maybe_vex")
10661 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10664 [(set (match_operand:DF 0 "register_operand")
10666 (match_operand:V2DF 1 "nonimmediate_operand")
10667 (parallel [(const_int 0)])))]
10668 "TARGET_SSE2 && reload_completed"
10669 [(set (match_dup 0) (match_dup 1))]
10670 "operands[1] = gen_lowpart (DFmode, operands[1]);")
10672 (define_insn "*vec_extractv2df_0_sse"
10673 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10675 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10676 (parallel [(const_int 0)])))]
10677 "!TARGET_SSE2 && TARGET_SSE
10678 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10680 movlps\t{%1, %0|%0, %1}
10681 movaps\t{%1, %0|%0, %1}
10682 movlps\t{%1, %0|%0, %q1}"
10683 [(set_attr "type" "ssemov")
10684 (set_attr "mode" "V2SF,V4SF,V2SF")])
10686 (define_expand "sse2_loadhpd_exp"
10687 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10690 (match_operand:V2DF 1 "nonimmediate_operand")
10691 (parallel [(const_int 0)]))
10692 (match_operand:DF 2 "nonimmediate_operand")))]
10695 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10697 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10699 /* Fix up the destination if needed. */
10700 if (dst != operands[0])
10701 emit_move_insn (operands[0], dst);
10706 ;; Avoid combining registers from different units in a single alternative,
10707 ;; see comment above inline_secondary_memory_needed function in i386.c
10708 (define_insn "sse2_loadhpd"
10709 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10710 "=x,v,x,v ,o,o ,o")
10713 (match_operand:V2DF 1 "nonimmediate_operand"
10714 " 0,v,0,v ,0,0 ,0")
10715 (parallel [(const_int 0)]))
10716 (match_operand:DF 2 "nonimmediate_operand"
10717 " m,m,x,Yv,x,*f,r")))]
10718 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10720 movhpd\t{%2, %0|%0, %2}
10721 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10722 unpcklpd\t{%2, %0|%0, %2}
10723 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10727 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10728 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10729 (set (attr "prefix_data16")
10730 (if_then_else (eq_attr "alternative" "0")
10732 (const_string "*")))
10733 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10734 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10737 [(set (match_operand:V2DF 0 "memory_operand")
10739 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10740 (match_operand:DF 1 "register_operand")))]
10741 "TARGET_SSE2 && reload_completed"
10742 [(set (match_dup 0) (match_dup 1))]
10743 "operands[0] = adjust_address (operands[0], DFmode, 8);")
10745 (define_expand "sse2_loadlpd_exp"
10746 [(set (match_operand:V2DF 0 "nonimmediate_operand")
10748 (match_operand:DF 2 "nonimmediate_operand")
10750 (match_operand:V2DF 1 "nonimmediate_operand")
10751 (parallel [(const_int 1)]))))]
10754 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10756 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10758 /* Fix up the destination if needed. */
10759 if (dst != operands[0])
10760 emit_move_insn (operands[0], dst);
10765 ;; Avoid combining registers from different units in a single alternative,
10766 ;; see comment above inline_secondary_memory_needed function in i386.c
10767 (define_insn "sse2_loadlpd"
10768 [(set (match_operand:V2DF 0 "nonimmediate_operand"
10769 "=v,x,v,x,v,x,x,v,m,m ,m")
10771 (match_operand:DF 2 "nonimmediate_operand"
10772 "vm,m,m,x,v,0,0,v,x,*f,r")
10774 (match_operand:V2DF 1 "nonimm_or_0_operand"
10775 " C,0,v,0,v,x,o,o,0,0 ,0")
10776 (parallel [(const_int 1)]))))]
10777 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10779 %vmovq\t{%2, %0|%0, %2}
10780 movlpd\t{%2, %0|%0, %2}
10781 vmovlpd\t{%2, %1, %0|%0, %1, %2}
10782 movsd\t{%2, %0|%0, %2}
10783 vmovsd\t{%2, %1, %0|%0, %1, %2}
10784 shufpd\t{$2, %1, %0|%0, %1, 2}
10785 movhpd\t{%H1, %0|%0, %H1}
10786 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10790 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10792 (cond [(eq_attr "alternative" "5")
10793 (const_string "sselog")
10794 (eq_attr "alternative" "9")
10795 (const_string "fmov")
10796 (eq_attr "alternative" "10")
10797 (const_string "imov")
10799 (const_string "ssemov")))
10800 (set (attr "prefix_data16")
10801 (if_then_else (eq_attr "alternative" "1,6")
10803 (const_string "*")))
10804 (set (attr "length_immediate")
10805 (if_then_else (eq_attr "alternative" "5")
10807 (const_string "*")))
10808 (set (attr "prefix")
10809 (cond [(eq_attr "alternative" "0")
10810 (const_string "maybe_vex")
10811 (eq_attr "alternative" "1,3,5,6")
10812 (const_string "orig")
10813 (eq_attr "alternative" "2,4,7")
10814 (const_string "maybe_evex")
10816 (const_string "*")))
10817 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10820 [(set (match_operand:V2DF 0 "memory_operand")
10822 (match_operand:DF 1 "register_operand")
10823 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10824 "TARGET_SSE2 && reload_completed"
10825 [(set (match_dup 0) (match_dup 1))]
10826 "operands[0] = adjust_address (operands[0], DFmode, 0);")
10828 (define_insn "sse2_movsd"
10829 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
10831 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10832 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10836 movsd\t{%2, %0|%0, %2}
10837 vmovsd\t{%2, %1, %0|%0, %1, %2}
10838 movlpd\t{%2, %0|%0, %q2}
10839 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10840 %vmovlpd\t{%2, %0|%q0, %2}
10841 shufpd\t{$2, %1, %0|%0, %1, 2}
10842 movhps\t{%H1, %0|%0, %H1}
10843 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10844 %vmovhps\t{%1, %H0|%H0, %1}"
10845 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10848 (eq_attr "alternative" "5")
10849 (const_string "sselog")
10850 (const_string "ssemov")))
10851 (set (attr "prefix_data16")
10853 (and (eq_attr "alternative" "2,4")
10854 (not (match_test "TARGET_AVX")))
10856 (const_string "*")))
10857 (set (attr "length_immediate")
10858 (if_then_else (eq_attr "alternative" "5")
10860 (const_string "*")))
10861 (set (attr "prefix")
10862 (cond [(eq_attr "alternative" "1,3,7")
10863 (const_string "maybe_evex")
10864 (eq_attr "alternative" "4,8")
10865 (const_string "maybe_vex")
10867 (const_string "orig")))
10868 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10870 (define_insn "vec_dupv2df<mask_name>"
10871 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
10872 (vec_duplicate:V2DF
10873 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10874 "TARGET_SSE2 && <mask_avx512vl_condition>"
10877 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10878 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10879 [(set_attr "isa" "noavx,sse3,avx512vl")
10880 (set_attr "type" "sselog1")
10881 (set_attr "prefix" "orig,maybe_vex,evex")
10882 (set_attr "mode" "V2DF,DF,DF")])
10884 (define_insn "vec_concatv2df"
10885 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
10887 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10888 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,1,1,m,m, C,x,m")))]
10890 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10891 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10893 unpcklpd\t{%2, %0|%0, %2}
10894 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10895 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10896 %vmovddup\t{%1, %0|%0, %1}
10897 vmovddup\t{%1, %0|%0, %1}
10898 movhpd\t{%2, %0|%0, %2}
10899 vmovhpd\t{%2, %1, %0|%0, %1, %2}
10900 %vmovq\t{%1, %0|%0, %1}
10901 movlhps\t{%2, %0|%0, %2}
10902 movhps\t{%2, %0|%0, %2}"
10904 (cond [(eq_attr "alternative" "0,5")
10905 (const_string "sse2_noavx")
10906 (eq_attr "alternative" "1,6")
10907 (const_string "avx")
10908 (eq_attr "alternative" "2,4")
10909 (const_string "avx512vl")
10910 (eq_attr "alternative" "3")
10911 (const_string "sse3")
10912 (eq_attr "alternative" "7")
10913 (const_string "sse2")
10915 (const_string "noavx")))
10918 (eq_attr "alternative" "0,1,2,3,4")
10919 (const_string "sselog")
10920 (const_string "ssemov")))
10921 (set (attr "prefix_data16")
10922 (if_then_else (eq_attr "alternative" "5")
10924 (const_string "*")))
10925 (set (attr "prefix")
10926 (cond [(eq_attr "alternative" "1,6")
10927 (const_string "vex")
10928 (eq_attr "alternative" "2,4")
10929 (const_string "evex")
10930 (eq_attr "alternative" "3,7")
10931 (const_string "maybe_vex")
10933 (const_string "orig")))
10934 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10936 ;; vmovq clears also the higher bits.
10937 (define_insn "vec_set<mode>_0"
10938 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10939 (vec_merge:VF2_512_256
10940 (vec_duplicate:VF2_512_256
10941 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
10942 (match_operand:VF2_512_256 1 "const0_operand" "C")
10945 "vmovq\t{%2, %x0|%x0, %2}"
10946 [(set_attr "type" "ssemov")
10947 (set_attr "prefix" "maybe_evex")
10948 (set_attr "mode" "DF")])
10950 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10952 ;; Parallel integer down-conversion operations
10954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10956 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10957 (define_mode_attr pmov_src_mode
10958 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10959 (define_mode_attr pmov_src_lower
10960 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10961 (define_mode_attr pmov_suff_1
10962 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10964 (define_expand "trunc<pmov_src_lower><mode>2"
10965 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
10966 (truncate:PMOV_DST_MODE_1
10967 (match_operand:<pmov_src_mode> 1 "register_operand")))]
10970 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10971 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10972 (any_truncate:PMOV_DST_MODE_1
10973 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10975 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10976 [(set_attr "type" "ssemov")
10977 (set_attr "memory" "none,store")
10978 (set_attr "prefix" "evex")
10979 (set_attr "mode" "<sseinsnmode>")])
10981 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10982 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10983 (vec_merge:PMOV_DST_MODE_1
10984 (any_truncate:PMOV_DST_MODE_1
10985 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10986 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10987 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10989 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10990 [(set_attr "type" "ssemov")
10991 (set_attr "memory" "none,store")
10992 (set_attr "prefix" "evex")
10993 (set_attr "mode" "<sseinsnmode>")])
10995 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10996 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10997 (vec_merge:PMOV_DST_MODE_1
10998 (any_truncate:PMOV_DST_MODE_1
10999 (match_operand:<pmov_src_mode> 1 "register_operand"))
11001 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
11004 (define_expand "truncv32hiv32qi2"
11005 [(set (match_operand:V32QI 0 "nonimmediate_operand")
11007 (match_operand:V32HI 1 "register_operand")))]
11010 (define_insn "avx512bw_<code>v32hiv32qi2"
11011 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
11012 (any_truncate:V32QI
11013 (match_operand:V32HI 1 "register_operand" "v,v")))]
11015 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
11016 [(set_attr "type" "ssemov")
11017 (set_attr "memory" "none,store")
11018 (set_attr "prefix" "evex")
11019 (set_attr "mode" "XI")])
11021 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
11022 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
11024 (any_truncate:V32QI
11025 (match_operand:V32HI 1 "register_operand" "v,v"))
11026 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
11027 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
11029 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11030 [(set_attr "type" "ssemov")
11031 (set_attr "memory" "none,store")
11032 (set_attr "prefix" "evex")
11033 (set_attr "mode" "XI")])
11035 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
11036 [(set (match_operand:V32QI 0 "nonimmediate_operand")
11038 (any_truncate:V32QI
11039 (match_operand:V32HI 1 "register_operand"))
11041 (match_operand:SI 2 "register_operand")))]
11044 (define_mode_iterator PMOV_DST_MODE_2
11045 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
11046 (define_mode_attr pmov_suff_2
11047 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
11049 (define_expand "trunc<ssedoublemodelower><mode>2"
11050 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
11051 (truncate:PMOV_DST_MODE_2
11052 (match_operand:<ssedoublemode> 1 "register_operand")))]
11055 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
11056 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
11057 (any_truncate:PMOV_DST_MODE_2
11058 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
11060 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
11061 [(set_attr "type" "ssemov")
11062 (set_attr "memory" "none,store")
11063 (set_attr "prefix" "evex")
11064 (set_attr "mode" "<sseinsnmode>")])
11066 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
11067 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
11068 (vec_merge:PMOV_DST_MODE_2
11069 (any_truncate:PMOV_DST_MODE_2
11070 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
11071 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
11072 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
11074 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11075 [(set_attr "type" "ssemov")
11076 (set_attr "memory" "none,store")
11077 (set_attr "prefix" "evex")
11078 (set_attr "mode" "<sseinsnmode>")])
11080 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
11081 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
11082 (vec_merge:PMOV_DST_MODE_2
11083 (any_truncate:PMOV_DST_MODE_2
11084 (match_operand:<ssedoublemode> 1 "register_operand"))
11086 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
11089 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
11090 (define_mode_attr pmov_dst_3_lower
11091 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
11092 (define_mode_attr pmov_dst_3
11093 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
11094 (define_mode_attr pmov_dst_zeroed_3
11095 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
11096 (define_mode_attr pmov_suff_3
11097 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
11099 (define_expand "trunc<mode><pmov_dst_3_lower>2"
11100 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
11101 (truncate:<pmov_dst_3>
11102 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
11105 operands[0] = simplify_gen_subreg (V16QImode, operands[0], <pmov_dst_3>mode, 0);
11106 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2 (operands[0],
11108 CONST0_RTX (<pmov_dst_zeroed_3>mode)));
11112 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
11113 [(set (match_operand:V16QI 0 "register_operand" "=v")
11115 (any_truncate:<pmov_dst_3>
11116 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
11117 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
11119 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11120 [(set_attr "type" "ssemov")
11121 (set_attr "prefix" "evex")
11122 (set_attr "mode" "TI")])
11124 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
11125 [(set (match_operand:V2QI 0 "memory_operand" "=m")
11127 (match_operand:V2DI 1 "register_operand" "v")))]
11129 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11130 [(set_attr "type" "ssemov")
11131 (set_attr "memory" "store")
11132 (set_attr "prefix" "evex")
11133 (set_attr "mode" "TI")])
11135 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
11136 [(set (match_operand:HI 0 "memory_operand")
11139 (match_operand:V2DI 1 "register_operand")) 0))]
11140 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11143 [(set (match_dup 0)
11144 (any_truncate:V2QI (match_dup 1)))]
11145 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
11147 (define_insn "avx512vl_<code>v2div2qi2_mask"
11148 [(set (match_operand:V16QI 0 "register_operand" "=v")
11152 (match_operand:V2DI 1 "register_operand" "v"))
11154 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11155 (parallel [(const_int 0) (const_int 1)]))
11156 (match_operand:QI 3 "register_operand" "Yk"))
11157 (const_vector:V14QI [(const_int 0) (const_int 0)
11158 (const_int 0) (const_int 0)
11159 (const_int 0) (const_int 0)
11160 (const_int 0) (const_int 0)
11161 (const_int 0) (const_int 0)
11162 (const_int 0) (const_int 0)
11163 (const_int 0) (const_int 0)])))]
11165 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11166 [(set_attr "type" "ssemov")
11167 (set_attr "prefix" "evex")
11168 (set_attr "mode" "TI")])
11170 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
11171 [(set (match_operand:V16QI 0 "register_operand" "=v")
11175 (match_operand:V2DI 1 "register_operand" "v"))
11176 (const_vector:V2QI [(const_int 0) (const_int 0)])
11177 (match_operand:QI 2 "register_operand" "Yk"))
11178 (const_vector:V14QI [(const_int 0) (const_int 0)
11179 (const_int 0) (const_int 0)
11180 (const_int 0) (const_int 0)
11181 (const_int 0) (const_int 0)
11182 (const_int 0) (const_int 0)
11183 (const_int 0) (const_int 0)
11184 (const_int 0) (const_int 0)])))]
11186 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11187 [(set_attr "type" "ssemov")
11188 (set_attr "prefix" "evex")
11189 (set_attr "mode" "TI")])
11191 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
11192 [(set (match_operand:V2QI 0 "memory_operand" "=m")
11195 (match_operand:V2DI 1 "register_operand" "v"))
11197 (match_operand:QI 2 "register_operand" "Yk")))]
11199 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11200 [(set_attr "type" "ssemov")
11201 (set_attr "memory" "store")
11202 (set_attr "prefix" "evex")
11203 (set_attr "mode" "TI")])
11205 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
11206 [(set (match_operand:HI 0 "memory_operand")
11210 (match_operand:V2DI 1 "register_operand"))
11216 (parallel [(const_int 0) (const_int 1)]))
11217 (match_operand:QI 2 "register_operand")) 0))]
11218 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11221 [(set (match_dup 0)
11223 (any_truncate:V2QI (match_dup 1))
11226 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
11228 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
11229 [(set (match_operand:V4QI 0 "memory_operand" "=m")
11231 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11233 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11234 [(set_attr "type" "ssemov")
11235 (set_attr "memory" "store")
11236 (set_attr "prefix" "evex")
11237 (set_attr "mode" "TI")])
11239 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
11240 [(set (match_operand:SI 0 "memory_operand")
11243 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11244 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11247 [(set (match_dup 0)
11248 (any_truncate:V4QI (match_dup 1)))]
11249 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
11251 (define_insn "avx512vl_<code><mode>v4qi2_mask"
11252 [(set (match_operand:V16QI 0 "register_operand" "=v")
11256 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11258 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11259 (parallel [(const_int 0) (const_int 1)
11260 (const_int 2) (const_int 3)]))
11261 (match_operand:QI 3 "register_operand" "Yk"))
11262 (const_vector:V12QI [(const_int 0) (const_int 0)
11263 (const_int 0) (const_int 0)
11264 (const_int 0) (const_int 0)
11265 (const_int 0) (const_int 0)
11266 (const_int 0) (const_int 0)
11267 (const_int 0) (const_int 0)])))]
11269 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11270 [(set_attr "type" "ssemov")
11271 (set_attr "prefix" "evex")
11272 (set_attr "mode" "TI")])
11274 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
11275 [(set (match_operand:V16QI 0 "register_operand" "=v")
11279 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11280 (const_vector:V4QI [(const_int 0) (const_int 0)
11281 (const_int 0) (const_int 0)])
11282 (match_operand:QI 2 "register_operand" "Yk"))
11283 (const_vector:V12QI [(const_int 0) (const_int 0)
11284 (const_int 0) (const_int 0)
11285 (const_int 0) (const_int 0)
11286 (const_int 0) (const_int 0)
11287 (const_int 0) (const_int 0)
11288 (const_int 0) (const_int 0)])))]
11290 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11291 [(set_attr "type" "ssemov")
11292 (set_attr "prefix" "evex")
11293 (set_attr "mode" "TI")])
11295 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
11296 [(set (match_operand:V4QI 0 "memory_operand" "=m")
11299 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11301 (match_operand:QI 2 "register_operand" "Yk")))]
11303 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11304 [(set_attr "type" "ssemov")
11305 (set_attr "memory" "store")
11306 (set_attr "prefix" "evex")
11307 (set_attr "mode" "TI")])
11309 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
11310 [(set (match_operand:SI 0 "memory_operand")
11314 (match_operand:VI4_128_8_256 1 "register_operand"))
11320 (parallel [(const_int 0) (const_int 1)
11321 (const_int 2) (const_int 3)]))
11322 (match_operand:QI 2 "register_operand")) 0))]
11323 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11326 [(set (match_dup 0)
11328 (any_truncate:V4QI (match_dup 1))
11331 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
11333 (define_mode_iterator VI2_128_BW_4_256
11334 [(V8HI "TARGET_AVX512BW") V8SI])
11336 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
11337 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11339 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
11341 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
11342 [(set_attr "type" "ssemov")
11343 (set_attr "memory" "store")
11344 (set_attr "prefix" "evex")
11345 (set_attr "mode" "TI")])
11347 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
11348 [(set (match_operand:DI 0 "memory_operand" "=m")
11351 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
11352 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11355 [(set (match_dup 0)
11356 (any_truncate:V8QI (match_dup 1)))]
11357 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11359 (define_insn "avx512vl_<code><mode>v8qi2_mask"
11360 [(set (match_operand:V16QI 0 "register_operand" "=v")
11364 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11366 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11367 (parallel [(const_int 0) (const_int 1)
11368 (const_int 2) (const_int 3)
11369 (const_int 4) (const_int 5)
11370 (const_int 6) (const_int 7)]))
11371 (match_operand:QI 3 "register_operand" "Yk"))
11372 (const_vector:V8QI [(const_int 0) (const_int 0)
11373 (const_int 0) (const_int 0)
11374 (const_int 0) (const_int 0)
11375 (const_int 0) (const_int 0)])))]
11377 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11378 [(set_attr "type" "ssemov")
11379 (set_attr "prefix" "evex")
11380 (set_attr "mode" "TI")])
11382 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
11383 [(set (match_operand:V16QI 0 "register_operand" "=v")
11387 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11388 (const_vector:V8QI [(const_int 0) (const_int 0)
11389 (const_int 0) (const_int 0)
11390 (const_int 0) (const_int 0)
11391 (const_int 0) (const_int 0)])
11392 (match_operand:QI 2 "register_operand" "Yk"))
11393 (const_vector:V8QI [(const_int 0) (const_int 0)
11394 (const_int 0) (const_int 0)
11395 (const_int 0) (const_int 0)
11396 (const_int 0) (const_int 0)])))]
11398 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11399 [(set_attr "type" "ssemov")
11400 (set_attr "prefix" "evex")
11401 (set_attr "mode" "TI")])
11403 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
11404 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11407 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
11409 (match_operand:QI 2 "register_operand" "Yk")))]
11411 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11412 [(set_attr "type" "ssemov")
11413 (set_attr "memory" "store")
11414 (set_attr "prefix" "evex")
11415 (set_attr "mode" "TI")])
11417 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
11418 [(set (match_operand:DI 0 "memory_operand")
11422 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
11428 (parallel [(const_int 0) (const_int 1)
11429 (const_int 2) (const_int 3)
11430 (const_int 4) (const_int 5)
11431 (const_int 6) (const_int 7)]))
11432 (match_operand:QI 2 "register_operand")) 0))]
11433 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11436 [(set (match_dup 0)
11438 (any_truncate:V8QI (match_dup 1))
11441 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11443 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
11444 (define_mode_attr pmov_dst_4
11445 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
11446 (define_mode_attr pmov_dst_4_lower
11447 [(V4DI "v4hi") (V2DI "v2hi") (V4SI "v4hi")])
11448 (define_mode_attr pmov_dst_zeroed_4
11449 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
11450 (define_mode_attr pmov_suff_4
11451 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
11453 (define_expand "trunc<mode><pmov_dst_4_lower>2"
11454 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
11455 (truncate:<pmov_dst_4>
11456 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
11459 operands[0] = simplify_gen_subreg (V8HImode, operands[0], <pmov_dst_4>mode, 0);
11460 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2 (operands[0],
11462 CONST0_RTX (<pmov_dst_zeroed_4>mode)));
11467 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
11468 [(set (match_operand:V8HI 0 "register_operand" "=v")
11470 (any_truncate:<pmov_dst_4>
11471 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
11472 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
11474 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11475 [(set_attr "type" "ssemov")
11476 (set_attr "prefix" "evex")
11477 (set_attr "mode" "TI")])
11479 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
11480 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11482 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
11484 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
11485 [(set_attr "type" "ssemov")
11486 (set_attr "memory" "store")
11487 (set_attr "prefix" "evex")
11488 (set_attr "mode" "TI")])
11490 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
11491 [(set (match_operand:DI 0 "memory_operand")
11494 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
11495 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11498 [(set (match_dup 0)
11499 (any_truncate:V4HI (match_dup 1)))]
11500 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11502 (define_insn "avx512vl_<code><mode>v4hi2_mask"
11503 [(set (match_operand:V8HI 0 "register_operand" "=v")
11507 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11509 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11510 (parallel [(const_int 0) (const_int 1)
11511 (const_int 2) (const_int 3)]))
11512 (match_operand:QI 3 "register_operand" "Yk"))
11513 (const_vector:V4HI [(const_int 0) (const_int 0)
11514 (const_int 0) (const_int 0)])))]
11516 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11517 [(set_attr "type" "ssemov")
11518 (set_attr "prefix" "evex")
11519 (set_attr "mode" "TI")])
11521 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
11522 [(set (match_operand:V8HI 0 "register_operand" "=v")
11526 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11527 (const_vector:V4HI [(const_int 0) (const_int 0)
11528 (const_int 0) (const_int 0)])
11529 (match_operand:QI 2 "register_operand" "Yk"))
11530 (const_vector:V4HI [(const_int 0) (const_int 0)
11531 (const_int 0) (const_int 0)])))]
11533 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11534 [(set_attr "type" "ssemov")
11535 (set_attr "prefix" "evex")
11536 (set_attr "mode" "TI")])
11538 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
11539 [(set (match_operand:V4HI 0 "memory_operand" "=m")
11542 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
11544 (match_operand:QI 2 "register_operand" "Yk")))]
11547 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
11548 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
11549 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
11551 [(set_attr "type" "ssemov")
11552 (set_attr "memory" "store")
11553 (set_attr "prefix" "evex")
11554 (set_attr "mode" "TI")])
11556 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
11557 [(set (match_operand:DI 0 "memory_operand")
11561 (match_operand:VI4_128_8_256 1 "register_operand"))
11567 (parallel [(const_int 0) (const_int 1)
11568 (const_int 2) (const_int 3)]))
11569 (match_operand:QI 2 "register_operand")) 0))]
11570 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11573 [(set (match_dup 0)
11575 (any_truncate:V4HI (match_dup 1))
11578 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
11581 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
11582 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11584 (match_operand:V2DI 1 "register_operand" "v")))]
11586 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
11587 [(set_attr "type" "ssemov")
11588 (set_attr "memory" "store")
11589 (set_attr "prefix" "evex")
11590 (set_attr "mode" "TI")])
11592 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
11593 [(set (match_operand:SI 0 "memory_operand")
11596 (match_operand:V2DI 1 "register_operand")) 0))]
11597 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11600 [(set (match_dup 0)
11601 (any_truncate:V2HI (match_dup 1)))]
11602 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11604 (define_insn "avx512vl_<code>v2div2hi2_mask"
11605 [(set (match_operand:V8HI 0 "register_operand" "=v")
11609 (match_operand:V2DI 1 "register_operand" "v"))
11611 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
11612 (parallel [(const_int 0) (const_int 1)]))
11613 (match_operand:QI 3 "register_operand" "Yk"))
11614 (const_vector:V6HI [(const_int 0) (const_int 0)
11615 (const_int 0) (const_int 0)
11616 (const_int 0) (const_int 0)])))]
11618 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11619 [(set_attr "type" "ssemov")
11620 (set_attr "prefix" "evex")
11621 (set_attr "mode" "TI")])
11623 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
11624 [(set (match_operand:V8HI 0 "register_operand" "=v")
11628 (match_operand:V2DI 1 "register_operand" "v"))
11629 (const_vector:V2HI [(const_int 0) (const_int 0)])
11630 (match_operand:QI 2 "register_operand" "Yk"))
11631 (const_vector:V6HI [(const_int 0) (const_int 0)
11632 (const_int 0) (const_int 0)
11633 (const_int 0) (const_int 0)])))]
11635 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11636 [(set_attr "type" "ssemov")
11637 (set_attr "prefix" "evex")
11638 (set_attr "mode" "TI")])
11640 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
11641 [(set (match_operand:V2HI 0 "memory_operand" "=m")
11644 (match_operand:V2DI 1 "register_operand" "v"))
11646 (match_operand:QI 2 "register_operand" "Yk")))]
11648 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
11649 [(set_attr "type" "ssemov")
11650 (set_attr "memory" "store")
11651 (set_attr "prefix" "evex")
11652 (set_attr "mode" "TI")])
11654 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
11655 [(set (match_operand:SI 0 "memory_operand")
11659 (match_operand:V2DI 1 "register_operand"))
11665 (parallel [(const_int 0) (const_int 1)]))
11666 (match_operand:QI 2 "register_operand")) 0))]
11667 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11670 [(set (match_dup 0)
11672 (any_truncate:V2HI (match_dup 1))
11675 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
11677 (define_expand "truncv2div2si2"
11678 [(set (match_operand:V2SI 0 "register_operand")
11680 (match_operand:V2DI 1 "register_operand")))]
11683 operands[0] = simplify_gen_subreg (V4SImode, operands[0], V2SImode, 0);
11684 emit_insn (gen_avx512vl_truncatev2div2si2 (operands[0],
11686 CONST0_RTX (V2SImode)));
11690 (define_insn "avx512vl_<code>v2div2si2"
11691 [(set (match_operand:V4SI 0 "register_operand" "=v")
11694 (match_operand:V2DI 1 "register_operand" "v"))
11695 (match_operand:V2SI 2 "const0_operand")))]
11697 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11698 [(set_attr "type" "ssemov")
11699 (set_attr "prefix" "evex")
11700 (set_attr "mode" "TI")])
11702 (define_insn "*avx512vl_<code>v2div2si2_store_1"
11703 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11705 (match_operand:V2DI 1 "register_operand" "v")))]
11707 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11708 [(set_attr "type" "ssemov")
11709 (set_attr "memory" "store")
11710 (set_attr "prefix" "evex")
11711 (set_attr "mode" "TI")])
11713 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
11714 [(set (match_operand:DI 0 "memory_operand")
11717 (match_operand:V2DI 1 "register_operand")) 0))]
11718 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11721 [(set (match_dup 0)
11722 (any_truncate:V2SI (match_dup 1)))]
11723 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11725 (define_insn "avx512vl_<code>v2div2si2_mask"
11726 [(set (match_operand:V4SI 0 "register_operand" "=v")
11730 (match_operand:V2DI 1 "register_operand" "v"))
11732 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11733 (parallel [(const_int 0) (const_int 1)]))
11734 (match_operand:QI 3 "register_operand" "Yk"))
11735 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11737 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11738 [(set_attr "type" "ssemov")
11739 (set_attr "prefix" "evex")
11740 (set_attr "mode" "TI")])
11742 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
11743 [(set (match_operand:V4SI 0 "register_operand" "=v")
11747 (match_operand:V2DI 1 "register_operand" "v"))
11748 (const_vector:V2SI [(const_int 0) (const_int 0)])
11749 (match_operand:QI 2 "register_operand" "Yk"))
11750 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11752 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11753 [(set_attr "type" "ssemov")
11754 (set_attr "prefix" "evex")
11755 (set_attr "mode" "TI")])
11757 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
11758 [(set (match_operand:V2SI 0 "memory_operand" "=m")
11761 (match_operand:V2DI 1 "register_operand" "v"))
11763 (match_operand:QI 2 "register_operand" "Yk")))]
11765 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11766 [(set_attr "type" "ssemov")
11767 (set_attr "memory" "store")
11768 (set_attr "prefix" "evex")
11769 (set_attr "mode" "TI")])
11771 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
11772 [(set (match_operand:DI 0 "memory_operand")
11776 (match_operand:V2DI 1 "register_operand"))
11782 (parallel [(const_int 0) (const_int 1)]))
11783 (match_operand:QI 2 "register_operand")) 0))]
11784 "TARGET_AVX512VL && ix86_pre_reload_split ()"
11787 [(set (match_dup 0)
11789 (any_truncate:V2SI (match_dup 1))
11792 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
11794 (define_expand "truncv8div8qi2"
11795 [(set (match_operand:V8QI 0 "register_operand")
11797 (match_operand:V8DI 1 "register_operand")))]
11800 operands[0] = simplify_gen_subreg (V16QImode, operands[0], V8QImode, 0);
11801 emit_insn (gen_avx512f_truncatev8div16qi2 (operands[0], operands[1]));
11805 (define_insn "avx512f_<code>v8div16qi2"
11806 [(set (match_operand:V16QI 0 "register_operand" "=v")
11809 (match_operand:V8DI 1 "register_operand" "v"))
11810 (const_vector:V8QI [(const_int 0) (const_int 0)
11811 (const_int 0) (const_int 0)
11812 (const_int 0) (const_int 0)
11813 (const_int 0) (const_int 0)])))]
11815 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11816 [(set_attr "type" "ssemov")
11817 (set_attr "prefix" "evex")
11818 (set_attr "mode" "TI")])
11820 (define_insn "*avx512f_<code>v8div16qi2_store_1"
11821 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11823 (match_operand:V8DI 1 "register_operand" "v")))]
11825 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11826 [(set_attr "type" "ssemov")
11827 (set_attr "memory" "store")
11828 (set_attr "prefix" "evex")
11829 (set_attr "mode" "TI")])
11831 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
11832 [(set (match_operand:DI 0 "memory_operand")
11835 (match_operand:V8DI 1 "register_operand")) 0))]
11836 "TARGET_AVX512F && ix86_pre_reload_split ()"
11839 [(set (match_dup 0)
11840 (any_truncate:V8QI (match_dup 1)))]
11841 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11843 (define_insn "avx512f_<code>v8div16qi2_mask"
11844 [(set (match_operand:V16QI 0 "register_operand" "=v")
11848 (match_operand:V8DI 1 "register_operand" "v"))
11850 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11851 (parallel [(const_int 0) (const_int 1)
11852 (const_int 2) (const_int 3)
11853 (const_int 4) (const_int 5)
11854 (const_int 6) (const_int 7)]))
11855 (match_operand:QI 3 "register_operand" "Yk"))
11856 (const_vector:V8QI [(const_int 0) (const_int 0)
11857 (const_int 0) (const_int 0)
11858 (const_int 0) (const_int 0)
11859 (const_int 0) (const_int 0)])))]
11861 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11862 [(set_attr "type" "ssemov")
11863 (set_attr "prefix" "evex")
11864 (set_attr "mode" "TI")])
11866 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
11867 [(set (match_operand:V16QI 0 "register_operand" "=v")
11871 (match_operand:V8DI 1 "register_operand" "v"))
11872 (const_vector:V8QI [(const_int 0) (const_int 0)
11873 (const_int 0) (const_int 0)
11874 (const_int 0) (const_int 0)
11875 (const_int 0) (const_int 0)])
11876 (match_operand:QI 2 "register_operand" "Yk"))
11877 (const_vector:V8QI [(const_int 0) (const_int 0)
11878 (const_int 0) (const_int 0)
11879 (const_int 0) (const_int 0)
11880 (const_int 0) (const_int 0)])))]
11882 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11883 [(set_attr "type" "ssemov")
11884 (set_attr "prefix" "evex")
11885 (set_attr "mode" "TI")])
11887 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
11888 [(set (match_operand:V8QI 0 "memory_operand" "=m")
11891 (match_operand:V8DI 1 "register_operand" "v"))
11893 (match_operand:QI 2 "register_operand" "Yk")))]
11895 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
11896 [(set_attr "type" "ssemov")
11897 (set_attr "memory" "store")
11898 (set_attr "prefix" "evex")
11899 (set_attr "mode" "TI")])
11901 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
11902 [(set (match_operand:DI 0 "memory_operand")
11906 (match_operand:V8DI 1 "register_operand"))
11912 (parallel [(const_int 0) (const_int 1)
11913 (const_int 2) (const_int 3)
11914 (const_int 4) (const_int 5)
11915 (const_int 6) (const_int 7)]))
11916 (match_operand:QI 2 "register_operand")) 0))]
11917 "TARGET_AVX512F && ix86_pre_reload_split ()"
11920 [(set (match_dup 0)
11922 (any_truncate:V8QI (match_dup 1))
11925 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
11927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11929 ;; Parallel integral arithmetic
11931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11933 (define_expand "neg<mode>2"
11934 [(set (match_operand:VI_AVX2 0 "register_operand")
11937 (match_operand:VI_AVX2 1 "vector_operand")))]
11939 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11941 (define_expand "<insn><mode>3"
11942 [(set (match_operand:VI_AVX2 0 "register_operand")
11944 (match_operand:VI_AVX2 1 "vector_operand")
11945 (match_operand:VI_AVX2 2 "vector_operand")))]
11947 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11949 (define_expand "cond_<insn><mode>"
11950 [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
11951 (vec_merge:VI1248_AVX512VLBW
11952 (plusminus:VI1248_AVX512VLBW
11953 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand")
11954 (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand"))
11955 (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand")
11956 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
11959 emit_insn (gen_<insn><mode>3_mask (operands[0],
11967 (define_expand "<insn><mode>3_mask"
11968 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11969 (vec_merge:VI48_AVX512VL
11970 (plusminus:VI48_AVX512VL
11971 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11972 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11973 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11974 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11976 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11978 (define_expand "<insn><mode>3_mask"
11979 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11980 (vec_merge:VI12_AVX512VL
11981 (plusminus:VI12_AVX512VL
11982 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11983 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11984 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11985 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11987 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11989 (define_insn "*<insn><mode>3"
11990 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
11992 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
11993 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
11994 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11996 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11997 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11998 [(set_attr "isa" "noavx,avx")
11999 (set_attr "type" "sseiadd")
12000 (set_attr "prefix_data16" "1,*")
12001 (set_attr "prefix" "orig,maybe_evex")
12002 (set_attr "mode" "<sseinsnmode>")])
12004 (define_insn "*<insn><mode>3_mask"
12005 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12006 (vec_merge:VI48_AVX512VL
12007 (plusminus:VI48_AVX512VL
12008 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
12009 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12010 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12011 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12012 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12013 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
12014 [(set_attr "type" "sseiadd")
12015 (set_attr "prefix" "evex")
12016 (set_attr "mode" "<sseinsnmode>")])
12018 (define_insn "*<insn><mode>3_mask"
12019 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12020 (vec_merge:VI12_AVX512VL
12021 (plusminus:VI12_AVX512VL
12022 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
12023 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
12024 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
12025 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12026 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12027 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
12028 [(set_attr "type" "sseiadd")
12029 (set_attr "prefix" "evex")
12030 (set_attr "mode" "<sseinsnmode>")])
12032 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
12033 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12034 (sat_plusminus:VI12_AVX2_AVX512BW
12035 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
12036 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
12037 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12038 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12040 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
12041 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
12042 (sat_plusminus:VI12_AVX2_AVX512BW
12043 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>")
12044 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))]
12045 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
12046 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
12048 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
12049 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12050 [(set_attr "isa" "noavx,avx")
12051 (set_attr "type" "sseiadd")
12052 (set_attr "prefix_data16" "1,*")
12053 (set_attr "prefix" "orig,maybe_evex")
12054 (set_attr "mode" "TI")])
12056 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
12058 [(set (match_operand:VI12_AVX2 0 "register_operand")
12060 (us_minus:VI12_AVX2
12061 (match_operand:VI12_AVX2 1 "vector_operand")
12062 (match_operand:VI12_AVX2 2 "vector_operand"))
12063 (match_operand:VI12_AVX2 3 "const0_operand")))]
12065 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
12066 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
12067 [(set (match_dup 4)
12068 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
12070 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
12071 "operands[4] = gen_reg_rtx (<MODE>mode);")
12073 (define_expand "mulv8qi3"
12074 [(set (match_operand:V8QI 0 "register_operand")
12075 (mult:V8QI (match_operand:V8QI 1 "register_operand")
12076 (match_operand:V8QI 2 "register_operand")))]
12077 "TARGET_AVX512VL && TARGET_AVX512BW && TARGET_64BIT"
12079 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
12083 (define_expand "mul<mode>3"
12084 [(set (match_operand:VI1_AVX512 0 "register_operand")
12085 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
12086 (match_operand:VI1_AVX512 2 "register_operand")))]
12089 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
12093 (define_expand "cond_mul<mode>"
12094 [(set (match_operand:VI2_AVX512VL 0 "register_operand")
12095 (vec_merge:VI2_AVX512VL
12097 (match_operand:VI2_AVX512VL 2 "vector_operand")
12098 (match_operand:VI2_AVX512VL 3 "vector_operand"))
12099 (match_operand:VI2_AVX512VL 4 "nonimm_or_0_operand")
12100 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
12103 emit_insn (gen_mul<mode>3_mask (operands[0],
12111 (define_expand "mul<mode>3<mask_name>"
12112 [(set (match_operand:VI2_AVX2 0 "register_operand")
12113 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
12114 (match_operand:VI2_AVX2 2 "vector_operand")))]
12115 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12116 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
12118 (define_insn "*mul<mode>3<mask_name>"
12119 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
12120 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
12121 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
12122 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
12123 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12125 pmullw\t{%2, %0|%0, %2}
12126 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12127 [(set_attr "isa" "noavx,avx")
12128 (set_attr "type" "sseimul")
12129 (set_attr "prefix_data16" "1,*")
12130 (set_attr "prefix" "orig,vex")
12131 (set_attr "mode" "<sseinsnmode>")])
12133 (define_expand "<s>mul<mode>3_highpart<mask_name>"
12134 [(set (match_operand:VI2_AVX2 0 "register_operand")
12136 (lshiftrt:<ssedoublemode>
12137 (mult:<ssedoublemode>
12138 (any_extend:<ssedoublemode>
12139 (match_operand:VI2_AVX2 1 "vector_operand"))
12140 (any_extend:<ssedoublemode>
12141 (match_operand:VI2_AVX2 2 "vector_operand")))
12144 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12145 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
12147 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
12148 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
12150 (lshiftrt:<ssedoublemode>
12151 (mult:<ssedoublemode>
12152 (any_extend:<ssedoublemode>
12153 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
12154 (any_extend:<ssedoublemode>
12155 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
12157 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
12158 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12160 pmulh<u>w\t{%2, %0|%0, %2}
12161 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12162 [(set_attr "isa" "noavx,avx")
12163 (set_attr "type" "sseimul")
12164 (set_attr "prefix_data16" "1,*")
12165 (set_attr "prefix" "orig,vex")
12166 (set_attr "mode" "<sseinsnmode>")])
12168 (define_expand "vec_widen_umult_even_v16si<mask_name>"
12169 [(set (match_operand:V8DI 0 "register_operand")
12173 (match_operand:V16SI 1 "nonimmediate_operand")
12174 (parallel [(const_int 0) (const_int 2)
12175 (const_int 4) (const_int 6)
12176 (const_int 8) (const_int 10)
12177 (const_int 12) (const_int 14)])))
12180 (match_operand:V16SI 2 "nonimmediate_operand")
12181 (parallel [(const_int 0) (const_int 2)
12182 (const_int 4) (const_int 6)
12183 (const_int 8) (const_int 10)
12184 (const_int 12) (const_int 14)])))))]
12186 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
12188 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
12189 [(set (match_operand:V8DI 0 "register_operand" "=v")
12193 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
12194 (parallel [(const_int 0) (const_int 2)
12195 (const_int 4) (const_int 6)
12196 (const_int 8) (const_int 10)
12197 (const_int 12) (const_int 14)])))
12200 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
12201 (parallel [(const_int 0) (const_int 2)
12202 (const_int 4) (const_int 6)
12203 (const_int 8) (const_int 10)
12204 (const_int 12) (const_int 14)])))))]
12205 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12206 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12207 [(set_attr "type" "sseimul")
12208 (set_attr "prefix_extra" "1")
12209 (set_attr "prefix" "evex")
12210 (set_attr "mode" "XI")])
12212 (define_expand "vec_widen_umult_even_v8si<mask_name>"
12213 [(set (match_operand:V4DI 0 "register_operand")
12217 (match_operand:V8SI 1 "nonimmediate_operand")
12218 (parallel [(const_int 0) (const_int 2)
12219 (const_int 4) (const_int 6)])))
12222 (match_operand:V8SI 2 "nonimmediate_operand")
12223 (parallel [(const_int 0) (const_int 2)
12224 (const_int 4) (const_int 6)])))))]
12225 "TARGET_AVX2 && <mask_avx512vl_condition>"
12226 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
12228 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
12229 [(set (match_operand:V4DI 0 "register_operand" "=v")
12233 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
12234 (parallel [(const_int 0) (const_int 2)
12235 (const_int 4) (const_int 6)])))
12238 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
12239 (parallel [(const_int 0) (const_int 2)
12240 (const_int 4) (const_int 6)])))))]
12241 "TARGET_AVX2 && <mask_avx512vl_condition>
12242 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12243 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12244 [(set_attr "type" "sseimul")
12245 (set_attr "prefix" "maybe_evex")
12246 (set_attr "mode" "OI")])
12248 (define_expand "vec_widen_umult_even_v4si<mask_name>"
12249 [(set (match_operand:V2DI 0 "register_operand")
12253 (match_operand:V4SI 1 "vector_operand")
12254 (parallel [(const_int 0) (const_int 2)])))
12257 (match_operand:V4SI 2 "vector_operand")
12258 (parallel [(const_int 0) (const_int 2)])))))]
12259 "TARGET_SSE2 && <mask_avx512vl_condition>"
12260 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
12262 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
12263 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
12267 (match_operand:V4SI 1 "vector_operand" "%0,v")
12268 (parallel [(const_int 0) (const_int 2)])))
12271 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
12272 (parallel [(const_int 0) (const_int 2)])))))]
12273 "TARGET_SSE2 && <mask_avx512vl_condition>
12274 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12276 pmuludq\t{%2, %0|%0, %2}
12277 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12278 [(set_attr "isa" "noavx,avx")
12279 (set_attr "type" "sseimul")
12280 (set_attr "prefix_data16" "1,*")
12281 (set_attr "prefix" "orig,maybe_evex")
12282 (set_attr "mode" "TI")])
12284 (define_expand "vec_widen_smult_even_v16si<mask_name>"
12285 [(set (match_operand:V8DI 0 "register_operand")
12289 (match_operand:V16SI 1 "nonimmediate_operand")
12290 (parallel [(const_int 0) (const_int 2)
12291 (const_int 4) (const_int 6)
12292 (const_int 8) (const_int 10)
12293 (const_int 12) (const_int 14)])))
12296 (match_operand:V16SI 2 "nonimmediate_operand")
12297 (parallel [(const_int 0) (const_int 2)
12298 (const_int 4) (const_int 6)
12299 (const_int 8) (const_int 10)
12300 (const_int 12) (const_int 14)])))))]
12302 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
12304 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
12305 [(set (match_operand:V8DI 0 "register_operand" "=v")
12309 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
12310 (parallel [(const_int 0) (const_int 2)
12311 (const_int 4) (const_int 6)
12312 (const_int 8) (const_int 10)
12313 (const_int 12) (const_int 14)])))
12316 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
12317 (parallel [(const_int 0) (const_int 2)
12318 (const_int 4) (const_int 6)
12319 (const_int 8) (const_int 10)
12320 (const_int 12) (const_int 14)])))))]
12321 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12322 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12323 [(set_attr "type" "sseimul")
12324 (set_attr "prefix_extra" "1")
12325 (set_attr "prefix" "evex")
12326 (set_attr "mode" "XI")])
12328 (define_expand "vec_widen_smult_even_v8si<mask_name>"
12329 [(set (match_operand:V4DI 0 "register_operand")
12333 (match_operand:V8SI 1 "nonimmediate_operand")
12334 (parallel [(const_int 0) (const_int 2)
12335 (const_int 4) (const_int 6)])))
12338 (match_operand:V8SI 2 "nonimmediate_operand")
12339 (parallel [(const_int 0) (const_int 2)
12340 (const_int 4) (const_int 6)])))))]
12341 "TARGET_AVX2 && <mask_avx512vl_condition>"
12342 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
12344 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
12345 [(set (match_operand:V4DI 0 "register_operand" "=v")
12349 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
12350 (parallel [(const_int 0) (const_int 2)
12351 (const_int 4) (const_int 6)])))
12354 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
12355 (parallel [(const_int 0) (const_int 2)
12356 (const_int 4) (const_int 6)])))))]
12357 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12358 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12359 [(set_attr "type" "sseimul")
12360 (set_attr "prefix_extra" "1")
12361 (set_attr "prefix" "vex")
12362 (set_attr "mode" "OI")])
12364 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
12365 [(set (match_operand:V2DI 0 "register_operand")
12369 (match_operand:V4SI 1 "vector_operand")
12370 (parallel [(const_int 0) (const_int 2)])))
12373 (match_operand:V4SI 2 "vector_operand")
12374 (parallel [(const_int 0) (const_int 2)])))))]
12375 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
12376 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
12378 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
12379 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
12383 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
12384 (parallel [(const_int 0) (const_int 2)])))
12387 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
12388 (parallel [(const_int 0) (const_int 2)])))))]
12389 "TARGET_SSE4_1 && <mask_avx512vl_condition>
12390 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12392 pmuldq\t{%2, %0|%0, %2}
12393 pmuldq\t{%2, %0|%0, %2}
12394 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12395 [(set_attr "isa" "noavx,noavx,avx")
12396 (set_attr "type" "sseimul")
12397 (set_attr "prefix_data16" "1,1,*")
12398 (set_attr "prefix_extra" "1")
12399 (set_attr "prefix" "orig,orig,vex")
12400 (set_attr "mode" "TI")])
12402 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
12403 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
12404 (unspec:<sseunpackmode>
12405 [(match_operand:VI2_AVX2 1 "register_operand" "v")
12406 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
12407 UNSPEC_PMADDWD512))]
12408 "TARGET_AVX512BW && <mask_mode512bit_condition>"
12409 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
12410 [(set_attr "type" "sseiadd")
12411 (set_attr "prefix" "evex")
12412 (set_attr "mode" "XI")])
12414 (define_expand "avx2_pmaddwd"
12415 [(set (match_operand:V8SI 0 "register_operand")
12420 (match_operand:V16HI 1 "nonimmediate_operand")
12421 (parallel [(const_int 0) (const_int 2)
12422 (const_int 4) (const_int 6)
12423 (const_int 8) (const_int 10)
12424 (const_int 12) (const_int 14)])))
12427 (match_operand:V16HI 2 "nonimmediate_operand")
12428 (parallel [(const_int 0) (const_int 2)
12429 (const_int 4) (const_int 6)
12430 (const_int 8) (const_int 10)
12431 (const_int 12) (const_int 14)]))))
12434 (vec_select:V8HI (match_dup 1)
12435 (parallel [(const_int 1) (const_int 3)
12436 (const_int 5) (const_int 7)
12437 (const_int 9) (const_int 11)
12438 (const_int 13) (const_int 15)])))
12440 (vec_select:V8HI (match_dup 2)
12441 (parallel [(const_int 1) (const_int 3)
12442 (const_int 5) (const_int 7)
12443 (const_int 9) (const_int 11)
12444 (const_int 13) (const_int 15)]))))))]
12446 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
12448 (define_insn "*avx2_pmaddwd"
12449 [(set (match_operand:V8SI 0 "register_operand" "=Yw")
12454 (match_operand:V16HI 1 "nonimmediate_operand" "%Yw")
12455 (parallel [(const_int 0) (const_int 2)
12456 (const_int 4) (const_int 6)
12457 (const_int 8) (const_int 10)
12458 (const_int 12) (const_int 14)])))
12461 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")
12462 (parallel [(const_int 0) (const_int 2)
12463 (const_int 4) (const_int 6)
12464 (const_int 8) (const_int 10)
12465 (const_int 12) (const_int 14)]))))
12468 (vec_select:V8HI (match_dup 1)
12469 (parallel [(const_int 1) (const_int 3)
12470 (const_int 5) (const_int 7)
12471 (const_int 9) (const_int 11)
12472 (const_int 13) (const_int 15)])))
12474 (vec_select:V8HI (match_dup 2)
12475 (parallel [(const_int 1) (const_int 3)
12476 (const_int 5) (const_int 7)
12477 (const_int 9) (const_int 11)
12478 (const_int 13) (const_int 15)]))))))]
12479 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12480 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12481 [(set_attr "type" "sseiadd")
12482 (set_attr "prefix" "vex")
12483 (set_attr "mode" "OI")])
12485 (define_expand "sse2_pmaddwd"
12486 [(set (match_operand:V4SI 0 "register_operand")
12491 (match_operand:V8HI 1 "vector_operand")
12492 (parallel [(const_int 0) (const_int 2)
12493 (const_int 4) (const_int 6)])))
12496 (match_operand:V8HI 2 "vector_operand")
12497 (parallel [(const_int 0) (const_int 2)
12498 (const_int 4) (const_int 6)]))))
12501 (vec_select:V4HI (match_dup 1)
12502 (parallel [(const_int 1) (const_int 3)
12503 (const_int 5) (const_int 7)])))
12505 (vec_select:V4HI (match_dup 2)
12506 (parallel [(const_int 1) (const_int 3)
12507 (const_int 5) (const_int 7)]))))))]
12509 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
12511 (define_insn "*sse2_pmaddwd"
12512 [(set (match_operand:V4SI 0 "register_operand" "=x,Yw")
12517 (match_operand:V8HI 1 "vector_operand" "%0,Yw")
12518 (parallel [(const_int 0) (const_int 2)
12519 (const_int 4) (const_int 6)])))
12522 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")
12523 (parallel [(const_int 0) (const_int 2)
12524 (const_int 4) (const_int 6)]))))
12527 (vec_select:V4HI (match_dup 1)
12528 (parallel [(const_int 1) (const_int 3)
12529 (const_int 5) (const_int 7)])))
12531 (vec_select:V4HI (match_dup 2)
12532 (parallel [(const_int 1) (const_int 3)
12533 (const_int 5) (const_int 7)]))))))]
12534 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12536 pmaddwd\t{%2, %0|%0, %2}
12537 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
12538 [(set_attr "isa" "noavx,avx")
12539 (set_attr "type" "sseiadd")
12540 (set_attr "atom_unit" "simul")
12541 (set_attr "prefix_data16" "1,*")
12542 (set_attr "prefix" "orig,vex")
12543 (set_attr "mode" "TI")])
12545 (define_expand "cond_mul<mode>"
12546 [(set (match_operand:VI8_AVX512VL 0 "register_operand")
12547 (vec_merge:VI8_AVX512VL
12549 (match_operand:VI8_AVX512VL 2 "vector_operand")
12550 (match_operand:VI8_AVX512VL 3 "vector_operand"))
12551 (match_operand:VI8_AVX512VL 4 "nonimm_or_0_operand")
12552 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
12555 emit_insn (gen_avx512dq_mul<mode>3_mask (operands[0],
12563 (define_insn "avx512dq_mul<mode>3<mask_name>"
12564 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
12566 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
12567 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
12568 "TARGET_AVX512DQ && <mask_mode512bit_condition>
12569 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
12570 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12571 [(set_attr "type" "sseimul")
12572 (set_attr "prefix" "evex")
12573 (set_attr "mode" "<sseinsnmode>")])
12575 (define_expand "cond_mul<mode>"
12576 [(set (match_operand:VI4_AVX512VL 0 "register_operand")
12577 (vec_merge:VI4_AVX512VL
12579 (match_operand:VI4_AVX512VL 2 "vector_operand")
12580 (match_operand:VI4_AVX512VL 3 "vector_operand"))
12581 (match_operand:VI4_AVX512VL 4 "nonimm_or_0_operand")
12582 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
12585 emit_insn (gen_mul<mode>3_mask (operands[0],
12593 (define_expand "mul<mode>3<mask_name>"
12594 [(set (match_operand:VI4_AVX512F 0 "register_operand")
12596 (match_operand:VI4_AVX512F 1 "general_vector_operand")
12597 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
12598 "TARGET_SSE2 && <mask_mode512bit_condition>"
12602 if (!vector_operand (operands[1], <MODE>mode))
12603 operands[1] = force_reg (<MODE>mode, operands[1]);
12604 if (!vector_operand (operands[2], <MODE>mode))
12605 operands[2] = force_reg (<MODE>mode, operands[2]);
12606 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
12610 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
12615 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
12616 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
12618 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
12619 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
12620 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
12621 && <mask_mode512bit_condition>"
12623 pmulld\t{%2, %0|%0, %2}
12624 pmulld\t{%2, %0|%0, %2}
12625 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12626 [(set_attr "isa" "noavx,noavx,avx")
12627 (set_attr "type" "sseimul")
12628 (set_attr "prefix_extra" "1")
12629 (set_attr "prefix" "<bcst_mask_prefix4>")
12630 (set_attr "btver2_decode" "vector,vector,vector")
12631 (set_attr "mode" "<sseinsnmode>")])
12633 (define_expand "mul<mode>3"
12634 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12635 (mult:VI8_AVX2_AVX512F
12636 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12637 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12640 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
12644 (define_expand "vec_widen_<s>mult_hi_<mode>"
12645 [(match_operand:<sseunpackmode> 0 "register_operand")
12646 (any_extend:<sseunpackmode>
12647 (match_operand:VI124_AVX2 1 "register_operand"))
12648 (match_operand:VI124_AVX2 2 "register_operand")]
12651 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12656 (define_expand "vec_widen_<s>mult_lo_<mode>"
12657 [(match_operand:<sseunpackmode> 0 "register_operand")
12658 (any_extend:<sseunpackmode>
12659 (match_operand:VI124_AVX2 1 "register_operand"))
12660 (match_operand:VI124_AVX2 2 "register_operand")]
12663 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
12668 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
12669 ;; named patterns, but signed V4SI needs special help for plain SSE2.
12670 (define_expand "vec_widen_smult_even_v4si"
12671 [(match_operand:V2DI 0 "register_operand")
12672 (match_operand:V4SI 1 "vector_operand")
12673 (match_operand:V4SI 2 "vector_operand")]
12676 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12681 (define_expand "vec_widen_<s>mult_odd_<mode>"
12682 [(match_operand:<sseunpackmode> 0 "register_operand")
12683 (any_extend:<sseunpackmode>
12684 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
12685 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
12688 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
12693 (define_mode_attr SDOT_PMADD_SUF
12694 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
12696 (define_expand "sdot_prod<mode>"
12697 [(match_operand:<sseunpackmode> 0 "register_operand")
12698 (match_operand:VI2_AVX2 1 "register_operand")
12699 (match_operand:VI2_AVX2 2 "register_operand")
12700 (match_operand:<sseunpackmode> 3 "register_operand")]
12703 rtx t = gen_reg_rtx (<sseunpackmode>mode);
12704 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
12705 emit_insn (gen_rtx_SET (operands[0],
12706 gen_rtx_PLUS (<sseunpackmode>mode,
12711 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
12712 ;; back together when madd is available.
12713 (define_expand "sdot_prodv4si"
12714 [(match_operand:V2DI 0 "register_operand")
12715 (match_operand:V4SI 1 "register_operand")
12716 (match_operand:V4SI 2 "register_operand")
12717 (match_operand:V2DI 3 "register_operand")]
12720 rtx t = gen_reg_rtx (V2DImode);
12721 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
12722 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
12726 (define_expand "uavg<mode>3_ceil"
12727 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
12728 (truncate:VI12_AVX2_AVX512BW
12729 (lshiftrt:<ssedoublemode>
12730 (plus:<ssedoublemode>
12731 (plus:<ssedoublemode>
12732 (zero_extend:<ssedoublemode>
12733 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
12734 (zero_extend:<ssedoublemode>
12735 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
12740 operands[3] = CONST1_RTX(<ssedoublemode>mode);
12741 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
12744 (define_expand "usadv16qi"
12745 [(match_operand:V4SI 0 "register_operand")
12746 (match_operand:V16QI 1 "register_operand")
12747 (match_operand:V16QI 2 "vector_operand")
12748 (match_operand:V4SI 3 "vector_operand")]
12751 rtx t1 = gen_reg_rtx (V2DImode);
12752 rtx t2 = gen_reg_rtx (V4SImode);
12753 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
12754 convert_move (t2, t1, 0);
12755 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
12759 (define_expand "usadv32qi"
12760 [(match_operand:V8SI 0 "register_operand")
12761 (match_operand:V32QI 1 "register_operand")
12762 (match_operand:V32QI 2 "nonimmediate_operand")
12763 (match_operand:V8SI 3 "nonimmediate_operand")]
12766 rtx t1 = gen_reg_rtx (V4DImode);
12767 rtx t2 = gen_reg_rtx (V8SImode);
12768 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
12769 convert_move (t2, t1, 0);
12770 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
12774 (define_expand "usadv64qi"
12775 [(match_operand:V16SI 0 "register_operand")
12776 (match_operand:V64QI 1 "register_operand")
12777 (match_operand:V64QI 2 "nonimmediate_operand")
12778 (match_operand:V16SI 3 "nonimmediate_operand")]
12781 rtx t1 = gen_reg_rtx (V8DImode);
12782 rtx t2 = gen_reg_rtx (V16SImode);
12783 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
12784 convert_move (t2, t1, 0);
12785 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
12789 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12790 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
12791 (ashiftrt:VI248_AVX512BW_1
12792 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
12793 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12795 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12796 [(set_attr "type" "sseishft")
12797 (set (attr "length_immediate")
12798 (if_then_else (match_operand 2 "const_int_operand")
12800 (const_string "0")))
12801 (set_attr "mode" "<sseinsnmode>")])
12803 (define_insn "ashr<mode>3"
12804 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>")
12805 (ashiftrt:VI24_AVX2
12806 (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>")
12807 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
12810 psra<ssemodesuffix>\t{%2, %0|%0, %2}
12811 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12812 [(set_attr "isa" "noavx,avx")
12813 (set_attr "type" "sseishft")
12814 (set (attr "length_immediate")
12815 (if_then_else (match_operand 2 "const_int_operand")
12817 (const_string "0")))
12818 (set_attr "prefix_data16" "1,*")
12819 (set_attr "prefix" "orig,vex")
12820 (set_attr "mode" "<sseinsnmode>")])
12822 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
12823 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12824 (ashiftrt:VI248_AVX512BW_AVX512VL
12825 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12826 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12828 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12829 [(set_attr "type" "sseishft")
12830 (set (attr "length_immediate")
12831 (if_then_else (match_operand 2 "const_int_operand")
12833 (const_string "0")))
12834 (set_attr "mode" "<sseinsnmode>")])
12836 (define_expand "ashr<mode>3"
12837 [(set (match_operand:VI248_AVX512BW 0 "register_operand")
12838 (ashiftrt:VI248_AVX512BW
12839 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
12840 (match_operand:DI 2 "nonmemory_operand")))]
12843 (define_expand "ashrv4di3"
12844 [(set (match_operand:V4DI 0 "register_operand")
12846 (match_operand:V4DI 1 "nonimmediate_operand")
12847 (match_operand:DI 2 "nonmemory_operand")))]
12850 if (!TARGET_AVX512VL)
12852 if (CONST_INT_P (operands[2]) && UINTVAL (operands[2]) >= 63)
12854 rtx zero = force_reg (V4DImode, CONST0_RTX (V4DImode));
12855 emit_insn (gen_avx2_gtv4di3 (operands[0], zero, operands[1]));
12858 if (operands[2] == const0_rtx)
12860 emit_move_insn (operands[0], operands[1]);
12863 operands[1] = force_reg (V4DImode, operands[1]);
12864 if (CONST_INT_P (operands[2]))
12866 vec_perm_builder sel (8, 8, 1);
12867 sel.quick_grow (8);
12869 rtx op1 = lowpart_subreg (V8SImode, operands[1], V4DImode);
12870 rtx target = gen_reg_rtx (V8SImode);
12871 if (INTVAL (operands[2]) > 32)
12873 arg0 = gen_reg_rtx (V8SImode);
12874 arg1 = gen_reg_rtx (V8SImode);
12875 emit_insn (gen_ashrv8si3 (arg1, op1, GEN_INT (31)));
12876 emit_insn (gen_ashrv8si3 (arg0, op1,
12877 GEN_INT (INTVAL (operands[2]) - 32)));
12887 else if (INTVAL (operands[2]) == 32)
12890 arg1 = gen_reg_rtx (V8SImode);
12891 emit_insn (gen_ashrv8si3 (arg1, op1, GEN_INT (31)));
12903 arg0 = gen_reg_rtx (V4DImode);
12904 arg1 = gen_reg_rtx (V8SImode);
12905 emit_insn (gen_lshrv4di3 (arg0, operands[1], operands[2]));
12906 emit_insn (gen_ashrv8si3 (arg1, op1, operands[2]));
12907 arg0 = lowpart_subreg (V8SImode, arg0, V4DImode);
12917 vec_perm_indices indices (sel, 2, 8);
12918 bool ok = targetm.vectorize.vec_perm_const (V8SImode, target,
12919 arg0, arg1, indices);
12921 emit_move_insn (operands[0],
12922 lowpart_subreg (V4DImode, target, V8SImode));
12926 rtx zero = force_reg (V4DImode, CONST0_RTX (V4DImode));
12927 rtx zero_or_all_ones = gen_reg_rtx (V4DImode);
12928 emit_insn (gen_avx2_gtv4di3 (zero_or_all_ones, zero, operands[1]));
12929 rtx lshr_res = gen_reg_rtx (V4DImode);
12930 emit_insn (gen_lshrv4di3 (lshr_res, operands[1], operands[2]));
12931 rtx ashl_res = gen_reg_rtx (V4DImode);
12935 amount = gen_reg_rtx (DImode);
12936 emit_insn (gen_subdi3 (amount, force_reg (DImode, GEN_INT (64)),
12941 rtx temp = gen_reg_rtx (SImode);
12942 emit_insn (gen_subsi3 (temp, force_reg (SImode, GEN_INT (64)),
12943 lowpart_subreg (SImode, operands[2],
12945 amount = gen_reg_rtx (V4SImode);
12946 emit_insn (gen_vec_setv4si_0 (amount, CONST0_RTX (V4SImode),
12949 amount = lowpart_subreg (DImode, amount, GET_MODE (amount));
12950 emit_insn (gen_ashlv4di3 (ashl_res, zero_or_all_ones, amount));
12951 emit_insn (gen_iorv4di3 (operands[0], lshr_res, ashl_res));
12956 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
12957 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12958 (any_lshift:VI248_AVX512BW_2
12959 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12960 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12962 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12963 [(set_attr "type" "sseishft")
12964 (set (attr "length_immediate")
12965 (if_then_else (match_operand 2 "const_int_operand")
12967 (const_string "0")))
12968 (set_attr "mode" "<sseinsnmode>")])
12970 (define_insn "<insn><mode>3"
12971 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>")
12972 (any_lshift:VI248_AVX2
12973 (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>")
12974 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
12977 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12978 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12979 [(set_attr "isa" "noavx,avx")
12980 (set_attr "type" "sseishft")
12981 (set (attr "length_immediate")
12982 (if_then_else (match_operand 2 "const_int_operand")
12984 (const_string "0")))
12985 (set_attr "prefix_data16" "1,*")
12986 (set_attr "prefix" "orig,vex")
12987 (set_attr "mode" "<sseinsnmode>")])
12989 (define_insn "<insn><mode>3<mask_name>"
12990 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12991 (any_lshift:VI248_AVX512BW
12992 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12993 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12995 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12996 [(set_attr "type" "sseishft")
12997 (set (attr "length_immediate")
12998 (if_then_else (match_operand 2 "const_int_operand")
13000 (const_string "0")))
13001 (set_attr "mode" "<sseinsnmode>")])
13004 (define_expand "vec_shl_<mode>"
13005 [(set (match_dup 3)
13007 (match_operand:V_128 1 "register_operand")
13008 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
13009 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
13012 operands[1] = gen_lowpart (V1TImode, operands[1]);
13013 operands[3] = gen_reg_rtx (V1TImode);
13014 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
13017 (define_expand "vec_shr_<mode>"
13018 [(set (match_dup 3)
13020 (match_operand:V_128 1 "register_operand")
13021 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
13022 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
13025 operands[1] = gen_lowpart (V1TImode, operands[1]);
13026 operands[3] = gen_reg_rtx (V1TImode);
13027 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
13030 (define_insn "avx512bw_<insn><mode>3"
13031 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
13032 (any_lshift:VIMAX_AVX512VL
13033 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
13034 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
13037 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
13038 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
13040 [(set_attr "type" "sseishft")
13041 (set_attr "length_immediate" "1")
13042 (set_attr "prefix" "maybe_evex")
13043 (set_attr "mode" "<sseinsnmode>")])
13045 (define_insn "<sse2_avx2>_<insn><mode>3"
13046 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw")
13047 (any_lshift:VIMAX_AVX2
13048 (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw")
13049 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
13052 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
13054 switch (which_alternative)
13057 return "p<vshift>dq\t{%2, %0|%0, %2}";
13059 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
13061 gcc_unreachable ();
13064 [(set_attr "isa" "noavx,avx")
13065 (set_attr "type" "sseishft")
13066 (set_attr "length_immediate" "1")
13067 (set_attr "atom_unit" "sishuf")
13068 (set_attr "prefix_data16" "1,*")
13069 (set_attr "prefix" "orig,vex")
13070 (set_attr "mode" "<sseinsnmode>")])
13072 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
13073 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13074 (any_rotate:VI48_AVX512VL
13075 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
13076 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
13078 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13079 [(set_attr "prefix" "evex")
13080 (set_attr "mode" "<sseinsnmode>")])
13082 (define_insn "<avx512>_<rotate><mode><mask_name>"
13083 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13084 (any_rotate:VI48_AVX512VL
13085 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
13086 (match_operand:SI 2 "const_0_to_255_operand")))]
13088 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13089 [(set_attr "prefix" "evex")
13090 (set_attr "mode" "<sseinsnmode>")])
13092 (define_expand "<code><mode>3"
13093 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
13094 (maxmin:VI124_256_AVX512F_AVX512BW
13095 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
13096 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
13098 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
13100 (define_insn "*avx2_<code><mode>3"
13101 [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
13103 (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
13104 (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
13105 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13106 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13107 [(set_attr "type" "sseiadd")
13108 (set_attr "prefix_extra" "1")
13109 (set_attr "prefix" "vex")
13110 (set_attr "mode" "OI")])
13112 (define_expand "cond_<code><mode>"
13113 [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
13114 (vec_merge:VI1248_AVX512VLBW
13115 (maxmin:VI1248_AVX512VLBW
13116 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand")
13117 (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand"))
13118 (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand")
13119 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
13122 emit_insn (gen_<code><mode>3_mask (operands[0],
13130 (define_expand "<code><mode>3_mask"
13131 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13132 (vec_merge:VI48_AVX512VL
13133 (maxmin:VI48_AVX512VL
13134 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
13135 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13136 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13137 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13139 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
13141 (define_insn "*avx512f_<code><mode>3<mask_name>"
13142 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13143 (maxmin:VI48_AVX512VL
13144 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
13145 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
13146 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13147 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13148 [(set_attr "type" "sseiadd")
13149 (set_attr "prefix_extra" "1")
13150 (set_attr "prefix" "maybe_evex")
13151 (set_attr "mode" "<sseinsnmode>")])
13153 (define_insn "<mask_codefor><code><mode>3<mask_name>"
13154 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13155 (maxmin:VI12_AVX512VL
13156 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
13157 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
13159 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13160 [(set_attr "type" "sseiadd")
13161 (set_attr "prefix" "evex")
13162 (set_attr "mode" "<sseinsnmode>")])
13164 (define_expand "<code><mode>3"
13165 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
13166 (maxmin:VI8_AVX2_AVX512F
13167 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
13168 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
13172 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
13176 enum rtx_code code;
13181 xops[0] = operands[0];
13183 if (<CODE> == SMAX || <CODE> == UMAX)
13185 xops[1] = operands[1];
13186 xops[2] = operands[2];
13190 xops[1] = operands[2];
13191 xops[2] = operands[1];
13194 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
13196 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
13197 xops[4] = operands[1];
13198 xops[5] = operands[2];
13200 ok = ix86_expand_int_vcond (xops);
13206 (define_expand "<code><mode>3"
13207 [(set (match_operand:VI124_128 0 "register_operand")
13209 (match_operand:VI124_128 1 "vector_operand")
13210 (match_operand:VI124_128 2 "vector_operand")))]
13213 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
13214 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
13220 xops[0] = operands[0];
13221 operands[1] = force_reg (<MODE>mode, operands[1]);
13222 operands[2] = force_reg (<MODE>mode, operands[2]);
13224 if (<CODE> == SMAX)
13226 xops[1] = operands[1];
13227 xops[2] = operands[2];
13231 xops[1] = operands[2];
13232 xops[2] = operands[1];
13235 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
13236 xops[4] = operands[1];
13237 xops[5] = operands[2];
13239 ok = ix86_expand_int_vcond (xops);
13245 (define_insn "*sse4_1_<code><mode>3<mask_name>"
13246 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
13248 (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
13249 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
13251 && <mask_mode512bit_condition>
13252 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13254 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13255 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13256 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13257 [(set_attr "isa" "noavx,noavx,avx")
13258 (set_attr "type" "sseiadd")
13259 (set_attr "prefix_extra" "1,1,*")
13260 (set_attr "prefix" "orig,orig,vex")
13261 (set_attr "mode" "TI")])
13263 (define_insn "*<code>v8hi3"
13264 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
13266 (match_operand:V8HI 1 "vector_operand" "%0,Yw")
13267 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))]
13268 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13270 p<maxmin_int>w\t{%2, %0|%0, %2}
13271 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
13272 [(set_attr "isa" "noavx,avx")
13273 (set_attr "type" "sseiadd")
13274 (set_attr "prefix_data16" "1,*")
13275 (set_attr "prefix_extra" "*,1")
13276 (set_attr "prefix" "orig,vex")
13277 (set_attr "mode" "TI")])
13279 (define_expand "<code><mode>3"
13280 [(set (match_operand:VI124_128 0 "register_operand")
13282 (match_operand:VI124_128 1 "vector_operand")
13283 (match_operand:VI124_128 2 "vector_operand")))]
13286 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
13287 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
13288 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
13290 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
13291 operands[1] = force_reg (<MODE>mode, operands[1]);
13292 if (rtx_equal_p (op3, op2))
13293 op3 = gen_reg_rtx (V8HImode);
13294 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
13295 emit_insn (gen_addv8hi3 (op0, op3, op2));
13303 operands[1] = force_reg (<MODE>mode, operands[1]);
13304 operands[2] = force_reg (<MODE>mode, operands[2]);
13306 xops[0] = operands[0];
13308 if (<CODE> == UMAX)
13310 xops[1] = operands[1];
13311 xops[2] = operands[2];
13315 xops[1] = operands[2];
13316 xops[2] = operands[1];
13319 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
13320 xops[4] = operands[1];
13321 xops[5] = operands[2];
13323 ok = ix86_expand_int_vcond (xops);
13329 (define_insn "*sse4_1_<code><mode>3<mask_name>"
13330 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
13332 (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
13333 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
13335 && <mask_mode512bit_condition>
13336 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13338 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13339 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
13340 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13341 [(set_attr "isa" "noavx,noavx,avx")
13342 (set_attr "type" "sseiadd")
13343 (set_attr "prefix_extra" "1,1,*")
13344 (set_attr "prefix" "orig,orig,vex")
13345 (set_attr "mode" "TI")])
13347 (define_insn "*<code>v16qi3"
13348 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
13350 (match_operand:V16QI 1 "vector_operand" "%0,Yw")
13351 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))]
13352 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13354 p<maxmin_int>b\t{%2, %0|%0, %2}
13355 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
13356 [(set_attr "isa" "noavx,avx")
13357 (set_attr "type" "sseiadd")
13358 (set_attr "prefix_data16" "1,*")
13359 (set_attr "prefix_extra" "*,1")
13360 (set_attr "prefix" "orig,vex")
13361 (set_attr "mode" "TI")])
13363 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13365 ;; Parallel integral comparisons
13367 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13369 (define_insn "*avx2_eq<mode>3"
13370 [(set (match_operand:VI_256 0 "register_operand" "=x")
13372 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
13373 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
13374 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13375 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13376 [(set_attr "type" "ssecmp")
13377 (set_attr "prefix_extra" "1")
13378 (set_attr "prefix" "vex")
13379 (set_attr "mode" "OI")])
13381 (define_insn_and_split "*avx2_eq<mode>3"
13382 [(set (match_operand:VI_128_256 0 "register_operand")
13383 (vec_merge:VI_128_256
13384 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13385 (match_operand:VI_128_256 2 "const0_operand")
13386 (unspec:<avx512fmaskmode>
13387 [(match_operand:VI_128_256 3 "nonimmediate_operand")
13388 (match_operand:VI_128_256 4 "nonimmediate_operand")]
13389 UNSPEC_MASKED_EQ)))]
13390 "TARGET_AVX512VL && ix86_pre_reload_split ()
13391 && !(MEM_P (operands[3]) && MEM_P (operands[4]))"
13394 [(set (match_dup 0)
13399 (define_insn_and_split "*avx2_pcmp<mode>3_1"
13400 [(set (match_operand:VI_128_256 0 "register_operand")
13401 (vec_merge:VI_128_256
13402 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13403 (match_operand:VI_128_256 2 "const0_operand")
13404 (unspec:<avx512fmaskmode>
13405 [(match_operand:VI_128_256 3 "nonimmediate_operand")
13406 (match_operand:VI_128_256 4 "nonimmediate_operand")
13407 (match_operand:SI 5 "const_0_to_7_operand")]
13409 "TARGET_AVX512VL && ix86_pre_reload_split ()
13410 /* EQ is commutative. */
13411 && ((INTVAL (operands[5]) == 0
13412 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
13413 /* NLE aka GT, 3 must be register. */
13414 || (INTVAL (operands[5]) == 6
13415 && !MEM_P (operands[3]))
13416 /* LT, 4 must be register and we swap operands. */
13417 || (INTVAL (operands[5]) == 1
13418 && !MEM_P (operands[4])))"
13423 if (INTVAL (operands[5]) == 1)
13424 std::swap (operands[3], operands[4]);
13425 enum rtx_code code = INTVAL (operands[5]) ? GT : EQ;
13426 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
13427 operands[3], operands[4]));
13431 (define_insn_and_split "*avx2_pcmp<mode>3_2"
13432 [(set (match_operand:VI_128_256 0 "register_operand")
13433 (vec_merge:VI_128_256
13434 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13435 (match_operand:VI_128_256 2 "const0_operand")
13436 (not:<avx512fmaskmode>
13437 (unspec:<avx512fmaskmode>
13438 [(match_operand:VI_128_256 3 "nonimmediate_operand")
13439 (match_operand:VI_128_256 4 "nonimmediate_operand")
13440 (match_operand:SI 5 "const_0_to_7_operand")]
13442 "TARGET_AVX512VL && ix86_pre_reload_split ()
13443 /* NE is commutative. */
13444 && ((INTVAL (operands[5]) == 4
13445 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
13446 /* LE, 3 must be register. */
13447 || (INTVAL (operands[5]) == 2
13448 && !MEM_P (operands[3]))
13449 /* NLT aka GE, 4 must be register and we swap operands. */
13450 || (INTVAL (operands[5]) == 5
13451 && !MEM_P (operands[4])))"
13456 if (INTVAL (operands[5]) == 5)
13457 std::swap (operands[3], operands[4]);
13458 enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
13459 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
13460 operands[3], operands[4]));
13464 (define_insn_and_split "*avx2_pcmp<mode>3_3"
13465 [(set (match_operand:VI1_AVX2 0 "register_operand")
13466 (vec_merge:VI1_AVX2
13467 (match_operand:VI1_AVX2 1 "vector_operand")
13468 (match_operand:VI1_AVX2 2 "vector_operand")
13469 (unspec:<avx512fmaskmode>
13470 [(match_operand:VI1_AVX2 3 "register_operand")
13471 (match_operand:VI1_AVX2 4 "const0_operand")
13472 (match_operand:SI 5 "const_0_to_7_operand")]
13474 "TARGET_AVX512VL && ix86_pre_reload_split ()
13476 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
13477 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
13480 [(set (match_dup 0)
13489 if (INTVAL (operands[5]) == 5)
13490 std::swap (operands[1], operands[2]);
13493 (define_insn_and_split "*avx2_pcmp<mode>3_4"
13494 [(set (match_operand:VI1_AVX2 0 "register_operand")
13495 (vec_merge:VI1_AVX2
13496 (match_operand:VI1_AVX2 1 "vector_operand")
13497 (match_operand:VI1_AVX2 2 "vector_operand")
13498 (unspec:<avx512fmaskmode>
13499 [(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)
13500 (match_operand:VI1_AVX2 4 "const0_operand")
13501 (match_operand:SI 5 "const_0_to_7_operand")]
13503 "TARGET_AVX512VL && ix86_pre_reload_split ()
13504 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
13505 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
13507 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
13508 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
13511 [(set (match_dup 0)
13520 if (INTVAL (operands[5]) == 1)
13521 std::swap (operands[1], operands[2]);
13522 operands[3] = gen_lowpart (<MODE>mode, operands[3]);
13525 (define_insn_and_split "*avx2_pcmp<mode>3_5"
13526 [(set (match_operand:VI1_AVX2 0 "register_operand")
13527 (vec_merge:VI1_AVX2
13528 (match_operand:VI1_AVX2 1 "vector_operand")
13529 (match_operand:VI1_AVX2 2 "vector_operand")
13530 (unspec:<avx512fmaskmode>
13531 [(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))
13532 (match_operand:VI1_AVX2 4 "const0_operand")
13533 (match_operand:SI 5 "const_0_to_7_operand")]
13535 "TARGET_AVX512VL && ix86_pre_reload_split ()
13537 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
13538 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
13541 [(set (match_dup 0)
13550 if (INTVAL (operands[5]) == 1)
13551 std::swap (operands[1], operands[2]);
13554 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
13555 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
13556 (unspec:<avx512fmaskmode>
13557 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
13558 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
13559 UNSPEC_MASKED_EQ))]
13561 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
13563 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
13564 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
13565 (unspec:<avx512fmaskmode>
13566 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
13567 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
13568 UNSPEC_MASKED_EQ))]
13570 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
13572 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
13573 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
13574 (unspec:<avx512fmaskmode>
13575 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
13576 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
13577 UNSPEC_MASKED_EQ))]
13578 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13580 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
13581 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
13582 [(set_attr "type" "ssecmp")
13583 (set_attr "prefix_extra" "1")
13584 (set_attr "prefix" "evex")
13585 (set_attr "mode" "<sseinsnmode>")])
13587 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
13588 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
13589 (unspec:<avx512fmaskmode>
13590 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
13591 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
13592 UNSPEC_MASKED_EQ))]
13593 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13595 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
13596 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
13597 [(set_attr "type" "ssecmp")
13598 (set_attr "prefix_extra" "1")
13599 (set_attr "prefix" "evex")
13600 (set_attr "mode" "<sseinsnmode>")])
13602 (define_insn "*sse4_1_eqv2di3"
13603 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
13605 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
13606 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
13607 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13609 pcmpeqq\t{%2, %0|%0, %2}
13610 pcmpeqq\t{%2, %0|%0, %2}
13611 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
13612 [(set_attr "isa" "noavx,noavx,avx")
13613 (set_attr "type" "ssecmp")
13614 (set_attr "prefix_extra" "1")
13615 (set_attr "prefix" "orig,orig,vex")
13616 (set_attr "mode" "TI")])
13618 (define_insn "*sse2_eq<mode>3"
13619 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
13621 (match_operand:VI124_128 1 "vector_operand" "%0,x")
13622 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
13624 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13626 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
13627 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13628 [(set_attr "isa" "noavx,avx")
13629 (set_attr "type" "ssecmp")
13630 (set_attr "prefix_data16" "1,*")
13631 (set_attr "prefix" "orig,vex")
13632 (set_attr "mode" "TI")])
13634 (define_insn "sse4_2_gtv2di3"
13635 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
13637 (match_operand:V2DI 1 "register_operand" "0,0,x")
13638 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
13641 pcmpgtq\t{%2, %0|%0, %2}
13642 pcmpgtq\t{%2, %0|%0, %2}
13643 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
13644 [(set_attr "isa" "noavx,noavx,avx")
13645 (set_attr "type" "ssecmp")
13646 (set_attr "prefix_extra" "1")
13647 (set_attr "prefix" "orig,orig,vex")
13648 (set_attr "mode" "TI")])
13650 (define_insn "avx2_gt<mode>3"
13651 [(set (match_operand:VI_256 0 "register_operand" "=x")
13653 (match_operand:VI_256 1 "register_operand" "x")
13654 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
13656 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13657 [(set_attr "type" "ssecmp")
13658 (set_attr "prefix_extra" "1")
13659 (set_attr "prefix" "vex")
13660 (set_attr "mode" "OI")])
13662 (define_insn_and_split "*avx2_gt<mode>3"
13663 [(set (match_operand:VI_128_256 0 "register_operand")
13664 (vec_merge:VI_128_256
13665 (match_operand:VI_128_256 1 "vector_all_ones_operand")
13666 (match_operand:VI_128_256 2 "const0_operand")
13667 (unspec:<avx512fmaskmode>
13668 [(match_operand:VI_128_256 3 "register_operand")
13669 (match_operand:VI_128_256 4 "nonimmediate_operand")]
13670 UNSPEC_MASKED_GT)))]
13672 && ix86_pre_reload_split ()"
13675 [(set (match_dup 0)
13680 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
13681 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13682 (unspec:<avx512fmaskmode>
13683 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
13684 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
13686 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13687 [(set_attr "type" "ssecmp")
13688 (set_attr "prefix_extra" "1")
13689 (set_attr "prefix" "evex")
13690 (set_attr "mode" "<sseinsnmode>")])
13692 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
13693 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13694 (unspec:<avx512fmaskmode>
13695 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
13696 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
13698 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13699 [(set_attr "type" "ssecmp")
13700 (set_attr "prefix_extra" "1")
13701 (set_attr "prefix" "evex")
13702 (set_attr "mode" "<sseinsnmode>")])
13704 (define_insn "*sse2_gt<mode>3"
13705 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
13707 (match_operand:VI124_128 1 "register_operand" "0,x")
13708 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
13711 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
13712 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13713 [(set_attr "isa" "noavx,avx")
13714 (set_attr "type" "ssecmp")
13715 (set_attr "prefix_data16" "1,*")
13716 (set_attr "prefix" "orig,vex")
13717 (set_attr "mode" "TI")])
13719 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
13720 [(set (match_operand:V_512 0 "register_operand")
13721 (if_then_else:V_512
13722 (match_operator 3 ""
13723 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
13724 (match_operand:VI_AVX512BW 5 "general_operand")])
13725 (match_operand:V_512 1)
13726 (match_operand:V_512 2)))]
13728 && (GET_MODE_NUNITS (<V_512:MODE>mode)
13729 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
13731 bool ok = ix86_expand_int_vcond (operands);
13736 (define_expand "vcond<V_256:mode><VI_256:mode>"
13737 [(set (match_operand:V_256 0 "register_operand")
13738 (if_then_else:V_256
13739 (match_operator 3 ""
13740 [(match_operand:VI_256 4 "nonimmediate_operand")
13741 (match_operand:VI_256 5 "general_operand")])
13742 (match_operand:V_256 1)
13743 (match_operand:V_256 2)))]
13745 && (GET_MODE_NUNITS (<V_256:MODE>mode)
13746 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
13748 bool ok = ix86_expand_int_vcond (operands);
13753 (define_expand "vcond<V_128:mode><VI124_128:mode>"
13754 [(set (match_operand:V_128 0 "register_operand")
13755 (if_then_else:V_128
13756 (match_operator 3 ""
13757 [(match_operand:VI124_128 4 "vector_operand")
13758 (match_operand:VI124_128 5 "general_operand")])
13759 (match_operand:V_128 1)
13760 (match_operand:V_128 2)))]
13762 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13763 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13765 bool ok = ix86_expand_int_vcond (operands);
13770 (define_expand "vcond<VI8F_128:mode>v2di"
13771 [(set (match_operand:VI8F_128 0 "register_operand")
13772 (if_then_else:VI8F_128
13773 (match_operator 3 ""
13774 [(match_operand:V2DI 4 "vector_operand")
13775 (match_operand:V2DI 5 "general_operand")])
13776 (match_operand:VI8F_128 1)
13777 (match_operand:VI8F_128 2)))]
13780 bool ok = ix86_expand_int_vcond (operands);
13785 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
13786 [(set (match_operand:V_512 0 "register_operand")
13787 (if_then_else:V_512
13788 (match_operator 3 ""
13789 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
13790 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
13791 (match_operand:V_512 1 "general_operand")
13792 (match_operand:V_512 2 "general_operand")))]
13794 && (GET_MODE_NUNITS (<V_512:MODE>mode)
13795 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
13797 bool ok = ix86_expand_int_vcond (operands);
13802 (define_expand "vcondu<V_256:mode><VI_256:mode>"
13803 [(set (match_operand:V_256 0 "register_operand")
13804 (if_then_else:V_256
13805 (match_operator 3 ""
13806 [(match_operand:VI_256 4 "nonimmediate_operand")
13807 (match_operand:VI_256 5 "nonimmediate_operand")])
13808 (match_operand:V_256 1 "general_operand")
13809 (match_operand:V_256 2 "general_operand")))]
13811 && (GET_MODE_NUNITS (<V_256:MODE>mode)
13812 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
13814 bool ok = ix86_expand_int_vcond (operands);
13819 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
13820 [(set (match_operand:V_128 0 "register_operand")
13821 (if_then_else:V_128
13822 (match_operator 3 ""
13823 [(match_operand:VI124_128 4 "vector_operand")
13824 (match_operand:VI124_128 5 "vector_operand")])
13825 (match_operand:V_128 1 "general_operand")
13826 (match_operand:V_128 2 "general_operand")))]
13828 && (GET_MODE_NUNITS (<V_128:MODE>mode)
13829 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
13831 bool ok = ix86_expand_int_vcond (operands);
13836 (define_expand "vcondu<VI8F_128:mode>v2di"
13837 [(set (match_operand:VI8F_128 0 "register_operand")
13838 (if_then_else:VI8F_128
13839 (match_operator 3 ""
13840 [(match_operand:V2DI 4 "vector_operand")
13841 (match_operand:V2DI 5 "vector_operand")])
13842 (match_operand:VI8F_128 1 "general_operand")
13843 (match_operand:VI8F_128 2 "general_operand")))]
13846 bool ok = ix86_expand_int_vcond (operands);
13851 (define_expand "vcondeq<VI8F_128:mode>v2di"
13852 [(set (match_operand:VI8F_128 0 "register_operand")
13853 (if_then_else:VI8F_128
13854 (match_operator 3 ""
13855 [(match_operand:V2DI 4 "vector_operand")
13856 (match_operand:V2DI 5 "general_operand")])
13857 (match_operand:VI8F_128 1)
13858 (match_operand:VI8F_128 2)))]
13861 bool ok = ix86_expand_int_vcond (operands);
13866 (define_mode_iterator VEC_PERM_AVX2
13867 [V16QI V8HI V4SI V2DI V4SF V2DF
13868 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
13869 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
13870 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
13871 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
13872 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
13873 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
13875 (define_expand "vec_perm<mode>"
13876 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
13877 (match_operand:VEC_PERM_AVX2 1 "register_operand")
13878 (match_operand:VEC_PERM_AVX2 2 "register_operand")
13879 (match_operand:<sseintvecmode> 3 "register_operand")]
13880 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
13882 ix86_expand_vec_perm (operands);
13886 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13888 ;; Parallel bitwise logical operations
13890 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13892 (define_expand "one_cmpl<mode>2"
13893 [(set (match_operand:VI 0 "register_operand")
13894 (xor:VI (match_operand:VI 1 "vector_operand")
13898 if (!TARGET_AVX512F)
13899 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
13901 operands[2] = CONSTM1_RTX (<MODE>mode);
13904 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
13905 [(set (match_operand:VI 0 "register_operand" "=v,v")
13906 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
13907 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
13909 && (!<mask_applied>
13910 || <ssescalarmode>mode == SImode
13911 || <ssescalarmode>mode == DImode)"
13913 if (TARGET_AVX512VL)
13914 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
13916 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
13918 [(set_attr "type" "sselog")
13919 (set_attr "prefix" "evex")
13921 (if_then_else (match_test "TARGET_AVX512VL")
13922 (const_string "<sseinsnmode>")
13923 (const_string "XI")))
13924 (set (attr "enabled")
13925 (if_then_else (eq_attr "alternative" "1")
13926 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
13929 (define_expand "<sse2_avx2>_andnot<mode>3"
13930 [(set (match_operand:VI_AVX2 0 "register_operand")
13932 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
13933 (match_operand:VI_AVX2 2 "vector_operand")))]
13936 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13937 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
13938 (vec_merge:VI48_AVX512VL
13941 (match_operand:VI48_AVX512VL 1 "register_operand"))
13942 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
13943 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
13944 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13947 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
13948 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
13949 (vec_merge:VI12_AVX512VL
13952 (match_operand:VI12_AVX512VL 1 "register_operand"))
13953 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
13954 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
13955 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13958 (define_insn "*andnot<mode>3"
13959 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
13961 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
13962 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
13968 const char *ssesuffix;
13970 switch (get_attr_mode (insn))
13973 gcc_assert (TARGET_AVX512F);
13976 gcc_assert (TARGET_AVX2);
13979 gcc_assert (TARGET_SSE2);
13981 switch (<MODE>mode)
13985 /* There is no vpandnb or vpandnw instruction, nor vpandn for
13986 512-bit vectors. Use vpandnq instead. */
13991 ssesuffix = "<ssemodesuffix>";
13997 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
13998 ? "<ssemodesuffix>" : "");
14001 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
14006 gcc_assert (TARGET_AVX512F);
14009 gcc_assert (TARGET_AVX);
14012 gcc_assert (TARGET_SSE);
14018 gcc_unreachable ();
14021 switch (which_alternative)
14024 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
14028 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
14031 gcc_unreachable ();
14034 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
14035 output_asm_insn (buf, operands);
14038 [(set_attr "isa" "noavx,avx,avx")
14039 (set_attr "type" "sselog")
14040 (set (attr "prefix_data16")
14042 (and (eq_attr "alternative" "0")
14043 (eq_attr "mode" "TI"))
14045 (const_string "*")))
14046 (set_attr "prefix" "orig,vex,evex")
14048 (cond [(match_test "TARGET_AVX2")
14049 (const_string "<sseinsnmode>")
14050 (match_test "TARGET_AVX")
14052 (match_test "<MODE_SIZE> > 16")
14053 (const_string "V8SF")
14054 (const_string "<sseinsnmode>"))
14055 (ior (not (match_test "TARGET_SSE2"))
14056 (match_test "optimize_function_for_size_p (cfun)"))
14057 (const_string "V4SF")
14059 (const_string "<sseinsnmode>")))])
14061 (define_insn "*andnot<mode>3_mask"
14062 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14063 (vec_merge:VI48_AVX512VL
14066 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
14067 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
14068 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
14069 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14071 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
14072 [(set_attr "type" "sselog")
14073 (set_attr "prefix" "evex")
14074 (set_attr "mode" "<sseinsnmode>")])
14076 (define_expand "<code><mode>3"
14077 [(set (match_operand:VI 0 "register_operand")
14079 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
14080 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
14083 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
14087 (define_expand "cond_<code><mode>"
14088 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
14089 (vec_merge:VI48_AVX512VL
14090 (any_logic:VI48_AVX512VL
14091 (match_operand:VI48_AVX512VL 2 "vector_operand")
14092 (match_operand:VI48_AVX512VL 3 "vector_operand"))
14093 (match_operand:VI48_AVX512VL 4 "nonimm_or_0_operand")
14094 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
14097 emit_insn (gen_<code><mode>3_mask (operands[0],
14105 (define_insn "<mask_codefor><code><mode>3<mask_name>"
14106 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
14107 (any_logic:VI48_AVX_AVX512F
14108 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
14109 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
14110 "TARGET_SSE && <mask_mode512bit_condition>
14111 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14116 const char *ssesuffix;
14118 switch (get_attr_mode (insn))
14121 gcc_assert (TARGET_AVX512F);
14124 gcc_assert (TARGET_AVX2);
14127 gcc_assert (TARGET_SSE2);
14129 switch (<MODE>mode)
14133 ssesuffix = "<ssemodesuffix>";
14139 ssesuffix = (TARGET_AVX512VL
14140 && (<mask_applied> || which_alternative == 2)
14141 ? "<ssemodesuffix>" : "");
14144 gcc_unreachable ();
14149 gcc_assert (TARGET_AVX);
14152 gcc_assert (TARGET_SSE);
14158 gcc_unreachable ();
14161 switch (which_alternative)
14164 if (<mask_applied>)
14165 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
14167 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
14171 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
14174 gcc_unreachable ();
14177 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
14178 output_asm_insn (buf, operands);
14181 [(set_attr "isa" "noavx,avx,avx")
14182 (set_attr "type" "sselog")
14183 (set (attr "prefix_data16")
14185 (and (eq_attr "alternative" "0")
14186 (eq_attr "mode" "TI"))
14188 (const_string "*")))
14189 (set_attr "prefix" "<mask_prefix3>,evex")
14191 (cond [(match_test "TARGET_AVX2")
14192 (const_string "<sseinsnmode>")
14193 (match_test "TARGET_AVX")
14195 (match_test "<MODE_SIZE> > 16")
14196 (const_string "V8SF")
14197 (const_string "<sseinsnmode>"))
14198 (ior (not (match_test "TARGET_SSE2"))
14199 (match_test "optimize_function_for_size_p (cfun)"))
14200 (const_string "V4SF")
14202 (const_string "<sseinsnmode>")))])
14204 (define_insn "*<code><mode>3"
14205 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
14206 (any_logic:VI12_AVX_AVX512F
14207 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
14208 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
14209 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
14214 const char *ssesuffix;
14216 switch (get_attr_mode (insn))
14219 gcc_assert (TARGET_AVX512F);
14222 gcc_assert (TARGET_AVX2);
14225 gcc_assert (TARGET_SSE2);
14227 switch (<MODE>mode)
14237 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
14240 gcc_unreachable ();
14245 gcc_assert (TARGET_AVX);
14248 gcc_assert (TARGET_SSE);
14254 gcc_unreachable ();
14257 switch (which_alternative)
14260 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
14264 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
14267 gcc_unreachable ();
14270 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
14271 output_asm_insn (buf, operands);
14274 [(set_attr "isa" "noavx,avx,avx")
14275 (set_attr "type" "sselog")
14276 (set (attr "prefix_data16")
14278 (and (eq_attr "alternative" "0")
14279 (eq_attr "mode" "TI"))
14281 (const_string "*")))
14282 (set_attr "prefix" "orig,vex,evex")
14284 (cond [(match_test "TARGET_AVX2")
14285 (const_string "<sseinsnmode>")
14286 (match_test "TARGET_AVX")
14288 (match_test "<MODE_SIZE> > 16")
14289 (const_string "V8SF")
14290 (const_string "<sseinsnmode>"))
14291 (ior (not (match_test "TARGET_SSE2"))
14292 (match_test "optimize_function_for_size_p (cfun)"))
14293 (const_string "V4SF")
14295 (const_string "<sseinsnmode>")))])
14297 (define_mode_iterator AVX512ZEXTMASK
14298 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
14300 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
14301 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
14302 (unspec:<avx512fmaskmode>
14303 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14304 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14307 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
14308 [(set_attr "prefix" "evex")
14309 (set_attr "mode" "<sseinsnmode>")])
14311 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
14312 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
14313 (unspec:<avx512fmaskmode>
14314 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14315 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14318 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
14319 [(set_attr "prefix" "evex")
14320 (set_attr "mode" "<sseinsnmode>")])
14322 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
14323 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14324 (zero_extend:AVX512ZEXTMASK
14325 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14326 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14327 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14330 && (<AVX512ZEXTMASK:MODE_SIZE>
14331 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14332 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14333 [(set_attr "prefix" "evex")
14334 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14336 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
14337 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14338 (zero_extend:AVX512ZEXTMASK
14339 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
14340 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14341 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14342 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14344 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
14346 && (<AVX512ZEXTMASK:MODE_SIZE>
14347 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14348 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
14349 [(set_attr "prefix" "evex")
14350 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14352 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
14353 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14354 (zero_extend:AVX512ZEXTMASK
14355 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14356 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14357 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14360 && (<AVX512ZEXTMASK:MODE_SIZE>
14361 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14362 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14363 [(set_attr "prefix" "evex")
14364 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14366 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
14367 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
14368 (zero_extend:AVX512ZEXTMASK
14369 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
14370 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
14371 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
14372 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
14374 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
14376 && (<AVX512ZEXTMASK:MODE_SIZE>
14377 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
14378 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
14379 [(set_attr "prefix" "evex")
14380 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
14382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14384 ;; Parallel integral element swizzling
14386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14388 (define_expand "vec_pack_trunc_<mode>"
14389 [(match_operand:<ssepackmode> 0 "register_operand")
14390 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
14391 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
14394 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
14395 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
14396 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
14400 (define_expand "vec_pack_trunc_qi"
14401 [(set (match_operand:HI 0 "register_operand")
14402 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
14404 (zero_extend:HI (match_operand:QI 1 "register_operand"))))]
14407 (define_expand "vec_pack_trunc_<mode>"
14408 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
14409 (ior:<DOUBLEMASKMODE>
14410 (ashift:<DOUBLEMASKMODE>
14411 (zero_extend:<DOUBLEMASKMODE>
14412 (match_operand:SWI24 2 "register_operand"))
14414 (zero_extend:<DOUBLEMASKMODE>
14415 (match_operand:SWI24 1 "register_operand"))))]
14418 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
14421 (define_expand "vec_pack_sbool_trunc_qi"
14422 [(match_operand:QI 0 "register_operand")
14423 (match_operand:QI 1 "register_operand")
14424 (match_operand:QI 2 "register_operand")
14425 (match_operand:QI 3 "const_int_operand")]
14428 HOST_WIDE_INT nunits = INTVAL (operands[3]);
14429 rtx mask, tem1, tem2;
14430 if (nunits != 8 && nunits != 4)
14432 mask = gen_reg_rtx (QImode);
14433 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
14434 tem1 = gen_reg_rtx (QImode);
14435 emit_insn (gen_kandqi (tem1, operands[1], mask));
14436 if (TARGET_AVX512DQ)
14438 tem2 = gen_reg_rtx (QImode);
14439 emit_insn (gen_kashiftqi (tem2, operands[2],
14440 GEN_INT (nunits / 2)));
14444 tem2 = gen_reg_rtx (HImode);
14445 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
14447 GEN_INT (nunits / 2)));
14448 tem2 = lowpart_subreg (QImode, tem2, HImode);
14450 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
14454 (define_insn "<sse2_avx2>_packsswb<mask_name>"
14455 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
14456 (vec_concat:VI1_AVX512
14457 (ss_truncate:<ssehalfvecmode>
14458 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
14459 (ss_truncate:<ssehalfvecmode>
14460 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
14461 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14463 packsswb\t{%2, %0|%0, %2}
14464 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14465 [(set_attr "isa" "noavx,avx")
14466 (set_attr "type" "sselog")
14467 (set_attr "prefix_data16" "1,*")
14468 (set_attr "prefix" "orig,<mask_prefix>")
14469 (set_attr "mode" "<sseinsnmode>")])
14471 (define_insn "<sse2_avx2>_packssdw<mask_name>"
14472 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
14473 (vec_concat:VI2_AVX2
14474 (ss_truncate:<ssehalfvecmode>
14475 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
14476 (ss_truncate:<ssehalfvecmode>
14477 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
14478 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14480 packssdw\t{%2, %0|%0, %2}
14481 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14482 [(set_attr "isa" "noavx,avx")
14483 (set_attr "type" "sselog")
14484 (set_attr "prefix_data16" "1,*")
14485 (set_attr "prefix" "orig,<mask_prefix>")
14486 (set_attr "mode" "<sseinsnmode>")])
14488 (define_insn "<sse2_avx2>_packuswb<mask_name>"
14489 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
14490 (vec_concat:VI1_AVX512
14491 (us_truncate:<ssehalfvecmode>
14492 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
14493 (us_truncate:<ssehalfvecmode>
14494 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
14495 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14497 packuswb\t{%2, %0|%0, %2}
14498 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14499 [(set_attr "isa" "noavx,avx")
14500 (set_attr "type" "sselog")
14501 (set_attr "prefix_data16" "1,*")
14502 (set_attr "prefix" "orig,<mask_prefix>")
14503 (set_attr "mode" "<sseinsnmode>")])
14505 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
14506 [(set (match_operand:V64QI 0 "register_operand" "=v")
14509 (match_operand:V64QI 1 "register_operand" "v")
14510 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
14511 (parallel [(const_int 8) (const_int 72)
14512 (const_int 9) (const_int 73)
14513 (const_int 10) (const_int 74)
14514 (const_int 11) (const_int 75)
14515 (const_int 12) (const_int 76)
14516 (const_int 13) (const_int 77)
14517 (const_int 14) (const_int 78)
14518 (const_int 15) (const_int 79)
14519 (const_int 24) (const_int 88)
14520 (const_int 25) (const_int 89)
14521 (const_int 26) (const_int 90)
14522 (const_int 27) (const_int 91)
14523 (const_int 28) (const_int 92)
14524 (const_int 29) (const_int 93)
14525 (const_int 30) (const_int 94)
14526 (const_int 31) (const_int 95)
14527 (const_int 40) (const_int 104)
14528 (const_int 41) (const_int 105)
14529 (const_int 42) (const_int 106)
14530 (const_int 43) (const_int 107)
14531 (const_int 44) (const_int 108)
14532 (const_int 45) (const_int 109)
14533 (const_int 46) (const_int 110)
14534 (const_int 47) (const_int 111)
14535 (const_int 56) (const_int 120)
14536 (const_int 57) (const_int 121)
14537 (const_int 58) (const_int 122)
14538 (const_int 59) (const_int 123)
14539 (const_int 60) (const_int 124)
14540 (const_int 61) (const_int 125)
14541 (const_int 62) (const_int 126)
14542 (const_int 63) (const_int 127)])))]
14544 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14545 [(set_attr "type" "sselog")
14546 (set_attr "prefix" "evex")
14547 (set_attr "mode" "XI")])
14549 (define_insn "avx2_interleave_highv32qi<mask_name>"
14550 [(set (match_operand:V32QI 0 "register_operand" "=Yw")
14553 (match_operand:V32QI 1 "register_operand" "Yw")
14554 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
14555 (parallel [(const_int 8) (const_int 40)
14556 (const_int 9) (const_int 41)
14557 (const_int 10) (const_int 42)
14558 (const_int 11) (const_int 43)
14559 (const_int 12) (const_int 44)
14560 (const_int 13) (const_int 45)
14561 (const_int 14) (const_int 46)
14562 (const_int 15) (const_int 47)
14563 (const_int 24) (const_int 56)
14564 (const_int 25) (const_int 57)
14565 (const_int 26) (const_int 58)
14566 (const_int 27) (const_int 59)
14567 (const_int 28) (const_int 60)
14568 (const_int 29) (const_int 61)
14569 (const_int 30) (const_int 62)
14570 (const_int 31) (const_int 63)])))]
14571 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14572 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14573 [(set_attr "type" "sselog")
14574 (set_attr "prefix" "<mask_prefix>")
14575 (set_attr "mode" "OI")])
14577 (define_insn "vec_interleave_highv16qi<mask_name>"
14578 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
14581 (match_operand:V16QI 1 "register_operand" "0,Yw")
14582 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
14583 (parallel [(const_int 8) (const_int 24)
14584 (const_int 9) (const_int 25)
14585 (const_int 10) (const_int 26)
14586 (const_int 11) (const_int 27)
14587 (const_int 12) (const_int 28)
14588 (const_int 13) (const_int 29)
14589 (const_int 14) (const_int 30)
14590 (const_int 15) (const_int 31)])))]
14591 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14593 punpckhbw\t{%2, %0|%0, %2}
14594 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14595 [(set_attr "isa" "noavx,avx")
14596 (set_attr "type" "sselog")
14597 (set_attr "prefix_data16" "1,*")
14598 (set_attr "prefix" "orig,<mask_prefix>")
14599 (set_attr "mode" "TI")])
14601 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
14602 [(set (match_operand:V64QI 0 "register_operand" "=v")
14605 (match_operand:V64QI 1 "register_operand" "v")
14606 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
14607 (parallel [(const_int 0) (const_int 64)
14608 (const_int 1) (const_int 65)
14609 (const_int 2) (const_int 66)
14610 (const_int 3) (const_int 67)
14611 (const_int 4) (const_int 68)
14612 (const_int 5) (const_int 69)
14613 (const_int 6) (const_int 70)
14614 (const_int 7) (const_int 71)
14615 (const_int 16) (const_int 80)
14616 (const_int 17) (const_int 81)
14617 (const_int 18) (const_int 82)
14618 (const_int 19) (const_int 83)
14619 (const_int 20) (const_int 84)
14620 (const_int 21) (const_int 85)
14621 (const_int 22) (const_int 86)
14622 (const_int 23) (const_int 87)
14623 (const_int 32) (const_int 96)
14624 (const_int 33) (const_int 97)
14625 (const_int 34) (const_int 98)
14626 (const_int 35) (const_int 99)
14627 (const_int 36) (const_int 100)
14628 (const_int 37) (const_int 101)
14629 (const_int 38) (const_int 102)
14630 (const_int 39) (const_int 103)
14631 (const_int 48) (const_int 112)
14632 (const_int 49) (const_int 113)
14633 (const_int 50) (const_int 114)
14634 (const_int 51) (const_int 115)
14635 (const_int 52) (const_int 116)
14636 (const_int 53) (const_int 117)
14637 (const_int 54) (const_int 118)
14638 (const_int 55) (const_int 119)])))]
14640 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14641 [(set_attr "type" "sselog")
14642 (set_attr "prefix" "evex")
14643 (set_attr "mode" "XI")])
14645 (define_insn "avx2_interleave_lowv32qi<mask_name>"
14646 [(set (match_operand:V32QI 0 "register_operand" "=Yw")
14649 (match_operand:V32QI 1 "register_operand" "Yw")
14650 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
14651 (parallel [(const_int 0) (const_int 32)
14652 (const_int 1) (const_int 33)
14653 (const_int 2) (const_int 34)
14654 (const_int 3) (const_int 35)
14655 (const_int 4) (const_int 36)
14656 (const_int 5) (const_int 37)
14657 (const_int 6) (const_int 38)
14658 (const_int 7) (const_int 39)
14659 (const_int 16) (const_int 48)
14660 (const_int 17) (const_int 49)
14661 (const_int 18) (const_int 50)
14662 (const_int 19) (const_int 51)
14663 (const_int 20) (const_int 52)
14664 (const_int 21) (const_int 53)
14665 (const_int 22) (const_int 54)
14666 (const_int 23) (const_int 55)])))]
14667 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14668 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14669 [(set_attr "type" "sselog")
14670 (set_attr "prefix" "maybe_vex")
14671 (set_attr "mode" "OI")])
14673 (define_insn "vec_interleave_lowv16qi<mask_name>"
14674 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
14677 (match_operand:V16QI 1 "register_operand" "0,Yw")
14678 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
14679 (parallel [(const_int 0) (const_int 16)
14680 (const_int 1) (const_int 17)
14681 (const_int 2) (const_int 18)
14682 (const_int 3) (const_int 19)
14683 (const_int 4) (const_int 20)
14684 (const_int 5) (const_int 21)
14685 (const_int 6) (const_int 22)
14686 (const_int 7) (const_int 23)])))]
14687 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14689 punpcklbw\t{%2, %0|%0, %2}
14690 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14691 [(set_attr "isa" "noavx,avx")
14692 (set_attr "type" "sselog")
14693 (set_attr "prefix_data16" "1,*")
14694 (set_attr "prefix" "orig,vex")
14695 (set_attr "mode" "TI")])
14697 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
14698 [(set (match_operand:V32HI 0 "register_operand" "=v")
14701 (match_operand:V32HI 1 "register_operand" "v")
14702 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
14703 (parallel [(const_int 4) (const_int 36)
14704 (const_int 5) (const_int 37)
14705 (const_int 6) (const_int 38)
14706 (const_int 7) (const_int 39)
14707 (const_int 12) (const_int 44)
14708 (const_int 13) (const_int 45)
14709 (const_int 14) (const_int 46)
14710 (const_int 15) (const_int 47)
14711 (const_int 20) (const_int 52)
14712 (const_int 21) (const_int 53)
14713 (const_int 22) (const_int 54)
14714 (const_int 23) (const_int 55)
14715 (const_int 28) (const_int 60)
14716 (const_int 29) (const_int 61)
14717 (const_int 30) (const_int 62)
14718 (const_int 31) (const_int 63)])))]
14720 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14721 [(set_attr "type" "sselog")
14722 (set_attr "prefix" "evex")
14723 (set_attr "mode" "XI")])
14725 (define_insn "avx2_interleave_highv16hi<mask_name>"
14726 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
14729 (match_operand:V16HI 1 "register_operand" "Yw")
14730 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
14731 (parallel [(const_int 4) (const_int 20)
14732 (const_int 5) (const_int 21)
14733 (const_int 6) (const_int 22)
14734 (const_int 7) (const_int 23)
14735 (const_int 12) (const_int 28)
14736 (const_int 13) (const_int 29)
14737 (const_int 14) (const_int 30)
14738 (const_int 15) (const_int 31)])))]
14739 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14740 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14741 [(set_attr "type" "sselog")
14742 (set_attr "prefix" "maybe_evex")
14743 (set_attr "mode" "OI")])
14745 (define_insn "vec_interleave_highv8hi<mask_name>"
14746 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
14749 (match_operand:V8HI 1 "register_operand" "0,Yw")
14750 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
14751 (parallel [(const_int 4) (const_int 12)
14752 (const_int 5) (const_int 13)
14753 (const_int 6) (const_int 14)
14754 (const_int 7) (const_int 15)])))]
14755 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14757 punpckhwd\t{%2, %0|%0, %2}
14758 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14759 [(set_attr "isa" "noavx,avx")
14760 (set_attr "type" "sselog")
14761 (set_attr "prefix_data16" "1,*")
14762 (set_attr "prefix" "orig,maybe_vex")
14763 (set_attr "mode" "TI")])
14765 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
14766 [(set (match_operand:V32HI 0 "register_operand" "=v")
14769 (match_operand:V32HI 1 "register_operand" "v")
14770 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
14771 (parallel [(const_int 0) (const_int 32)
14772 (const_int 1) (const_int 33)
14773 (const_int 2) (const_int 34)
14774 (const_int 3) (const_int 35)
14775 (const_int 8) (const_int 40)
14776 (const_int 9) (const_int 41)
14777 (const_int 10) (const_int 42)
14778 (const_int 11) (const_int 43)
14779 (const_int 16) (const_int 48)
14780 (const_int 17) (const_int 49)
14781 (const_int 18) (const_int 50)
14782 (const_int 19) (const_int 51)
14783 (const_int 24) (const_int 56)
14784 (const_int 25) (const_int 57)
14785 (const_int 26) (const_int 58)
14786 (const_int 27) (const_int 59)])))]
14788 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14789 [(set_attr "type" "sselog")
14790 (set_attr "prefix" "evex")
14791 (set_attr "mode" "XI")])
14793 (define_insn "avx2_interleave_lowv16hi<mask_name>"
14794 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
14797 (match_operand:V16HI 1 "register_operand" "Yw")
14798 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm"))
14799 (parallel [(const_int 0) (const_int 16)
14800 (const_int 1) (const_int 17)
14801 (const_int 2) (const_int 18)
14802 (const_int 3) (const_int 19)
14803 (const_int 8) (const_int 24)
14804 (const_int 9) (const_int 25)
14805 (const_int 10) (const_int 26)
14806 (const_int 11) (const_int 27)])))]
14807 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14808 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14809 [(set_attr "type" "sselog")
14810 (set_attr "prefix" "maybe_evex")
14811 (set_attr "mode" "OI")])
14813 (define_insn "vec_interleave_lowv8hi<mask_name>"
14814 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
14817 (match_operand:V8HI 1 "register_operand" "0,Yw")
14818 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm"))
14819 (parallel [(const_int 0) (const_int 8)
14820 (const_int 1) (const_int 9)
14821 (const_int 2) (const_int 10)
14822 (const_int 3) (const_int 11)])))]
14823 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
14825 punpcklwd\t{%2, %0|%0, %2}
14826 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14827 [(set_attr "isa" "noavx,avx")
14828 (set_attr "type" "sselog")
14829 (set_attr "prefix_data16" "1,*")
14830 (set_attr "prefix" "orig,maybe_evex")
14831 (set_attr "mode" "TI")])
14833 (define_insn "avx2_interleave_highv8si<mask_name>"
14834 [(set (match_operand:V8SI 0 "register_operand" "=v")
14837 (match_operand:V8SI 1 "register_operand" "v")
14838 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14839 (parallel [(const_int 2) (const_int 10)
14840 (const_int 3) (const_int 11)
14841 (const_int 6) (const_int 14)
14842 (const_int 7) (const_int 15)])))]
14843 "TARGET_AVX2 && <mask_avx512vl_condition>"
14844 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14845 [(set_attr "type" "sselog")
14846 (set_attr "prefix" "maybe_evex")
14847 (set_attr "mode" "OI")])
14849 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
14850 [(set (match_operand:V16SI 0 "register_operand" "=v")
14853 (match_operand:V16SI 1 "register_operand" "v")
14854 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14855 (parallel [(const_int 2) (const_int 18)
14856 (const_int 3) (const_int 19)
14857 (const_int 6) (const_int 22)
14858 (const_int 7) (const_int 23)
14859 (const_int 10) (const_int 26)
14860 (const_int 11) (const_int 27)
14861 (const_int 14) (const_int 30)
14862 (const_int 15) (const_int 31)])))]
14864 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14865 [(set_attr "type" "sselog")
14866 (set_attr "prefix" "evex")
14867 (set_attr "mode" "XI")])
14870 (define_insn "vec_interleave_highv4si<mask_name>"
14871 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14874 (match_operand:V4SI 1 "register_operand" "0,v")
14875 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14876 (parallel [(const_int 2) (const_int 6)
14877 (const_int 3) (const_int 7)])))]
14878 "TARGET_SSE2 && <mask_avx512vl_condition>"
14880 punpckhdq\t{%2, %0|%0, %2}
14881 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14882 [(set_attr "isa" "noavx,avx")
14883 (set_attr "type" "sselog")
14884 (set_attr "prefix_data16" "1,*")
14885 (set_attr "prefix" "orig,maybe_vex")
14886 (set_attr "mode" "TI")])
14888 (define_insn "avx2_interleave_lowv8si<mask_name>"
14889 [(set (match_operand:V8SI 0 "register_operand" "=v")
14892 (match_operand:V8SI 1 "register_operand" "v")
14893 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
14894 (parallel [(const_int 0) (const_int 8)
14895 (const_int 1) (const_int 9)
14896 (const_int 4) (const_int 12)
14897 (const_int 5) (const_int 13)])))]
14898 "TARGET_AVX2 && <mask_avx512vl_condition>"
14899 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14900 [(set_attr "type" "sselog")
14901 (set_attr "prefix" "maybe_evex")
14902 (set_attr "mode" "OI")])
14904 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
14905 [(set (match_operand:V16SI 0 "register_operand" "=v")
14908 (match_operand:V16SI 1 "register_operand" "v")
14909 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
14910 (parallel [(const_int 0) (const_int 16)
14911 (const_int 1) (const_int 17)
14912 (const_int 4) (const_int 20)
14913 (const_int 5) (const_int 21)
14914 (const_int 8) (const_int 24)
14915 (const_int 9) (const_int 25)
14916 (const_int 12) (const_int 28)
14917 (const_int 13) (const_int 29)])))]
14919 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14920 [(set_attr "type" "sselog")
14921 (set_attr "prefix" "evex")
14922 (set_attr "mode" "XI")])
14924 (define_insn "vec_interleave_lowv4si<mask_name>"
14925 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
14928 (match_operand:V4SI 1 "register_operand" "0,v")
14929 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
14930 (parallel [(const_int 0) (const_int 4)
14931 (const_int 1) (const_int 5)])))]
14932 "TARGET_SSE2 && <mask_avx512vl_condition>"
14934 punpckldq\t{%2, %0|%0, %2}
14935 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14936 [(set_attr "isa" "noavx,avx")
14937 (set_attr "type" "sselog")
14938 (set_attr "prefix_data16" "1,*")
14939 (set_attr "prefix" "orig,vex")
14940 (set_attr "mode" "TI")])
14942 (define_expand "vec_interleave_high<mode>"
14943 [(match_operand:VI_256 0 "register_operand")
14944 (match_operand:VI_256 1 "register_operand")
14945 (match_operand:VI_256 2 "nonimmediate_operand")]
14948 rtx t1 = gen_reg_rtx (<MODE>mode);
14949 rtx t2 = gen_reg_rtx (<MODE>mode);
14950 rtx t3 = gen_reg_rtx (V4DImode);
14951 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14952 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14953 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14954 gen_lowpart (V4DImode, t2),
14955 GEN_INT (1 + (3 << 4))));
14956 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14960 (define_expand "vec_interleave_low<mode>"
14961 [(match_operand:VI_256 0 "register_operand")
14962 (match_operand:VI_256 1 "register_operand")
14963 (match_operand:VI_256 2 "nonimmediate_operand")]
14966 rtx t1 = gen_reg_rtx (<MODE>mode);
14967 rtx t2 = gen_reg_rtx (<MODE>mode);
14968 rtx t3 = gen_reg_rtx (V4DImode);
14969 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
14970 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
14971 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
14972 gen_lowpart (V4DImode, t2),
14973 GEN_INT (0 + (2 << 4))));
14974 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
14978 ;; Modes handled by pinsr patterns.
14979 (define_mode_iterator PINSR_MODE
14980 [(V16QI "TARGET_SSE4_1") V8HI
14981 (V4SI "TARGET_SSE4_1")
14982 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
14984 (define_mode_attr sse2p4_1
14985 [(V16QI "sse4_1") (V8HI "sse2")
14986 (V4SI "sse4_1") (V2DI "sse4_1")])
14988 (define_mode_attr pinsr_evex_isa
14989 [(V16QI "avx512bw") (V8HI "avx512bw")
14990 (V4SI "avx512dq") (V2DI "avx512dq")])
14992 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
14993 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
14994 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
14995 (vec_merge:PINSR_MODE
14996 (vec_duplicate:PINSR_MODE
14997 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
14998 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
14999 (match_operand:SI 3 "const_int_operand")))]
15001 && ((unsigned) exact_log2 (INTVAL (operands[3]))
15002 < GET_MODE_NUNITS (<MODE>mode))"
15004 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
15006 switch (which_alternative)
15009 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
15010 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
15013 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
15016 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
15017 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
15021 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15023 gcc_unreachable ();
15026 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
15027 (set_attr "type" "sselog")
15028 (set (attr "prefix_rex")
15030 (and (not (match_test "TARGET_AVX"))
15031 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
15033 (const_string "*")))
15034 (set (attr "prefix_data16")
15036 (and (not (match_test "TARGET_AVX"))
15037 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15039 (const_string "*")))
15040 (set (attr "prefix_extra")
15042 (and (not (match_test "TARGET_AVX"))
15043 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
15045 (const_string "1")))
15046 (set_attr "length_immediate" "1")
15047 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
15048 (set_attr "mode" "TI")])
15050 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
15051 [(match_operand:AVX512_VEC 0 "register_operand")
15052 (match_operand:AVX512_VEC 1 "register_operand")
15053 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
15054 (match_operand:SI 3 "const_0_to_3_operand")
15055 (match_operand:AVX512_VEC 4 "register_operand")
15056 (match_operand:<avx512fmaskmode> 5 "register_operand")]
15059 int mask, selector;
15060 mask = INTVAL (operands[3]);
15061 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
15062 ? 0xFFFF ^ (0x000F << mask * 4)
15063 : 0xFF ^ (0x03 << mask * 2));
15064 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
15065 (operands[0], operands[1], operands[2], GEN_INT (selector),
15066 operands[4], operands[5]));
15070 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
15071 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
15072 (vec_merge:AVX512_VEC
15073 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
15074 (vec_duplicate:AVX512_VEC
15075 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
15076 (match_operand:SI 3 "const_int_operand" "n,n,n")))]
15078 && (INTVAL (operands[3])
15079 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
15081 if (which_alternative == 0)
15082 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
15083 switch (<MODE>mode)
15086 if (misaligned_operand (operands[2], <ssequartermode>mode))
15087 return "vmovupd\t{%2, %x0|%x0, %2}";
15089 return "vmovapd\t{%2, %x0|%x0, %2}";
15091 if (misaligned_operand (operands[2], <ssequartermode>mode))
15092 return "vmovups\t{%2, %x0|%x0, %2}";
15094 return "vmovaps\t{%2, %x0|%x0, %2}";
15096 if (misaligned_operand (operands[2], <ssequartermode>mode))
15097 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
15098 : "vmovdqu\t{%2, %x0|%x0, %2}";
15100 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
15101 : "vmovdqa\t{%2, %x0|%x0, %2}";
15103 if (misaligned_operand (operands[2], <ssequartermode>mode))
15104 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
15105 : "vmovdqu\t{%2, %x0|%x0, %2}";
15107 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
15108 : "vmovdqa\t{%2, %x0|%x0, %2}";
15110 gcc_unreachable ();
15113 [(set_attr "type" "sselog,ssemov,ssemov")
15114 (set_attr "length_immediate" "1,0,0")
15115 (set_attr "prefix" "evex,vex,evex")
15116 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
15118 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
15119 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
15120 (vec_merge:AVX512_VEC
15121 (match_operand:AVX512_VEC 1 "register_operand" "v")
15122 (vec_duplicate:AVX512_VEC
15123 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
15124 (match_operand:SI 3 "const_int_operand" "n")))]
15128 int selector = INTVAL (operands[3]);
15130 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
15132 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
15134 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
15136 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
15139 gcc_unreachable ();
15141 operands[3] = GEN_INT (mask);
15143 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
15145 [(set_attr "type" "sselog")
15146 (set_attr "length_immediate" "1")
15147 (set_attr "prefix" "evex")
15148 (set_attr "mode" "<sseinsnmode>")])
15150 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
15151 [(match_operand:AVX512_VEC_2 0 "register_operand")
15152 (match_operand:AVX512_VEC_2 1 "register_operand")
15153 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
15154 (match_operand:SI 3 "const_0_to_1_operand")
15155 (match_operand:AVX512_VEC_2 4 "register_operand")
15156 (match_operand:<avx512fmaskmode> 5 "register_operand")]
15159 int mask = INTVAL (operands[3]);
15161 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
15162 operands[2], operands[4],
15165 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
15166 operands[2], operands[4],
15171 (define_insn "vec_set_lo_<mode><mask_name>"
15172 [(set (match_operand:V16FI 0 "register_operand" "=v")
15174 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
15175 (vec_select:<ssehalfvecmode>
15176 (match_operand:V16FI 1 "register_operand" "v")
15177 (parallel [(const_int 8) (const_int 9)
15178 (const_int 10) (const_int 11)
15179 (const_int 12) (const_int 13)
15180 (const_int 14) (const_int 15)]))))]
15182 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
15183 [(set_attr "type" "sselog")
15184 (set_attr "length_immediate" "1")
15185 (set_attr "prefix" "evex")
15186 (set_attr "mode" "<sseinsnmode>")])
15188 (define_insn "vec_set_hi_<mode><mask_name>"
15189 [(set (match_operand:V16FI 0 "register_operand" "=v")
15191 (vec_select:<ssehalfvecmode>
15192 (match_operand:V16FI 1 "register_operand" "v")
15193 (parallel [(const_int 0) (const_int 1)
15194 (const_int 2) (const_int 3)
15195 (const_int 4) (const_int 5)
15196 (const_int 6) (const_int 7)]))
15197 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
15199 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
15200 [(set_attr "type" "sselog")
15201 (set_attr "length_immediate" "1")
15202 (set_attr "prefix" "evex")
15203 (set_attr "mode" "<sseinsnmode>")])
15205 (define_insn "vec_set_lo_<mode><mask_name>"
15206 [(set (match_operand:V8FI 0 "register_operand" "=v")
15208 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
15209 (vec_select:<ssehalfvecmode>
15210 (match_operand:V8FI 1 "register_operand" "v")
15211 (parallel [(const_int 4) (const_int 5)
15212 (const_int 6) (const_int 7)]))))]
15214 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
15215 [(set_attr "type" "sselog")
15216 (set_attr "length_immediate" "1")
15217 (set_attr "prefix" "evex")
15218 (set_attr "mode" "XI")])
15220 (define_insn "vec_set_hi_<mode><mask_name>"
15221 [(set (match_operand:V8FI 0 "register_operand" "=v")
15223 (vec_select:<ssehalfvecmode>
15224 (match_operand:V8FI 1 "register_operand" "v")
15225 (parallel [(const_int 0) (const_int 1)
15226 (const_int 2) (const_int 3)]))
15227 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
15229 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
15230 [(set_attr "type" "sselog")
15231 (set_attr "length_immediate" "1")
15232 (set_attr "prefix" "evex")
15233 (set_attr "mode" "XI")])
15235 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
15236 [(match_operand:VI8F_256 0 "register_operand")
15237 (match_operand:VI8F_256 1 "register_operand")
15238 (match_operand:VI8F_256 2 "nonimmediate_operand")
15239 (match_operand:SI 3 "const_0_to_3_operand")
15240 (match_operand:VI8F_256 4 "register_operand")
15241 (match_operand:QI 5 "register_operand")]
15244 int mask = INTVAL (operands[3]);
15245 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
15246 (operands[0], operands[1], operands[2],
15247 GEN_INT (((mask >> 0) & 1) * 2 + 0),
15248 GEN_INT (((mask >> 0) & 1) * 2 + 1),
15249 GEN_INT (((mask >> 1) & 1) * 2 + 4),
15250 GEN_INT (((mask >> 1) & 1) * 2 + 5),
15251 operands[4], operands[5]));
15255 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
15256 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
15257 (vec_select:VI8F_256
15258 (vec_concat:<ssedoublemode>
15259 (match_operand:VI8F_256 1 "register_operand" "v")
15260 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
15261 (parallel [(match_operand 3 "const_0_to_3_operand")
15262 (match_operand 4 "const_0_to_3_operand")
15263 (match_operand 5 "const_4_to_7_operand")
15264 (match_operand 6 "const_4_to_7_operand")])))]
15266 && (INTVAL (operands[3]) & 1) == 0
15267 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15268 && (INTVAL (operands[5]) & 1) == 0
15269 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
15272 mask = INTVAL (operands[3]) / 2;
15273 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
15274 operands[3] = GEN_INT (mask);
15275 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
15277 [(set_attr "type" "sselog")
15278 (set_attr "length_immediate" "1")
15279 (set_attr "prefix" "evex")
15280 (set_attr "mode" "XI")])
15282 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
15283 [(match_operand:V8FI 0 "register_operand")
15284 (match_operand:V8FI 1 "register_operand")
15285 (match_operand:V8FI 2 "nonimmediate_operand")
15286 (match_operand:SI 3 "const_0_to_255_operand")
15287 (match_operand:V8FI 4 "register_operand")
15288 (match_operand:QI 5 "register_operand")]
15291 int mask = INTVAL (operands[3]);
15292 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
15293 (operands[0], operands[1], operands[2],
15294 GEN_INT (((mask >> 0) & 3) * 2),
15295 GEN_INT (((mask >> 0) & 3) * 2 + 1),
15296 GEN_INT (((mask >> 2) & 3) * 2),
15297 GEN_INT (((mask >> 2) & 3) * 2 + 1),
15298 GEN_INT (((mask >> 4) & 3) * 2 + 8),
15299 GEN_INT (((mask >> 4) & 3) * 2 + 9),
15300 GEN_INT (((mask >> 6) & 3) * 2 + 8),
15301 GEN_INT (((mask >> 6) & 3) * 2 + 9),
15302 operands[4], operands[5]));
15306 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
15307 [(set (match_operand:V8FI 0 "register_operand" "=v")
15309 (vec_concat:<ssedoublemode>
15310 (match_operand:V8FI 1 "register_operand" "v")
15311 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
15312 (parallel [(match_operand 3 "const_0_to_7_operand")
15313 (match_operand 4 "const_0_to_7_operand")
15314 (match_operand 5 "const_0_to_7_operand")
15315 (match_operand 6 "const_0_to_7_operand")
15316 (match_operand 7 "const_8_to_15_operand")
15317 (match_operand 8 "const_8_to_15_operand")
15318 (match_operand 9 "const_8_to_15_operand")
15319 (match_operand 10 "const_8_to_15_operand")])))]
15321 && (INTVAL (operands[3]) & 1) == 0
15322 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15323 && (INTVAL (operands[5]) & 1) == 0
15324 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
15325 && (INTVAL (operands[7]) & 1) == 0
15326 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
15327 && (INTVAL (operands[9]) & 1) == 0
15328 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
15331 mask = INTVAL (operands[3]) / 2;
15332 mask |= INTVAL (operands[5]) / 2 << 2;
15333 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
15334 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
15335 operands[3] = GEN_INT (mask);
15337 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
15339 [(set_attr "type" "sselog")
15340 (set_attr "length_immediate" "1")
15341 (set_attr "prefix" "evex")
15342 (set_attr "mode" "<sseinsnmode>")])
15344 (define_insn "*avx512f_shuf_<shuffletype>64x2_1<mask_name>_1"
15345 [(set (match_operand:V8FI 0 "register_operand" "=v")
15347 (match_operand:V8FI 1 "register_operand" "v")
15348 (parallel [(match_operand 2 "const_0_to_7_operand")
15349 (match_operand 3 "const_0_to_7_operand")
15350 (match_operand 4 "const_0_to_7_operand")
15351 (match_operand 5 "const_0_to_7_operand")
15352 (match_operand 6 "const_0_to_7_operand")
15353 (match_operand 7 "const_0_to_7_operand")
15354 (match_operand 8 "const_0_to_7_operand")
15355 (match_operand 9 "const_0_to_7_operand")])))]
15357 && (INTVAL (operands[2]) & 1) == 0
15358 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
15359 && (INTVAL (operands[4]) & 1) == 0
15360 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
15361 && (INTVAL (operands[6]) & 1) == 0
15362 && INTVAL (operands[6]) == INTVAL (operands[7]) - 1
15363 && (INTVAL (operands[8]) & 1) == 0
15364 && INTVAL (operands[8]) == INTVAL (operands[9]) - 1"
15367 mask = INTVAL (operands[2]) / 2;
15368 mask |= INTVAL (operands[4]) / 2 << 2;
15369 mask |= INTVAL (operands[6]) / 2 << 4;
15370 mask |= INTVAL (operands[8]) / 2 << 6;
15371 operands[2] = GEN_INT (mask);
15373 return "vshuf<shuffletype>64x2\t{%2, %1, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %1, %2}";
15375 [(set_attr "type" "sselog")
15376 (set_attr "length_immediate" "1")
15377 (set_attr "prefix" "evex")
15378 (set_attr "mode" "<sseinsnmode>")])
15380 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
15381 [(match_operand:VI4F_256 0 "register_operand")
15382 (match_operand:VI4F_256 1 "register_operand")
15383 (match_operand:VI4F_256 2 "nonimmediate_operand")
15384 (match_operand:SI 3 "const_0_to_3_operand")
15385 (match_operand:VI4F_256 4 "register_operand")
15386 (match_operand:QI 5 "register_operand")]
15389 int mask = INTVAL (operands[3]);
15390 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
15391 (operands[0], operands[1], operands[2],
15392 GEN_INT (((mask >> 0) & 1) * 4 + 0),
15393 GEN_INT (((mask >> 0) & 1) * 4 + 1),
15394 GEN_INT (((mask >> 0) & 1) * 4 + 2),
15395 GEN_INT (((mask >> 0) & 1) * 4 + 3),
15396 GEN_INT (((mask >> 1) & 1) * 4 + 8),
15397 GEN_INT (((mask >> 1) & 1) * 4 + 9),
15398 GEN_INT (((mask >> 1) & 1) * 4 + 10),
15399 GEN_INT (((mask >> 1) & 1) * 4 + 11),
15400 operands[4], operands[5]));
15404 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
15405 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
15406 (vec_select:VI4F_256
15407 (vec_concat:<ssedoublemode>
15408 (match_operand:VI4F_256 1 "register_operand" "v")
15409 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
15410 (parallel [(match_operand 3 "const_0_to_7_operand")
15411 (match_operand 4 "const_0_to_7_operand")
15412 (match_operand 5 "const_0_to_7_operand")
15413 (match_operand 6 "const_0_to_7_operand")
15414 (match_operand 7 "const_8_to_15_operand")
15415 (match_operand 8 "const_8_to_15_operand")
15416 (match_operand 9 "const_8_to_15_operand")
15417 (match_operand 10 "const_8_to_15_operand")])))]
15419 && (INTVAL (operands[3]) & 3) == 0
15420 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15421 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
15422 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
15423 && (INTVAL (operands[7]) & 3) == 0
15424 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
15425 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
15426 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
15429 mask = INTVAL (operands[3]) / 4;
15430 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
15431 operands[3] = GEN_INT (mask);
15433 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
15435 [(set_attr "type" "sselog")
15436 (set_attr "length_immediate" "1")
15437 (set_attr "prefix" "evex")
15438 (set_attr "mode" "<sseinsnmode>")])
15440 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
15441 [(match_operand:V16FI 0 "register_operand")
15442 (match_operand:V16FI 1 "register_operand")
15443 (match_operand:V16FI 2 "nonimmediate_operand")
15444 (match_operand:SI 3 "const_0_to_255_operand")
15445 (match_operand:V16FI 4 "register_operand")
15446 (match_operand:HI 5 "register_operand")]
15449 int mask = INTVAL (operands[3]);
15450 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
15451 (operands[0], operands[1], operands[2],
15452 GEN_INT (((mask >> 0) & 3) * 4),
15453 GEN_INT (((mask >> 0) & 3) * 4 + 1),
15454 GEN_INT (((mask >> 0) & 3) * 4 + 2),
15455 GEN_INT (((mask >> 0) & 3) * 4 + 3),
15456 GEN_INT (((mask >> 2) & 3) * 4),
15457 GEN_INT (((mask >> 2) & 3) * 4 + 1),
15458 GEN_INT (((mask >> 2) & 3) * 4 + 2),
15459 GEN_INT (((mask >> 2) & 3) * 4 + 3),
15460 GEN_INT (((mask >> 4) & 3) * 4 + 16),
15461 GEN_INT (((mask >> 4) & 3) * 4 + 17),
15462 GEN_INT (((mask >> 4) & 3) * 4 + 18),
15463 GEN_INT (((mask >> 4) & 3) * 4 + 19),
15464 GEN_INT (((mask >> 6) & 3) * 4 + 16),
15465 GEN_INT (((mask >> 6) & 3) * 4 + 17),
15466 GEN_INT (((mask >> 6) & 3) * 4 + 18),
15467 GEN_INT (((mask >> 6) & 3) * 4 + 19),
15468 operands[4], operands[5]));
15472 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
15473 [(set (match_operand:V16FI 0 "register_operand" "=v")
15475 (vec_concat:<ssedoublemode>
15476 (match_operand:V16FI 1 "register_operand" "v")
15477 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
15478 (parallel [(match_operand 3 "const_0_to_15_operand")
15479 (match_operand 4 "const_0_to_15_operand")
15480 (match_operand 5 "const_0_to_15_operand")
15481 (match_operand 6 "const_0_to_15_operand")
15482 (match_operand 7 "const_0_to_15_operand")
15483 (match_operand 8 "const_0_to_15_operand")
15484 (match_operand 9 "const_0_to_15_operand")
15485 (match_operand 10 "const_0_to_15_operand")
15486 (match_operand 11 "const_16_to_31_operand")
15487 (match_operand 12 "const_16_to_31_operand")
15488 (match_operand 13 "const_16_to_31_operand")
15489 (match_operand 14 "const_16_to_31_operand")
15490 (match_operand 15 "const_16_to_31_operand")
15491 (match_operand 16 "const_16_to_31_operand")
15492 (match_operand 17 "const_16_to_31_operand")
15493 (match_operand 18 "const_16_to_31_operand")])))]
15495 && (INTVAL (operands[3]) & 3) == 0
15496 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
15497 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
15498 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
15499 && (INTVAL (operands[7]) & 3) == 0
15500 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
15501 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
15502 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
15503 && (INTVAL (operands[11]) & 3) == 0
15504 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
15505 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
15506 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
15507 && (INTVAL (operands[15]) & 3) == 0
15508 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
15509 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
15510 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
15513 mask = INTVAL (operands[3]) / 4;
15514 mask |= INTVAL (operands[7]) / 4 << 2;
15515 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
15516 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
15517 operands[3] = GEN_INT (mask);
15519 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
15521 [(set_attr "type" "sselog")
15522 (set_attr "length_immediate" "1")
15523 (set_attr "prefix" "evex")
15524 (set_attr "mode" "<sseinsnmode>")])
15526 (define_insn "*avx512f_shuf_<shuffletype>32x4_1<mask_name>_1"
15527 [(set (match_operand:V16FI 0 "register_operand" "=v")
15529 (match_operand:V16FI 1 "register_operand" "v")
15530 (parallel [(match_operand 2 "const_0_to_15_operand")
15531 (match_operand 3 "const_0_to_15_operand")
15532 (match_operand 4 "const_0_to_15_operand")
15533 (match_operand 5 "const_0_to_15_operand")
15534 (match_operand 6 "const_0_to_15_operand")
15535 (match_operand 7 "const_0_to_15_operand")
15536 (match_operand 8 "const_0_to_15_operand")
15537 (match_operand 9 "const_0_to_15_operand")
15538 (match_operand 10 "const_0_to_15_operand")
15539 (match_operand 11 "const_0_to_15_operand")
15540 (match_operand 12 "const_0_to_15_operand")
15541 (match_operand 13 "const_0_to_15_operand")
15542 (match_operand 14 "const_0_to_15_operand")
15543 (match_operand 15 "const_0_to_15_operand")
15544 (match_operand 16 "const_0_to_15_operand")
15545 (match_operand 17 "const_0_to_15_operand")])))]
15547 && (INTVAL (operands[2]) & 3) == 0
15548 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
15549 && INTVAL (operands[2]) == INTVAL (operands[4]) - 2
15550 && INTVAL (operands[2]) == INTVAL (operands[5]) - 3
15551 && (INTVAL (operands[6]) & 3) == 0
15552 && INTVAL (operands[6]) == INTVAL (operands[7]) - 1
15553 && INTVAL (operands[6]) == INTVAL (operands[8]) - 2
15554 && INTVAL (operands[6]) == INTVAL (operands[9]) - 3
15555 && (INTVAL (operands[10]) & 3) == 0
15556 && INTVAL (operands[10]) == INTVAL (operands[11]) - 1
15557 && INTVAL (operands[10]) == INTVAL (operands[12]) - 2
15558 && INTVAL (operands[10]) == INTVAL (operands[13]) - 3
15559 && (INTVAL (operands[14]) & 3) == 0
15560 && INTVAL (operands[14]) == INTVAL (operands[15]) - 1
15561 && INTVAL (operands[14]) == INTVAL (operands[16]) - 2
15562 && INTVAL (operands[14]) == INTVAL (operands[17]) - 3"
15565 mask = INTVAL (operands[2]) / 4;
15566 mask |= INTVAL (operands[6]) / 4 << 2;
15567 mask |= INTVAL (operands[10]) / 4 << 4;
15568 mask |= INTVAL (operands[14]) / 4 << 6;
15569 operands[2] = GEN_INT (mask);
15571 return "vshuf<shuffletype>32x4\t{%2, %1, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %1, %2}";
15573 [(set_attr "type" "sselog")
15574 (set_attr "length_immediate" "1")
15575 (set_attr "prefix" "evex")
15576 (set_attr "mode" "<sseinsnmode>")])
15578 (define_expand "avx512f_pshufdv3_mask"
15579 [(match_operand:V16SI 0 "register_operand")
15580 (match_operand:V16SI 1 "nonimmediate_operand")
15581 (match_operand:SI 2 "const_0_to_255_operand")
15582 (match_operand:V16SI 3 "register_operand")
15583 (match_operand:HI 4 "register_operand")]
15586 int mask = INTVAL (operands[2]);
15587 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
15588 GEN_INT ((mask >> 0) & 3),
15589 GEN_INT ((mask >> 2) & 3),
15590 GEN_INT ((mask >> 4) & 3),
15591 GEN_INT ((mask >> 6) & 3),
15592 GEN_INT (((mask >> 0) & 3) + 4),
15593 GEN_INT (((mask >> 2) & 3) + 4),
15594 GEN_INT (((mask >> 4) & 3) + 4),
15595 GEN_INT (((mask >> 6) & 3) + 4),
15596 GEN_INT (((mask >> 0) & 3) + 8),
15597 GEN_INT (((mask >> 2) & 3) + 8),
15598 GEN_INT (((mask >> 4) & 3) + 8),
15599 GEN_INT (((mask >> 6) & 3) + 8),
15600 GEN_INT (((mask >> 0) & 3) + 12),
15601 GEN_INT (((mask >> 2) & 3) + 12),
15602 GEN_INT (((mask >> 4) & 3) + 12),
15603 GEN_INT (((mask >> 6) & 3) + 12),
15604 operands[3], operands[4]));
15608 (define_insn "avx512f_pshufd_1<mask_name>"
15609 [(set (match_operand:V16SI 0 "register_operand" "=v")
15611 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
15612 (parallel [(match_operand 2 "const_0_to_3_operand")
15613 (match_operand 3 "const_0_to_3_operand")
15614 (match_operand 4 "const_0_to_3_operand")
15615 (match_operand 5 "const_0_to_3_operand")
15616 (match_operand 6 "const_4_to_7_operand")
15617 (match_operand 7 "const_4_to_7_operand")
15618 (match_operand 8 "const_4_to_7_operand")
15619 (match_operand 9 "const_4_to_7_operand")
15620 (match_operand 10 "const_8_to_11_operand")
15621 (match_operand 11 "const_8_to_11_operand")
15622 (match_operand 12 "const_8_to_11_operand")
15623 (match_operand 13 "const_8_to_11_operand")
15624 (match_operand 14 "const_12_to_15_operand")
15625 (match_operand 15 "const_12_to_15_operand")
15626 (match_operand 16 "const_12_to_15_operand")
15627 (match_operand 17 "const_12_to_15_operand")])))]
15629 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
15630 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
15631 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
15632 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
15633 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
15634 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
15635 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
15636 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
15637 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
15638 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
15639 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
15640 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
15643 mask |= INTVAL (operands[2]) << 0;
15644 mask |= INTVAL (operands[3]) << 2;
15645 mask |= INTVAL (operands[4]) << 4;
15646 mask |= INTVAL (operands[5]) << 6;
15647 operands[2] = GEN_INT (mask);
15649 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
15651 [(set_attr "type" "sselog1")
15652 (set_attr "prefix" "evex")
15653 (set_attr "length_immediate" "1")
15654 (set_attr "mode" "XI")])
15656 (define_expand "avx512vl_pshufdv3_mask"
15657 [(match_operand:V8SI 0 "register_operand")
15658 (match_operand:V8SI 1 "nonimmediate_operand")
15659 (match_operand:SI 2 "const_0_to_255_operand")
15660 (match_operand:V8SI 3 "register_operand")
15661 (match_operand:QI 4 "register_operand")]
15664 int mask = INTVAL (operands[2]);
15665 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
15666 GEN_INT ((mask >> 0) & 3),
15667 GEN_INT ((mask >> 2) & 3),
15668 GEN_INT ((mask >> 4) & 3),
15669 GEN_INT ((mask >> 6) & 3),
15670 GEN_INT (((mask >> 0) & 3) + 4),
15671 GEN_INT (((mask >> 2) & 3) + 4),
15672 GEN_INT (((mask >> 4) & 3) + 4),
15673 GEN_INT (((mask >> 6) & 3) + 4),
15674 operands[3], operands[4]));
15678 (define_expand "avx2_pshufdv3"
15679 [(match_operand:V8SI 0 "register_operand")
15680 (match_operand:V8SI 1 "nonimmediate_operand")
15681 (match_operand:SI 2 "const_0_to_255_operand")]
15684 int mask = INTVAL (operands[2]);
15685 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
15686 GEN_INT ((mask >> 0) & 3),
15687 GEN_INT ((mask >> 2) & 3),
15688 GEN_INT ((mask >> 4) & 3),
15689 GEN_INT ((mask >> 6) & 3),
15690 GEN_INT (((mask >> 0) & 3) + 4),
15691 GEN_INT (((mask >> 2) & 3) + 4),
15692 GEN_INT (((mask >> 4) & 3) + 4),
15693 GEN_INT (((mask >> 6) & 3) + 4)));
15697 (define_insn "avx2_pshufd_1<mask_name>"
15698 [(set (match_operand:V8SI 0 "register_operand" "=v")
15700 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
15701 (parallel [(match_operand 2 "const_0_to_3_operand")
15702 (match_operand 3 "const_0_to_3_operand")
15703 (match_operand 4 "const_0_to_3_operand")
15704 (match_operand 5 "const_0_to_3_operand")
15705 (match_operand 6 "const_4_to_7_operand")
15706 (match_operand 7 "const_4_to_7_operand")
15707 (match_operand 8 "const_4_to_7_operand")
15708 (match_operand 9 "const_4_to_7_operand")])))]
15710 && <mask_avx512vl_condition>
15711 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
15712 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
15713 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
15714 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
15717 mask |= INTVAL (operands[2]) << 0;
15718 mask |= INTVAL (operands[3]) << 2;
15719 mask |= INTVAL (operands[4]) << 4;
15720 mask |= INTVAL (operands[5]) << 6;
15721 operands[2] = GEN_INT (mask);
15723 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15725 [(set_attr "type" "sselog1")
15726 (set_attr "prefix" "maybe_evex")
15727 (set_attr "length_immediate" "1")
15728 (set_attr "mode" "OI")])
15730 (define_expand "avx512vl_pshufd_mask"
15731 [(match_operand:V4SI 0 "register_operand")
15732 (match_operand:V4SI 1 "nonimmediate_operand")
15733 (match_operand:SI 2 "const_0_to_255_operand")
15734 (match_operand:V4SI 3 "register_operand")
15735 (match_operand:QI 4 "register_operand")]
15738 int mask = INTVAL (operands[2]);
15739 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
15740 GEN_INT ((mask >> 0) & 3),
15741 GEN_INT ((mask >> 2) & 3),
15742 GEN_INT ((mask >> 4) & 3),
15743 GEN_INT ((mask >> 6) & 3),
15744 operands[3], operands[4]));
15748 (define_expand "sse2_pshufd"
15749 [(match_operand:V4SI 0 "register_operand")
15750 (match_operand:V4SI 1 "vector_operand")
15751 (match_operand:SI 2 "const_int_operand")]
15754 int mask = INTVAL (operands[2]);
15755 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
15756 GEN_INT ((mask >> 0) & 3),
15757 GEN_INT ((mask >> 2) & 3),
15758 GEN_INT ((mask >> 4) & 3),
15759 GEN_INT ((mask >> 6) & 3)));
15763 (define_insn "sse2_pshufd_1<mask_name>"
15764 [(set (match_operand:V4SI 0 "register_operand" "=v")
15766 (match_operand:V4SI 1 "vector_operand" "vBm")
15767 (parallel [(match_operand 2 "const_0_to_3_operand")
15768 (match_operand 3 "const_0_to_3_operand")
15769 (match_operand 4 "const_0_to_3_operand")
15770 (match_operand 5 "const_0_to_3_operand")])))]
15771 "TARGET_SSE2 && <mask_avx512vl_condition>"
15774 mask |= INTVAL (operands[2]) << 0;
15775 mask |= INTVAL (operands[3]) << 2;
15776 mask |= INTVAL (operands[4]) << 4;
15777 mask |= INTVAL (operands[5]) << 6;
15778 operands[2] = GEN_INT (mask);
15780 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15782 [(set_attr "type" "sselog1")
15783 (set_attr "prefix_data16" "1")
15784 (set_attr "prefix" "<mask_prefix2>")
15785 (set_attr "length_immediate" "1")
15786 (set_attr "mode" "TI")])
15788 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
15789 [(set (match_operand:V32HI 0 "register_operand" "=v")
15791 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15792 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15795 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15796 [(set_attr "type" "sselog")
15797 (set_attr "prefix" "evex")
15798 (set_attr "mode" "XI")])
15800 (define_expand "avx512vl_pshuflwv3_mask"
15801 [(match_operand:V16HI 0 "register_operand")
15802 (match_operand:V16HI 1 "nonimmediate_operand")
15803 (match_operand:SI 2 "const_0_to_255_operand")
15804 (match_operand:V16HI 3 "register_operand")
15805 (match_operand:HI 4 "register_operand")]
15806 "TARGET_AVX512VL && TARGET_AVX512BW"
15808 int mask = INTVAL (operands[2]);
15809 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
15810 GEN_INT ((mask >> 0) & 3),
15811 GEN_INT ((mask >> 2) & 3),
15812 GEN_INT ((mask >> 4) & 3),
15813 GEN_INT ((mask >> 6) & 3),
15814 GEN_INT (((mask >> 0) & 3) + 8),
15815 GEN_INT (((mask >> 2) & 3) + 8),
15816 GEN_INT (((mask >> 4) & 3) + 8),
15817 GEN_INT (((mask >> 6) & 3) + 8),
15818 operands[3], operands[4]));
15822 (define_expand "avx2_pshuflwv3"
15823 [(match_operand:V16HI 0 "register_operand")
15824 (match_operand:V16HI 1 "nonimmediate_operand")
15825 (match_operand:SI 2 "const_0_to_255_operand")]
15828 int mask = INTVAL (operands[2]);
15829 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
15830 GEN_INT ((mask >> 0) & 3),
15831 GEN_INT ((mask >> 2) & 3),
15832 GEN_INT ((mask >> 4) & 3),
15833 GEN_INT ((mask >> 6) & 3),
15834 GEN_INT (((mask >> 0) & 3) + 8),
15835 GEN_INT (((mask >> 2) & 3) + 8),
15836 GEN_INT (((mask >> 4) & 3) + 8),
15837 GEN_INT (((mask >> 6) & 3) + 8)));
15841 (define_insn "avx2_pshuflw_1<mask_name>"
15842 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
15844 (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
15845 (parallel [(match_operand 2 "const_0_to_3_operand")
15846 (match_operand 3 "const_0_to_3_operand")
15847 (match_operand 4 "const_0_to_3_operand")
15848 (match_operand 5 "const_0_to_3_operand")
15853 (match_operand 6 "const_8_to_11_operand")
15854 (match_operand 7 "const_8_to_11_operand")
15855 (match_operand 8 "const_8_to_11_operand")
15856 (match_operand 9 "const_8_to_11_operand")
15860 (const_int 15)])))]
15862 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
15863 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
15864 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
15865 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
15866 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
15869 mask |= INTVAL (operands[2]) << 0;
15870 mask |= INTVAL (operands[3]) << 2;
15871 mask |= INTVAL (operands[4]) << 4;
15872 mask |= INTVAL (operands[5]) << 6;
15873 operands[2] = GEN_INT (mask);
15875 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
15877 [(set_attr "type" "sselog")
15878 (set_attr "prefix" "maybe_evex")
15879 (set_attr "length_immediate" "1")
15880 (set_attr "mode" "OI")])
15882 (define_expand "avx512vl_pshuflw_mask"
15883 [(match_operand:V8HI 0 "register_operand")
15884 (match_operand:V8HI 1 "nonimmediate_operand")
15885 (match_operand:SI 2 "const_0_to_255_operand")
15886 (match_operand:V8HI 3 "register_operand")
15887 (match_operand:QI 4 "register_operand")]
15888 "TARGET_AVX512VL && TARGET_AVX512BW"
15890 int mask = INTVAL (operands[2]);
15891 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
15892 GEN_INT ((mask >> 0) & 3),
15893 GEN_INT ((mask >> 2) & 3),
15894 GEN_INT ((mask >> 4) & 3),
15895 GEN_INT ((mask >> 6) & 3),
15896 operands[3], operands[4]));
15900 (define_expand "sse2_pshuflw"
15901 [(match_operand:V8HI 0 "register_operand")
15902 (match_operand:V8HI 1 "vector_operand")
15903 (match_operand:SI 2 "const_int_operand")]
15906 int mask = INTVAL (operands[2]);
15907 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
15908 GEN_INT ((mask >> 0) & 3),
15909 GEN_INT ((mask >> 2) & 3),
15910 GEN_INT ((mask >> 4) & 3),
15911 GEN_INT ((mask >> 6) & 3)));
15915 (define_insn "sse2_pshuflw_1<mask_name>"
15916 [(set (match_operand:V8HI 0 "register_operand" "=Yw")
15918 (match_operand:V8HI 1 "vector_operand" "YwBm")
15919 (parallel [(match_operand 2 "const_0_to_3_operand")
15920 (match_operand 3 "const_0_to_3_operand")
15921 (match_operand 4 "const_0_to_3_operand")
15922 (match_operand 5 "const_0_to_3_operand")
15927 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15930 mask |= INTVAL (operands[2]) << 0;
15931 mask |= INTVAL (operands[3]) << 2;
15932 mask |= INTVAL (operands[4]) << 4;
15933 mask |= INTVAL (operands[5]) << 6;
15934 operands[2] = GEN_INT (mask);
15936 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
15938 [(set_attr "type" "sselog")
15939 (set_attr "prefix_data16" "0")
15940 (set_attr "prefix_rep" "1")
15941 (set_attr "prefix" "maybe_vex")
15942 (set_attr "length_immediate" "1")
15943 (set_attr "mode" "TI")])
15945 (define_expand "avx2_pshufhwv3"
15946 [(match_operand:V16HI 0 "register_operand")
15947 (match_operand:V16HI 1 "nonimmediate_operand")
15948 (match_operand:SI 2 "const_0_to_255_operand")]
15951 int mask = INTVAL (operands[2]);
15952 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
15953 GEN_INT (((mask >> 0) & 3) + 4),
15954 GEN_INT (((mask >> 2) & 3) + 4),
15955 GEN_INT (((mask >> 4) & 3) + 4),
15956 GEN_INT (((mask >> 6) & 3) + 4),
15957 GEN_INT (((mask >> 0) & 3) + 12),
15958 GEN_INT (((mask >> 2) & 3) + 12),
15959 GEN_INT (((mask >> 4) & 3) + 12),
15960 GEN_INT (((mask >> 6) & 3) + 12)));
15964 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
15965 [(set (match_operand:V32HI 0 "register_operand" "=v")
15967 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
15968 (match_operand:SI 2 "const_0_to_255_operand" "n")]
15971 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15972 [(set_attr "type" "sselog")
15973 (set_attr "prefix" "evex")
15974 (set_attr "mode" "XI")])
15976 (define_expand "avx512vl_pshufhwv3_mask"
15977 [(match_operand:V16HI 0 "register_operand")
15978 (match_operand:V16HI 1 "nonimmediate_operand")
15979 (match_operand:SI 2 "const_0_to_255_operand")
15980 (match_operand:V16HI 3 "register_operand")
15981 (match_operand:HI 4 "register_operand")]
15982 "TARGET_AVX512VL && TARGET_AVX512BW"
15984 int mask = INTVAL (operands[2]);
15985 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
15986 GEN_INT (((mask >> 0) & 3) + 4),
15987 GEN_INT (((mask >> 2) & 3) + 4),
15988 GEN_INT (((mask >> 4) & 3) + 4),
15989 GEN_INT (((mask >> 6) & 3) + 4),
15990 GEN_INT (((mask >> 0) & 3) + 12),
15991 GEN_INT (((mask >> 2) & 3) + 12),
15992 GEN_INT (((mask >> 4) & 3) + 12),
15993 GEN_INT (((mask >> 6) & 3) + 12),
15994 operands[3], operands[4]));
15998 (define_insn "avx2_pshufhw_1<mask_name>"
15999 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
16001 (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
16002 (parallel [(const_int 0)
16006 (match_operand 2 "const_4_to_7_operand")
16007 (match_operand 3 "const_4_to_7_operand")
16008 (match_operand 4 "const_4_to_7_operand")
16009 (match_operand 5 "const_4_to_7_operand")
16014 (match_operand 6 "const_12_to_15_operand")
16015 (match_operand 7 "const_12_to_15_operand")
16016 (match_operand 8 "const_12_to_15_operand")
16017 (match_operand 9 "const_12_to_15_operand")])))]
16019 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
16020 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
16021 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
16022 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
16023 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
16026 mask |= (INTVAL (operands[2]) - 4) << 0;
16027 mask |= (INTVAL (operands[3]) - 4) << 2;
16028 mask |= (INTVAL (operands[4]) - 4) << 4;
16029 mask |= (INTVAL (operands[5]) - 4) << 6;
16030 operands[2] = GEN_INT (mask);
16032 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
16034 [(set_attr "type" "sselog")
16035 (set_attr "prefix" "maybe_evex")
16036 (set_attr "length_immediate" "1")
16037 (set_attr "mode" "OI")])
16039 (define_expand "avx512vl_pshufhw_mask"
16040 [(match_operand:V8HI 0 "register_operand")
16041 (match_operand:V8HI 1 "nonimmediate_operand")
16042 (match_operand:SI 2 "const_0_to_255_operand")
16043 (match_operand:V8HI 3 "register_operand")
16044 (match_operand:QI 4 "register_operand")]
16045 "TARGET_AVX512VL && TARGET_AVX512BW"
16047 int mask = INTVAL (operands[2]);
16048 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
16049 GEN_INT (((mask >> 0) & 3) + 4),
16050 GEN_INT (((mask >> 2) & 3) + 4),
16051 GEN_INT (((mask >> 4) & 3) + 4),
16052 GEN_INT (((mask >> 6) & 3) + 4),
16053 operands[3], operands[4]));
16057 (define_expand "sse2_pshufhw"
16058 [(match_operand:V8HI 0 "register_operand")
16059 (match_operand:V8HI 1 "vector_operand")
16060 (match_operand:SI 2 "const_int_operand")]
16063 int mask = INTVAL (operands[2]);
16064 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
16065 GEN_INT (((mask >> 0) & 3) + 4),
16066 GEN_INT (((mask >> 2) & 3) + 4),
16067 GEN_INT (((mask >> 4) & 3) + 4),
16068 GEN_INT (((mask >> 6) & 3) + 4)));
16072 (define_insn "sse2_pshufhw_1<mask_name>"
16073 [(set (match_operand:V8HI 0 "register_operand" "=Yw")
16075 (match_operand:V8HI 1 "vector_operand" "YwBm")
16076 (parallel [(const_int 0)
16080 (match_operand 2 "const_4_to_7_operand")
16081 (match_operand 3 "const_4_to_7_operand")
16082 (match_operand 4 "const_4_to_7_operand")
16083 (match_operand 5 "const_4_to_7_operand")])))]
16084 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
16087 mask |= (INTVAL (operands[2]) - 4) << 0;
16088 mask |= (INTVAL (operands[3]) - 4) << 2;
16089 mask |= (INTVAL (operands[4]) - 4) << 4;
16090 mask |= (INTVAL (operands[5]) - 4) << 6;
16091 operands[2] = GEN_INT (mask);
16093 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16095 [(set_attr "type" "sselog")
16096 (set_attr "prefix_rep" "1")
16097 (set_attr "prefix_data16" "0")
16098 (set_attr "prefix" "maybe_vex")
16099 (set_attr "length_immediate" "1")
16100 (set_attr "mode" "TI")])
16102 (define_expand "sse2_loadd"
16103 [(set (match_operand:V4SI 0 "register_operand")
16105 (vec_duplicate:V4SI
16106 (match_operand:SI 1 "nonimmediate_operand"))
16110 "operands[2] = CONST0_RTX (V4SImode);")
16112 (define_insn "sse2_loadld"
16113 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
16115 (vec_duplicate:V4SI
16116 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
16117 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
16121 %vmovd\t{%2, %0|%0, %2}
16122 %vmovd\t{%2, %0|%0, %2}
16123 movss\t{%2, %0|%0, %2}
16124 movss\t{%2, %0|%0, %2}
16125 vmovss\t{%2, %1, %0|%0, %1, %2}"
16126 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
16127 (set_attr "type" "ssemov")
16128 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
16129 (set_attr "mode" "TI,TI,V4SF,SF,SF")
16130 (set (attr "preferred_for_speed")
16131 (cond [(eq_attr "alternative" "1")
16132 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16134 (symbol_ref "true")))])
16136 ;; QI and HI modes handled by pextr patterns.
16137 (define_mode_iterator PEXTR_MODE12
16138 [(V16QI "TARGET_SSE4_1") V8HI])
16140 (define_insn "*vec_extract<mode>"
16141 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
16142 (vec_select:<ssescalarmode>
16143 (match_operand:PEXTR_MODE12 1 "register_operand" "YW,YW")
16145 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
16148 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
16149 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16150 [(set_attr "isa" "*,sse4")
16151 (set_attr "type" "sselog1")
16152 (set_attr "prefix_data16" "1")
16153 (set (attr "prefix_extra")
16155 (and (eq_attr "alternative" "0,2")
16156 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
16158 (const_string "1")))
16159 (set_attr "length_immediate" "1")
16160 (set_attr "prefix" "maybe_vex,maybe_vex")
16161 (set_attr "mode" "TI")])
16163 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
16164 [(set (match_operand:SWI48 0 "register_operand" "=r")
16166 (vec_select:<PEXTR_MODE12:ssescalarmode>
16167 (match_operand:PEXTR_MODE12 1 "register_operand" "YW")
16169 [(match_operand:SI 2
16170 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
16172 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
16173 [(set_attr "type" "sselog1")
16174 (set_attr "prefix_data16" "1")
16175 (set (attr "prefix_extra")
16177 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
16179 (const_string "1")))
16180 (set_attr "length_immediate" "1")
16181 (set_attr "prefix" "maybe_vex")
16182 (set_attr "mode" "TI")])
16184 (define_insn "*vec_extractv16qi_zext"
16185 [(set (match_operand:HI 0 "register_operand" "=r")
16188 (match_operand:V16QI 1 "register_operand" "YW")
16190 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
16192 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
16193 [(set_attr "type" "sselog1")
16194 (set_attr "prefix_data16" "1")
16195 (set_attr "prefix_extra" "1")
16196 (set_attr "length_immediate" "1")
16197 (set_attr "prefix" "maybe_vex")
16198 (set_attr "mode" "TI")])
16200 (define_insn "*vec_extract<mode>_mem"
16201 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
16202 (vec_select:<ssescalarmode>
16203 (match_operand:VI12_128 1 "memory_operand" "o")
16205 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
16209 (define_insn "*vec_extract<ssevecmodelower>_0"
16210 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
16212 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
16213 (parallel [(const_int 0)])))]
16214 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
16216 [(set_attr "isa" "*,sse2,*,*")
16217 (set (attr "preferred_for_speed")
16218 (cond [(eq_attr "alternative" "1")
16219 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
16221 (symbol_ref "true")))])
16223 (define_insn "*vec_extractv2di_0_sse"
16224 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
16226 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
16227 (parallel [(const_int 0)])))]
16228 "TARGET_SSE && !TARGET_64BIT
16229 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
16231 [(set_attr "isa" "sse4,*,*")
16232 (set (attr "preferred_for_speed")
16233 (cond [(eq_attr "alternative" "0")
16234 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
16236 (symbol_ref "true")))])
16239 [(set (match_operand:DI 0 "general_reg_operand")
16241 (match_operand:V2DI 1 "register_operand")
16242 (parallel [(const_int 0)])))]
16243 "TARGET_SSE4_1 && !TARGET_64BIT
16244 && reload_completed"
16245 [(set (match_dup 2) (match_dup 4))
16249 (parallel [(const_int 1)])))]
16251 operands[4] = gen_lowpart (SImode, operands[1]);
16252 operands[5] = gen_lowpart (V4SImode, operands[1]);
16253 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
16257 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
16259 (match_operand:<ssevecmode> 1 "register_operand")
16260 (parallel [(const_int 0)])))]
16261 "TARGET_SSE && reload_completed"
16262 [(set (match_dup 0) (match_dup 1))]
16263 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
16265 (define_insn "*vec_extractv4si_0_zext_sse4"
16266 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
16269 (match_operand:V4SI 1 "register_operand" "v,x,v")
16270 (parallel [(const_int 0)]))))]
16273 [(set_attr "isa" "x64,*,avx512f")
16274 (set (attr "preferred_for_speed")
16275 (cond [(eq_attr "alternative" "0")
16276 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
16278 (symbol_ref "true")))])
16280 (define_insn "*vec_extractv4si_0_zext"
16281 [(set (match_operand:DI 0 "register_operand" "=r")
16284 (match_operand:V4SI 1 "register_operand" "x")
16285 (parallel [(const_int 0)]))))]
16286 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
16290 [(set (match_operand:DI 0 "register_operand")
16293 (match_operand:V4SI 1 "register_operand")
16294 (parallel [(const_int 0)]))))]
16295 "TARGET_SSE2 && reload_completed"
16296 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
16297 "operands[1] = gen_lowpart (SImode, operands[1]);")
16299 (define_insn "*vec_extractv4si"
16300 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,Yw")
16302 (match_operand:V4SI 1 "register_operand" " x, v, 0, 0,Yw")
16303 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
16306 switch (which_alternative)
16310 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
16314 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
16315 return "psrldq\t{%2, %0|%0, %2}";
16318 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
16319 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
16322 gcc_unreachable ();
16325 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx")
16326 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1")
16327 (set (attr "prefix_extra")
16328 (if_then_else (eq_attr "alternative" "0,1")
16330 (const_string "*")))
16331 (set_attr "length_immediate" "1")
16332 (set_attr "prefix" "maybe_vex,evex,orig,orig,maybe_vex")
16333 (set_attr "mode" "TI")])
16335 (define_insn "*vec_extractv4si_zext"
16336 [(set (match_operand:DI 0 "register_operand" "=r,r")
16339 (match_operand:V4SI 1 "register_operand" "x,v")
16340 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
16341 "TARGET_64BIT && TARGET_SSE4_1"
16342 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
16343 [(set_attr "isa" "*,avx512dq")
16344 (set_attr "type" "sselog1")
16345 (set_attr "prefix_extra" "1")
16346 (set_attr "length_immediate" "1")
16347 (set_attr "prefix" "maybe_vex")
16348 (set_attr "mode" "TI")])
16350 (define_insn "*vec_extractv4si_mem"
16351 [(set (match_operand:SI 0 "register_operand" "=x,r")
16353 (match_operand:V4SI 1 "memory_operand" "o,o")
16354 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
16358 (define_insn_and_split "*vec_extractv4si_zext_mem"
16359 [(set (match_operand:DI 0 "register_operand" "=x,r")
16362 (match_operand:V4SI 1 "memory_operand" "o,o")
16363 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
16364 "TARGET_64BIT && TARGET_SSE"
16366 "&& reload_completed"
16367 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
16369 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
16372 (define_insn "*vec_extractv2di_1"
16373 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
16375 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
16376 (parallel [(const_int 1)])))]
16377 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
16379 %vpextrq\t{$1, %1, %0|%0, %1, 1}
16380 vpextrq\t{$1, %1, %0|%0, %1, 1}
16381 %vmovhps\t{%1, %0|%0, %1}
16382 psrldq\t{$8, %0|%0, 8}
16383 vpsrldq\t{$8, %1, %0|%0, %1, 8}
16384 vpsrldq\t{$8, %1, %0|%0, %1, 8}
16385 movhlps\t{%1, %0|%0, %1}
16389 (cond [(eq_attr "alternative" "0")
16390 (const_string "x64_sse4")
16391 (eq_attr "alternative" "1")
16392 (const_string "x64_avx512dq")
16393 (eq_attr "alternative" "3")
16394 (const_string "sse2_noavx")
16395 (eq_attr "alternative" "4")
16396 (const_string "avx")
16397 (eq_attr "alternative" "5")
16398 (const_string "avx512bw")
16399 (eq_attr "alternative" "6")
16400 (const_string "noavx")
16401 (eq_attr "alternative" "8")
16402 (const_string "x64")
16404 (const_string "*")))
16406 (cond [(eq_attr "alternative" "2,6,7")
16407 (const_string "ssemov")
16408 (eq_attr "alternative" "3,4,5")
16409 (const_string "sseishft1")
16410 (eq_attr "alternative" "8")
16411 (const_string "imov")
16413 (const_string "sselog1")))
16414 (set (attr "length_immediate")
16415 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
16417 (const_string "*")))
16418 (set (attr "prefix_rex")
16419 (if_then_else (eq_attr "alternative" "0,1")
16421 (const_string "*")))
16422 (set (attr "prefix_extra")
16423 (if_then_else (eq_attr "alternative" "0,1")
16425 (const_string "*")))
16426 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
16427 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
16430 [(set (match_operand:<ssescalarmode> 0 "register_operand")
16431 (vec_select:<ssescalarmode>
16432 (match_operand:VI_128 1 "memory_operand")
16434 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
16435 "TARGET_SSE && reload_completed"
16436 [(set (match_dup 0) (match_dup 1))]
16438 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
16440 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
16443 (define_insn "*vec_extractv2ti"
16444 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
16446 (match_operand:V2TI 1 "register_operand" "x,v")
16448 [(match_operand:SI 2 "const_0_to_1_operand")])))]
16451 vextract%~128\t{%2, %1, %0|%0, %1, %2}
16452 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
16453 [(set_attr "type" "sselog")
16454 (set_attr "prefix_extra" "1")
16455 (set_attr "length_immediate" "1")
16456 (set_attr "prefix" "vex,evex")
16457 (set_attr "mode" "OI")])
16459 (define_insn "*vec_extractv4ti"
16460 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
16462 (match_operand:V4TI 1 "register_operand" "v")
16464 [(match_operand:SI 2 "const_0_to_3_operand")])))]
16466 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
16467 [(set_attr "type" "sselog")
16468 (set_attr "prefix_extra" "1")
16469 (set_attr "length_immediate" "1")
16470 (set_attr "prefix" "evex")
16471 (set_attr "mode" "XI")])
16473 (define_mode_iterator VEXTRACTI128_MODE
16474 [(V4TI "TARGET_AVX512F") V2TI])
16477 [(set (match_operand:TI 0 "nonimmediate_operand")
16479 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
16480 (parallel [(const_int 0)])))]
16482 && reload_completed
16483 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
16484 [(set (match_dup 0) (match_dup 1))]
16485 "operands[1] = gen_lowpart (TImode, operands[1]);")
16487 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
16488 ;; vector modes into vec_extract*.
16490 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
16491 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
16492 "can_create_pseudo_p ()
16493 && REG_P (operands[1])
16494 && VECTOR_MODE_P (GET_MODE (operands[1]))
16495 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
16496 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
16497 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
16498 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
16499 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
16500 (parallel [(const_int 0)])))]
16504 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
16507 if (<MODE>mode == SImode)
16509 tmp = gen_reg_rtx (V8SImode);
16510 emit_insn (gen_vec_extract_lo_v16si (tmp,
16511 gen_lowpart (V16SImode,
16516 tmp = gen_reg_rtx (V4DImode);
16517 emit_insn (gen_vec_extract_lo_v8di (tmp,
16518 gen_lowpart (V8DImode,
16524 tmp = gen_reg_rtx (<ssevecmode>mode);
16525 if (<MODE>mode == SImode)
16526 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
16529 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
16534 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
16539 (define_insn "*vec_concatv2si_sse4_1"
16540 [(set (match_operand:V2SI 0 "register_operand"
16541 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
16543 (match_operand:SI 1 "nonimmediate_operand"
16544 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
16545 (match_operand:SI 2 "nonimm_or_0_operand"
16546 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
16547 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16549 pinsrd\t{$1, %2, %0|%0, %2, 1}
16550 pinsrd\t{$1, %2, %0|%0, %2, 1}
16551 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
16552 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
16553 punpckldq\t{%2, %0|%0, %2}
16554 punpckldq\t{%2, %0|%0, %2}
16555 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
16556 %vmovd\t{%1, %0|%0, %1}
16557 punpckldq\t{%2, %0|%0, %2}
16558 movd\t{%1, %0|%0, %1}"
16559 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
16560 (set (attr "mmx_isa")
16561 (if_then_else (eq_attr "alternative" "8,9")
16562 (const_string "native")
16563 (const_string "*")))
16565 (cond [(eq_attr "alternative" "7")
16566 (const_string "ssemov")
16567 (eq_attr "alternative" "8")
16568 (const_string "mmxcvt")
16569 (eq_attr "alternative" "9")
16570 (const_string "mmxmov")
16572 (const_string "sselog")))
16573 (set (attr "prefix_extra")
16574 (if_then_else (eq_attr "alternative" "0,1,2,3")
16576 (const_string "*")))
16577 (set (attr "length_immediate")
16578 (if_then_else (eq_attr "alternative" "0,1,2,3")
16580 (const_string "*")))
16581 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
16582 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
16584 ;; ??? In theory we can match memory for the MMX alternative, but allowing
16585 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
16586 ;; alternatives pretty much forces the MMX alternative to be chosen.
16587 (define_insn "*vec_concatv2si"
16588 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
16590 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
16591 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
16592 "TARGET_SSE && !TARGET_SSE4_1"
16594 punpckldq\t{%2, %0|%0, %2}
16595 movd\t{%1, %0|%0, %1}
16596 unpcklps\t{%2, %0|%0, %2}
16597 movss\t{%1, %0|%0, %1}
16598 punpckldq\t{%2, %0|%0, %2}
16599 movd\t{%1, %0|%0, %1}"
16600 [(set_attr "isa" "sse2,sse2,*,*,*,*")
16601 (set_attr "mmx_isa" "*,*,*,*,native,native")
16602 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
16603 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
16605 (define_insn "*vec_concatv4si"
16606 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
16608 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
16609 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
16612 punpcklqdq\t{%2, %0|%0, %2}
16613 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
16614 movlhps\t{%2, %0|%0, %2}
16615 movhps\t{%2, %0|%0, %q2}
16616 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
16617 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
16618 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
16619 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
16620 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
16622 (define_insn "*vec_concat<mode>_0"
16623 [(set (match_operand:VI124_128 0 "register_operand" "=v,x")
16624 (vec_concat:VI124_128
16625 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm,?!*y")
16626 (match_operand:<ssehalfvecmode> 2 "const0_operand" " C,C")))]
16629 %vmovq\t{%1, %0|%0, %1}
16630 movq2dq\t{%1, %0|%0, %1}"
16631 [(set_attr "mmx_isa" "*,native")
16632 (set_attr "type" "ssemov")
16633 (set_attr "prefix" "maybe_vex,orig")
16634 (set_attr "mode" "TI")])
16636 (define_insn "vec_concatv2di"
16637 [(set (match_operand:V2DI 0 "register_operand"
16638 "=Yr,*x,x ,v ,x,v ,x,x,v")
16640 (match_operand:DI 1 "register_operand"
16641 " 0, 0,x ,Yv,0,Yv,0,0,v")
16642 (match_operand:DI 2 "nonimmediate_operand"
16643 " rm,rm,rm,rm,x,Yv,x,m,m")))]
16646 pinsrq\t{$1, %2, %0|%0, %2, 1}
16647 pinsrq\t{$1, %2, %0|%0, %2, 1}
16648 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
16649 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
16650 punpcklqdq\t{%2, %0|%0, %2}
16651 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
16652 movlhps\t{%2, %0|%0, %2}
16653 movhps\t{%2, %0|%0, %2}
16654 vmovhps\t{%2, %1, %0|%0, %1, %2}"
16656 (cond [(eq_attr "alternative" "0,1")
16657 (const_string "x64_sse4_noavx")
16658 (eq_attr "alternative" "2")
16659 (const_string "x64_avx")
16660 (eq_attr "alternative" "3")
16661 (const_string "x64_avx512dq")
16662 (eq_attr "alternative" "4")
16663 (const_string "sse2_noavx")
16664 (eq_attr "alternative" "5,8")
16665 (const_string "avx")
16667 (const_string "noavx")))
16670 (eq_attr "alternative" "0,1,2,3,4,5")
16671 (const_string "sselog")
16672 (const_string "ssemov")))
16673 (set (attr "prefix_rex")
16674 (if_then_else (eq_attr "alternative" "0,1,2,3")
16676 (const_string "*")))
16677 (set (attr "prefix_extra")
16678 (if_then_else (eq_attr "alternative" "0,1,2,3")
16680 (const_string "*")))
16681 (set (attr "length_immediate")
16682 (if_then_else (eq_attr "alternative" "0,1,2,3")
16684 (const_string "*")))
16685 (set (attr "prefix")
16686 (cond [(eq_attr "alternative" "2")
16687 (const_string "vex")
16688 (eq_attr "alternative" "3")
16689 (const_string "evex")
16690 (eq_attr "alternative" "5,8")
16691 (const_string "maybe_evex")
16693 (const_string "orig")))
16694 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
16696 (define_insn "*vec_concatv2di_0"
16697 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
16699 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
16700 (match_operand:DI 2 "const0_operand" " C,C ,C")))]
16703 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
16704 %vmovq\t{%1, %0|%0, %1}
16705 movq2dq\t{%1, %0|%0, %1}"
16706 [(set_attr "isa" "x64,*,*")
16707 (set_attr "mmx_isa" "*,*,native")
16708 (set_attr "type" "ssemov")
16709 (set_attr "prefix_rex" "1,*,*")
16710 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
16711 (set_attr "mode" "TI")
16712 (set (attr "preferred_for_speed")
16713 (cond [(eq_attr "alternative" "0")
16714 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16716 (symbol_ref "true")))])
16718 ;; vmovq clears also the higher bits.
16719 (define_insn "vec_set<mode>_0"
16720 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
16721 (vec_merge:VI8_AVX_AVX512F
16722 (vec_duplicate:VI8_AVX_AVX512F
16723 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
16724 (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
16727 "vmovq\t{%2, %x0|%x0, %2}"
16728 [(set_attr "isa" "x64,*")
16729 (set_attr "type" "ssemov")
16730 (set_attr "prefix_rex" "1,*")
16731 (set_attr "prefix" "maybe_evex")
16732 (set_attr "mode" "TI")
16733 (set (attr "preferred_for_speed")
16734 (cond [(eq_attr "alternative" "0")
16735 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
16737 (symbol_ref "true")))])
16739 (define_expand "vec_unpacks_lo_<mode>"
16740 [(match_operand:<sseunpackmode> 0 "register_operand")
16741 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16743 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
16745 (define_expand "vec_unpacks_hi_<mode>"
16746 [(match_operand:<sseunpackmode> 0 "register_operand")
16747 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16749 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
16751 (define_expand "vec_unpacku_lo_<mode>"
16752 [(match_operand:<sseunpackmode> 0 "register_operand")
16753 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16755 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
16757 (define_expand "vec_unpacks_sbool_lo_qi"
16758 [(match_operand:QI 0 "register_operand")
16759 (match_operand:QI 1 "register_operand")
16760 (match_operand:QI 2 "const_int_operand")]
16763 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
16765 emit_move_insn (operands[0], operands[1]);
16769 (define_expand "vec_unpacks_lo_hi"
16770 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
16771 (match_operand:HI 1 "register_operand"))]
16774 (define_expand "vec_unpacks_lo_si"
16775 [(set (match_operand:HI 0 "register_operand")
16776 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
16779 (define_expand "vec_unpacks_lo_di"
16780 [(set (match_operand:SI 0 "register_operand")
16781 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
16784 (define_expand "vec_unpacku_hi_<mode>"
16785 [(match_operand:<sseunpackmode> 0 "register_operand")
16786 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
16788 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
16790 (define_expand "vec_unpacks_sbool_hi_qi"
16791 [(match_operand:QI 0 "register_operand")
16792 (match_operand:QI 1 "register_operand")
16793 (match_operand:QI 2 "const_int_operand")]
16796 HOST_WIDE_INT nunits = INTVAL (operands[2]);
16797 if (nunits != 8 && nunits != 4)
16799 if (TARGET_AVX512DQ)
16800 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
16801 GEN_INT (nunits / 2)));
16804 rtx tem = gen_reg_rtx (HImode);
16805 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
16807 GEN_INT (nunits / 2)));
16808 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
16813 (define_expand "vec_unpacks_hi_hi"
16815 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
16816 (lshiftrt:HI (match_operand:HI 1 "register_operand")
16818 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
16821 (define_expand "vec_unpacks_hi_<mode>"
16823 [(set (subreg:SWI48x
16824 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
16825 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
16827 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
16829 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
16831 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16835 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16837 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
16838 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
16839 (truncate:VI12_AVX2_AVX512BW
16840 (lshiftrt:<ssedoublemode>
16841 (plus:<ssedoublemode>
16842 (plus:<ssedoublemode>
16843 (zero_extend:<ssedoublemode>
16844 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
16845 (zero_extend:<ssedoublemode>
16846 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
16847 (match_dup <mask_expand_op3>))
16849 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16851 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
16852 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
16855 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
16856 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
16857 (truncate:VI12_AVX2_AVX512BW
16858 (lshiftrt:<ssedoublemode>
16859 (plus:<ssedoublemode>
16860 (plus:<ssedoublemode>
16861 (zero_extend:<ssedoublemode>
16862 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>"))
16863 (zero_extend:<ssedoublemode>
16864 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))
16865 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
16867 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16868 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16870 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
16871 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16872 [(set_attr "isa" "noavx,avx")
16873 (set_attr "type" "sseiadd")
16874 (set_attr "prefix_data16" "1,*")
16875 (set_attr "prefix" "orig,<mask_prefix>")
16876 (set_attr "mode" "<sseinsnmode>")])
16878 ;; The correct representation for this is absolutely enormous, and
16879 ;; surely not generally useful.
16880 (define_insn "<sse2_avx2>_psadbw"
16881 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW")
16882 (unspec:VI8_AVX2_AVX512BW
16883 [(match_operand:<ssebytemode> 1 "register_operand" "0,YW")
16884 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")]
16888 psadbw\t{%2, %0|%0, %2}
16889 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
16890 [(set_attr "isa" "noavx,avx")
16891 (set_attr "type" "sseiadd")
16892 (set_attr "atom_unit" "simul")
16893 (set_attr "prefix_data16" "1,*")
16894 (set_attr "prefix" "orig,maybe_evex")
16895 (set_attr "mode" "<sseinsnmode>")])
16897 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
16898 [(set (match_operand:SI 0 "register_operand" "=r")
16900 [(match_operand:VF_128_256 1 "register_operand" "x")]
16903 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
16904 [(set_attr "type" "ssemov")
16905 (set_attr "prefix" "maybe_vex")
16906 (set_attr "mode" "<MODE>")])
16908 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
16909 [(set (match_operand:DI 0 "register_operand" "=r")
16912 [(match_operand:VF_128_256 1 "register_operand" "x")]
16914 "TARGET_64BIT && TARGET_SSE"
16915 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
16916 [(set_attr "type" "ssemov")
16917 (set_attr "prefix" "maybe_vex")
16918 (set_attr "mode" "<MODE>")])
16920 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
16921 [(set (match_operand:SI 0 "register_operand" "=r")
16924 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16925 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16929 "&& reload_completed"
16930 [(set (match_dup 0)
16931 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16932 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16933 [(set_attr "type" "ssemov")
16934 (set_attr "prefix" "maybe_vex")
16935 (set_attr "mode" "<MODE>")])
16937 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
16938 [(set (match_operand:DI 0 "register_operand" "=r")
16942 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16943 (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
16945 "TARGET_64BIT && TARGET_SSE"
16947 "&& reload_completed"
16948 [(set (match_dup 0)
16949 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16950 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16951 [(set_attr "type" "ssemov")
16952 (set_attr "prefix" "maybe_vex")
16953 (set_attr "mode" "<MODE>")])
16955 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
16956 [(set (match_operand:SI 0 "register_operand" "=r")
16958 [(subreg:VF_128_256
16959 (ashiftrt:<sseintvecmode>
16960 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16961 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16965 "&& reload_completed"
16966 [(set (match_dup 0)
16967 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
16968 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16969 [(set_attr "type" "ssemov")
16970 (set_attr "prefix" "maybe_vex")
16971 (set_attr "mode" "<MODE>")])
16973 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
16974 [(set (match_operand:DI 0 "register_operand" "=r")
16977 [(subreg:VF_128_256
16978 (ashiftrt:<sseintvecmode>
16979 (match_operand:<sseintvecmode> 1 "register_operand" "x")
16980 (match_operand:QI 2 "const_int_operand" "n")) 0)]
16982 "TARGET_64BIT && TARGET_SSE"
16984 "&& reload_completed"
16985 [(set (match_dup 0)
16986 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
16987 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
16988 [(set_attr "type" "ssemov")
16989 (set_attr "prefix" "maybe_vex")
16990 (set_attr "mode" "<MODE>")])
16992 (define_insn "<sse2_avx2>_pmovmskb"
16993 [(set (match_operand:SI 0 "register_operand" "=r")
16995 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
16998 "%vpmovmskb\t{%1, %0|%0, %1}"
16999 [(set_attr "type" "ssemov")
17000 (set (attr "prefix_data16")
17002 (match_test "TARGET_AVX")
17004 (const_string "1")))
17005 (set_attr "prefix" "maybe_vex")
17006 (set_attr "mode" "SI")])
17008 (define_insn "*<sse2_avx2>_pmovmskb_zext"
17009 [(set (match_operand:DI 0 "register_operand" "=r")
17012 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
17014 "TARGET_64BIT && TARGET_SSE2"
17015 "%vpmovmskb\t{%1, %k0|%k0, %1}"
17016 [(set_attr "type" "ssemov")
17017 (set (attr "prefix_data16")
17019 (match_test "TARGET_AVX")
17021 (const_string "1")))
17022 (set_attr "prefix" "maybe_vex")
17023 (set_attr "mode" "SI")])
17025 (define_insn "*sse2_pmovmskb_ext"
17026 [(set (match_operand:DI 0 "register_operand" "=r")
17029 [(match_operand:V16QI 1 "register_operand" "x")]
17031 "TARGET_64BIT && TARGET_SSE2"
17032 "%vpmovmskb\t{%1, %k0|%k0, %1}"
17033 [(set_attr "type" "ssemov")
17034 (set (attr "prefix_data16")
17036 (match_test "TARGET_AVX")
17038 (const_string "1")))
17039 (set_attr "prefix" "maybe_vex")
17040 (set_attr "mode" "SI")])
17042 (define_insn_and_split "*sse2_pmovskb_zexthisi"
17043 [(set (match_operand:SI 0 "register_operand")
17047 [(match_operand:V16QI 1 "register_operand")]
17048 UNSPEC_MOVMSK) 0)))]
17049 "TARGET_SSE2 && ix86_pre_reload_split ()"
17052 [(set (match_dup 0)
17053 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
17056 [(set (match_operand:SI 0 "register_operand")
17061 [(match_operand:V16QI 1 "register_operand")]
17062 UNSPEC_MOVMSK) 0))))]
17064 [(set (match_dup 2)
17065 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
17067 (xor:SI (match_dup 2) (const_int 65535)))]
17068 "operands[2] = gen_reg_rtx (SImode);")
17071 [(set (match_operand:SI 0 "register_operand")
17073 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
17076 [(set (match_dup 2)
17077 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
17078 (set (match_dup 0) (match_dup 3))]
17080 operands[2] = gen_reg_rtx (SImode);
17081 if (GET_MODE_NUNITS (<MODE>mode) == 32)
17082 operands[3] = gen_rtx_NOT (SImode, operands[2]);
17086 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
17088 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
17093 [(set (match_operand:SI 0 "register_operand")
17095 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
17098 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
17099 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
17100 [(set (match_dup 2)
17101 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
17102 (set (match_dup 0) (match_dup 3))]
17104 operands[2] = gen_reg_rtx (SImode);
17105 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
17106 if (GET_MODE_NUNITS (<MODE>mode) == 32)
17107 operands[3] = gen_rtx_NOT (SImode, operands[2]);
17111 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
17113 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
17117 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
17118 [(set (match_operand:SI 0 "register_operand" "=r")
17120 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
17121 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
17126 [(set (match_dup 0)
17127 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
17129 [(set_attr "type" "ssemov")
17130 (set (attr "prefix_data16")
17132 (match_test "TARGET_AVX")
17134 (const_string "1")))
17135 (set_attr "prefix" "maybe_vex")
17136 (set_attr "mode" "SI")])
17138 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
17139 [(set (match_operand:DI 0 "register_operand" "=r")
17142 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
17143 (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
17145 "TARGET_64BIT && TARGET_SSE2"
17148 [(set (match_dup 0)
17149 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
17151 [(set_attr "type" "ssemov")
17152 (set (attr "prefix_data16")
17154 (match_test "TARGET_AVX")
17156 (const_string "1")))
17157 (set_attr "prefix" "maybe_vex")
17158 (set_attr "mode" "SI")])
17160 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
17161 [(set (match_operand:DI 0 "register_operand" "=r")
17164 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
17165 (match_operand:V16QI 2 "const0_operand" "C"))]
17167 "TARGET_64BIT && TARGET_SSE2"
17170 [(set (match_dup 0)
17171 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
17173 [(set_attr "type" "ssemov")
17174 (set (attr "prefix_data16")
17176 (match_test "TARGET_AVX")
17178 (const_string "1")))
17179 (set_attr "prefix" "maybe_vex")
17180 (set_attr "mode" "SI")])
17182 (define_expand "sse2_maskmovdqu"
17183 [(set (match_operand:V16QI 0 "memory_operand")
17184 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
17185 (match_operand:V16QI 2 "register_operand")
17190 (define_insn "*sse2_maskmovdqu"
17191 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
17192 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
17193 (match_operand:V16QI 2 "register_operand" "x")
17194 (mem:V16QI (match_dup 0))]
17198 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
17199 that requires %v to be at the beginning of the opcode name. */
17200 if (Pmode != word_mode)
17201 fputs ("\taddr32", asm_out_file);
17202 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
17204 [(set_attr "type" "ssemov")
17205 (set_attr "prefix_data16" "1")
17206 (set (attr "length_address")
17207 (symbol_ref ("Pmode != word_mode")))
17208 ;; The implicit %rdi operand confuses default length_vex computation.
17209 (set (attr "length_vex")
17210 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
17211 (set_attr "prefix" "maybe_vex")
17212 (set_attr "znver1_decode" "vector")
17213 (set_attr "mode" "TI")])
17215 (define_insn "sse_ldmxcsr"
17216 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
17220 [(set_attr "type" "sse")
17221 (set_attr "atom_sse_attr" "mxcsr")
17222 (set_attr "prefix" "maybe_vex")
17223 (set_attr "memory" "load")])
17225 (define_insn "sse_stmxcsr"
17226 [(set (match_operand:SI 0 "memory_operand" "=m")
17227 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
17230 [(set_attr "type" "sse")
17231 (set_attr "atom_sse_attr" "mxcsr")
17232 (set_attr "prefix" "maybe_vex")
17233 (set_attr "memory" "store")])
17235 (define_insn "sse2_clflush"
17236 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
17240 [(set_attr "type" "sse")
17241 (set_attr "atom_sse_attr" "fence")
17242 (set_attr "memory" "unknown")])
17244 ;; As per AMD and Intel ISA manuals, the first operand is extensions
17245 ;; and it goes to %ecx. The second operand received is hints and it goes
17247 (define_insn "sse3_mwait"
17248 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
17249 (match_operand:SI 1 "register_operand" "a")]
17252 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
17253 ;; Since 32bit register operands are implicitly zero extended to 64bit,
17254 ;; we only need to set up 32bit registers.
17256 [(set_attr "length" "3")])
17258 (define_insn "@sse3_monitor_<mode>"
17259 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
17260 (match_operand:SI 1 "register_operand" "c")
17261 (match_operand:SI 2 "register_operand" "d")]
17264 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
17265 ;; RCX and RDX are used. Since 32bit register operands are implicitly
17266 ;; zero extended to 64bit, we only need to set up 32bit registers.
17268 [(set (attr "length")
17269 (symbol_ref ("(Pmode != word_mode) + 3")))])
17271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17273 ;; SSSE3 instructions
17275 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17277 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
17279 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
17280 [(set (match_operand:V16HI 0 "register_operand" "=x")
17281 (ssse3_plusminus:V16HI
17284 (match_operand:V16HI 1 "register_operand" "x")
17285 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
17287 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
17288 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
17289 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
17290 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
17292 (vec_concat:V32HI (match_dup 1) (match_dup 2))
17294 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
17295 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
17296 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
17297 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
17299 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
17300 [(set_attr "type" "sseiadd")
17301 (set_attr "prefix_extra" "1")
17302 (set_attr "prefix" "vex")
17303 (set_attr "mode" "OI")])
17305 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
17306 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
17307 (ssse3_plusminus:V8HI
17310 (match_operand:V8HI 1 "register_operand" "0,x")
17311 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
17313 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
17314 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
17316 (vec_concat:V16HI (match_dup 1) (match_dup 2))
17318 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
17319 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
17322 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
17323 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
17324 [(set_attr "isa" "noavx,avx")
17325 (set_attr "type" "sseiadd")
17326 (set_attr "atom_unit" "complex")
17327 (set_attr "prefix_data16" "1,*")
17328 (set_attr "prefix_extra" "1")
17329 (set_attr "prefix" "orig,vex")
17330 (set_attr "mode" "TI")])
17332 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
17333 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
17334 (ssse3_plusminus:V4HI
17337 (match_operand:V4HI 1 "register_operand" "0,0,Yv")
17338 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
17340 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
17342 (vec_concat:V8HI (match_dup 1) (match_dup 2))
17344 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
17345 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17347 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
17350 "TARGET_SSSE3 && reload_completed
17351 && SSE_REGNO_P (REGNO (operands[0]))"
17354 /* Generate SSE version of the operation. */
17355 rtx op0 = lowpart_subreg (V8HImode, operands[0],
17356 GET_MODE (operands[0]));
17357 rtx op1 = lowpart_subreg (V8HImode, operands[1],
17358 GET_MODE (operands[1]));
17359 rtx op2 = lowpart_subreg (V8HImode, operands[2],
17360 GET_MODE (operands[2]));
17361 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
17362 ix86_move_vector_high_sse_to_mmx (op0);
17365 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17366 (set_attr "type" "sseiadd")
17367 (set_attr "atom_unit" "complex")
17368 (set_attr "prefix_extra" "1")
17369 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17370 (set_attr "mode" "DI,TI,TI")])
17372 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
17373 [(set (match_operand:V8SI 0 "register_operand" "=x")
17377 (match_operand:V8SI 1 "register_operand" "x")
17378 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
17380 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
17381 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
17383 (vec_concat:V16SI (match_dup 1) (match_dup 2))
17385 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
17386 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
17388 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
17389 [(set_attr "type" "sseiadd")
17390 (set_attr "prefix_extra" "1")
17391 (set_attr "prefix" "vex")
17392 (set_attr "mode" "OI")])
17394 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
17395 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
17399 (match_operand:V4SI 1 "register_operand" "0,x")
17400 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
17402 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
17404 (vec_concat:V8SI (match_dup 1) (match_dup 2))
17406 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
17409 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
17410 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
17411 [(set_attr "isa" "noavx,avx")
17412 (set_attr "type" "sseiadd")
17413 (set_attr "atom_unit" "complex")
17414 (set_attr "prefix_data16" "1,*")
17415 (set_attr "prefix_extra" "1")
17416 (set_attr "prefix" "orig,vex")
17417 (set_attr "mode" "TI")])
17419 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
17420 [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
17424 (match_operand:V2SI 1 "register_operand" "0,0,Yv")
17425 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
17426 (parallel [(const_int 0) (const_int 2)]))
17428 (vec_concat:V4SI (match_dup 1) (match_dup 2))
17429 (parallel [(const_int 1) (const_int 3)]))))]
17430 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17432 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
17435 "TARGET_SSSE3 && reload_completed
17436 && SSE_REGNO_P (REGNO (operands[0]))"
17439 /* Generate SSE version of the operation. */
17440 rtx op0 = lowpart_subreg (V4SImode, operands[0],
17441 GET_MODE (operands[0]));
17442 rtx op1 = lowpart_subreg (V4SImode, operands[1],
17443 GET_MODE (operands[1]));
17444 rtx op2 = lowpart_subreg (V4SImode, operands[2],
17445 GET_MODE (operands[2]));
17446 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
17447 ix86_move_vector_high_sse_to_mmx (op0);
17450 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17451 (set_attr "type" "sseiadd")
17452 (set_attr "atom_unit" "complex")
17453 (set_attr "prefix_extra" "1")
17454 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17455 (set_attr "mode" "DI,TI,TI")])
17457 (define_insn "avx2_pmaddubsw256"
17458 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
17463 (match_operand:V32QI 1 "register_operand" "Yw")
17464 (parallel [(const_int 0) (const_int 2)
17465 (const_int 4) (const_int 6)
17466 (const_int 8) (const_int 10)
17467 (const_int 12) (const_int 14)
17468 (const_int 16) (const_int 18)
17469 (const_int 20) (const_int 22)
17470 (const_int 24) (const_int 26)
17471 (const_int 28) (const_int 30)])))
17474 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")
17475 (parallel [(const_int 0) (const_int 2)
17476 (const_int 4) (const_int 6)
17477 (const_int 8) (const_int 10)
17478 (const_int 12) (const_int 14)
17479 (const_int 16) (const_int 18)
17480 (const_int 20) (const_int 22)
17481 (const_int 24) (const_int 26)
17482 (const_int 28) (const_int 30)]))))
17485 (vec_select:V16QI (match_dup 1)
17486 (parallel [(const_int 1) (const_int 3)
17487 (const_int 5) (const_int 7)
17488 (const_int 9) (const_int 11)
17489 (const_int 13) (const_int 15)
17490 (const_int 17) (const_int 19)
17491 (const_int 21) (const_int 23)
17492 (const_int 25) (const_int 27)
17493 (const_int 29) (const_int 31)])))
17495 (vec_select:V16QI (match_dup 2)
17496 (parallel [(const_int 1) (const_int 3)
17497 (const_int 5) (const_int 7)
17498 (const_int 9) (const_int 11)
17499 (const_int 13) (const_int 15)
17500 (const_int 17) (const_int 19)
17501 (const_int 21) (const_int 23)
17502 (const_int 25) (const_int 27)
17503 (const_int 29) (const_int 31)]))))))]
17505 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
17506 [(set_attr "type" "sseiadd")
17507 (set_attr "prefix_extra" "1")
17508 (set_attr "prefix" "vex")
17509 (set_attr "mode" "OI")])
17511 ;; The correct representation for this is absolutely enormous, and
17512 ;; surely not generally useful.
17513 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
17514 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17515 (unspec:VI2_AVX512VL
17516 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
17517 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
17518 UNSPEC_PMADDUBSW512))]
17520 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
17521 [(set_attr "type" "sseiadd")
17522 (set_attr "prefix" "evex")
17523 (set_attr "mode" "XI")])
17525 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
17526 [(set (match_operand:V32HI 0 "register_operand" "=v")
17533 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
17535 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
17537 (const_vector:V32HI [(const_int 1) (const_int 1)
17538 (const_int 1) (const_int 1)
17539 (const_int 1) (const_int 1)
17540 (const_int 1) (const_int 1)
17541 (const_int 1) (const_int 1)
17542 (const_int 1) (const_int 1)
17543 (const_int 1) (const_int 1)
17544 (const_int 1) (const_int 1)
17545 (const_int 1) (const_int 1)
17546 (const_int 1) (const_int 1)
17547 (const_int 1) (const_int 1)
17548 (const_int 1) (const_int 1)
17549 (const_int 1) (const_int 1)
17550 (const_int 1) (const_int 1)
17551 (const_int 1) (const_int 1)
17552 (const_int 1) (const_int 1)]))
17555 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17556 [(set_attr "type" "sseimul")
17557 (set_attr "prefix" "evex")
17558 (set_attr "mode" "XI")])
17560 (define_insn "ssse3_pmaddubsw128"
17561 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
17566 (match_operand:V16QI 1 "register_operand" "0,Yw")
17567 (parallel [(const_int 0) (const_int 2)
17568 (const_int 4) (const_int 6)
17569 (const_int 8) (const_int 10)
17570 (const_int 12) (const_int 14)])))
17573 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")
17574 (parallel [(const_int 0) (const_int 2)
17575 (const_int 4) (const_int 6)
17576 (const_int 8) (const_int 10)
17577 (const_int 12) (const_int 14)]))))
17580 (vec_select:V8QI (match_dup 1)
17581 (parallel [(const_int 1) (const_int 3)
17582 (const_int 5) (const_int 7)
17583 (const_int 9) (const_int 11)
17584 (const_int 13) (const_int 15)])))
17586 (vec_select:V8QI (match_dup 2)
17587 (parallel [(const_int 1) (const_int 3)
17588 (const_int 5) (const_int 7)
17589 (const_int 9) (const_int 11)
17590 (const_int 13) (const_int 15)]))))))]
17593 pmaddubsw\t{%2, %0|%0, %2}
17594 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
17595 [(set_attr "isa" "noavx,avx")
17596 (set_attr "type" "sseiadd")
17597 (set_attr "atom_unit" "simul")
17598 (set_attr "prefix_data16" "1,*")
17599 (set_attr "prefix_extra" "1")
17600 (set_attr "prefix" "orig,vex")
17601 (set_attr "mode" "TI")])
17603 (define_insn "ssse3_pmaddubsw"
17604 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
17609 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
17610 (parallel [(const_int 0) (const_int 2)
17611 (const_int 4) (const_int 6)])))
17614 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
17615 (parallel [(const_int 0) (const_int 2)
17616 (const_int 4) (const_int 6)]))))
17619 (vec_select:V4QI (match_dup 1)
17620 (parallel [(const_int 1) (const_int 3)
17621 (const_int 5) (const_int 7)])))
17623 (vec_select:V4QI (match_dup 2)
17624 (parallel [(const_int 1) (const_int 3)
17625 (const_int 5) (const_int 7)]))))))]
17626 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17628 pmaddubsw\t{%2, %0|%0, %2}
17629 pmaddubsw\t{%2, %0|%0, %2}
17630 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
17631 [(set_attr "isa" "*,noavx,avx")
17632 (set_attr "mmx_isa" "native,*,*")
17633 (set_attr "type" "sseiadd")
17634 (set_attr "atom_unit" "simul")
17635 (set_attr "prefix_extra" "1")
17636 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17637 (set_attr "mode" "DI,TI,TI")])
17639 (define_mode_iterator PMULHRSW
17640 [V8HI (V16HI "TARGET_AVX2")])
17642 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
17643 [(set (match_operand:PMULHRSW 0 "register_operand")
17644 (vec_merge:PMULHRSW
17646 (lshiftrt:<ssedoublemode>
17647 (plus:<ssedoublemode>
17648 (lshiftrt:<ssedoublemode>
17649 (mult:<ssedoublemode>
17650 (sign_extend:<ssedoublemode>
17651 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
17652 (sign_extend:<ssedoublemode>
17653 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
17657 (match_operand:PMULHRSW 3 "register_operand")
17658 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
17659 "TARGET_AVX512BW && TARGET_AVX512VL"
17661 operands[5] = CONST1_RTX(<MODE>mode);
17662 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17665 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
17666 [(set (match_operand:PMULHRSW 0 "register_operand")
17668 (lshiftrt:<ssedoublemode>
17669 (plus:<ssedoublemode>
17670 (lshiftrt:<ssedoublemode>
17671 (mult:<ssedoublemode>
17672 (sign_extend:<ssedoublemode>
17673 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
17674 (sign_extend:<ssedoublemode>
17675 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
17681 operands[3] = CONST1_RTX(<MODE>mode);
17682 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17685 (define_expand "smulhrs<mode>3"
17686 [(set (match_operand:VI2_AVX2 0 "register_operand")
17688 (lshiftrt:<ssedoublemode>
17689 (plus:<ssedoublemode>
17690 (lshiftrt:<ssedoublemode>
17691 (mult:<ssedoublemode>
17692 (sign_extend:<ssedoublemode>
17693 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
17694 (sign_extend:<ssedoublemode>
17695 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
17701 operands[3] = CONST1_RTX(<MODE>mode);
17702 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
17705 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
17706 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
17708 (lshiftrt:<ssedoublemode>
17709 (plus:<ssedoublemode>
17710 (lshiftrt:<ssedoublemode>
17711 (mult:<ssedoublemode>
17712 (sign_extend:<ssedoublemode>
17713 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
17714 (sign_extend:<ssedoublemode>
17715 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
17717 (match_operand:VI2_AVX2 3 "const1_operand"))
17719 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
17720 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17722 pmulhrsw\t{%2, %0|%0, %2}
17723 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17724 [(set_attr "isa" "noavx,avx")
17725 (set_attr "type" "sseimul")
17726 (set_attr "prefix_data16" "1,*")
17727 (set_attr "prefix_extra" "1")
17728 (set_attr "prefix" "orig,maybe_evex")
17729 (set_attr "mode" "<sseinsnmode>")])
17731 (define_expand "smulhrsv4hi3"
17732 [(set (match_operand:V4HI 0 "register_operand")
17739 (match_operand:V4HI 1 "register_operand"))
17741 (match_operand:V4HI 2 "register_operand")))
17745 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
17746 "operands[3] = CONST1_RTX(V4HImode);")
17748 (define_expand "ssse3_pmulhrswv4hi3"
17749 [(set (match_operand:V4HI 0 "register_operand")
17756 (match_operand:V4HI 1 "register_mmxmem_operand"))
17758 (match_operand:V4HI 2 "register_mmxmem_operand")))
17762 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17764 operands[3] = CONST1_RTX(V4HImode);
17765 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
17768 (define_insn "*ssse3_pmulhrswv4hi3"
17769 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
17776 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
17778 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
17780 (match_operand:V4HI 3 "const1_operand"))
17782 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
17784 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17786 pmulhrsw\t{%2, %0|%0, %2}
17787 pmulhrsw\t{%2, %0|%0, %2}
17788 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
17789 [(set_attr "isa" "*,noavx,avx")
17790 (set_attr "mmx_isa" "native,*,*")
17791 (set_attr "type" "sseimul")
17792 (set_attr "prefix_extra" "1")
17793 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17794 (set_attr "mode" "DI,TI,TI")])
17796 (define_expand "smulhrsv2hi3"
17797 [(set (match_operand:V2HI 0 "register_operand")
17804 (match_operand:V2HI 1 "register_operand"))
17806 (match_operand:V2HI 2 "register_operand")))
17811 "operands[3] = CONST1_RTX(V2HImode);")
17813 (define_insn "*smulhrsv2hi3"
17814 [(set (match_operand:V2HI 0 "register_operand" "=x,Yv")
17821 (match_operand:V2HI 1 "register_operand" "%0,Yv"))
17823 (match_operand:V2HI 2 "register_operand" "x,Yv")))
17825 (match_operand:V2HI 3 "const1_operand"))
17828 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17830 pmulhrsw\t{%2, %0|%0, %2}
17831 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
17832 [(set_attr "isa" "noavx,avx")
17833 (set_attr "type" "sseimul")
17834 (set_attr "prefix_extra" "1")
17835 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17836 (set_attr "mode" "TI")])
17838 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
17839 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
17841 [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>")
17842 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")]
17844 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17846 pshufb\t{%2, %0|%0, %2}
17847 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17848 [(set_attr "isa" "noavx,avx")
17849 (set_attr "type" "sselog1")
17850 (set_attr "prefix_data16" "1,*")
17851 (set_attr "prefix_extra" "1")
17852 (set_attr "prefix" "orig,maybe_evex")
17853 (set_attr "btver2_decode" "vector")
17854 (set_attr "mode" "<sseinsnmode>")])
17856 (define_expand "ssse3_pshufbv8qi3"
17858 [(set (match_operand:V8QI 0 "register_operand")
17859 (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
17860 (match_operand:V8QI 2 "register_mmxmem_operand")
17861 (match_dup 3)] UNSPEC_PSHUFB))
17862 (clobber (match_scratch:V4SI 4))])]
17863 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17865 operands[3] = ix86_build_const_vector (V4SImode, true,
17866 gen_int_mode (0xf7f7f7f7, SImode));
17869 (define_insn_and_split "*ssse3_pshufbv8qi3"
17870 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
17871 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
17872 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
17873 (match_operand:V4SI 4 "reg_or_const_vector_operand"
17876 (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
17877 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17879 pshufb\t{%2, %0|%0, %2}
17882 "TARGET_SSSE3 && reload_completed
17883 && SSE_REGNO_P (REGNO (operands[0]))"
17884 [(set (match_dup 3)
17885 (and:V4SI (match_dup 3) (match_dup 2)))
17887 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
17889 /* Emulate MMX version of pshufb with SSE version by masking out the
17890 bit 3 of the shuffle control byte. */
17891 operands[0] = lowpart_subreg (V16QImode, operands[0],
17892 GET_MODE (operands[0]));
17893 operands[1] = lowpart_subreg (V16QImode, operands[1],
17894 GET_MODE (operands[1]));
17895 operands[2] = lowpart_subreg (V4SImode, operands[2],
17896 GET_MODE (operands[2]));
17897 operands[4] = lowpart_subreg (V16QImode, operands[3],
17898 GET_MODE (operands[3]));
17900 [(set_attr "mmx_isa" "native,sse_noavx,avx")
17901 (set_attr "prefix_extra" "1")
17902 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17903 (set_attr "mode" "DI,TI,TI")])
17905 (define_insn "<ssse3_avx2>_psign<mode>3"
17906 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
17908 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
17909 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
17913 psign<ssemodesuffix>\t{%2, %0|%0, %2}
17914 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17915 [(set_attr "isa" "noavx,avx")
17916 (set_attr "type" "sselog1")
17917 (set_attr "prefix_data16" "1,*")
17918 (set_attr "prefix_extra" "1")
17919 (set_attr "prefix" "orig,vex")
17920 (set_attr "mode" "<sseinsnmode>")])
17922 (define_insn "ssse3_psign<mode>3"
17923 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
17925 [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")
17926 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")]
17928 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
17930 psign<mmxvecsize>\t{%2, %0|%0, %2}
17931 psign<mmxvecsize>\t{%2, %0|%0, %2}
17932 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
17933 [(set_attr "isa" "*,noavx,avx")
17934 (set_attr "mmx_isa" "native,*,*")
17935 (set_attr "type" "sselog1")
17936 (set_attr "prefix_extra" "1")
17937 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
17938 (set_attr "mode" "DI,TI,TI")])
17940 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
17941 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
17942 (vec_merge:VI1_AVX512
17944 [(match_operand:VI1_AVX512 1 "register_operand" "v")
17945 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
17946 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
17948 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
17949 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
17950 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
17952 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17953 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
17955 [(set_attr "type" "sseishft")
17956 (set_attr "atom_unit" "sishuf")
17957 (set_attr "prefix_extra" "1")
17958 (set_attr "length_immediate" "1")
17959 (set_attr "prefix" "evex")
17960 (set_attr "mode" "<sseinsnmode>")])
17962 (define_insn "<ssse3_avx2>_palignr<mode>"
17963 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,<v_Yw>")
17964 (unspec:SSESCALARMODE
17965 [(match_operand:SSESCALARMODE 1 "register_operand" "0,<v_Yw>")
17966 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,<v_Yw>m")
17967 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
17971 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
17973 switch (which_alternative)
17976 return "palignr\t{%3, %2, %0|%0, %2, %3}";
17978 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17980 gcc_unreachable ();
17983 [(set_attr "isa" "noavx,avx")
17984 (set_attr "type" "sseishft")
17985 (set_attr "atom_unit" "sishuf")
17986 (set_attr "prefix_data16" "1,*")
17987 (set_attr "prefix_extra" "1")
17988 (set_attr "length_immediate" "1")
17989 (set_attr "prefix" "orig,vex")
17990 (set_attr "mode" "<sseinsnmode>")])
17992 (define_insn_and_split "ssse3_palignrdi"
17993 [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
17994 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
17995 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
17996 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
17998 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
18000 switch (which_alternative)
18003 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
18004 return "palignr\t{%3, %2, %0|%0, %2, %3}";
18009 gcc_unreachable ();
18012 "TARGET_SSSE3 && reload_completed
18013 && SSE_REGNO_P (REGNO (operands[0]))"
18014 [(set (match_dup 0)
18015 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
18017 /* Emulate MMX palignrdi with SSE psrldq. */
18018 rtx op0 = lowpart_subreg (V2DImode, operands[0],
18019 GET_MODE (operands[0]));
18021 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
18024 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
18025 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
18026 /* Swap bits 0:63 with bits 64:127. */
18027 rtx mask = gen_rtx_PARALLEL (VOIDmode,
18028 gen_rtvec (4, GEN_INT (2),
18032 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
18033 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
18034 emit_insn (gen_rtx_SET (op1, op2));
18036 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
18038 [(set_attr "mmx_isa" "native,sse_noavx,avx")
18039 (set_attr "type" "sseishft")
18040 (set_attr "atom_unit" "sishuf")
18041 (set_attr "prefix_extra" "1")
18042 (set_attr "length_immediate" "1")
18043 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
18044 (set_attr "mode" "DI,TI,TI")])
18046 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
18047 ;; modes for abs instruction on pre AVX-512 targets.
18048 (define_mode_iterator VI1248_AVX512VL_AVX512BW
18049 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
18050 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
18051 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
18052 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
18054 (define_insn "*abs<mode>2"
18055 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>")
18056 (abs:VI1248_AVX512VL_AVX512BW
18057 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))]
18059 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
18060 [(set_attr "type" "sselog1")
18061 (set_attr "prefix_data16" "1")
18062 (set_attr "prefix_extra" "1")
18063 (set_attr "prefix" "maybe_vex")
18064 (set_attr "mode" "<sseinsnmode>")])
18066 (define_insn "abs<mode>2_mask"
18067 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18068 (vec_merge:VI48_AVX512VL
18070 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
18071 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
18072 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
18074 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18075 [(set_attr "type" "sselog1")
18076 (set_attr "prefix" "evex")
18077 (set_attr "mode" "<sseinsnmode>")])
18079 (define_insn "abs<mode>2_mask"
18080 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
18081 (vec_merge:VI12_AVX512VL
18083 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
18084 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
18085 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
18087 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18088 [(set_attr "type" "sselog1")
18089 (set_attr "prefix" "evex")
18090 (set_attr "mode" "<sseinsnmode>")])
18092 (define_expand "abs<mode>2"
18093 [(set (match_operand:VI_AVX2 0 "register_operand")
18095 (match_operand:VI_AVX2 1 "vector_operand")))]
18099 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
18100 && !TARGET_AVX512VL))
18102 ix86_expand_sse2_abs (operands[0], operands[1]);
18107 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18109 ;; AMD SSE4A instructions
18111 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18113 (define_insn "sse4a_movnt<mode>"
18114 [(set (match_operand:MODEF 0 "memory_operand" "=m")
18116 [(match_operand:MODEF 1 "register_operand" "x")]
18119 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
18120 [(set_attr "type" "ssemov")
18121 (set_attr "mode" "<MODE>")])
18123 (define_insn "sse4a_vmmovnt<mode>"
18124 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
18125 (unspec:<ssescalarmode>
18126 [(vec_select:<ssescalarmode>
18127 (match_operand:VF_128 1 "register_operand" "x")
18128 (parallel [(const_int 0)]))]
18131 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
18132 [(set_attr "type" "ssemov")
18133 (set_attr "mode" "<ssescalarmode>")])
18135 (define_insn "sse4a_extrqi"
18136 [(set (match_operand:V2DI 0 "register_operand" "=x")
18137 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18138 (match_operand 2 "const_0_to_255_operand")
18139 (match_operand 3 "const_0_to_255_operand")]
18142 "extrq\t{%3, %2, %0|%0, %2, %3}"
18143 [(set_attr "type" "sse")
18144 (set_attr "prefix_data16" "1")
18145 (set_attr "length_immediate" "2")
18146 (set_attr "mode" "TI")])
18148 (define_insn "sse4a_extrq"
18149 [(set (match_operand:V2DI 0 "register_operand" "=x")
18150 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18151 (match_operand:V16QI 2 "register_operand" "x")]
18154 "extrq\t{%2, %0|%0, %2}"
18155 [(set_attr "type" "sse")
18156 (set_attr "prefix_data16" "1")
18157 (set_attr "mode" "TI")])
18159 (define_insn "sse4a_insertqi"
18160 [(set (match_operand:V2DI 0 "register_operand" "=x")
18161 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18162 (match_operand:V2DI 2 "register_operand" "x")
18163 (match_operand 3 "const_0_to_255_operand")
18164 (match_operand 4 "const_0_to_255_operand")]
18167 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
18168 [(set_attr "type" "sseins")
18169 (set_attr "prefix_data16" "0")
18170 (set_attr "prefix_rep" "1")
18171 (set_attr "length_immediate" "2")
18172 (set_attr "mode" "TI")])
18174 (define_insn "sse4a_insertq"
18175 [(set (match_operand:V2DI 0 "register_operand" "=x")
18176 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
18177 (match_operand:V2DI 2 "register_operand" "x")]
18180 "insertq\t{%2, %0|%0, %2}"
18181 [(set_attr "type" "sseins")
18182 (set_attr "prefix_data16" "0")
18183 (set_attr "prefix_rep" "1")
18184 (set_attr "mode" "TI")])
18186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18188 ;; Intel SSE4.1 instructions
18190 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18192 ;; Mapping of immediate bits for blend instructions
18193 (define_mode_attr blendbits
18194 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
18196 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
18197 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18198 (vec_merge:VF_128_256
18199 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18200 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
18201 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
18204 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18205 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18206 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18207 [(set_attr "isa" "noavx,noavx,avx")
18208 (set_attr "type" "ssemov")
18209 (set_attr "length_immediate" "1")
18210 (set_attr "prefix_data16" "1,1,*")
18211 (set_attr "prefix_extra" "1")
18212 (set_attr "prefix" "orig,orig,vex")
18213 (set_attr "mode" "<MODE>")])
18215 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
18216 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18218 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
18219 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18220 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
18224 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18225 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18226 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18227 [(set_attr "isa" "noavx,noavx,avx")
18228 (set_attr "type" "ssemov")
18229 (set_attr "length_immediate" "1")
18230 (set_attr "prefix_data16" "1,1,*")
18231 (set_attr "prefix_extra" "1")
18232 (set_attr "prefix" "orig,orig,vex")
18233 (set_attr "btver2_decode" "vector,vector,vector")
18234 (set_attr "mode" "<MODE>")])
18236 ;; Also define scalar versions. These are used for conditional move.
18237 ;; Using subregs into vector modes causes register allocation lossage.
18238 ;; These patterns do not allow memory operands because the native
18239 ;; instructions read the full 128-bits.
18241 (define_insn "sse4_1_blendv<ssemodesuffix>"
18242 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
18244 [(match_operand:MODEF 1 "register_operand" "0,0,x")
18245 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
18246 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
18250 if (get_attr_mode (insn) == MODE_V4SF)
18251 return (which_alternative == 2
18252 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18253 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
18255 return (which_alternative == 2
18256 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18257 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
18259 [(set_attr "isa" "noavx,noavx,avx")
18260 (set_attr "type" "ssemov")
18261 (set_attr "length_immediate" "1")
18262 (set_attr "prefix_data16" "1,1,*")
18263 (set_attr "prefix_extra" "1")
18264 (set_attr "prefix" "orig,orig,vex")
18265 (set_attr "btver2_decode" "vector,vector,vector")
18267 (cond [(match_test "TARGET_AVX")
18268 (const_string "<ssevecmode>")
18269 (match_test "optimize_function_for_size_p (cfun)")
18270 (const_string "V4SF")
18271 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
18272 (const_string "V4SF")
18274 (const_string "<ssevecmode>")))])
18276 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
18277 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18279 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
18280 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18282 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
18283 (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
18287 "&& reload_completed"
18288 [(set (match_dup 0)
18290 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
18291 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
18292 [(set_attr "isa" "noavx,noavx,avx")
18293 (set_attr "type" "ssemov")
18294 (set_attr "length_immediate" "1")
18295 (set_attr "prefix_data16" "1,1,*")
18296 (set_attr "prefix_extra" "1")
18297 (set_attr "prefix" "orig,orig,vex")
18298 (set_attr "btver2_decode" "vector,vector,vector")
18299 (set_attr "mode" "<MODE>")])
18301 (define_mode_attr ssefltmodesuffix
18302 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
18304 (define_mode_attr ssefltvecmode
18305 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
18307 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
18308 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
18309 (unspec:<ssebytemode>
18310 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
18311 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
18312 (subreg:<ssebytemode>
18314 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
18315 (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
18319 "&& reload_completed"
18320 [(set (match_dup 0)
18321 (unspec:<ssefltvecmode>
18322 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
18324 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
18325 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
18326 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
18327 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
18329 [(set_attr "isa" "noavx,noavx,avx")
18330 (set_attr "type" "ssemov")
18331 (set_attr "length_immediate" "1")
18332 (set_attr "prefix_data16" "1,1,*")
18333 (set_attr "prefix_extra" "1")
18334 (set_attr "prefix" "orig,orig,vex")
18335 (set_attr "btver2_decode" "vector,vector,vector")
18336 (set_attr "mode" "<ssefltvecmode>")])
18338 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
18339 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
18341 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
18342 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
18343 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
18347 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18348 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
18349 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18350 [(set_attr "isa" "noavx,noavx,avx")
18351 (set_attr "type" "ssemul")
18352 (set_attr "length_immediate" "1")
18353 (set_attr "prefix_data16" "1,1,*")
18354 (set_attr "prefix_extra" "1")
18355 (set_attr "prefix" "orig,orig,vex")
18356 (set_attr "btver2_decode" "vector,vector,vector")
18357 (set_attr "znver1_decode" "vector,vector,vector")
18358 (set_attr "mode" "<MODE>")])
18360 ;; Mode attribute used by `vmovntdqa' pattern
18361 (define_mode_attr vi8_sse4_1_avx2_avx512
18362 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
18364 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
18365 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
18366 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
18369 "%vmovntdqa\t{%1, %0|%0, %1}"
18370 [(set_attr "isa" "noavx,noavx,avx")
18371 (set_attr "type" "ssemov")
18372 (set_attr "prefix_extra" "1,1,*")
18373 (set_attr "prefix" "orig,orig,maybe_evex")
18374 (set_attr "mode" "<sseinsnmode>")])
18376 (define_insn "<sse4_1_avx2>_mpsadbw"
18377 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
18379 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
18380 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
18381 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
18385 mpsadbw\t{%3, %2, %0|%0, %2, %3}
18386 mpsadbw\t{%3, %2, %0|%0, %2, %3}
18387 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18388 [(set_attr "isa" "noavx,noavx,avx")
18389 (set_attr "type" "sselog1")
18390 (set_attr "length_immediate" "1")
18391 (set_attr "prefix_extra" "1")
18392 (set_attr "prefix" "orig,orig,vex")
18393 (set_attr "btver2_decode" "vector,vector,vector")
18394 (set_attr "znver1_decode" "vector,vector,vector")
18395 (set_attr "mode" "<sseinsnmode>")])
18397 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
18398 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
18399 (vec_concat:VI2_AVX2
18400 (us_truncate:<ssehalfvecmode>
18401 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
18402 (us_truncate:<ssehalfvecmode>
18403 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
18404 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
18406 packusdw\t{%2, %0|%0, %2}
18407 packusdw\t{%2, %0|%0, %2}
18408 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18409 [(set_attr "isa" "noavx,noavx,avx")
18410 (set_attr "type" "sselog")
18411 (set_attr "prefix_extra" "1")
18412 (set_attr "prefix" "orig,orig,<mask_prefix>")
18413 (set_attr "mode" "<sseinsnmode>")])
18415 (define_insn "<sse4_1_avx2>_pblendvb"
18416 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
18418 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
18419 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
18420 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
18424 pblendvb\t{%3, %2, %0|%0, %2, %3}
18425 pblendvb\t{%3, %2, %0|%0, %2, %3}
18426 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18427 [(set_attr "isa" "noavx,noavx,avx")
18428 (set_attr "type" "ssemov")
18429 (set_attr "prefix_extra" "1")
18430 (set_attr "length_immediate" "*,*,1")
18431 (set_attr "prefix" "orig,orig,vex")
18432 (set_attr "btver2_decode" "vector,vector,vector")
18433 (set_attr "mode" "<sseinsnmode>")])
18436 [(set (match_operand:VI1_AVX2 0 "register_operand")
18438 [(match_operand:VI1_AVX2 1 "vector_operand")
18439 (match_operand:VI1_AVX2 2 "register_operand")
18440 (not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))]
18443 [(set (match_dup 0)
18445 [(match_dup 2) (match_dup 1) (match_dup 3)]
18449 [(set (match_operand:VI1_AVX2 0 "register_operand")
18451 [(match_operand:VI1_AVX2 1 "vector_operand")
18452 (match_operand:VI1_AVX2 2 "register_operand")
18453 (subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)]
18456 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
18457 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>"
18458 [(set (match_dup 0)
18460 [(match_dup 2) (match_dup 1) (match_dup 4)]
18462 "operands[4] = gen_lowpart (<MODE>mode, operands[3]);")
18464 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
18465 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
18467 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
18468 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
18469 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
18470 (match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
18475 [(set (match_dup 0)
18477 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
18479 [(set_attr "isa" "noavx,noavx,avx")
18480 (set_attr "type" "ssemov")
18481 (set_attr "prefix_extra" "1")
18482 (set_attr "length_immediate" "*,*,1")
18483 (set_attr "prefix" "orig,orig,vex")
18484 (set_attr "btver2_decode" "vector,vector,vector")
18485 (set_attr "mode" "<sseinsnmode>")])
18487 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
18488 [(set (match_operand:VI1_AVX2 0 "register_operand")
18490 [(match_operand:VI1_AVX2 2 "vector_operand")
18491 (match_operand:VI1_AVX2 1 "register_operand")
18494 (not (match_operand 3 "register_operand")) 0)
18495 (match_operand:VI1_AVX2 4 "const0_operand"))]
18498 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
18499 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
18500 && ix86_pre_reload_split ()"
18503 [(set (match_dup 0)
18505 [(match_dup 1) (match_dup 2)
18506 (lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
18507 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
18509 (define_insn "sse4_1_pblendw"
18510 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
18512 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
18513 (match_operand:V8HI 1 "register_operand" "0,0,x")
18514 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
18517 pblendw\t{%3, %2, %0|%0, %2, %3}
18518 pblendw\t{%3, %2, %0|%0, %2, %3}
18519 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18520 [(set_attr "isa" "noavx,noavx,avx")
18521 (set_attr "type" "ssemov")
18522 (set_attr "prefix_extra" "1")
18523 (set_attr "length_immediate" "1")
18524 (set_attr "prefix" "orig,orig,vex")
18525 (set_attr "mode" "TI")])
18527 ;; The builtin uses an 8-bit immediate. Expand that.
18528 (define_expand "avx2_pblendw"
18529 [(set (match_operand:V16HI 0 "register_operand")
18531 (match_operand:V16HI 2 "nonimmediate_operand")
18532 (match_operand:V16HI 1 "register_operand")
18533 (match_operand:SI 3 "const_0_to_255_operand")))]
18536 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
18537 operands[3] = GEN_INT (val << 8 | val);
18540 (define_insn "*avx2_pblendw"
18541 [(set (match_operand:V16HI 0 "register_operand" "=x")
18543 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
18544 (match_operand:V16HI 1 "register_operand" "x")
18545 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
18548 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
18549 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18551 [(set_attr "type" "ssemov")
18552 (set_attr "prefix_extra" "1")
18553 (set_attr "length_immediate" "1")
18554 (set_attr "prefix" "vex")
18555 (set_attr "mode" "OI")])
18557 (define_insn "avx2_pblendd<mode>"
18558 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
18559 (vec_merge:VI4_AVX2
18560 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
18561 (match_operand:VI4_AVX2 1 "register_operand" "x")
18562 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
18564 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18565 [(set_attr "type" "ssemov")
18566 (set_attr "prefix_extra" "1")
18567 (set_attr "length_immediate" "1")
18568 (set_attr "prefix" "vex")
18569 (set_attr "mode" "<sseinsnmode>")])
18571 (define_insn "sse4_1_phminposuw"
18572 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
18573 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
18574 UNSPEC_PHMINPOSUW))]
18576 "%vphminposuw\t{%1, %0|%0, %1}"
18577 [(set_attr "isa" "noavx,noavx,avx")
18578 (set_attr "type" "sselog1")
18579 (set_attr "prefix_extra" "1")
18580 (set_attr "prefix" "orig,orig,vex")
18581 (set_attr "mode" "TI")])
18583 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
18584 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
18586 (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))]
18587 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
18588 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18589 [(set_attr "type" "ssemov")
18590 (set_attr "prefix_extra" "1")
18591 (set_attr "prefix" "maybe_evex")
18592 (set_attr "mode" "OI")])
18594 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
18595 [(set (match_operand:V32QI 0 "register_operand" "=v")
18598 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
18599 (match_operand:V32QI 2 "const0_operand" "C"))
18600 (match_parallel 3 "pmovzx_parallel"
18601 [(match_operand 4 "const_int_operand" "n")])))]
18604 "&& reload_completed"
18605 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
18607 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
18608 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
18611 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_2"
18612 [(set (match_operand:V32QI 0 "register_operand" "=v")
18616 (vec_concat:VI248_256
18617 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
18618 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
18619 (match_operand:V32QI 3 "const0_operand" "C"))
18620 (match_parallel 4 "pmovzx_parallel"
18621 [(match_operand 5 "const_int_operand" "n")])))]
18624 "&& reload_completed"
18625 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
18627 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
18628 operands[1] = lowpart_subreg (V16QImode, operands[1], <ssehalfvecmode>mode);
18631 (define_expand "<insn>v16qiv16hi2"
18632 [(set (match_operand:V16HI 0 "register_operand")
18634 (match_operand:V16QI 1 "nonimmediate_operand")))]
18637 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
18638 [(set (match_operand:V32HI 0 "register_operand" "=v")
18640 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
18642 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18643 [(set_attr "type" "ssemov")
18644 (set_attr "prefix_extra" "1")
18645 (set_attr "prefix" "evex")
18646 (set_attr "mode" "XI")])
18648 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
18649 [(set (match_operand:V64QI 0 "register_operand" "=v")
18652 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
18653 (match_operand:V64QI 2 "const0_operand" "C"))
18654 (match_parallel 3 "pmovzx_parallel"
18655 [(match_operand 4 "const_int_operand" "n")])))]
18658 "&& reload_completed"
18659 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
18661 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
18662 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
18665 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_2"
18666 [(set (match_operand:V64QI 0 "register_operand" "=v")
18670 (vec_concat:VI248_512
18671 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
18672 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
18673 (match_operand:V64QI 3 "const0_operand" "C"))
18674 (match_parallel 4 "pmovzx_parallel"
18675 [(match_operand 5 "const_int_operand" "n")])))]
18678 "&& reload_completed"
18679 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
18681 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
18682 operands[1] = lowpart_subreg (V32QImode, operands[1], <ssehalfvecmode>mode);
18685 (define_expand "<insn>v32qiv32hi2"
18686 [(set (match_operand:V32HI 0 "register_operand")
18688 (match_operand:V32QI 1 "nonimmediate_operand")))]
18691 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
18692 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
18695 (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw")
18696 (parallel [(const_int 0) (const_int 1)
18697 (const_int 2) (const_int 3)
18698 (const_int 4) (const_int 5)
18699 (const_int 6) (const_int 7)]))))]
18700 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
18701 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18702 [(set_attr "isa" "noavx,noavx,avx")
18703 (set_attr "type" "ssemov")
18704 (set_attr "prefix_extra" "1")
18705 (set_attr "prefix" "orig,orig,maybe_evex")
18706 (set_attr "mode" "TI")])
18708 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
18709 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
18711 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
18712 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
18713 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18714 [(set_attr "isa" "noavx,noavx,avx")
18715 (set_attr "type" "ssemov")
18716 (set_attr "prefix_extra" "1")
18717 (set_attr "prefix" "orig,orig,maybe_evex")
18718 (set_attr "mode" "TI")])
18720 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
18721 [(set (match_operand:V8HI 0 "register_operand")
18726 (match_operand:DI 1 "memory_operand")
18728 (parallel [(const_int 0) (const_int 1)
18729 (const_int 2) (const_int 3)
18730 (const_int 4) (const_int 5)
18731 (const_int 6) (const_int 7)]))))]
18732 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
18733 && ix86_pre_reload_split ()"
18736 [(set (match_dup 0)
18737 (any_extend:V8HI (match_dup 1)))]
18738 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18740 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
18741 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
18744 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,Ywm")
18745 (match_operand:V16QI 2 "const0_operand" "C,C,C"))
18746 (match_parallel 3 "pmovzx_parallel"
18747 [(match_operand 4 "const_int_operand" "n,n,n")])))]
18750 "&& reload_completed"
18751 [(set (match_dup 0)
18755 (parallel [(const_int 0) (const_int 1)
18756 (const_int 2) (const_int 3)
18757 (const_int 4) (const_int 5)
18758 (const_int 6) (const_int 7)]))))]
18760 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
18761 if (MEM_P (operands[1]))
18763 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
18764 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
18765 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18769 [(set_attr "isa" "noavx,noavx,avx")])
18771 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_4"
18772 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
18776 (vec_concat:VI248_128
18777 (match_operand:<ssehalfvecmode> 1 "vector_operand" "YrBm,*xBm,Ywm")
18778 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C,C,C")) 0)
18779 (match_operand:V16QI 3 "const0_operand" "C,C,C"))
18780 (match_parallel 4 "pmovzx_parallel"
18781 [(match_operand 5 "const_int_operand" "n,n,n")])))]
18784 "&& reload_completed"
18785 [(set (match_dup 0)
18789 (parallel [(const_int 0) (const_int 1)
18790 (const_int 2) (const_int 3)
18791 (const_int 4) (const_int 5)
18792 (const_int 6) (const_int 7)]))))]
18794 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
18795 if (MEM_P (operands[1]))
18797 operands[1] = lowpart_subreg (V8QImode, operands[1], <ssehalfvecmode>mode);
18798 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
18799 emit_insn (gen_rtx_SET (operands[0], operands[1]));
18802 operands[1] = lowpart_subreg (V16QImode, operands[1], <ssehalfvecmode>mode);
18804 [(set_attr "isa" "noavx,noavx,avx")])
18806 (define_expand "<insn>v8qiv8hi2"
18807 [(set (match_operand:V8HI 0 "register_operand")
18809 (match_operand:V8QI 1 "nonimmediate_operand")))]
18812 if (!MEM_P (operands[1]))
18814 operands[1] = force_reg (V8QImode, operands[1]);
18815 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18816 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], operands[1]));
18821 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
18822 [(set (match_operand:V16SI 0 "register_operand" "=v")
18824 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
18826 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18827 [(set_attr "type" "ssemov")
18828 (set_attr "prefix" "evex")
18829 (set_attr "mode" "XI")])
18831 (define_expand "<insn>v16qiv16si2"
18832 [(set (match_operand:V16SI 0 "register_operand")
18834 (match_operand:V16QI 1 "nonimmediate_operand")))]
18837 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
18838 [(set (match_operand:V8SI 0 "register_operand" "=v")
18841 (match_operand:V16QI 1 "register_operand" "v")
18842 (parallel [(const_int 0) (const_int 1)
18843 (const_int 2) (const_int 3)
18844 (const_int 4) (const_int 5)
18845 (const_int 6) (const_int 7)]))))]
18846 "TARGET_AVX2 && <mask_avx512vl_condition>"
18847 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18848 [(set_attr "type" "ssemov")
18849 (set_attr "prefix_extra" "1")
18850 (set_attr "prefix" "maybe_evex")
18851 (set_attr "mode" "OI")])
18853 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
18854 [(set (match_operand:V8SI 0 "register_operand" "=v")
18856 (match_operand:V8QI 1 "memory_operand" "m")))]
18857 "TARGET_AVX2 && <mask_avx512vl_condition>"
18858 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18859 [(set_attr "type" "ssemov")
18860 (set_attr "prefix_extra" "1")
18861 (set_attr "prefix" "maybe_evex")
18862 (set_attr "mode" "OI")])
18864 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
18865 [(set (match_operand:V8SI 0 "register_operand")
18870 (match_operand:DI 1 "memory_operand")
18872 (parallel [(const_int 0) (const_int 1)
18873 (const_int 2) (const_int 3)
18874 (const_int 4) (const_int 5)
18875 (const_int 6) (const_int 7)]))))]
18876 "TARGET_AVX2 && <mask_avx512vl_condition>
18877 && ix86_pre_reload_split ()"
18880 [(set (match_dup 0)
18881 (any_extend:V8SI (match_dup 1)))]
18882 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
18884 (define_expand "<insn>v8qiv8si2"
18885 [(set (match_operand:V8SI 0 "register_operand")
18887 (match_operand:V8QI 1 "nonimmediate_operand")))]
18890 if (!MEM_P (operands[1]))
18892 operands[1] = force_reg (V8QImode, operands[1]);
18893 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
18894 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], operands[1]));
18899 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
18900 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18903 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
18904 (parallel [(const_int 0) (const_int 1)
18905 (const_int 2) (const_int 3)]))))]
18906 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18907 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18908 [(set_attr "isa" "noavx,noavx,avx")
18909 (set_attr "type" "ssemov")
18910 (set_attr "prefix_extra" "1")
18911 (set_attr "prefix" "orig,orig,maybe_evex")
18912 (set_attr "mode" "TI")])
18914 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
18915 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
18917 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
18918 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
18919 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18920 [(set_attr "isa" "noavx,noavx,avx")
18921 (set_attr "type" "ssemov")
18922 (set_attr "prefix_extra" "1")
18923 (set_attr "prefix" "orig,orig,maybe_evex")
18924 (set_attr "mode" "TI")])
18926 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
18927 [(set (match_operand:V4SI 0 "register_operand")
18932 (vec_duplicate:V4SI
18933 (match_operand:SI 1 "memory_operand"))
18935 [(const_int 0) (const_int 0)
18936 (const_int 0) (const_int 0)])
18938 (parallel [(const_int 0) (const_int 1)
18939 (const_int 2) (const_int 3)]))))]
18940 "TARGET_SSE4_1 && <mask_avx512vl_condition>
18941 && ix86_pre_reload_split ()"
18944 [(set (match_dup 0)
18945 (any_extend:V4SI (match_dup 1)))]
18946 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
18948 (define_expand "<insn>v4qiv4si2"
18949 [(set (match_operand:V4SI 0 "register_operand")
18951 (match_operand:V4QI 1 "nonimmediate_operand")))]
18954 if (!MEM_P (operands[1]))
18956 operands[1] = force_reg (V4QImode, operands[1]);
18957 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
18958 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], operands[1]));
18963 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
18964 [(set (match_operand:V16SI 0 "register_operand" "=v")
18966 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
18968 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18969 [(set_attr "type" "ssemov")
18970 (set_attr "prefix" "evex")
18971 (set_attr "mode" "XI")])
18973 (define_expand "<insn>v16hiv16si2"
18974 [(set (match_operand:V16SI 0 "register_operand")
18976 (match_operand:V16HI 1 "nonimmediate_operand")))]
18979 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
18980 [(set (match_operand:V32HI 0 "register_operand" "=v")
18983 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
18984 (match_operand:V32HI 2 "const0_operand" "C"))
18985 (match_parallel 3 "pmovzx_parallel"
18986 [(match_operand 4 "const_int_operand" "n")])))]
18989 "&& reload_completed"
18990 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
18992 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
18993 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
18996 (define_insn_and_split "*avx512f_zero_extendv16hiv16si2_2"
18997 [(set (match_operand:V32HI 0 "register_operand" "=v")
19001 (vec_concat:VI148_512
19002 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
19003 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
19004 (match_operand:V32HI 3 "const0_operand" "C"))
19005 (match_parallel 4 "pmovzx_parallel"
19006 [(match_operand 5 "const_int_operand" "n")])))]
19009 "&& reload_completed"
19010 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
19012 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
19013 operands[1] = lowpart_subreg (V16HImode, operands[1], <ssehalfvecmode>mode);
19016 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
19017 [(set (match_operand:V8SI 0 "register_operand" "=v")
19019 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
19020 "TARGET_AVX2 && <mask_avx512vl_condition>"
19021 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19022 [(set_attr "type" "ssemov")
19023 (set_attr "prefix_extra" "1")
19024 (set_attr "prefix" "maybe_evex")
19025 (set_attr "mode" "OI")])
19027 (define_expand "<insn>v8hiv8si2"
19028 [(set (match_operand:V8SI 0 "register_operand")
19030 (match_operand:V8HI 1 "nonimmediate_operand")))]
19033 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
19034 [(set (match_operand:V16HI 0 "register_operand" "=v")
19037 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
19038 (match_operand:V16HI 2 "const0_operand" "C"))
19039 (match_parallel 3 "pmovzx_parallel"
19040 [(match_operand 4 "const_int_operand" "n")])))]
19043 "&& reload_completed"
19044 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
19046 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
19047 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
19050 (define_insn_and_split "*avx2_zero_extendv8hiv8si2_2"
19051 [(set (match_operand:V16HI 0 "register_operand" "=v")
19055 (vec_concat:VI148_256
19056 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
19057 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C")) 0)
19058 (match_operand:V16HI 3 "const0_operand" "C"))
19059 (match_parallel 4 "pmovzx_parallel"
19060 [(match_operand 5 "const_int_operand" "n")])))]
19063 "&& reload_completed"
19064 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
19066 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
19067 operands[1] = lowpart_subreg (V8HImode, operands[1], <ssehalfvecmode>mode);
19071 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
19072 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19075 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
19076 (parallel [(const_int 0) (const_int 1)
19077 (const_int 2) (const_int 3)]))))]
19078 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19079 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19080 [(set_attr "isa" "noavx,noavx,avx")
19081 (set_attr "type" "ssemov")
19082 (set_attr "prefix_extra" "1")
19083 (set_attr "prefix" "orig,orig,maybe_evex")
19084 (set_attr "mode" "TI")])
19086 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
19087 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19089 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
19090 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19091 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19092 [(set_attr "isa" "noavx,noavx,avx")
19093 (set_attr "type" "ssemov")
19094 (set_attr "prefix_extra" "1")
19095 (set_attr "prefix" "orig,orig,maybe_evex")
19096 (set_attr "mode" "TI")])
19098 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
19099 [(set (match_operand:V4SI 0 "register_operand")
19104 (match_operand:DI 1 "memory_operand")
19106 (parallel [(const_int 0) (const_int 1)
19107 (const_int 2) (const_int 3)]))))]
19108 "TARGET_SSE4_1 && <mask_avx512vl_condition>
19109 && ix86_pre_reload_split ()"
19112 [(set (match_dup 0)
19113 (any_extend:V4SI (match_dup 1)))]
19114 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
19116 (define_expand "<insn>v4hiv4si2"
19117 [(set (match_operand:V4SI 0 "register_operand")
19119 (match_operand:V4HI 1 "nonimmediate_operand")))]
19122 if (!MEM_P (operands[1]))
19124 operands[1] = force_reg (V4HImode, operands[1]);
19125 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
19126 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], operands[1]));
19131 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
19132 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
19135 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
19136 (match_operand:V8HI 2 "const0_operand" "C,C,C"))
19137 (match_parallel 3 "pmovzx_parallel"
19138 [(match_operand 4 "const_int_operand" "n,n,n")])))]
19141 "&& reload_completed"
19142 [(set (match_dup 0)
19146 (parallel [(const_int 0) (const_int 1)
19147 (const_int 2) (const_int 3)]))))]
19149 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
19150 if (MEM_P (operands[1]))
19152 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
19153 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
19154 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19158 [(set_attr "isa" "noavx,noavx,avx")])
19160 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_4"
19161 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
19165 (vec_concat:VI148_128
19166 (match_operand:<ssehalfvecmode> 1 "vector_operand" "YrBm,*xBm,vm")
19167 (match_operand:<ssehalfvecmode> 2 "const0_operand" "C,C,C")) 0)
19168 (match_operand:V8HI 3 "const0_operand" "C,C,C"))
19169 (match_parallel 4 "pmovzx_parallel"
19170 [(match_operand 5 "const_int_operand" "n,n,n")])))]
19173 "&& reload_completed"
19174 [(set (match_dup 0)
19178 (parallel [(const_int 0) (const_int 1)
19179 (const_int 2) (const_int 3)]))))]
19181 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
19182 if (MEM_P (operands[1]))
19184 operands[1] = lowpart_subreg (V4HImode, operands[1], <ssehalfvecmode>mode);
19185 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
19186 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19189 operands[1] = lowpart_subreg (V8HImode, operands[1], <ssehalfvecmode>mode);
19191 [(set_attr "isa" "noavx,noavx,avx")])
19193 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
19194 [(set (match_operand:V8DI 0 "register_operand" "=v")
19197 (match_operand:V16QI 1 "register_operand" "v")
19198 (parallel [(const_int 0) (const_int 1)
19199 (const_int 2) (const_int 3)
19200 (const_int 4) (const_int 5)
19201 (const_int 6) (const_int 7)]))))]
19203 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19204 [(set_attr "type" "ssemov")
19205 (set_attr "prefix" "evex")
19206 (set_attr "mode" "XI")])
19208 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
19209 [(set (match_operand:V8DI 0 "register_operand" "=v")
19211 (match_operand:V8QI 1 "memory_operand" "m")))]
19213 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19214 [(set_attr "type" "ssemov")
19215 (set_attr "prefix" "evex")
19216 (set_attr "mode" "XI")])
19218 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
19219 [(set (match_operand:V8DI 0 "register_operand")
19224 (match_operand:DI 1 "memory_operand")
19226 (parallel [(const_int 0) (const_int 1)
19227 (const_int 2) (const_int 3)
19228 (const_int 4) (const_int 5)
19229 (const_int 6) (const_int 7)]))))]
19230 "TARGET_AVX512F && ix86_pre_reload_split ()"
19233 [(set (match_dup 0)
19234 (any_extend:V8DI (match_dup 1)))]
19235 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
19237 (define_expand "<insn>v8qiv8di2"
19238 [(set (match_operand:V8DI 0 "register_operand")
19240 (match_operand:V8QI 1 "nonimmediate_operand")))]
19243 if (!MEM_P (operands[1]))
19245 operands[1] = force_reg (V8QImode, operands[1]);
19246 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V8QImode, 0);
19247 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], operands[1]));
19252 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
19253 [(set (match_operand:V4DI 0 "register_operand" "=v")
19256 (match_operand:V16QI 1 "register_operand" "v")
19257 (parallel [(const_int 0) (const_int 1)
19258 (const_int 2) (const_int 3)]))))]
19259 "TARGET_AVX2 && <mask_avx512vl_condition>"
19260 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19261 [(set_attr "type" "ssemov")
19262 (set_attr "prefix_extra" "1")
19263 (set_attr "prefix" "maybe_evex")
19264 (set_attr "mode" "OI")])
19266 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
19267 [(set (match_operand:V4DI 0 "register_operand" "=v")
19269 (match_operand:V4QI 1 "memory_operand" "m")))]
19270 "TARGET_AVX2 && <mask_avx512vl_condition>"
19271 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19272 [(set_attr "type" "ssemov")
19273 (set_attr "prefix_extra" "1")
19274 (set_attr "prefix" "maybe_evex")
19275 (set_attr "mode" "OI")])
19277 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
19278 [(set (match_operand:V4DI 0 "register_operand")
19283 (vec_duplicate:V4SI
19284 (match_operand:SI 1 "memory_operand"))
19286 [(const_int 0) (const_int 0)
19287 (const_int 0) (const_int 0)])
19289 (parallel [(const_int 0) (const_int 1)
19290 (const_int 2) (const_int 3)]))))]
19291 "TARGET_AVX2 && <mask_avx512vl_condition>
19292 && ix86_pre_reload_split ()"
19295 [(set (match_dup 0)
19296 (any_extend:V4DI (match_dup 1)))]
19297 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
19299 (define_expand "<insn>v4qiv4di2"
19300 [(set (match_operand:V4DI 0 "register_operand")
19302 (match_operand:V4QI 1 "nonimmediate_operand")))]
19305 if (!MEM_P (operands[1]))
19307 operands[1] = force_reg (V4QImode, operands[1]);
19308 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V4QImode, 0);
19309 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], operands[1]));
19314 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
19315 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19318 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
19319 (parallel [(const_int 0) (const_int 1)]))))]
19320 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19321 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19322 [(set_attr "isa" "noavx,noavx,avx")
19323 (set_attr "type" "ssemov")
19324 (set_attr "prefix_extra" "1")
19325 (set_attr "prefix" "orig,orig,maybe_evex")
19326 (set_attr "mode" "TI")])
19328 (define_expand "<insn>v2qiv2di2"
19329 [(set (match_operand:V2DI 0 "register_operand")
19331 (match_operand:V2QI 1 "register_operand")))]
19334 operands[1] = force_reg (V2QImode, operands[1]);
19335 operands[1] = simplify_gen_subreg (V16QImode, operands[1], V2QImode, 0);
19336 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], operands[1]));
19340 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
19341 [(set (match_operand:V8DI 0 "register_operand" "=v")
19343 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
19345 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19346 [(set_attr "type" "ssemov")
19347 (set_attr "prefix" "evex")
19348 (set_attr "mode" "XI")])
19350 (define_expand "<insn>v8hiv8di2"
19351 [(set (match_operand:V8DI 0 "register_operand")
19353 (match_operand:V8HI 1 "nonimmediate_operand")))]
19356 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
19357 [(set (match_operand:V4DI 0 "register_operand" "=v")
19360 (match_operand:V8HI 1 "register_operand" "v")
19361 (parallel [(const_int 0) (const_int 1)
19362 (const_int 2) (const_int 3)]))))]
19363 "TARGET_AVX2 && <mask_avx512vl_condition>"
19364 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19365 [(set_attr "type" "ssemov")
19366 (set_attr "prefix_extra" "1")
19367 (set_attr "prefix" "maybe_evex")
19368 (set_attr "mode" "OI")])
19370 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
19371 [(set (match_operand:V4DI 0 "register_operand" "=v")
19373 (match_operand:V4HI 1 "memory_operand" "m")))]
19374 "TARGET_AVX2 && <mask_avx512vl_condition>"
19375 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19376 [(set_attr "type" "ssemov")
19377 (set_attr "prefix_extra" "1")
19378 (set_attr "prefix" "maybe_evex")
19379 (set_attr "mode" "OI")])
19381 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
19382 [(set (match_operand:V4DI 0 "register_operand")
19387 (match_operand:DI 1 "memory_operand")
19389 (parallel [(const_int 0) (const_int 1)
19390 (const_int 2) (const_int 3)]))))]
19391 "TARGET_AVX2 && <mask_avx512vl_condition>
19392 && ix86_pre_reload_split ()"
19395 [(set (match_dup 0)
19396 (any_extend:V4DI (match_dup 1)))]
19397 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
19399 (define_expand "<insn>v4hiv4di2"
19400 [(set (match_operand:V4DI 0 "register_operand")
19402 (match_operand:V4HI 1 "nonimmediate_operand")))]
19405 if (!MEM_P (operands[1]))
19407 operands[1] = force_reg (V4HImode, operands[1]);
19408 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V4HImode, 0);
19409 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], operands[1]));
19414 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
19415 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19418 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
19419 (parallel [(const_int 0) (const_int 1)]))))]
19420 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19421 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19422 [(set_attr "isa" "noavx,noavx,avx")
19423 (set_attr "type" "ssemov")
19424 (set_attr "prefix_extra" "1")
19425 (set_attr "prefix" "orig,orig,maybe_evex")
19426 (set_attr "mode" "TI")])
19428 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
19429 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19431 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
19432 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19433 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19434 [(set_attr "isa" "noavx,noavx,avx")
19435 (set_attr "type" "ssemov")
19436 (set_attr "prefix_extra" "1")
19437 (set_attr "prefix" "orig,orig,maybe_evex")
19438 (set_attr "mode" "TI")])
19440 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
19441 [(set (match_operand:V2DI 0 "register_operand")
19446 (vec_duplicate:V4SI
19447 (match_operand:SI 1 "memory_operand"))
19449 [(const_int 0) (const_int 0)
19450 (const_int 0) (const_int 0)])
19452 (parallel [(const_int 0) (const_int 1)]))))]
19453 "TARGET_SSE4_1 && <mask_avx512vl_condition>
19454 && ix86_pre_reload_split ()"
19457 [(set (match_dup 0)
19458 (any_extend:V2DI (match_dup 1)))]
19459 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
19461 (define_expand "<insn>v2hiv2di2"
19462 [(set (match_operand:V2DI 0 "register_operand")
19464 (match_operand:V2HI 1 "nonimmediate_operand")))]
19467 if (!MEM_P (operands[1]))
19469 operands[1] = force_reg (V2HImode, operands[1]);
19470 operands[1] = simplify_gen_subreg (V8HImode, operands[1], V2HImode, 0);
19471 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], operands[1]));
19476 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
19477 [(set (match_operand:V8DI 0 "register_operand" "=v")
19479 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
19481 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19482 [(set_attr "type" "ssemov")
19483 (set_attr "prefix" "evex")
19484 (set_attr "mode" "XI")])
19486 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
19487 [(set (match_operand:V16SI 0 "register_operand" "=v")
19490 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
19491 (match_operand:V16SI 2 "const0_operand" "C"))
19492 (match_parallel 3 "pmovzx_parallel"
19493 [(match_operand 4 "const_int_operand" "n")])))]
19496 "&& reload_completed"
19497 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
19499 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
19500 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
19503 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_2"
19504 [(set (match_operand:V16SI 0 "register_operand" "=v")
19508 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
19509 (match_operand:V8SI 2 "const0_operand" "C"))
19510 (match_operand:V16SI 3 "const0_operand" "C"))
19511 (match_parallel 4 "pmovzx_parallel"
19512 [(match_operand 5 "const_int_operand" "n")])))]
19515 "&& reload_completed"
19516 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
19518 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
19521 (define_expand "<insn>v8siv8di2"
19522 [(set (match_operand:V8DI 0 "register_operand" "=v")
19524 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
19527 (define_insn "avx2_<code>v4siv4di2<mask_name>"
19528 [(set (match_operand:V4DI 0 "register_operand" "=v")
19530 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
19531 "TARGET_AVX2 && <mask_avx512vl_condition>"
19532 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19533 [(set_attr "type" "ssemov")
19534 (set_attr "prefix" "maybe_evex")
19535 (set_attr "prefix_extra" "1")
19536 (set_attr "mode" "OI")])
19538 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
19539 [(set (match_operand:V8SI 0 "register_operand" "=v")
19542 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
19543 (match_operand:V8SI 2 "const0_operand" "C"))
19544 (match_parallel 3 "pmovzx_parallel"
19545 [(match_operand 4 "const_int_operand" "n")])))]
19548 "&& reload_completed"
19549 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
19551 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
19552 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
19555 (define_insn_and_split "*avx2_zero_extendv4siv4di2_2"
19556 [(set (match_operand:V8SI 0 "register_operand" "=v")
19560 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
19561 (match_operand:V4SI 2 "const0_operand" "C"))
19562 (match_operand:V8SI 3 "const0_operand" "C"))
19563 (match_parallel 4 "pmovzx_parallel"
19564 [(match_operand 5 "const_int_operand" "n")])))]
19567 "&& reload_completed"
19568 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
19570 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
19573 (define_expand "<insn>v4siv4di2"
19574 [(set (match_operand:V4DI 0 "register_operand")
19576 (match_operand:V4SI 1 "nonimmediate_operand")))]
19579 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
19580 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19583 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
19584 (parallel [(const_int 0) (const_int 1)]))))]
19585 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19586 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19587 [(set_attr "isa" "noavx,noavx,avx")
19588 (set_attr "type" "ssemov")
19589 (set_attr "prefix_extra" "1")
19590 (set_attr "prefix" "orig,orig,maybe_evex")
19591 (set_attr "mode" "TI")])
19593 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
19594 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
19596 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
19597 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
19598 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19599 [(set_attr "isa" "noavx,noavx,avx")
19600 (set_attr "type" "ssemov")
19601 (set_attr "prefix_extra" "1")
19602 (set_attr "prefix" "orig,orig,maybe_evex")
19603 (set_attr "mode" "TI")])
19605 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
19606 [(set (match_operand:V2DI 0 "register_operand")
19611 (match_operand:DI 1 "memory_operand")
19613 (parallel [(const_int 0) (const_int 1)]))))]
19614 "TARGET_SSE4_1 && <mask_avx512vl_condition>
19615 && ix86_pre_reload_split ()"
19618 [(set (match_dup 0)
19619 (any_extend:V2DI (match_dup 1)))]
19620 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
19622 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
19623 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19626 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
19627 (match_operand:V4SI 2 "const0_operand" "C,C,C"))
19628 (match_parallel 3 "pmovzx_parallel"
19629 [(match_operand 4 "const_int_operand" "n,n,n")])))]
19632 "&& reload_completed"
19633 [(set (match_dup 0)
19635 (vec_select:V2SI (match_dup 1)
19636 (parallel [(const_int 0) (const_int 1)]))))]
19638 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
19639 if (MEM_P (operands[1]))
19641 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
19642 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
19643 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19647 [(set_attr "isa" "noavx,noavx,avx")])
19649 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_4"
19650 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
19654 (match_operand:V2SI 1 "vector_operand" "YrBm, *xBm, vm")
19655 (match_operand:V2SI 2 "const0_operand" "C,C,C"))
19656 (match_operand:V4SI 3 "const0_operand" "C,C,C"))
19657 (match_parallel 4 "pmovzx_parallel"
19658 [(match_operand 5 "const_int_operand" "n,n,n")])))]
19661 "&& reload_completed"
19662 [(set (match_dup 0)
19664 (vec_select:V2SI (match_dup 1)
19665 (parallel [(const_int 0) (const_int 1)]))))]
19667 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
19668 if (MEM_P (operands[1]))
19670 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
19671 emit_insn (gen_rtx_SET (operands[0], operands[1]));
19674 operands[1] = lowpart_subreg (V4SImode, operands[1], V2SImode);
19676 [(set_attr "isa" "noavx,noavx,avx")])
19678 (define_expand "<insn>v2siv2di2"
19679 [(set (match_operand:V2DI 0 "register_operand")
19681 (match_operand:V2SI 1 "nonimmediate_operand")))]
19684 if (!MEM_P (operands[1]))
19686 operands[1] = force_reg (V2SImode, operands[1]);
19687 operands[1] = simplify_gen_subreg (V4SImode, operands[1], V2SImode, 0);
19688 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], operands[1]));
19693 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
19694 ;; setting FLAGS_REG. But it is not a really compare instruction.
19695 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
19696 [(set (reg:CC FLAGS_REG)
19697 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
19698 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
19701 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
19702 [(set_attr "type" "ssecomi")
19703 (set_attr "prefix_extra" "1")
19704 (set_attr "prefix" "vex")
19705 (set_attr "mode" "<MODE>")])
19707 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
19708 ;; But it is not a really compare instruction.
19709 (define_insn "<sse4_1>_ptest<mode>"
19710 [(set (reg:CC FLAGS_REG)
19711 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
19712 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
19715 "%vptest\t{%1, %0|%0, %1}"
19716 [(set_attr "isa" "noavx,noavx,avx")
19717 (set_attr "type" "ssecomi")
19718 (set_attr "prefix_extra" "1")
19719 (set_attr "prefix" "orig,orig,vex")
19720 (set (attr "btver2_decode")
19722 (match_test "<sseinsnmode>mode==OImode")
19723 (const_string "vector")
19724 (const_string "*")))
19725 (set_attr "mode" "<sseinsnmode>")])
19727 (define_insn "ptesttf2"
19728 [(set (reg:CC FLAGS_REG)
19729 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
19730 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
19733 "%vptest\t{%1, %0|%0, %1}"
19734 [(set_attr "isa" "noavx,noavx,avx")
19735 (set_attr "type" "ssecomi")
19736 (set_attr "prefix_extra" "1")
19737 (set_attr "prefix" "orig,orig,vex")
19738 (set_attr "mode" "TI")])
19740 (define_expand "nearbyint<mode>2"
19741 [(set (match_operand:VF 0 "register_operand")
19743 [(match_operand:VF 1 "vector_operand")
19747 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
19749 (define_expand "rint<mode>2"
19750 [(set (match_operand:VF 0 "register_operand")
19752 [(match_operand:VF 1 "vector_operand")
19756 "operands[2] = GEN_INT (ROUND_MXCSR);")
19758 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
19759 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
19761 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
19762 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
19765 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19766 [(set_attr "isa" "noavx,noavx,avx")
19767 (set_attr "type" "ssecvt")
19768 (set_attr "prefix_data16" "1,1,*")
19769 (set_attr "prefix_extra" "1")
19770 (set_attr "length_immediate" "1")
19771 (set_attr "prefix" "orig,orig,vex")
19772 (set_attr "mode" "<MODE>")])
19774 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
19775 [(match_operand:<sseintvecmode> 0 "register_operand")
19776 (match_operand:VF1_128_256 1 "vector_operand")
19777 (match_operand:SI 2 "const_0_to_15_operand")]
19780 rtx tmp = gen_reg_rtx (<MODE>mode);
19783 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
19786 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
19790 (define_expand "avx512f_round<castmode>512"
19791 [(match_operand:VF_512 0 "register_operand")
19792 (match_operand:VF_512 1 "nonimmediate_operand")
19793 (match_operand:SI 2 "const_0_to_15_operand")]
19796 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
19800 (define_expand "avx512f_roundps512_sfix"
19801 [(match_operand:V16SI 0 "register_operand")
19802 (match_operand:V16SF 1 "nonimmediate_operand")
19803 (match_operand:SI 2 "const_0_to_15_operand")]
19806 rtx tmp = gen_reg_rtx (V16SFmode);
19807 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
19808 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
19812 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
19813 [(match_operand:<ssepackfltmode> 0 "register_operand")
19814 (match_operand:VF2 1 "vector_operand")
19815 (match_operand:VF2 2 "vector_operand")
19816 (match_operand:SI 3 "const_0_to_15_operand")]
19821 if (<MODE>mode == V2DFmode
19822 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
19824 rtx tmp2 = gen_reg_rtx (V4DFmode);
19826 tmp0 = gen_reg_rtx (V4DFmode);
19827 tmp1 = force_reg (V2DFmode, operands[1]);
19829 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
19830 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
19831 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
19835 tmp0 = gen_reg_rtx (<MODE>mode);
19836 tmp1 = gen_reg_rtx (<MODE>mode);
19839 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
19842 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
19845 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
19850 (define_insn "sse4_1_round<ssescalarmodesuffix>"
19851 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
19854 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
19855 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
19857 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
19861 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
19862 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
19863 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
19864 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
19865 [(set_attr "isa" "noavx,noavx,avx,avx512f")
19866 (set_attr "type" "ssecvt")
19867 (set_attr "length_immediate" "1")
19868 (set_attr "prefix_data16" "1,1,*,*")
19869 (set_attr "prefix_extra" "1")
19870 (set_attr "prefix" "orig,orig,vex,evex")
19871 (set_attr "mode" "<MODE>")])
19873 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
19874 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
19876 (vec_duplicate:VF_128
19877 (unspec:<ssescalarmode>
19878 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
19879 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
19881 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
19885 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
19886 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
19887 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
19888 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19889 [(set_attr "isa" "noavx,noavx,avx,avx512f")
19890 (set_attr "type" "ssecvt")
19891 (set_attr "length_immediate" "1")
19892 (set_attr "prefix_data16" "1,1,*,*")
19893 (set_attr "prefix_extra" "1")
19894 (set_attr "prefix" "orig,orig,vex,evex")
19895 (set_attr "mode" "<MODE>")])
19897 (define_expand "round<mode>2"
19898 [(set (match_dup 3)
19900 (match_operand:VF 1 "register_operand")
19902 (set (match_operand:VF 0 "register_operand")
19904 [(match_dup 3) (match_dup 4)]
19906 "TARGET_SSE4_1 && !flag_trapping_math"
19908 machine_mode scalar_mode;
19909 const struct real_format *fmt;
19910 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
19911 rtx half, vec_half;
19913 scalar_mode = GET_MODE_INNER (<MODE>mode);
19915 /* load nextafter (0.5, 0.0) */
19916 fmt = REAL_MODE_FORMAT (scalar_mode);
19917 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
19918 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
19919 half = const_double_from_real_value (pred_half, scalar_mode);
19921 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
19922 vec_half = force_reg (<MODE>mode, vec_half);
19924 operands[2] = gen_reg_rtx (<MODE>mode);
19925 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
19927 operands[3] = gen_reg_rtx (<MODE>mode);
19928 operands[4] = GEN_INT (ROUND_TRUNC);
19931 (define_expand "round<mode>2_sfix"
19932 [(match_operand:<sseintvecmode> 0 "register_operand")
19933 (match_operand:VF1 1 "register_operand")]
19934 "TARGET_SSE4_1 && !flag_trapping_math"
19936 rtx tmp = gen_reg_rtx (<MODE>mode);
19938 emit_insn (gen_round<mode>2 (tmp, operands[1]));
19941 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
19945 (define_expand "round<mode>2_vec_pack_sfix"
19946 [(match_operand:<ssepackfltmode> 0 "register_operand")
19947 (match_operand:VF2 1 "register_operand")
19948 (match_operand:VF2 2 "register_operand")]
19949 "TARGET_SSE4_1 && !flag_trapping_math"
19953 if (<MODE>mode == V2DFmode
19954 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
19956 rtx tmp2 = gen_reg_rtx (V4DFmode);
19958 tmp0 = gen_reg_rtx (V4DFmode);
19959 tmp1 = force_reg (V2DFmode, operands[1]);
19961 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
19962 emit_insn (gen_roundv4df2 (tmp2, tmp0));
19963 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
19967 tmp0 = gen_reg_rtx (<MODE>mode);
19968 tmp1 = gen_reg_rtx (<MODE>mode);
19970 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
19971 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
19974 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
19979 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19981 ;; Intel SSE4.2 string/text processing instructions
19983 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19985 (define_insn_and_split "sse4_2_pcmpestr"
19986 [(set (match_operand:SI 0 "register_operand" "=c,c")
19988 [(match_operand:V16QI 2 "register_operand" "x,x")
19989 (match_operand:SI 3 "register_operand" "a,a")
19990 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
19991 (match_operand:SI 5 "register_operand" "d,d")
19992 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
19994 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
20002 (set (reg:CC FLAGS_REG)
20011 && ix86_pre_reload_split ()"
20016 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
20017 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
20018 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
20021 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
20022 operands[3], operands[4],
20023 operands[5], operands[6]));
20025 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
20026 operands[3], operands[4],
20027 operands[5], operands[6]));
20028 if (flags && !(ecx || xmm0))
20029 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
20030 operands[2], operands[3],
20031 operands[4], operands[5],
20033 if (!(flags || ecx || xmm0))
20034 emit_note (NOTE_INSN_DELETED);
20038 [(set_attr "type" "sselog")
20039 (set_attr "prefix_data16" "1")
20040 (set_attr "prefix_extra" "1")
20041 (set_attr "length_immediate" "1")
20042 (set_attr "memory" "none,load")
20043 (set_attr "mode" "TI")])
20045 (define_insn "sse4_2_pcmpestri"
20046 [(set (match_operand:SI 0 "register_operand" "=c,c")
20048 [(match_operand:V16QI 1 "register_operand" "x,x")
20049 (match_operand:SI 2 "register_operand" "a,a")
20050 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
20051 (match_operand:SI 4 "register_operand" "d,d")
20052 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
20054 (set (reg:CC FLAGS_REG)
20063 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
20064 [(set_attr "type" "sselog")
20065 (set_attr "prefix_data16" "1")
20066 (set_attr "prefix_extra" "1")
20067 (set_attr "prefix" "maybe_vex")
20068 (set_attr "length_immediate" "1")
20069 (set_attr "btver2_decode" "vector")
20070 (set_attr "memory" "none,load")
20071 (set_attr "mode" "TI")])
20073 (define_insn "sse4_2_pcmpestrm"
20074 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
20076 [(match_operand:V16QI 1 "register_operand" "x,x")
20077 (match_operand:SI 2 "register_operand" "a,a")
20078 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
20079 (match_operand:SI 4 "register_operand" "d,d")
20080 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
20082 (set (reg:CC FLAGS_REG)
20091 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
20092 [(set_attr "type" "sselog")
20093 (set_attr "prefix_data16" "1")
20094 (set_attr "prefix_extra" "1")
20095 (set_attr "length_immediate" "1")
20096 (set_attr "prefix" "maybe_vex")
20097 (set_attr "btver2_decode" "vector")
20098 (set_attr "memory" "none,load")
20099 (set_attr "mode" "TI")])
20101 (define_insn "sse4_2_pcmpestr_cconly"
20102 [(set (reg:CC FLAGS_REG)
20104 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
20105 (match_operand:SI 3 "register_operand" "a,a,a,a")
20106 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
20107 (match_operand:SI 5 "register_operand" "d,d,d,d")
20108 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
20110 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
20111 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
20114 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
20115 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
20116 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
20117 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
20118 [(set_attr "type" "sselog")
20119 (set_attr "prefix_data16" "1")
20120 (set_attr "prefix_extra" "1")
20121 (set_attr "length_immediate" "1")
20122 (set_attr "memory" "none,load,none,load")
20123 (set_attr "btver2_decode" "vector,vector,vector,vector")
20124 (set_attr "prefix" "maybe_vex")
20125 (set_attr "mode" "TI")])
20127 (define_insn_and_split "sse4_2_pcmpistr"
20128 [(set (match_operand:SI 0 "register_operand" "=c,c")
20130 [(match_operand:V16QI 2 "register_operand" "x,x")
20131 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
20132 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
20134 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
20140 (set (reg:CC FLAGS_REG)
20147 && ix86_pre_reload_split ()"
20152 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
20153 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
20154 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
20157 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
20158 operands[3], operands[4]));
20160 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
20161 operands[3], operands[4]));
20162 if (flags && !(ecx || xmm0))
20163 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
20164 operands[2], operands[3],
20166 if (!(flags || ecx || xmm0))
20167 emit_note (NOTE_INSN_DELETED);
20171 [(set_attr "type" "sselog")
20172 (set_attr "prefix_data16" "1")
20173 (set_attr "prefix_extra" "1")
20174 (set_attr "length_immediate" "1")
20175 (set_attr "memory" "none,load")
20176 (set_attr "mode" "TI")])
20178 (define_insn "sse4_2_pcmpistri"
20179 [(set (match_operand:SI 0 "register_operand" "=c,c")
20181 [(match_operand:V16QI 1 "register_operand" "x,x")
20182 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
20183 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20185 (set (reg:CC FLAGS_REG)
20192 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
20193 [(set_attr "type" "sselog")
20194 (set_attr "prefix_data16" "1")
20195 (set_attr "prefix_extra" "1")
20196 (set_attr "length_immediate" "1")
20197 (set_attr "prefix" "maybe_vex")
20198 (set_attr "memory" "none,load")
20199 (set_attr "btver2_decode" "vector")
20200 (set_attr "mode" "TI")])
20202 (define_insn "sse4_2_pcmpistrm"
20203 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
20205 [(match_operand:V16QI 1 "register_operand" "x,x")
20206 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
20207 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
20209 (set (reg:CC FLAGS_REG)
20216 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
20217 [(set_attr "type" "sselog")
20218 (set_attr "prefix_data16" "1")
20219 (set_attr "prefix_extra" "1")
20220 (set_attr "length_immediate" "1")
20221 (set_attr "prefix" "maybe_vex")
20222 (set_attr "memory" "none,load")
20223 (set_attr "btver2_decode" "vector")
20224 (set_attr "mode" "TI")])
20226 (define_insn "sse4_2_pcmpistr_cconly"
20227 [(set (reg:CC FLAGS_REG)
20229 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
20230 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
20231 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
20233 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
20234 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
20237 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
20238 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
20239 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
20240 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
20241 [(set_attr "type" "sselog")
20242 (set_attr "prefix_data16" "1")
20243 (set_attr "prefix_extra" "1")
20244 (set_attr "length_immediate" "1")
20245 (set_attr "memory" "none,load,none,load")
20246 (set_attr "prefix" "maybe_vex")
20247 (set_attr "btver2_decode" "vector,vector,vector,vector")
20248 (set_attr "mode" "TI")])
20250 ;; Packed float variants
20251 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
20252 [(V8DI "V8SF") (V16SI "V16SF")])
20254 (define_expand "avx512pf_gatherpf<mode>sf"
20256 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20257 (mem:<GATHER_SCATTER_SF_MEM_MODE>
20259 [(match_operand 2 "vsib_address_operand")
20260 (match_operand:VI48_512 1 "register_operand")
20261 (match_operand:SI 3 "const1248_operand")]))
20262 (match_operand:SI 4 "const_2_to_3_operand")]
20263 UNSPEC_GATHER_PREFETCH)]
20267 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20268 operands[3]), UNSPEC_VSIBADDR);
20271 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
20273 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20274 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
20276 [(match_operand:P 2 "vsib_address_operand" "Tv")
20277 (match_operand:VI48_512 1 "register_operand" "v")
20278 (match_operand:SI 3 "const1248_operand" "n")]
20280 (match_operand:SI 4 "const_2_to_3_operand" "n")]
20281 UNSPEC_GATHER_PREFETCH)]
20284 switch (INTVAL (operands[4]))
20287 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20288 gas changed what it requires incompatibly. */
20289 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20291 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20293 gcc_unreachable ();
20296 [(set_attr "type" "sse")
20297 (set_attr "prefix" "evex")
20298 (set_attr "mode" "XI")])
20300 ;; Packed double variants
20301 (define_expand "avx512pf_gatherpf<mode>df"
20303 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20306 [(match_operand 2 "vsib_address_operand")
20307 (match_operand:VI4_256_8_512 1 "register_operand")
20308 (match_operand:SI 3 "const1248_operand")]))
20309 (match_operand:SI 4 "const_2_to_3_operand")]
20310 UNSPEC_GATHER_PREFETCH)]
20314 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20315 operands[3]), UNSPEC_VSIBADDR);
20318 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
20320 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20321 (match_operator:V8DF 5 "vsib_mem_operator"
20323 [(match_operand:P 2 "vsib_address_operand" "Tv")
20324 (match_operand:VI4_256_8_512 1 "register_operand" "v")
20325 (match_operand:SI 3 "const1248_operand" "n")]
20327 (match_operand:SI 4 "const_2_to_3_operand" "n")]
20328 UNSPEC_GATHER_PREFETCH)]
20331 switch (INTVAL (operands[4]))
20334 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20335 gas changed what it requires incompatibly. */
20336 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20338 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20340 gcc_unreachable ();
20343 [(set_attr "type" "sse")
20344 (set_attr "prefix" "evex")
20345 (set_attr "mode" "XI")])
20347 ;; Packed float variants
20348 (define_expand "avx512pf_scatterpf<mode>sf"
20350 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20351 (mem:<GATHER_SCATTER_SF_MEM_MODE>
20353 [(match_operand 2 "vsib_address_operand")
20354 (match_operand:VI48_512 1 "register_operand")
20355 (match_operand:SI 3 "const1248_operand")]))
20356 (match_operand:SI 4 "const2367_operand")]
20357 UNSPEC_SCATTER_PREFETCH)]
20361 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20362 operands[3]), UNSPEC_VSIBADDR);
20365 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
20367 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20368 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
20370 [(match_operand:P 2 "vsib_address_operand" "Tv")
20371 (match_operand:VI48_512 1 "register_operand" "v")
20372 (match_operand:SI 3 "const1248_operand" "n")]
20374 (match_operand:SI 4 "const2367_operand" "n")]
20375 UNSPEC_SCATTER_PREFETCH)]
20378 switch (INTVAL (operands[4]))
20382 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20383 gas changed what it requires incompatibly. */
20384 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20387 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
20389 gcc_unreachable ();
20392 [(set_attr "type" "sse")
20393 (set_attr "prefix" "evex")
20394 (set_attr "mode" "XI")])
20396 ;; Packed double variants
20397 (define_expand "avx512pf_scatterpf<mode>df"
20399 [(match_operand:<avx512fmaskmode> 0 "register_operand")
20402 [(match_operand 2 "vsib_address_operand")
20403 (match_operand:VI4_256_8_512 1 "register_operand")
20404 (match_operand:SI 3 "const1248_operand")]))
20405 (match_operand:SI 4 "const2367_operand")]
20406 UNSPEC_SCATTER_PREFETCH)]
20410 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
20411 operands[3]), UNSPEC_VSIBADDR);
20414 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
20416 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
20417 (match_operator:V8DF 5 "vsib_mem_operator"
20419 [(match_operand:P 2 "vsib_address_operand" "Tv")
20420 (match_operand:VI4_256_8_512 1 "register_operand" "v")
20421 (match_operand:SI 3 "const1248_operand" "n")]
20423 (match_operand:SI 4 "const2367_operand" "n")]
20424 UNSPEC_SCATTER_PREFETCH)]
20427 switch (INTVAL (operands[4]))
20431 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
20432 gas changed what it requires incompatibly. */
20433 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20436 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
20438 gcc_unreachable ();
20441 [(set_attr "type" "sse")
20442 (set_attr "prefix" "evex")
20443 (set_attr "mode" "XI")])
20445 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
20446 [(set (match_operand:VF_512 0 "register_operand" "=v")
20448 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20451 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20452 [(set_attr "prefix" "evex")
20453 (set_attr "type" "sse")
20454 (set_attr "mode" "<MODE>")])
20456 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
20457 [(set (match_operand:VF_512 0 "register_operand" "=v")
20459 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20462 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20463 [(set_attr "prefix" "evex")
20464 (set_attr "type" "sse")
20465 (set_attr "mode" "<MODE>")])
20467 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
20468 [(set (match_operand:VF_128 0 "register_operand" "=v")
20471 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
20473 (match_operand:VF_128 2 "register_operand" "v")
20476 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
20477 [(set_attr "length_immediate" "1")
20478 (set_attr "prefix" "evex")
20479 (set_attr "type" "sse")
20480 (set_attr "mode" "<MODE>")])
20482 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
20483 [(set (match_operand:VF_512 0 "register_operand" "=v")
20485 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
20488 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
20489 [(set_attr "prefix" "evex")
20490 (set_attr "type" "sse")
20491 (set_attr "mode" "<MODE>")])
20493 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
20494 [(set (match_operand:VF_128 0 "register_operand" "=v")
20497 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
20499 (match_operand:VF_128 2 "register_operand" "v")
20502 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
20503 [(set_attr "length_immediate" "1")
20504 (set_attr "type" "sse")
20505 (set_attr "prefix" "evex")
20506 (set_attr "mode" "<MODE>")])
20508 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20510 ;; XOP instructions
20512 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20514 (define_code_iterator xop_plus [plus ss_plus])
20516 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
20517 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
20519 ;; XOP parallel integer multiply/add instructions.
20521 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
20522 [(set (match_operand:VI24_128 0 "register_operand" "=x")
20525 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
20526 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
20527 (match_operand:VI24_128 3 "register_operand" "x")))]
20529 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20530 [(set_attr "type" "ssemuladd")
20531 (set_attr "mode" "TI")])
20533 (define_insn "xop_p<macs>dql"
20534 [(set (match_operand:V2DI 0 "register_operand" "=x")
20539 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
20540 (parallel [(const_int 0) (const_int 2)])))
20543 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
20544 (parallel [(const_int 0) (const_int 2)]))))
20545 (match_operand:V2DI 3 "register_operand" "x")))]
20547 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20548 [(set_attr "type" "ssemuladd")
20549 (set_attr "mode" "TI")])
20551 (define_insn "xop_p<macs>dqh"
20552 [(set (match_operand:V2DI 0 "register_operand" "=x")
20557 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
20558 (parallel [(const_int 1) (const_int 3)])))
20561 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
20562 (parallel [(const_int 1) (const_int 3)]))))
20563 (match_operand:V2DI 3 "register_operand" "x")))]
20565 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20566 [(set_attr "type" "ssemuladd")
20567 (set_attr "mode" "TI")])
20569 ;; XOP parallel integer multiply/add instructions for the intrinisics
20570 (define_insn "xop_p<macs>wd"
20571 [(set (match_operand:V4SI 0 "register_operand" "=x")
20576 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
20577 (parallel [(const_int 1) (const_int 3)
20578 (const_int 5) (const_int 7)])))
20581 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
20582 (parallel [(const_int 1) (const_int 3)
20583 (const_int 5) (const_int 7)]))))
20584 (match_operand:V4SI 3 "register_operand" "x")))]
20586 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20587 [(set_attr "type" "ssemuladd")
20588 (set_attr "mode" "TI")])
20590 (define_insn "xop_p<madcs>wd"
20591 [(set (match_operand:V4SI 0 "register_operand" "=x")
20597 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
20598 (parallel [(const_int 0) (const_int 2)
20599 (const_int 4) (const_int 6)])))
20602 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
20603 (parallel [(const_int 0) (const_int 2)
20604 (const_int 4) (const_int 6)]))))
20609 (parallel [(const_int 1) (const_int 3)
20610 (const_int 5) (const_int 7)])))
20614 (parallel [(const_int 1) (const_int 3)
20615 (const_int 5) (const_int 7)])))))
20616 (match_operand:V4SI 3 "register_operand" "x")))]
20618 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20619 [(set_attr "type" "ssemuladd")
20620 (set_attr "mode" "TI")])
20622 ;; XOP parallel XMM conditional moves
20623 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
20624 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
20625 (if_then_else:V_128_256
20626 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
20627 (match_operand:V_128_256 1 "register_operand" "x,x")
20628 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
20630 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20631 [(set_attr "type" "sse4arg")])
20633 ;; XOP horizontal add/subtract instructions
20634 (define_insn "xop_phadd<u>bw"
20635 [(set (match_operand:V8HI 0 "register_operand" "=x")
20639 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
20640 (parallel [(const_int 0) (const_int 2)
20641 (const_int 4) (const_int 6)
20642 (const_int 8) (const_int 10)
20643 (const_int 12) (const_int 14)])))
20647 (parallel [(const_int 1) (const_int 3)
20648 (const_int 5) (const_int 7)
20649 (const_int 9) (const_int 11)
20650 (const_int 13) (const_int 15)])))))]
20652 "vphadd<u>bw\t{%1, %0|%0, %1}"
20653 [(set_attr "type" "sseiadd1")])
20655 (define_insn "xop_phadd<u>bd"
20656 [(set (match_operand:V4SI 0 "register_operand" "=x")
20661 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
20662 (parallel [(const_int 0) (const_int 4)
20663 (const_int 8) (const_int 12)])))
20667 (parallel [(const_int 1) (const_int 5)
20668 (const_int 9) (const_int 13)]))))
20673 (parallel [(const_int 2) (const_int 6)
20674 (const_int 10) (const_int 14)])))
20678 (parallel [(const_int 3) (const_int 7)
20679 (const_int 11) (const_int 15)]))))))]
20681 "vphadd<u>bd\t{%1, %0|%0, %1}"
20682 [(set_attr "type" "sseiadd1")])
20684 (define_insn "xop_phadd<u>bq"
20685 [(set (match_operand:V2DI 0 "register_operand" "=x")
20691 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
20692 (parallel [(const_int 0) (const_int 8)])))
20696 (parallel [(const_int 1) (const_int 9)]))))
20701 (parallel [(const_int 2) (const_int 10)])))
20705 (parallel [(const_int 3) (const_int 11)])))))
20711 (parallel [(const_int 4) (const_int 12)])))
20715 (parallel [(const_int 5) (const_int 13)]))))
20720 (parallel [(const_int 6) (const_int 14)])))
20724 (parallel [(const_int 7) (const_int 15)])))))))]
20726 "vphadd<u>bq\t{%1, %0|%0, %1}"
20727 [(set_attr "type" "sseiadd1")])
20729 (define_insn "xop_phadd<u>wd"
20730 [(set (match_operand:V4SI 0 "register_operand" "=x")
20734 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
20735 (parallel [(const_int 0) (const_int 2)
20736 (const_int 4) (const_int 6)])))
20740 (parallel [(const_int 1) (const_int 3)
20741 (const_int 5) (const_int 7)])))))]
20743 "vphadd<u>wd\t{%1, %0|%0, %1}"
20744 [(set_attr "type" "sseiadd1")])
20746 (define_insn "xop_phadd<u>wq"
20747 [(set (match_operand:V2DI 0 "register_operand" "=x")
20752 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
20753 (parallel [(const_int 0) (const_int 4)])))
20757 (parallel [(const_int 1) (const_int 5)]))))
20762 (parallel [(const_int 2) (const_int 6)])))
20766 (parallel [(const_int 3) (const_int 7)]))))))]
20768 "vphadd<u>wq\t{%1, %0|%0, %1}"
20769 [(set_attr "type" "sseiadd1")])
20771 (define_insn "xop_phadd<u>dq"
20772 [(set (match_operand:V2DI 0 "register_operand" "=x")
20776 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
20777 (parallel [(const_int 0) (const_int 2)])))
20781 (parallel [(const_int 1) (const_int 3)])))))]
20783 "vphadd<u>dq\t{%1, %0|%0, %1}"
20784 [(set_attr "type" "sseiadd1")])
20786 (define_insn "xop_phsubbw"
20787 [(set (match_operand:V8HI 0 "register_operand" "=x")
20791 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
20792 (parallel [(const_int 0) (const_int 2)
20793 (const_int 4) (const_int 6)
20794 (const_int 8) (const_int 10)
20795 (const_int 12) (const_int 14)])))
20799 (parallel [(const_int 1) (const_int 3)
20800 (const_int 5) (const_int 7)
20801 (const_int 9) (const_int 11)
20802 (const_int 13) (const_int 15)])))))]
20804 "vphsubbw\t{%1, %0|%0, %1}"
20805 [(set_attr "type" "sseiadd1")])
20807 (define_insn "xop_phsubwd"
20808 [(set (match_operand:V4SI 0 "register_operand" "=x")
20812 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
20813 (parallel [(const_int 0) (const_int 2)
20814 (const_int 4) (const_int 6)])))
20818 (parallel [(const_int 1) (const_int 3)
20819 (const_int 5) (const_int 7)])))))]
20821 "vphsubwd\t{%1, %0|%0, %1}"
20822 [(set_attr "type" "sseiadd1")])
20824 (define_insn "xop_phsubdq"
20825 [(set (match_operand:V2DI 0 "register_operand" "=x")
20829 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
20830 (parallel [(const_int 0) (const_int 2)])))
20834 (parallel [(const_int 1) (const_int 3)])))))]
20836 "vphsubdq\t{%1, %0|%0, %1}"
20837 [(set_attr "type" "sseiadd1")])
20839 ;; XOP permute instructions
20840 (define_insn "xop_pperm"
20841 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
20843 [(match_operand:V16QI 1 "register_operand" "x,x")
20844 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
20845 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
20846 UNSPEC_XOP_PERMUTE))]
20847 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
20848 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20849 [(set_attr "type" "sse4arg")
20850 (set_attr "mode" "TI")])
20852 ;; XOP pack instructions that combine two vectors into a smaller vector
20853 (define_insn "xop_pperm_pack_v2di_v4si"
20854 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
20857 (match_operand:V2DI 1 "register_operand" "x,x"))
20859 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
20860 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
20861 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
20862 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20863 [(set_attr "type" "sse4arg")
20864 (set_attr "mode" "TI")])
20866 (define_insn "xop_pperm_pack_v4si_v8hi"
20867 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
20870 (match_operand:V4SI 1 "register_operand" "x,x"))
20872 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
20873 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
20874 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
20875 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20876 [(set_attr "type" "sse4arg")
20877 (set_attr "mode" "TI")])
20879 (define_insn "xop_pperm_pack_v8hi_v16qi"
20880 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
20883 (match_operand:V8HI 1 "register_operand" "x,x"))
20885 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
20886 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
20887 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
20888 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20889 [(set_attr "type" "sse4arg")
20890 (set_attr "mode" "TI")])
20892 ;; XOP packed rotate instructions
20893 (define_expand "rotl<mode>3"
20894 [(set (match_operand:VI_128 0 "register_operand")
20896 (match_operand:VI_128 1 "nonimmediate_operand")
20897 (match_operand:SI 2 "general_operand")))]
20900 /* If we were given a scalar, convert it to parallel */
20901 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
20903 rtvec vs = rtvec_alloc (<ssescalarnum>);
20904 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
20905 rtx reg = gen_reg_rtx (<MODE>mode);
20906 rtx op2 = operands[2];
20909 if (GET_MODE (op2) != <ssescalarmode>mode)
20911 op2 = gen_reg_rtx (<ssescalarmode>mode);
20912 convert_move (op2, operands[2], false);
20915 for (i = 0; i < <ssescalarnum>; i++)
20916 RTVEC_ELT (vs, i) = op2;
20918 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
20919 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
20924 (define_expand "rotr<mode>3"
20925 [(set (match_operand:VI_128 0 "register_operand")
20927 (match_operand:VI_128 1 "nonimmediate_operand")
20928 (match_operand:SI 2 "general_operand")))]
20931 /* If we were given a scalar, convert it to parallel */
20932 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
20934 rtvec vs = rtvec_alloc (<ssescalarnum>);
20935 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
20936 rtx neg = gen_reg_rtx (<MODE>mode);
20937 rtx reg = gen_reg_rtx (<MODE>mode);
20938 rtx op2 = operands[2];
20941 if (GET_MODE (op2) != <ssescalarmode>mode)
20943 op2 = gen_reg_rtx (<ssescalarmode>mode);
20944 convert_move (op2, operands[2], false);
20947 for (i = 0; i < <ssescalarnum>; i++)
20948 RTVEC_ELT (vs, i) = op2;
20950 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
20951 emit_insn (gen_neg<mode>2 (neg, reg));
20952 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
20957 (define_insn "xop_rotl<mode>3"
20958 [(set (match_operand:VI_128 0 "register_operand" "=x")
20960 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
20961 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
20963 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
20964 [(set_attr "type" "sseishft")
20965 (set_attr "length_immediate" "1")
20966 (set_attr "mode" "TI")])
20968 (define_insn "xop_rotr<mode>3"
20969 [(set (match_operand:VI_128 0 "register_operand" "=x")
20971 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
20972 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
20976 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
20977 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
20979 [(set_attr "type" "sseishft")
20980 (set_attr "length_immediate" "1")
20981 (set_attr "mode" "TI")])
20983 (define_expand "vrotr<mode>3"
20984 [(match_operand:VI_128 0 "register_operand")
20985 (match_operand:VI_128 1 "register_operand")
20986 (match_operand:VI_128 2 "register_operand")]
20989 rtx reg = gen_reg_rtx (<MODE>mode);
20990 emit_insn (gen_neg<mode>2 (reg, operands[2]));
20991 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
20995 (define_expand "vrotl<mode>3"
20996 [(match_operand:VI_128 0 "register_operand")
20997 (match_operand:VI_128 1 "register_operand")
20998 (match_operand:VI_128 2 "register_operand")]
21001 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
21005 (define_insn "xop_vrotl<mode>3"
21006 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
21007 (if_then_else:VI_128
21009 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
21012 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
21016 (neg:VI_128 (match_dup 2)))))]
21017 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21018 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21019 [(set_attr "type" "sseishft")
21020 (set_attr "prefix_data16" "0")
21021 (set_attr "prefix_extra" "2")
21022 (set_attr "mode" "TI")])
21024 ;; XOP packed shift instructions.
21025 (define_expand "vlshr<mode>3"
21026 [(set (match_operand:VI12_128 0 "register_operand")
21028 (match_operand:VI12_128 1 "register_operand")
21029 (match_operand:VI12_128 2 "nonimmediate_operand")))]
21030 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
21034 rtx neg = gen_reg_rtx (<MODE>mode);
21035 emit_insn (gen_neg<mode>2 (neg, operands[2]));
21036 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
21039 else if (<MODE>mode == V16QImode)
21041 ix86_expand_vecop_qihi (LSHIFTRT, operands[0], operands[1], operands[2]);
21046 (define_expand "vlshr<mode>3"
21047 [(set (match_operand:VI48_128 0 "register_operand")
21049 (match_operand:VI48_128 1 "register_operand")
21050 (match_operand:VI48_128 2 "nonimmediate_operand")))]
21051 "TARGET_AVX2 || TARGET_XOP"
21055 rtx neg = gen_reg_rtx (<MODE>mode);
21056 emit_insn (gen_neg<mode>2 (neg, operands[2]));
21057 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
21062 (define_expand "v<insn><mode>3"
21063 [(set (match_operand:VI12_256_512_AVX512VL 0 "register_operand")
21064 (any_shift:VI12_256_512_AVX512VL
21065 (match_operand:VI12_256_512_AVX512VL 1 "register_operand")
21066 (match_operand:VI12_256_512_AVX512VL 2 "nonimmediate_operand")))]
21069 if (<MODE>mode == V32QImode || <MODE>mode == V64QImode)
21071 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
21076 (define_expand "v<insn>v8qi3"
21077 [(set (match_operand:V8QI 0 "register_operand")
21079 (match_operand:V8QI 1 "register_operand")
21080 (match_operand:V8QI 2 "nonimmediate_operand")))]
21081 "TARGET_AVX512BW && TARGET_AVX512VL && TARGET_64BIT"
21083 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
21087 (define_expand "vlshr<mode>3"
21088 [(set (match_operand:VI48_512 0 "register_operand")
21090 (match_operand:VI48_512 1 "register_operand")
21091 (match_operand:VI48_512 2 "nonimmediate_operand")))]
21094 (define_expand "vlshr<mode>3"
21095 [(set (match_operand:VI48_256 0 "register_operand")
21097 (match_operand:VI48_256 1 "register_operand")
21098 (match_operand:VI48_256 2 "nonimmediate_operand")))]
21101 (define_expand "vashrv8di3"
21102 [(set (match_operand:V8DI 0 "register_operand")
21104 (match_operand:V8DI 1 "register_operand")
21105 (match_operand:V8DI 2 "nonimmediate_operand")))]
21108 (define_expand "vashrv4di3"
21109 [(set (match_operand:V4DI 0 "register_operand")
21111 (match_operand:V4DI 1 "register_operand")
21112 (match_operand:V4DI 2 "nonimmediate_operand")))]
21115 if (!TARGET_AVX512VL)
21117 rtx mask = ix86_build_signbit_mask (V4DImode, 1, 0);
21118 rtx t1 = gen_reg_rtx (V4DImode);
21119 rtx t2 = gen_reg_rtx (V4DImode);
21120 rtx t3 = gen_reg_rtx (V4DImode);
21121 emit_insn (gen_vlshrv4di3 (t1, operands[1], operands[2]));
21122 emit_insn (gen_vlshrv4di3 (t2, mask, operands[2]));
21123 emit_insn (gen_xorv4di3 (t3, t1, t2));
21124 emit_insn (gen_subv4di3 (operands[0], t3, t2));
21129 (define_expand "vashr<mode>3"
21130 [(set (match_operand:VI12_128 0 "register_operand")
21132 (match_operand:VI12_128 1 "register_operand")
21133 (match_operand:VI12_128 2 "nonimmediate_operand")))]
21134 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
21138 rtx neg = gen_reg_rtx (<MODE>mode);
21139 emit_insn (gen_neg<mode>2 (neg, operands[2]));
21140 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
21143 else if(<MODE>mode == V16QImode)
21145 ix86_expand_vecop_qihi (ASHIFTRT, operands[0],operands[1], operands[2]);
21150 (define_expand "vashrv2di3"
21151 [(set (match_operand:V2DI 0 "register_operand")
21153 (match_operand:V2DI 1 "register_operand")
21154 (match_operand:V2DI 2 "nonimmediate_operand")))]
21155 "TARGET_XOP || TARGET_AVX2"
21159 rtx neg = gen_reg_rtx (V2DImode);
21160 emit_insn (gen_negv2di2 (neg, operands[2]));
21161 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
21164 if (!TARGET_AVX512VL)
21166 rtx mask = ix86_build_signbit_mask (V2DImode, 1, 0);
21167 rtx t1 = gen_reg_rtx (V2DImode);
21168 rtx t2 = gen_reg_rtx (V2DImode);
21169 rtx t3 = gen_reg_rtx (V2DImode);
21170 emit_insn (gen_vlshrv2di3 (t1, operands[1], operands[2]));
21171 emit_insn (gen_vlshrv2di3 (t2, mask, operands[2]));
21172 emit_insn (gen_xorv2di3 (t3, t1, t2));
21173 emit_insn (gen_subv2di3 (operands[0], t3, t2));
21178 (define_expand "vashrv4si3"
21179 [(set (match_operand:V4SI 0 "register_operand")
21180 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
21181 (match_operand:V4SI 2 "nonimmediate_operand")))]
21182 "TARGET_AVX2 || TARGET_XOP"
21186 rtx neg = gen_reg_rtx (V4SImode);
21187 emit_insn (gen_negv4si2 (neg, operands[2]));
21188 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
21193 (define_expand "vashrv16si3"
21194 [(set (match_operand:V16SI 0 "register_operand")
21195 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
21196 (match_operand:V16SI 2 "nonimmediate_operand")))]
21199 (define_expand "vashrv8si3"
21200 [(set (match_operand:V8SI 0 "register_operand")
21201 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
21202 (match_operand:V8SI 2 "nonimmediate_operand")))]
21205 (define_expand "vashl<mode>3"
21206 [(set (match_operand:VI12_128 0 "register_operand")
21208 (match_operand:VI12_128 1 "register_operand")
21209 (match_operand:VI12_128 2 "nonimmediate_operand")))]
21210 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
21214 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
21217 else if (<MODE>mode == V16QImode)
21219 ix86_expand_vecop_qihi (ASHIFT, operands[0], operands[1], operands[2]);
21224 (define_expand "vashl<mode>3"
21225 [(set (match_operand:VI48_128 0 "register_operand")
21227 (match_operand:VI48_128 1 "register_operand")
21228 (match_operand:VI48_128 2 "nonimmediate_operand")))]
21229 "TARGET_AVX2 || TARGET_XOP"
21233 operands[2] = force_reg (<MODE>mode, operands[2]);
21234 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
21239 (define_expand "vashl<mode>3"
21240 [(set (match_operand:VI48_512 0 "register_operand")
21242 (match_operand:VI48_512 1 "register_operand")
21243 (match_operand:VI48_512 2 "nonimmediate_operand")))]
21246 (define_expand "vashl<mode>3"
21247 [(set (match_operand:VI48_256 0 "register_operand")
21249 (match_operand:VI48_256 1 "register_operand")
21250 (match_operand:VI48_256 2 "nonimmediate_operand")))]
21253 (define_insn "xop_sha<mode>3"
21254 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
21255 (if_then_else:VI_128
21257 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
21260 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
21264 (neg:VI_128 (match_dup 2)))))]
21265 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21266 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21267 [(set_attr "type" "sseishft")
21268 (set_attr "prefix_data16" "0")
21269 (set_attr "prefix_extra" "2")
21270 (set_attr "mode" "TI")])
21272 (define_insn "xop_shl<mode>3"
21273 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
21274 (if_then_else:VI_128
21276 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
21279 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
21283 (neg:VI_128 (match_dup 2)))))]
21284 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21285 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21286 [(set_attr "type" "sseishft")
21287 (set_attr "prefix_data16" "0")
21288 (set_attr "prefix_extra" "2")
21289 (set_attr "mode" "TI")])
21291 (define_expand "<insn><mode>3"
21292 [(set (match_operand:VI1_AVX512 0 "register_operand")
21293 (any_shift:VI1_AVX512
21294 (match_operand:VI1_AVX512 1 "register_operand")
21295 (match_operand:SI 2 "nonmemory_operand")))]
21298 if (TARGET_XOP && <MODE>mode == V16QImode)
21300 bool negate = false;
21301 rtx (*gen) (rtx, rtx, rtx);
21305 if (<CODE> != ASHIFT)
21307 if (CONST_INT_P (operands[2]))
21308 operands[2] = GEN_INT (-INTVAL (operands[2]));
21312 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
21313 for (i = 0; i < 16; i++)
21314 XVECEXP (par, 0, i) = operands[2];
21316 tmp = gen_reg_rtx (V16QImode);
21317 emit_insn (gen_vec_initv16qiqi (tmp, par));
21320 emit_insn (gen_negv16qi2 (tmp, tmp));
21322 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
21323 emit_insn (gen (operands[0], operands[1], tmp));
21326 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
21330 (define_expand "ashrv2di3"
21331 [(set (match_operand:V2DI 0 "register_operand")
21333 (match_operand:V2DI 1 "register_operand")
21334 (match_operand:DI 2 "nonmemory_operand")))]
21337 if (!TARGET_AVX512VL)
21340 && CONST_INT_P (operands[2])
21341 && UINTVAL (operands[2]) >= 63)
21343 rtx zero = force_reg (V2DImode, CONST0_RTX (V2DImode));
21344 emit_insn (gen_sse4_2_gtv2di3 (operands[0], zero, operands[1]));
21347 if (operands[2] == const0_rtx)
21349 emit_move_insn (operands[0], operands[1]);
21352 if (CONST_INT_P (operands[2])
21353 && (!TARGET_XOP || UINTVAL (operands[2]) >= 63))
21355 vec_perm_builder sel (4, 4, 1);
21356 sel.quick_grow (4);
21358 rtx op1 = lowpart_subreg (V4SImode, operands[1], V2DImode);
21359 rtx target = gen_reg_rtx (V4SImode);
21360 if (UINTVAL (operands[2]) >= 63)
21362 arg0 = arg1 = gen_reg_rtx (V4SImode);
21363 emit_insn (gen_ashrv4si3 (arg0, op1, GEN_INT (31)));
21369 else if (INTVAL (operands[2]) > 32)
21371 arg0 = gen_reg_rtx (V4SImode);
21372 arg1 = gen_reg_rtx (V4SImode);
21373 emit_insn (gen_ashrv4si3 (arg1, op1, GEN_INT (31)));
21374 emit_insn (gen_ashrv4si3 (arg0, op1,
21375 GEN_INT (INTVAL (operands[2]) - 32)));
21381 else if (INTVAL (operands[2]) == 32)
21384 arg1 = gen_reg_rtx (V4SImode);
21385 emit_insn (gen_ashrv4si3 (arg1, op1, GEN_INT (31)));
21393 arg0 = gen_reg_rtx (V2DImode);
21394 arg1 = gen_reg_rtx (V4SImode);
21395 emit_insn (gen_lshrv2di3 (arg0, operands[1], operands[2]));
21396 emit_insn (gen_ashrv4si3 (arg1, op1, operands[2]));
21397 arg0 = lowpart_subreg (V4SImode, arg0, V2DImode);
21403 vec_perm_indices indices (sel, arg0 != arg1 ? 2 : 1, 4);
21404 bool ok = targetm.vectorize.vec_perm_const (V4SImode, target,
21405 arg0, arg1, indices);
21407 emit_move_insn (operands[0],
21408 lowpart_subreg (V2DImode, target, V4SImode));
21413 rtx zero = force_reg (V2DImode, CONST0_RTX (V2DImode));
21414 rtx zero_or_all_ones;
21417 zero_or_all_ones = gen_reg_rtx (V2DImode);
21418 emit_insn (gen_sse4_2_gtv2di3 (zero_or_all_ones, zero,
21423 rtx temp = gen_reg_rtx (V4SImode);
21424 emit_insn (gen_ashrv4si3 (temp, lowpart_subreg (V4SImode,
21428 zero_or_all_ones = gen_reg_rtx (V4SImode);
21429 emit_insn (gen_sse2_pshufd_1 (zero_or_all_ones, temp,
21430 const1_rtx, const1_rtx,
21431 GEN_INT (3), GEN_INT (3)));
21432 zero_or_all_ones = lowpart_subreg (V2DImode, zero_or_all_ones,
21435 rtx lshr_res = gen_reg_rtx (V2DImode);
21436 emit_insn (gen_lshrv2di3 (lshr_res, operands[1], operands[2]));
21437 rtx ashl_res = gen_reg_rtx (V2DImode);
21441 amount = gen_reg_rtx (DImode);
21442 emit_insn (gen_subdi3 (amount, force_reg (DImode, GEN_INT (64)),
21447 rtx temp = gen_reg_rtx (SImode);
21448 emit_insn (gen_subsi3 (temp, force_reg (SImode, GEN_INT (64)),
21449 lowpart_subreg (SImode, operands[2],
21451 amount = gen_reg_rtx (V4SImode);
21452 emit_insn (gen_vec_setv4si_0 (amount, CONST0_RTX (V4SImode),
21455 amount = lowpart_subreg (DImode, amount, GET_MODE (amount));
21456 emit_insn (gen_ashlv2di3 (ashl_res, zero_or_all_ones, amount));
21457 emit_insn (gen_iorv2di3 (operands[0], lshr_res, ashl_res));
21461 rtx reg = gen_reg_rtx (V2DImode);
21463 bool negate = false;
21466 if (CONST_INT_P (operands[2]))
21467 operands[2] = GEN_INT (-INTVAL (operands[2]));
21471 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
21472 for (i = 0; i < 2; i++)
21473 XVECEXP (par, 0, i) = operands[2];
21475 emit_insn (gen_vec_initv2didi (reg, par));
21478 emit_insn (gen_negv2di2 (reg, reg));
21480 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
21485 ;; XOP FRCZ support
21486 (define_insn "xop_frcz<mode>2"
21487 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
21489 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
21492 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
21493 [(set_attr "type" "ssecvt1")
21494 (set_attr "mode" "<MODE>")])
21496 (define_expand "xop_vmfrcz<mode>2"
21497 [(set (match_operand:VF_128 0 "register_operand")
21500 [(match_operand:VF_128 1 "nonimmediate_operand")]
21505 "operands[2] = CONST0_RTX (<MODE>mode);")
21507 (define_insn "*xop_vmfrcz<mode>2"
21508 [(set (match_operand:VF_128 0 "register_operand" "=x")
21511 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
21513 (match_operand:VF_128 2 "const0_operand")
21516 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
21517 [(set_attr "type" "ssecvt1")
21518 (set_attr "mode" "<MODE>")])
21520 (define_insn "xop_maskcmp<mode>3"
21521 [(set (match_operand:VI_128 0 "register_operand" "=x")
21522 (match_operator:VI_128 1 "ix86_comparison_int_operator"
21523 [(match_operand:VI_128 2 "register_operand" "x")
21524 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
21526 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
21527 [(set_attr "type" "sse4arg")
21528 (set_attr "prefix_data16" "0")
21529 (set_attr "prefix_rep" "0")
21530 (set_attr "prefix_extra" "2")
21531 (set_attr "length_immediate" "1")
21532 (set_attr "mode" "TI")])
21534 (define_insn "xop_maskcmp_uns<mode>3"
21535 [(set (match_operand:VI_128 0 "register_operand" "=x")
21536 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
21537 [(match_operand:VI_128 2 "register_operand" "x")
21538 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
21540 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
21541 [(set_attr "type" "ssecmp")
21542 (set_attr "prefix_data16" "0")
21543 (set_attr "prefix_rep" "0")
21544 (set_attr "prefix_extra" "2")
21545 (set_attr "length_immediate" "1")
21546 (set_attr "mode" "TI")])
21548 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
21549 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
21550 ;; the exact instruction generated for the intrinsic.
21551 (define_insn "xop_maskcmp_uns2<mode>3"
21552 [(set (match_operand:VI_128 0 "register_operand" "=x")
21554 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
21555 [(match_operand:VI_128 2 "register_operand" "x")
21556 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
21557 UNSPEC_XOP_UNSIGNED_CMP))]
21559 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
21560 [(set_attr "type" "ssecmp")
21561 (set_attr "prefix_data16" "0")
21562 (set_attr "prefix_extra" "2")
21563 (set_attr "length_immediate" "1")
21564 (set_attr "mode" "TI")])
21566 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
21567 ;; being added here to be complete.
21568 (define_insn "xop_pcom_tf<mode>3"
21569 [(set (match_operand:VI_128 0 "register_operand" "=x")
21571 [(match_operand:VI_128 1 "register_operand" "x")
21572 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
21573 (match_operand:SI 3 "const_int_operand" "n")]
21574 UNSPEC_XOP_TRUEFALSE))]
21577 return ((INTVAL (operands[3]) != 0)
21578 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21579 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
21581 [(set_attr "type" "ssecmp")
21582 (set_attr "prefix_data16" "0")
21583 (set_attr "prefix_extra" "2")
21584 (set_attr "length_immediate" "1")
21585 (set_attr "mode" "TI")])
21587 (define_insn "xop_vpermil2<mode>3"
21588 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
21590 [(match_operand:VF_128_256 1 "register_operand" "x,x")
21591 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
21592 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
21593 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
21596 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
21597 [(set_attr "type" "sse4arg")
21598 (set_attr "length_immediate" "1")
21599 (set_attr "mode" "<MODE>")])
21601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21603 (define_insn "aesenc"
21604 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
21605 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
21606 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
21610 aesenc\t{%2, %0|%0, %2}
21611 vaesenc\t{%2, %1, %0|%0, %1, %2}"
21612 [(set_attr "isa" "noavx,avx")
21613 (set_attr "type" "sselog1")
21614 (set_attr "prefix_extra" "1")
21615 (set_attr "prefix" "orig,vex")
21616 (set_attr "btver2_decode" "double,double")
21617 (set_attr "mode" "TI")])
21619 (define_insn "aesenclast"
21620 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
21621 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
21622 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
21623 UNSPEC_AESENCLAST))]
21626 aesenclast\t{%2, %0|%0, %2}
21627 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
21628 [(set_attr "isa" "noavx,avx")
21629 (set_attr "type" "sselog1")
21630 (set_attr "prefix_extra" "1")
21631 (set_attr "prefix" "orig,vex")
21632 (set_attr "btver2_decode" "double,double")
21633 (set_attr "mode" "TI")])
21635 (define_insn "aesdec"
21636 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
21637 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
21638 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
21642 aesdec\t{%2, %0|%0, %2}
21643 vaesdec\t{%2, %1, %0|%0, %1, %2}"
21644 [(set_attr "isa" "noavx,avx")
21645 (set_attr "type" "sselog1")
21646 (set_attr "prefix_extra" "1")
21647 (set_attr "prefix" "orig,vex")
21648 (set_attr "btver2_decode" "double,double")
21649 (set_attr "mode" "TI")])
21651 (define_insn "aesdeclast"
21652 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
21653 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
21654 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
21655 UNSPEC_AESDECLAST))]
21658 aesdeclast\t{%2, %0|%0, %2}
21659 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
21660 [(set_attr "isa" "noavx,avx")
21661 (set_attr "type" "sselog1")
21662 (set_attr "prefix_extra" "1")
21663 (set_attr "prefix" "orig,vex")
21664 (set_attr "btver2_decode" "double,double")
21665 (set_attr "mode" "TI")])
21667 (define_insn "aesimc"
21668 [(set (match_operand:V2DI 0 "register_operand" "=x")
21669 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
21672 "%vaesimc\t{%1, %0|%0, %1}"
21673 [(set_attr "type" "sselog1")
21674 (set_attr "prefix_extra" "1")
21675 (set_attr "prefix" "maybe_vex")
21676 (set_attr "mode" "TI")])
21678 (define_insn "aeskeygenassist"
21679 [(set (match_operand:V2DI 0 "register_operand" "=x")
21680 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
21681 (match_operand:SI 2 "const_0_to_255_operand" "n")]
21682 UNSPEC_AESKEYGENASSIST))]
21684 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
21685 [(set_attr "type" "sselog1")
21686 (set_attr "prefix_extra" "1")
21687 (set_attr "length_immediate" "1")
21688 (set_attr "prefix" "maybe_vex")
21689 (set_attr "mode" "TI")])
21691 (define_insn "pclmulqdq"
21692 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
21693 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
21694 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
21695 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
21699 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
21700 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21701 [(set_attr "isa" "noavx,avx")
21702 (set_attr "type" "sselog1")
21703 (set_attr "prefix_extra" "1")
21704 (set_attr "length_immediate" "1")
21705 (set_attr "prefix" "orig,vex")
21706 (set_attr "mode" "TI")])
21708 (define_expand "avx_vzeroall"
21709 [(match_par_dup 0 [(const_int 0)])]
21712 int nregs = TARGET_64BIT ? 16 : 8;
21715 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
21717 XVECEXP (operands[0], 0, 0)
21718 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
21721 for (regno = 0; regno < nregs; regno++)
21722 XVECEXP (operands[0], 0, regno + 1)
21723 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
21724 CONST0_RTX (V8SImode));
21727 (define_insn "*avx_vzeroall"
21728 [(match_parallel 0 "vzeroall_operation"
21729 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
21732 [(set_attr "type" "sse")
21733 (set_attr "modrm" "0")
21734 (set_attr "memory" "none")
21735 (set_attr "prefix" "vex")
21736 (set_attr "btver2_decode" "vector")
21737 (set_attr "mode" "OI")])
21739 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
21740 ;; if the upper 128bits are unused. Initially we expand the instructions
21741 ;; as though they had no effect on the SSE registers, but later add SETs and
21742 ;; CLOBBERs to the PARALLEL to model the real effect.
21744 (define_expand "avx_vzeroupper"
21745 [(parallel [(call (mem:QI (const_int 0))
21747 (unspec [(const_int ABI_VZEROUPPER)] UNSPEC_CALLEE_ABI)])]
21750 ix86_expand_avx_vzeroupper ();
21754 (define_insn "avx_vzeroupper_callee_abi"
21755 [(call (mem:QI (const_int 0))
21757 (unspec [(const_int ABI_VZEROUPPER)] UNSPEC_CALLEE_ABI)]
21760 [(set_attr "type" "sse")
21761 (set_attr "modrm" "0")
21762 (set_attr "memory" "none")
21763 (set_attr "prefix" "vex")
21764 (set_attr "btver2_decode" "vector")
21765 (set_attr "mode" "OI")])
21767 (define_mode_attr pbroadcast_evex_isa
21768 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
21769 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
21770 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
21771 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
21773 (define_insn "avx2_pbroadcast<mode>"
21774 [(set (match_operand:VI 0 "register_operand" "=x,v")
21776 (vec_select:<ssescalarmode>
21777 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
21778 (parallel [(const_int 0)]))))]
21780 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
21781 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
21782 (set_attr "type" "ssemov")
21783 (set_attr "prefix_extra" "1")
21784 (set_attr "prefix" "vex,evex")
21785 (set_attr "mode" "<sseinsnmode>")])
21787 (define_insn "avx2_pbroadcast<mode>_1"
21788 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
21789 (vec_duplicate:VI_256
21790 (vec_select:<ssescalarmode>
21791 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
21792 (parallel [(const_int 0)]))))]
21795 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
21796 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
21797 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
21798 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
21799 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
21800 (set_attr "type" "ssemov")
21801 (set_attr "prefix_extra" "1")
21802 (set_attr "prefix" "vex")
21803 (set_attr "mode" "<sseinsnmode>")])
21805 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
21806 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
21807 (unspec:VI48F_256_512
21808 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
21809 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
21811 "TARGET_AVX2 && <mask_mode512bit_condition>"
21812 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
21813 [(set_attr "type" "sselog")
21814 (set_attr "prefix" "<mask_prefix2>")
21815 (set_attr "mode" "<sseinsnmode>")])
21817 (define_insn "<avx512>_permvar<mode><mask_name>"
21818 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
21819 (unspec:VI1_AVX512VL
21820 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
21821 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
21823 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
21824 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
21825 [(set_attr "type" "sselog")
21826 (set_attr "prefix" "<mask_prefix2>")
21827 (set_attr "mode" "<sseinsnmode>")])
21829 (define_insn "<avx512>_permvar<mode><mask_name>"
21830 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21831 (unspec:VI2_AVX512VL
21832 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
21833 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
21835 "TARGET_AVX512BW && <mask_mode512bit_condition>"
21836 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
21837 [(set_attr "type" "sselog")
21838 (set_attr "prefix" "<mask_prefix2>")
21839 (set_attr "mode" "<sseinsnmode>")])
21841 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
21842 ;; If it so happens that the input is in memory, use vbroadcast.
21843 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
21844 (define_insn "*avx_vperm_broadcast_v4sf"
21845 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
21847 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
21848 (match_parallel 2 "avx_vbroadcast_operand"
21849 [(match_operand 3 "const_int_operand" "C,n,n")])))]
21852 int elt = INTVAL (operands[3]);
21853 switch (which_alternative)
21857 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
21858 return "vbroadcastss\t{%1, %0|%0, %k1}";
21860 operands[2] = GEN_INT (elt * 0x55);
21861 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
21863 gcc_unreachable ();
21866 [(set_attr "type" "ssemov,ssemov,sselog1")
21867 (set_attr "prefix_extra" "1")
21868 (set_attr "length_immediate" "0,0,1")
21869 (set_attr "prefix" "maybe_evex")
21870 (set_attr "mode" "SF,SF,V4SF")])
21872 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
21873 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
21875 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
21876 (match_parallel 2 "avx_vbroadcast_operand"
21877 [(match_operand 3 "const_int_operand" "C,n,n")])))]
21879 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
21881 "&& reload_completed"
21882 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
21884 rtx op0 = operands[0], op1 = operands[1];
21885 int elt = INTVAL (operands[3]);
21891 if (TARGET_AVX2 && elt == 0)
21893 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
21898 /* Shuffle element we care about into all elements of the 128-bit lane.
21899 The other lane gets shuffled too, but we don't care. */
21900 if (<MODE>mode == V4DFmode)
21901 mask = (elt & 1 ? 15 : 0);
21903 mask = (elt & 3) * 0x55;
21904 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
21906 /* Shuffle the lane we care about into both lanes of the dest. */
21907 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
21908 if (EXT_REX_SSE_REG_P (op0))
21910 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
21912 gcc_assert (<MODE>mode == V8SFmode);
21913 if ((mask & 1) == 0)
21914 emit_insn (gen_avx2_vec_dupv8sf (op0,
21915 gen_lowpart (V4SFmode, op0)));
21917 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
21918 GEN_INT (4), GEN_INT (5),
21919 GEN_INT (6), GEN_INT (7),
21920 GEN_INT (12), GEN_INT (13),
21921 GEN_INT (14), GEN_INT (15)));
21925 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
21929 operands[1] = adjust_address (op1, <ssescalarmode>mode,
21930 elt * GET_MODE_SIZE (<ssescalarmode>mode));
21933 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
21934 [(set (match_operand:VF2 0 "register_operand")
21936 (match_operand:VF2 1 "nonimmediate_operand")
21937 (match_operand:SI 2 "const_0_to_255_operand")))]
21938 "TARGET_AVX && <mask_mode512bit_condition>"
21940 int mask = INTVAL (operands[2]);
21941 rtx perm[<ssescalarnum>];
21944 for (i = 0; i < <ssescalarnum>; i = i + 2)
21946 perm[i] = GEN_INT (((mask >> i) & 1) + i);
21947 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
21951 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
21954 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
21955 [(set (match_operand:VF1 0 "register_operand")
21957 (match_operand:VF1 1 "nonimmediate_operand")
21958 (match_operand:SI 2 "const_0_to_255_operand")))]
21959 "TARGET_AVX && <mask_mode512bit_condition>"
21961 int mask = INTVAL (operands[2]);
21962 rtx perm[<ssescalarnum>];
21965 for (i = 0; i < <ssescalarnum>; i = i + 4)
21967 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
21968 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
21969 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
21970 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
21974 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
21977 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
21978 ;; patterns, as they have the same RTL representation (vpermilp*
21979 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
21980 ;; latency as it never crosses lanes.
21981 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
21982 [(set (match_operand:VF 0 "register_operand" "=v")
21984 (match_operand:VF 1 "nonimmediate_operand" "vm")
21985 (match_parallel 2 ""
21986 [(match_operand 3 "const_int_operand")])))]
21987 "TARGET_AVX && <mask_mode512bit_condition>
21988 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
21990 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
21991 operands[2] = GEN_INT (mask);
21992 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
21994 [(set_attr "type" "sselog")
21995 (set_attr "prefix_extra" "1")
21996 (set_attr "length_immediate" "1")
21997 (set_attr "prefix" "<mask_prefix>")
21998 (set_attr "mode" "<sseinsnmode>")])
22000 (define_expand "avx2_perm<mode>"
22001 [(match_operand:VI8F_256 0 "register_operand")
22002 (match_operand:VI8F_256 1 "nonimmediate_operand")
22003 (match_operand:SI 2 "const_0_to_255_operand")]
22006 int mask = INTVAL (operands[2]);
22007 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
22008 GEN_INT ((mask >> 0) & 3),
22009 GEN_INT ((mask >> 2) & 3),
22010 GEN_INT ((mask >> 4) & 3),
22011 GEN_INT ((mask >> 6) & 3)));
22015 (define_expand "avx512vl_perm<mode>_mask"
22016 [(match_operand:VI8F_256 0 "register_operand")
22017 (match_operand:VI8F_256 1 "nonimmediate_operand")
22018 (match_operand:SI 2 "const_0_to_255_operand")
22019 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
22020 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22023 int mask = INTVAL (operands[2]);
22024 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
22025 GEN_INT ((mask >> 0) & 3),
22026 GEN_INT ((mask >> 2) & 3),
22027 GEN_INT ((mask >> 4) & 3),
22028 GEN_INT ((mask >> 6) & 3),
22029 operands[3], operands[4]));
22033 (define_insn "avx2_perm<mode>_1<mask_name>"
22034 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
22035 (vec_select:VI8F_256
22036 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
22037 (parallel [(match_operand 2 "const_0_to_3_operand")
22038 (match_operand 3 "const_0_to_3_operand")
22039 (match_operand 4 "const_0_to_3_operand")
22040 (match_operand 5 "const_0_to_3_operand")])))]
22041 "TARGET_AVX2 && <mask_mode512bit_condition>"
22044 mask |= INTVAL (operands[2]) << 0;
22045 mask |= INTVAL (operands[3]) << 2;
22046 mask |= INTVAL (operands[4]) << 4;
22047 mask |= INTVAL (operands[5]) << 6;
22048 operands[2] = GEN_INT (mask);
22049 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
22051 [(set_attr "type" "sselog")
22052 (set_attr "prefix" "<mask_prefix2>")
22053 (set_attr "mode" "<sseinsnmode>")])
22055 (define_expand "avx512f_perm<mode>"
22056 [(match_operand:V8FI 0 "register_operand")
22057 (match_operand:V8FI 1 "nonimmediate_operand")
22058 (match_operand:SI 2 "const_0_to_255_operand")]
22061 int mask = INTVAL (operands[2]);
22062 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
22063 GEN_INT ((mask >> 0) & 3),
22064 GEN_INT ((mask >> 2) & 3),
22065 GEN_INT ((mask >> 4) & 3),
22066 GEN_INT ((mask >> 6) & 3),
22067 GEN_INT (((mask >> 0) & 3) + 4),
22068 GEN_INT (((mask >> 2) & 3) + 4),
22069 GEN_INT (((mask >> 4) & 3) + 4),
22070 GEN_INT (((mask >> 6) & 3) + 4)));
22074 (define_expand "avx512f_perm<mode>_mask"
22075 [(match_operand:V8FI 0 "register_operand")
22076 (match_operand:V8FI 1 "nonimmediate_operand")
22077 (match_operand:SI 2 "const_0_to_255_operand")
22078 (match_operand:V8FI 3 "nonimm_or_0_operand")
22079 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22082 int mask = INTVAL (operands[2]);
22083 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
22084 GEN_INT ((mask >> 0) & 3),
22085 GEN_INT ((mask >> 2) & 3),
22086 GEN_INT ((mask >> 4) & 3),
22087 GEN_INT ((mask >> 6) & 3),
22088 GEN_INT (((mask >> 0) & 3) + 4),
22089 GEN_INT (((mask >> 2) & 3) + 4),
22090 GEN_INT (((mask >> 4) & 3) + 4),
22091 GEN_INT (((mask >> 6) & 3) + 4),
22092 operands[3], operands[4]));
22096 (define_insn "avx512f_perm<mode>_1<mask_name>"
22097 [(set (match_operand:V8FI 0 "register_operand" "=v")
22099 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
22100 (parallel [(match_operand 2 "const_0_to_3_operand")
22101 (match_operand 3 "const_0_to_3_operand")
22102 (match_operand 4 "const_0_to_3_operand")
22103 (match_operand 5 "const_0_to_3_operand")
22104 (match_operand 6 "const_4_to_7_operand")
22105 (match_operand 7 "const_4_to_7_operand")
22106 (match_operand 8 "const_4_to_7_operand")
22107 (match_operand 9 "const_4_to_7_operand")])))]
22108 "TARGET_AVX512F && <mask_mode512bit_condition>
22109 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
22110 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
22111 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
22112 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
22115 mask |= INTVAL (operands[2]) << 0;
22116 mask |= INTVAL (operands[3]) << 2;
22117 mask |= INTVAL (operands[4]) << 4;
22118 mask |= INTVAL (operands[5]) << 6;
22119 operands[2] = GEN_INT (mask);
22120 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
22122 [(set_attr "type" "sselog")
22123 (set_attr "prefix" "<mask_prefix2>")
22124 (set_attr "mode" "<sseinsnmode>")])
22126 (define_insn "avx2_permv2ti"
22127 [(set (match_operand:V4DI 0 "register_operand" "=x")
22129 [(match_operand:V4DI 1 "register_operand" "x")
22130 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
22131 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22134 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22135 [(set_attr "type" "sselog")
22136 (set_attr "prefix" "vex")
22137 (set_attr "mode" "OI")])
22139 (define_insn "avx2_vec_dupv4df"
22140 [(set (match_operand:V4DF 0 "register_operand" "=v")
22141 (vec_duplicate:V4DF
22143 (match_operand:V2DF 1 "register_operand" "v")
22144 (parallel [(const_int 0)]))))]
22146 "vbroadcastsd\t{%1, %0|%0, %1}"
22147 [(set_attr "type" "sselog1")
22148 (set_attr "prefix" "maybe_evex")
22149 (set_attr "mode" "V4DF")])
22151 (define_insn "<avx512>_vec_dup<mode>_1"
22152 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
22153 (vec_duplicate:VI_AVX512BW
22154 (vec_select:<ssescalarmode>
22155 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
22156 (parallel [(const_int 0)]))))]
22159 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
22160 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
22161 [(set_attr "type" "ssemov")
22162 (set_attr "prefix" "evex")
22163 (set_attr "mode" "<sseinsnmode>")])
22165 (define_insn "<avx512>_vec_dup<mode><mask_name>"
22166 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
22167 (vec_duplicate:V48_AVX512VL
22168 (vec_select:<ssescalarmode>
22169 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
22170 (parallel [(const_int 0)]))))]
22173 /* There is no DF broadcast (in AVX-512*) to 128b register.
22174 Mimic it with integer variant. */
22175 if (<MODE>mode == V2DFmode)
22176 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
22178 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
22180 [(set_attr "type" "ssemov")
22181 (set_attr "prefix" "evex")
22182 (set_attr "mode" "<sseinsnmode>")])
22184 (define_insn "<avx512>_vec_dup<mode><mask_name>"
22185 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
22186 (vec_duplicate:VI12_AVX512VL
22187 (vec_select:<ssescalarmode>
22188 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
22189 (parallel [(const_int 0)]))))]
22191 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
22192 [(set_attr "type" "ssemov")
22193 (set_attr "prefix" "evex")
22194 (set_attr "mode" "<sseinsnmode>")])
22196 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
22197 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
22198 (vec_duplicate:V16FI
22199 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
22202 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
22203 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22204 [(set_attr "type" "ssemov")
22205 (set_attr "prefix" "evex")
22206 (set_attr "mode" "<sseinsnmode>")])
22208 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
22209 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
22210 (vec_duplicate:V8FI
22211 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
22214 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
22215 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22216 [(set_attr "type" "ssemov")
22217 (set_attr "prefix" "evex")
22218 (set_attr "mode" "<sseinsnmode>")])
22220 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
22221 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
22222 (vec_duplicate:VI12_AVX512VL
22223 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
22226 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
22227 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
22228 [(set_attr "type" "ssemov")
22229 (set_attr "prefix" "evex")
22230 (set_attr "mode" "<sseinsnmode>")])
22232 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
22233 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
22234 (vec_duplicate:V48_AVX512VL
22235 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
22237 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22238 [(set_attr "type" "ssemov")
22239 (set_attr "prefix" "evex")
22240 (set_attr "mode" "<sseinsnmode>")
22241 (set (attr "enabled")
22242 (if_then_else (eq_attr "alternative" "1")
22243 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
22244 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
22247 (define_insn "vec_dupv4sf"
22248 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
22249 (vec_duplicate:V4SF
22250 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
22253 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
22254 vbroadcastss\t{%1, %0|%0, %1}
22255 shufps\t{$0, %0, %0|%0, %0, 0}"
22256 [(set_attr "isa" "avx,avx,noavx")
22257 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
22258 (set_attr "length_immediate" "1,0,1")
22259 (set_attr "prefix_extra" "0,1,*")
22260 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
22261 (set_attr "mode" "V4SF")])
22263 (define_insn "*vec_dupv4si"
22264 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
22265 (vec_duplicate:V4SI
22266 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
22269 %vpshufd\t{$0, %1, %0|%0, %1, 0}
22270 vbroadcastss\t{%1, %0|%0, %1}
22271 shufps\t{$0, %0, %0|%0, %0, 0}"
22272 [(set_attr "isa" "sse2,avx,noavx")
22273 (set_attr "type" "sselog1,ssemov,sselog1")
22274 (set_attr "length_immediate" "1,0,1")
22275 (set_attr "prefix_extra" "0,1,*")
22276 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
22277 (set_attr "mode" "TI,V4SF,V4SF")])
22279 (define_insn "*vec_dupv2di"
22280 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
22281 (vec_duplicate:V2DI
22282 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
22286 vpunpcklqdq\t{%d1, %0|%0, %d1}
22287 %vmovddup\t{%1, %0|%0, %1}
22289 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
22290 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
22291 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
22292 (set_attr "mode" "TI,TI,DF,V4SF")])
22294 (define_insn "avx2_vbroadcasti128_<mode>"
22295 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
22297 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
22301 vbroadcasti128\t{%1, %0|%0, %1}
22302 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
22303 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
22304 [(set_attr "isa" "*,avx512dq,avx512vl")
22305 (set_attr "type" "ssemov")
22306 (set_attr "prefix_extra" "1")
22307 (set_attr "prefix" "vex,evex,evex")
22308 (set_attr "mode" "OI")])
22310 ;; Modes handled by AVX vec_dup patterns.
22311 (define_mode_iterator AVX_VEC_DUP_MODE
22312 [V8SI V8SF V4DI V4DF])
22313 (define_mode_attr vecdupssescalarmodesuffix
22314 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
22315 ;; Modes handled by AVX2 vec_dup patterns.
22316 (define_mode_iterator AVX2_VEC_DUP_MODE
22317 [V32QI V16QI V16HI V8HI V8SI V4SI])
22319 (define_insn "*vec_dup<mode>"
22320 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
22321 (vec_duplicate:AVX2_VEC_DUP_MODE
22322 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
22325 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
22326 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
22328 [(set_attr "isa" "*,*,noavx512vl")
22329 (set_attr "type" "ssemov")
22330 (set_attr "prefix_extra" "1")
22331 (set_attr "prefix" "maybe_evex")
22332 (set_attr "mode" "<sseinsnmode>")
22333 (set (attr "preferred_for_speed")
22334 (cond [(eq_attr "alternative" "2")
22335 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
22337 (symbol_ref "true")))])
22339 (define_insn "vec_dup<mode>"
22340 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
22341 (vec_duplicate:AVX_VEC_DUP_MODE
22342 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
22345 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
22346 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
22347 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
22348 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
22350 [(set_attr "type" "ssemov")
22351 (set_attr "prefix_extra" "1")
22352 (set_attr "prefix" "maybe_evex")
22353 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
22354 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
22357 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
22358 (vec_duplicate:AVX2_VEC_DUP_MODE
22359 (match_operand:<ssescalarmode> 1 "register_operand")))]
22361 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
22362 available, because then we can broadcast from GPRs directly.
22363 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
22364 for V*SI mode it requires just -mavx512vl. */
22365 && !(TARGET_AVX512VL
22366 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
22367 && reload_completed && GENERAL_REG_P (operands[1])"
22370 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
22371 CONST0_RTX (V4SImode),
22372 gen_lowpart (SImode, operands[1])));
22373 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
22374 gen_lowpart (<ssexmmmode>mode,
22380 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
22381 (vec_duplicate:AVX_VEC_DUP_MODE
22382 (match_operand:<ssescalarmode> 1 "register_operand")))]
22383 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
22384 [(set (match_dup 2)
22385 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
22387 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
22388 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
22390 (define_insn "avx_vbroadcastf128_<mode>"
22391 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
22393 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
22397 vbroadcast<i128>\t{%1, %0|%0, %1}
22398 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
22399 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
22400 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
22401 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
22402 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
22403 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
22404 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
22405 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
22406 (set_attr "prefix_extra" "1")
22407 (set_attr "length_immediate" "0,1,1,0,1,0,1")
22408 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
22409 (set_attr "mode" "<sseinsnmode>")])
22411 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
22412 (define_mode_iterator VI4F_BRCST32x2
22413 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
22414 V16SF (V8SF "TARGET_AVX512VL")])
22416 (define_mode_attr 64x2mode
22417 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
22419 (define_mode_attr 32x2mode
22420 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
22421 (V8SF "V2SF") (V4SI "V2SI")])
22423 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
22424 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
22425 (vec_duplicate:VI4F_BRCST32x2
22426 (vec_select:<32x2mode>
22427 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
22428 (parallel [(const_int 0) (const_int 1)]))))]
22430 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
22431 [(set_attr "type" "ssemov")
22432 (set_attr "prefix_extra" "1")
22433 (set_attr "prefix" "evex")
22434 (set_attr "mode" "<sseinsnmode>")])
22436 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
22437 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
22438 (vec_duplicate:VI4F_256
22439 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
22442 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
22443 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22444 [(set_attr "type" "ssemov")
22445 (set_attr "prefix_extra" "1")
22446 (set_attr "prefix" "evex")
22447 (set_attr "mode" "<sseinsnmode>")])
22449 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
22450 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
22451 (vec_duplicate:V16FI
22452 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
22455 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
22456 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22457 [(set_attr "type" "ssemov")
22458 (set_attr "prefix_extra" "1")
22459 (set_attr "prefix" "evex")
22460 (set_attr "mode" "<sseinsnmode>")])
22462 ;; For broadcast[i|f]64x2
22463 (define_mode_iterator VI8F_BRCST64x2
22464 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
22466 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
22467 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
22468 (vec_duplicate:VI8F_BRCST64x2
22469 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
22472 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
22473 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22474 [(set_attr "type" "ssemov")
22475 (set_attr "prefix_extra" "1")
22476 (set_attr "prefix" "evex")
22477 (set_attr "mode" "<sseinsnmode>")])
22479 (define_insn "avx512cd_maskb_vec_dup<mode>"
22480 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22481 (vec_duplicate:VI8_AVX512VL
22483 (match_operand:QI 1 "register_operand" "k"))))]
22485 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
22486 [(set_attr "type" "mskmov")
22487 (set_attr "prefix" "evex")
22488 (set_attr "mode" "XI")])
22490 (define_insn "avx512cd_maskw_vec_dup<mode>"
22491 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22492 (vec_duplicate:VI4_AVX512VL
22494 (match_operand:HI 1 "register_operand" "k"))))]
22496 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
22497 [(set_attr "type" "mskmov")
22498 (set_attr "prefix" "evex")
22499 (set_attr "mode" "XI")])
22501 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
22502 [(set (match_operand:VF 0 "register_operand" "=v")
22504 [(match_operand:VF 1 "register_operand" "v")
22505 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
22507 "TARGET_AVX && <mask_mode512bit_condition>"
22508 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22509 [(set_attr "type" "sselog")
22510 (set_attr "prefix_extra" "1")
22511 (set_attr "btver2_decode" "vector")
22512 (set_attr "prefix" "<mask_prefix>")
22513 (set_attr "mode" "<sseinsnmode>")])
22515 (define_mode_iterator VPERMI2
22516 [V16SI V16SF V8DI V8DF
22517 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
22518 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
22519 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
22520 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
22521 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
22522 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
22523 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
22524 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
22526 (define_mode_iterator VPERMI2I
22528 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
22529 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
22530 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
22531 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
22532 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
22533 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
22535 (define_expand "<avx512>_vpermi2var<mode>3_mask"
22536 [(set (match_operand:VPERMI2 0 "register_operand")
22539 [(match_operand:<sseintvecmode> 2 "register_operand")
22540 (match_operand:VPERMI2 1 "register_operand")
22541 (match_operand:VPERMI2 3 "nonimmediate_operand")]
22544 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
22547 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
22548 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
22551 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
22552 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
22553 (vec_merge:VPERMI2I
22555 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
22556 (match_operand:VPERMI2I 1 "register_operand" "v")
22557 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
22560 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22562 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
22563 [(set_attr "type" "sselog")
22564 (set_attr "prefix" "evex")
22565 (set_attr "mode" "<sseinsnmode>")])
22567 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
22568 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
22569 (vec_merge:VF_AVX512VL
22570 (unspec:VF_AVX512VL
22571 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
22572 (match_operand:VF_AVX512VL 1 "register_operand" "v")
22573 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
22575 (subreg:VF_AVX512VL (match_dup 2) 0)
22576 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22578 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
22579 [(set_attr "type" "sselog")
22580 (set_attr "prefix" "evex")
22581 (set_attr "mode" "<sseinsnmode>")])
22583 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
22584 [(match_operand:VPERMI2 0 "register_operand")
22585 (match_operand:<sseintvecmode> 1 "register_operand")
22586 (match_operand:VPERMI2 2 "register_operand")
22587 (match_operand:VPERMI2 3 "nonimmediate_operand")
22588 (match_operand:<avx512fmaskmode> 4 "register_operand")]
22591 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
22592 operands[0], operands[1], operands[2], operands[3],
22593 CONST0_RTX (<MODE>mode), operands[4]));
22597 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
22598 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
22600 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
22601 (match_operand:VPERMI2 2 "register_operand" "0,v")
22602 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
22606 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
22607 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
22608 [(set_attr "type" "sselog")
22609 (set_attr "prefix" "evex")
22610 (set_attr "mode" "<sseinsnmode>")])
22612 (define_insn "<avx512>_vpermt2var<mode>3_mask"
22613 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
22616 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
22617 (match_operand:VPERMI2 2 "register_operand" "0")
22618 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
22621 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22623 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
22624 [(set_attr "type" "sselog")
22625 (set_attr "prefix" "evex")
22626 (set_attr "mode" "<sseinsnmode>")])
22628 (define_expand "avx_vperm2f128<mode>3"
22629 [(set (match_operand:AVX256MODE2P 0 "register_operand")
22630 (unspec:AVX256MODE2P
22631 [(match_operand:AVX256MODE2P 1 "register_operand")
22632 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
22633 (match_operand:SI 3 "const_0_to_255_operand")]
22634 UNSPEC_VPERMIL2F128))]
22637 int mask = INTVAL (operands[3]);
22638 if ((mask & 0x88) == 0)
22640 rtx perm[<ssescalarnum>], t1, t2;
22641 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
22643 base = (mask & 3) * nelt2;
22644 for (i = 0; i < nelt2; ++i)
22645 perm[i] = GEN_INT (base + i);
22647 base = ((mask >> 4) & 3) * nelt2;
22648 for (i = 0; i < nelt2; ++i)
22649 perm[i + nelt2] = GEN_INT (base + i);
22651 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
22652 operands[1], operands[2]);
22653 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
22654 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
22655 t2 = gen_rtx_SET (operands[0], t2);
22661 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
22662 ;; means that in order to represent this properly in rtl we'd have to
22663 ;; nest *another* vec_concat with a zero operand and do the select from
22664 ;; a 4x wide vector. That doesn't seem very nice.
22665 (define_insn "*avx_vperm2f128<mode>_full"
22666 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
22667 (unspec:AVX256MODE2P
22668 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
22669 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
22670 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22671 UNSPEC_VPERMIL2F128))]
22673 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
22674 [(set_attr "type" "sselog")
22675 (set_attr "prefix_extra" "1")
22676 (set_attr "length_immediate" "1")
22677 (set_attr "prefix" "vex")
22678 (set_attr "mode" "<sseinsnmode>")])
22680 (define_insn "*avx_vperm2f128<mode>_nozero"
22681 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
22682 (vec_select:AVX256MODE2P
22683 (vec_concat:<ssedoublevecmode>
22684 (match_operand:AVX256MODE2P 1 "register_operand" "x")
22685 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
22686 (match_parallel 3 ""
22687 [(match_operand 4 "const_int_operand")])))]
22689 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
22691 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
22693 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
22695 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
22696 operands[3] = GEN_INT (mask);
22697 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
22699 [(set_attr "type" "sselog")
22700 (set_attr "prefix_extra" "1")
22701 (set_attr "length_immediate" "1")
22702 (set_attr "prefix" "vex")
22703 (set_attr "mode" "<sseinsnmode>")])
22705 (define_insn "*ssse3_palignr<mode>_perm"
22706 [(set (match_operand:V_128 0 "register_operand" "=x,Yw")
22708 (match_operand:V_128 1 "register_operand" "0,Yw")
22709 (match_parallel 2 "palignr_operand"
22710 [(match_operand 3 "const_int_operand" "n,n")])))]
22713 operands[2] = (GEN_INT (INTVAL (operands[3])
22714 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
22716 switch (which_alternative)
22719 return "palignr\t{%2, %1, %0|%0, %1, %2}";
22721 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
22723 gcc_unreachable ();
22726 [(set_attr "isa" "noavx,avx")
22727 (set_attr "type" "sseishft")
22728 (set_attr "atom_unit" "sishuf")
22729 (set_attr "prefix_data16" "1,*")
22730 (set_attr "prefix_extra" "1")
22731 (set_attr "length_immediate" "1")
22732 (set_attr "prefix" "orig,maybe_evex")])
22734 (define_expand "avx512vl_vinsert<mode>"
22735 [(match_operand:VI48F_256 0 "register_operand")
22736 (match_operand:VI48F_256 1 "register_operand")
22737 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
22738 (match_operand:SI 3 "const_0_to_1_operand")
22739 (match_operand:VI48F_256 4 "register_operand")
22740 (match_operand:<avx512fmaskmode> 5 "register_operand")]
22743 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22745 switch (INTVAL (operands[3]))
22748 insn = gen_vec_set_lo_<mode>_mask;
22751 insn = gen_vec_set_hi_<mode>_mask;
22754 gcc_unreachable ();
22757 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
22762 (define_expand "avx_vinsertf128<mode>"
22763 [(match_operand:V_256 0 "register_operand")
22764 (match_operand:V_256 1 "register_operand")
22765 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
22766 (match_operand:SI 3 "const_0_to_1_operand")]
22769 rtx (*insn)(rtx, rtx, rtx);
22771 switch (INTVAL (operands[3]))
22774 insn = gen_vec_set_lo_<mode>;
22777 insn = gen_vec_set_hi_<mode>;
22780 gcc_unreachable ();
22783 emit_insn (insn (operands[0], operands[1], operands[2]));
22787 (define_insn "vec_set_lo_<mode><mask_name>"
22788 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
22789 (vec_concat:VI8F_256
22790 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
22791 (vec_select:<ssehalfvecmode>
22792 (match_operand:VI8F_256 1 "register_operand" "v")
22793 (parallel [(const_int 2) (const_int 3)]))))]
22794 "TARGET_AVX && <mask_avx512dq_condition>"
22796 if (TARGET_AVX512DQ)
22797 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
22798 else if (TARGET_AVX512VL)
22799 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
22801 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
22803 [(set_attr "type" "sselog")
22804 (set_attr "prefix_extra" "1")
22805 (set_attr "length_immediate" "1")
22806 (set_attr "prefix" "vex")
22807 (set_attr "mode" "<sseinsnmode>")])
22809 (define_insn "vec_set_hi_<mode><mask_name>"
22810 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
22811 (vec_concat:VI8F_256
22812 (vec_select:<ssehalfvecmode>
22813 (match_operand:VI8F_256 1 "register_operand" "v")
22814 (parallel [(const_int 0) (const_int 1)]))
22815 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
22816 "TARGET_AVX && <mask_avx512dq_condition>"
22818 if (TARGET_AVX512DQ)
22819 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
22820 else if (TARGET_AVX512VL)
22821 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
22823 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
22825 [(set_attr "type" "sselog")
22826 (set_attr "prefix_extra" "1")
22827 (set_attr "length_immediate" "1")
22828 (set_attr "prefix" "vex")
22829 (set_attr "mode" "<sseinsnmode>")])
22831 (define_insn "vec_set_lo_<mode><mask_name>"
22832 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
22833 (vec_concat:VI4F_256
22834 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
22835 (vec_select:<ssehalfvecmode>
22836 (match_operand:VI4F_256 1 "register_operand" "v")
22837 (parallel [(const_int 4) (const_int 5)
22838 (const_int 6) (const_int 7)]))))]
22841 if (TARGET_AVX512VL)
22842 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
22844 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
22846 [(set_attr "type" "sselog")
22847 (set_attr "prefix_extra" "1")
22848 (set_attr "length_immediate" "1")
22849 (set_attr "prefix" "vex")
22850 (set_attr "mode" "<sseinsnmode>")])
22852 (define_insn "vec_set_hi_<mode><mask_name>"
22853 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
22854 (vec_concat:VI4F_256
22855 (vec_select:<ssehalfvecmode>
22856 (match_operand:VI4F_256 1 "register_operand" "v")
22857 (parallel [(const_int 0) (const_int 1)
22858 (const_int 2) (const_int 3)]))
22859 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
22862 if (TARGET_AVX512VL)
22863 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
22865 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
22867 [(set_attr "type" "sselog")
22868 (set_attr "prefix_extra" "1")
22869 (set_attr "length_immediate" "1")
22870 (set_attr "prefix" "vex")
22871 (set_attr "mode" "<sseinsnmode>")])
22873 (define_insn "vec_set_lo_v16hi"
22874 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
22876 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
22878 (match_operand:V16HI 1 "register_operand" "x,v")
22879 (parallel [(const_int 8) (const_int 9)
22880 (const_int 10) (const_int 11)
22881 (const_int 12) (const_int 13)
22882 (const_int 14) (const_int 15)]))))]
22885 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
22886 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
22887 [(set_attr "type" "sselog")
22888 (set_attr "prefix_extra" "1")
22889 (set_attr "length_immediate" "1")
22890 (set_attr "prefix" "vex,evex")
22891 (set_attr "mode" "OI")])
22893 (define_insn "vec_set_hi_v16hi"
22894 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
22897 (match_operand:V16HI 1 "register_operand" "x,v")
22898 (parallel [(const_int 0) (const_int 1)
22899 (const_int 2) (const_int 3)
22900 (const_int 4) (const_int 5)
22901 (const_int 6) (const_int 7)]))
22902 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
22905 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
22906 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
22907 [(set_attr "type" "sselog")
22908 (set_attr "prefix_extra" "1")
22909 (set_attr "length_immediate" "1")
22910 (set_attr "prefix" "vex,evex")
22911 (set_attr "mode" "OI")])
22913 (define_insn "vec_set_lo_v32qi"
22914 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
22916 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
22918 (match_operand:V32QI 1 "register_operand" "x,v")
22919 (parallel [(const_int 16) (const_int 17)
22920 (const_int 18) (const_int 19)
22921 (const_int 20) (const_int 21)
22922 (const_int 22) (const_int 23)
22923 (const_int 24) (const_int 25)
22924 (const_int 26) (const_int 27)
22925 (const_int 28) (const_int 29)
22926 (const_int 30) (const_int 31)]))))]
22929 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
22930 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
22931 [(set_attr "type" "sselog")
22932 (set_attr "prefix_extra" "1")
22933 (set_attr "length_immediate" "1")
22934 (set_attr "prefix" "vex,evex")
22935 (set_attr "mode" "OI")])
22937 (define_insn "vec_set_hi_v32qi"
22938 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
22941 (match_operand:V32QI 1 "register_operand" "x,v")
22942 (parallel [(const_int 0) (const_int 1)
22943 (const_int 2) (const_int 3)
22944 (const_int 4) (const_int 5)
22945 (const_int 6) (const_int 7)
22946 (const_int 8) (const_int 9)
22947 (const_int 10) (const_int 11)
22948 (const_int 12) (const_int 13)
22949 (const_int 14) (const_int 15)]))
22950 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
22953 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
22954 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
22955 [(set_attr "type" "sselog")
22956 (set_attr "prefix_extra" "1")
22957 (set_attr "length_immediate" "1")
22958 (set_attr "prefix" "vex,evex")
22959 (set_attr "mode" "OI")])
22961 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
22962 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
22964 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
22965 (match_operand:V48_AVX2 1 "memory_operand" "m")]
22968 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
22969 [(set_attr "type" "sselog1")
22970 (set_attr "prefix_extra" "1")
22971 (set_attr "prefix" "vex")
22972 (set_attr "btver2_decode" "vector")
22973 (set_attr "mode" "<sseinsnmode>")])
22975 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
22976 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
22978 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
22979 (match_operand:V48_AVX2 2 "register_operand" "x")
22983 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
22984 [(set_attr "type" "sselog1")
22985 (set_attr "prefix_extra" "1")
22986 (set_attr "prefix" "vex")
22987 (set_attr "btver2_decode" "vector")
22988 (set_attr "mode" "<sseinsnmode>")])
22990 (define_expand "maskload<mode><sseintvecmodelower>"
22991 [(set (match_operand:V48_AVX2 0 "register_operand")
22993 [(match_operand:<sseintvecmode> 2 "register_operand")
22994 (match_operand:V48_AVX2 1 "memory_operand")]
22998 (define_expand "maskload<mode><avx512fmaskmodelower>"
22999 [(set (match_operand:V48_AVX512VL 0 "register_operand")
23000 (vec_merge:V48_AVX512VL
23001 (match_operand:V48_AVX512VL 1 "memory_operand")
23003 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23006 (define_expand "maskload<mode><avx512fmaskmodelower>"
23007 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
23008 (vec_merge:VI12_AVX512VL
23009 (match_operand:VI12_AVX512VL 1 "memory_operand")
23011 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23014 (define_expand "maskstore<mode><sseintvecmodelower>"
23015 [(set (match_operand:V48_AVX2 0 "memory_operand")
23017 [(match_operand:<sseintvecmode> 2 "register_operand")
23018 (match_operand:V48_AVX2 1 "register_operand")
23023 (define_expand "maskstore<mode><avx512fmaskmodelower>"
23024 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
23025 (vec_merge:V48_AVX512VL
23026 (match_operand:V48_AVX512VL 1 "register_operand")
23028 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23031 (define_expand "maskstore<mode><avx512fmaskmodelower>"
23032 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
23033 (vec_merge:VI12_AVX512VL
23034 (match_operand:VI12_AVX512VL 1 "register_operand")
23036 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
23039 (define_expand "cbranch<mode>4"
23040 [(set (reg:CC FLAGS_REG)
23041 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
23042 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
23043 (set (pc) (if_then_else
23044 (match_operator 0 "bt_comparison_operator"
23045 [(reg:CC FLAGS_REG) (const_int 0)])
23046 (label_ref (match_operand 3))
23050 ix86_expand_branch (GET_CODE (operands[0]),
23051 operands[1], operands[2], operands[3]);
23056 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
23057 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
23058 (vec_concat:AVX256MODE2P
23059 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
23060 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
23061 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23063 "&& reload_completed"
23064 [(set (match_dup 0) (match_dup 1))]
23066 if (REG_P (operands[0]))
23067 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
23069 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
23070 <ssehalfvecmode>mode);
23073 ;; Modes handled by vec_init expanders.
23074 (define_mode_iterator VEC_INIT_MODE
23075 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
23076 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
23077 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
23078 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
23079 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
23080 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
23081 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
23083 ;; Likewise, but for initialization from half sized vectors.
23084 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
23085 (define_mode_iterator VEC_INIT_HALF_MODE
23086 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
23087 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
23088 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
23089 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
23090 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
23091 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
23092 (V4TI "TARGET_AVX512F")])
23094 (define_expand "vec_init<mode><ssescalarmodelower>"
23095 [(match_operand:VEC_INIT_MODE 0 "register_operand")
23099 ix86_expand_vector_init (false, operands[0], operands[1]);
23103 (define_expand "vec_init<mode><ssehalfvecmodelower>"
23104 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
23108 ix86_expand_vector_init (false, operands[0], operands[1]);
23112 (define_expand "cond_<insn><mode>"
23113 [(set (match_operand:VI248_AVX512VLBW 0 "register_operand")
23114 (vec_merge:VI248_AVX512VLBW
23115 (any_shift:VI248_AVX512VLBW
23116 (match_operand:VI248_AVX512VLBW 2 "register_operand")
23117 (match_operand:VI248_AVX512VLBW 3 "nonimmediate_or_const_vec_dup_operand"))
23118 (match_operand:VI248_AVX512VLBW 4 "nonimm_or_0_operand")
23119 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
23122 if (const_vec_duplicate_p (operands[3]))
23124 operands[3] = unwrap_const_vec_duplicate (operands[3]);
23125 operands[3] = lowpart_subreg (DImode, operands[3], <ssescalarmode>mode);
23126 emit_insn (gen_<insn><mode>3_mask (operands[0],
23133 emit_insn (gen_<avx2_avx512>_<insn>v<mode>_mask (operands[0],
23141 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
23142 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
23143 (ashiftrt:VI48_AVX512F_AVX512VL
23144 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
23145 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
23146 "TARGET_AVX2 && <mask_mode512bit_condition>"
23147 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23148 [(set_attr "type" "sseishft")
23149 (set_attr "prefix" "maybe_evex")
23150 (set_attr "mode" "<sseinsnmode>")])
23152 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
23153 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
23154 (ashiftrt:VI2_AVX512VL
23155 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
23156 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
23158 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23159 [(set_attr "type" "sseishft")
23160 (set_attr "prefix" "maybe_evex")
23161 (set_attr "mode" "<sseinsnmode>")])
23163 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
23164 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
23165 (any_lshift:VI48_AVX512F
23166 (match_operand:VI48_AVX512F 1 "register_operand" "v")
23167 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
23168 "TARGET_AVX2 && <mask_mode512bit_condition>"
23169 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23170 [(set_attr "type" "sseishft")
23171 (set_attr "prefix" "maybe_evex")
23172 (set_attr "mode" "<sseinsnmode>")])
23174 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
23175 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
23176 (any_lshift:VI2_AVX512VL
23177 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
23178 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
23180 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23181 [(set_attr "type" "sseishft")
23182 (set_attr "prefix" "maybe_evex")
23183 (set_attr "mode" "<sseinsnmode>")])
23185 (define_insn "avx_vec_concat<mode>"
23186 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
23187 (vec_concat:V_256_512
23188 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
23189 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
23191 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
23192 || !MEM_P (operands[1]))"
23194 switch (which_alternative)
23197 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23199 if (<MODE_SIZE> == 64)
23201 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
23202 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23204 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23208 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23209 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23211 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
23215 switch (get_attr_mode (insn))
23218 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23219 return "vmovups\t{%1, %t0|%t0, %1}";
23221 return "vmovaps\t{%1, %t0|%t0, %1}";
23223 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23224 return "vmovupd\t{%1, %t0|%t0, %1}";
23226 return "vmovapd\t{%1, %t0|%t0, %1}";
23228 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23229 return "vmovups\t{%1, %x0|%x0, %1}";
23231 return "vmovaps\t{%1, %x0|%x0, %1}";
23233 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23234 return "vmovupd\t{%1, %x0|%x0, %1}";
23236 return "vmovapd\t{%1, %x0|%x0, %1}";
23238 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23240 if (which_alternative == 2)
23241 return "vmovdqu\t{%1, %t0|%t0, %1}";
23242 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23243 return "vmovdqu64\t{%1, %t0|%t0, %1}";
23245 return "vmovdqu32\t{%1, %t0|%t0, %1}";
23249 if (which_alternative == 2)
23250 return "vmovdqa\t{%1, %t0|%t0, %1}";
23251 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23252 return "vmovdqa64\t{%1, %t0|%t0, %1}";
23254 return "vmovdqa32\t{%1, %t0|%t0, %1}";
23257 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
23259 if (which_alternative == 2)
23260 return "vmovdqu\t{%1, %x0|%x0, %1}";
23261 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23262 return "vmovdqu64\t{%1, %x0|%x0, %1}";
23264 return "vmovdqu32\t{%1, %x0|%x0, %1}";
23268 if (which_alternative == 2)
23269 return "vmovdqa\t{%1, %x0|%x0, %1}";
23270 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
23271 return "vmovdqa64\t{%1, %x0|%x0, %1}";
23273 return "vmovdqa32\t{%1, %x0|%x0, %1}";
23276 gcc_unreachable ();
23279 gcc_unreachable ();
23282 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
23283 (set_attr "prefix_extra" "1,1,*,*")
23284 (set_attr "length_immediate" "1,1,*,*")
23285 (set_attr "prefix" "maybe_evex")
23286 (set_attr "mode" "<sseinsnmode>")])
23288 (define_insn_and_split "*vec_concat<mode>_0_1"
23289 [(set (match_operand:V 0 "register_operand")
23291 (vec_concat:<ssedoublevecmode>
23292 (match_operand:V 1 "nonimmediate_operand")
23293 (match_operand:V 2 "const0_operand"))
23294 (match_parallel 3 "movq_parallel"
23295 [(match_operand 4 "const_int_operand")])))]
23296 "TARGET_SSE2 && ix86_pre_reload_split ()"
23299 [(set (match_dup 0)
23300 (vec_concat:V (match_dup 1) (match_dup 5)))]
23302 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
23303 operands[5] = CONST0_RTX (<ssehalfvecmode>mode);
23306 (define_insn "vcvtph2ps<mask_name>"
23307 [(set (match_operand:V4SF 0 "register_operand" "=v")
23309 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
23311 (parallel [(const_int 0) (const_int 1)
23312 (const_int 2) (const_int 3)])))]
23313 "TARGET_F16C || TARGET_AVX512VL"
23314 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
23315 [(set_attr "type" "ssecvt")
23316 (set_attr "prefix" "maybe_evex")
23317 (set_attr "mode" "V4SF")])
23319 (define_insn "*vcvtph2ps_load<mask_name>"
23320 [(set (match_operand:V4SF 0 "register_operand" "=v")
23321 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
23322 UNSPEC_VCVTPH2PS))]
23323 "TARGET_F16C || TARGET_AVX512VL"
23324 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
23325 [(set_attr "type" "ssecvt")
23326 (set_attr "prefix" "vex")
23327 (set_attr "mode" "V8SF")])
23329 (define_insn "vcvtph2ps256<mask_name>"
23330 [(set (match_operand:V8SF 0 "register_operand" "=v")
23331 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
23332 UNSPEC_VCVTPH2PS))]
23333 "TARGET_F16C || TARGET_AVX512VL"
23334 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
23335 [(set_attr "type" "ssecvt")
23336 (set_attr "prefix" "vex")
23337 (set_attr "btver2_decode" "double")
23338 (set_attr "mode" "V8SF")])
23340 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
23341 [(set (match_operand:V16SF 0 "register_operand" "=v")
23343 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
23344 UNSPEC_VCVTPH2PS))]
23346 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
23347 [(set_attr "type" "ssecvt")
23348 (set_attr "prefix" "evex")
23349 (set_attr "mode" "V16SF")])
23351 (define_expand "vcvtps2ph_mask"
23352 [(set (match_operand:V8HI 0 "register_operand")
23355 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
23356 (match_operand:SI 2 "const_0_to_255_operand")]
23359 (match_operand:V8HI 3 "nonimm_or_0_operand")
23360 (match_operand:QI 4 "register_operand")))]
23362 "operands[5] = CONST0_RTX (V4HImode);")
23364 (define_expand "vcvtps2ph"
23365 [(set (match_operand:V8HI 0 "register_operand")
23367 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
23368 (match_operand:SI 2 "const_0_to_255_operand")]
23372 "operands[3] = CONST0_RTX (V4HImode);")
23374 (define_insn "*vcvtps2ph<mask_name>"
23375 [(set (match_operand:V8HI 0 "register_operand" "=v")
23377 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
23378 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23380 (match_operand:V4HI 3 "const0_operand")))]
23381 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
23382 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
23383 [(set_attr "type" "ssecvt")
23384 (set_attr "prefix" "maybe_evex")
23385 (set_attr "mode" "V4SF")])
23387 (define_insn "*vcvtps2ph_store<merge_mask_name>"
23388 [(set (match_operand:V4HI 0 "memory_operand" "=m")
23389 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
23390 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23391 UNSPEC_VCVTPS2PH))]
23392 "TARGET_F16C || TARGET_AVX512VL"
23393 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
23394 [(set_attr "type" "ssecvt")
23395 (set_attr "prefix" "maybe_evex")
23396 (set_attr "mode" "V4SF")])
23398 (define_insn "vcvtps2ph256<mask_name>"
23399 [(set (match_operand:V8HI 0 "register_operand" "=v")
23400 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
23401 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23402 UNSPEC_VCVTPS2PH))]
23403 "TARGET_F16C || TARGET_AVX512VL"
23404 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23405 [(set_attr "type" "ssecvt")
23406 (set_attr "prefix" "maybe_evex")
23407 (set_attr "btver2_decode" "vector")
23408 (set_attr "mode" "V8SF")])
23410 (define_insn "*vcvtps2ph256<merge_mask_name>"
23411 [(set (match_operand:V8HI 0 "memory_operand" "=m")
23412 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
23413 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23414 UNSPEC_VCVTPS2PH))]
23415 "TARGET_F16C || TARGET_AVX512VL"
23416 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
23417 [(set_attr "type" "ssecvt")
23418 (set_attr "prefix" "maybe_evex")
23419 (set_attr "btver2_decode" "vector")
23420 (set_attr "mode" "V8SF")])
23422 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
23423 [(set (match_operand:V16HI 0 "register_operand" "=v")
23425 [(match_operand:V16SF 1 "register_operand" "v")
23426 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23427 UNSPEC_VCVTPS2PH))]
23429 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
23430 [(set_attr "type" "ssecvt")
23431 (set_attr "prefix" "evex")
23432 (set_attr "mode" "V16SF")])
23434 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
23435 [(set (match_operand:V16HI 0 "memory_operand" "=m")
23437 [(match_operand:V16SF 1 "register_operand" "v")
23438 (match_operand:SI 2 "const_0_to_255_operand" "N")]
23439 UNSPEC_VCVTPS2PH))]
23441 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
23442 [(set_attr "type" "ssecvt")
23443 (set_attr "prefix" "evex")
23444 (set_attr "mode" "V16SF")])
23446 ;; For gather* insn patterns
23447 (define_mode_iterator VEC_GATHER_MODE
23448 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
23449 (define_mode_attr VEC_GATHER_IDXSI
23450 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
23451 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
23452 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
23453 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
23455 (define_mode_attr VEC_GATHER_IDXDI
23456 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
23457 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
23458 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
23459 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
23461 (define_mode_attr VEC_GATHER_SRCDI
23462 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
23463 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
23464 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
23465 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
23467 (define_expand "avx2_gathersi<mode>"
23468 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
23469 (unspec:VEC_GATHER_MODE
23470 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
23471 (mem:<ssescalarmode>
23473 [(match_operand 2 "vsib_address_operand")
23474 (match_operand:<VEC_GATHER_IDXSI>
23475 3 "register_operand")
23476 (match_operand:SI 5 "const1248_operand ")]))
23477 (mem:BLK (scratch))
23478 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
23480 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
23484 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
23485 operands[5]), UNSPEC_VSIBADDR);
23488 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
23489 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23490 (unspec:VEC_GATHER_MODE
23491 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
23492 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
23494 [(match_operand:P 3 "vsib_address_operand" "Tv")
23495 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
23496 (match_operand:SI 6 "const1248_operand" "n")]
23498 (mem:BLK (scratch))
23499 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
23501 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23503 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
23504 [(set_attr "type" "ssemov")
23505 (set_attr "prefix" "vex")
23506 (set_attr "mode" "<sseinsnmode>")])
23508 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
23509 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23510 (unspec:VEC_GATHER_MODE
23512 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23514 [(match_operand:P 2 "vsib_address_operand" "Tv")
23515 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
23516 (match_operand:SI 5 "const1248_operand" "n")]
23518 (mem:BLK (scratch))
23519 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
23521 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23523 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
23524 [(set_attr "type" "ssemov")
23525 (set_attr "prefix" "vex")
23526 (set_attr "mode" "<sseinsnmode>")])
23528 (define_expand "avx2_gatherdi<mode>"
23529 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
23530 (unspec:VEC_GATHER_MODE
23531 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
23532 (mem:<ssescalarmode>
23534 [(match_operand 2 "vsib_address_operand")
23535 (match_operand:<VEC_GATHER_IDXDI>
23536 3 "register_operand")
23537 (match_operand:SI 5 "const1248_operand ")]))
23538 (mem:BLK (scratch))
23539 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
23541 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
23545 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
23546 operands[5]), UNSPEC_VSIBADDR);
23549 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
23550 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23551 (unspec:VEC_GATHER_MODE
23552 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
23553 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
23555 [(match_operand:P 3 "vsib_address_operand" "Tv")
23556 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
23557 (match_operand:SI 6 "const1248_operand" "n")]
23559 (mem:BLK (scratch))
23560 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
23562 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23564 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
23565 [(set_attr "type" "ssemov")
23566 (set_attr "prefix" "vex")
23567 (set_attr "mode" "<sseinsnmode>")])
23569 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
23570 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
23571 (unspec:VEC_GATHER_MODE
23573 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23575 [(match_operand:P 2 "vsib_address_operand" "Tv")
23576 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
23577 (match_operand:SI 5 "const1248_operand" "n")]
23579 (mem:BLK (scratch))
23580 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
23582 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
23585 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
23586 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
23587 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
23589 [(set_attr "type" "ssemov")
23590 (set_attr "prefix" "vex")
23591 (set_attr "mode" "<sseinsnmode>")])
23593 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
23594 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
23595 (vec_select:<VEC_GATHER_SRCDI>
23597 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
23598 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
23600 [(match_operand:P 3 "vsib_address_operand" "Tv")
23601 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
23602 (match_operand:SI 6 "const1248_operand" "n")]
23604 (mem:BLK (scratch))
23605 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
23607 (parallel [(const_int 0) (const_int 1)
23608 (const_int 2) (const_int 3)])))
23609 (clobber (match_scratch:VI4F_256 1 "=&x"))]
23611 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
23612 [(set_attr "type" "ssemov")
23613 (set_attr "prefix" "vex")
23614 (set_attr "mode" "<sseinsnmode>")])
23616 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
23617 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
23618 (vec_select:<VEC_GATHER_SRCDI>
23621 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23623 [(match_operand:P 2 "vsib_address_operand" "Tv")
23624 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
23625 (match_operand:SI 5 "const1248_operand" "n")]
23627 (mem:BLK (scratch))
23628 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
23630 (parallel [(const_int 0) (const_int 1)
23631 (const_int 2) (const_int 3)])))
23632 (clobber (match_scratch:VI4F_256 1 "=&x"))]
23634 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
23635 [(set_attr "type" "ssemov")
23636 (set_attr "prefix" "vex")
23637 (set_attr "mode" "<sseinsnmode>")])
23639 (define_expand "<avx512>_gathersi<mode>"
23640 [(parallel [(set (match_operand:VI48F 0 "register_operand")
23642 [(match_operand:VI48F 1 "register_operand")
23643 (match_operand:<avx512fmaskmode> 4 "register_operand")
23644 (mem:<ssescalarmode>
23646 [(match_operand 2 "vsib_address_operand")
23647 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
23648 (match_operand:SI 5 "const1248_operand")]))]
23650 (clobber (match_scratch:<avx512fmaskmode> 7))])]
23654 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
23655 operands[5]), UNSPEC_VSIBADDR);
23658 (define_insn "*avx512f_gathersi<VI48F:mode>"
23659 [(set (match_operand:VI48F 0 "register_operand" "=&v")
23661 [(match_operand:VI48F 1 "register_operand" "0")
23662 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
23663 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23665 [(match_operand:P 4 "vsib_address_operand" "Tv")
23666 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
23667 (match_operand:SI 5 "const1248_operand" "n")]
23668 UNSPEC_VSIBADDR)])]
23670 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
23672 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
23673 ;; gas changed what it requires incompatibly.
23674 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
23675 [(set_attr "type" "ssemov")
23676 (set_attr "prefix" "evex")
23677 (set_attr "mode" "<sseinsnmode>")])
23679 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
23680 [(set (match_operand:VI48F 0 "register_operand" "=&v")
23683 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
23684 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
23686 [(match_operand:P 3 "vsib_address_operand" "Tv")
23687 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
23688 (match_operand:SI 4 "const1248_operand" "n")]
23689 UNSPEC_VSIBADDR)])]
23691 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
23693 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23694 ;; gas changed what it requires incompatibly.
23695 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
23696 [(set_attr "type" "ssemov")
23697 (set_attr "prefix" "evex")
23698 (set_attr "mode" "<sseinsnmode>")])
23701 (define_expand "<avx512>_gatherdi<mode>"
23702 [(parallel [(set (match_operand:VI48F 0 "register_operand")
23704 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
23705 (match_operand:QI 4 "register_operand")
23706 (mem:<ssescalarmode>
23708 [(match_operand 2 "vsib_address_operand")
23709 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
23710 (match_operand:SI 5 "const1248_operand")]))]
23712 (clobber (match_scratch:QI 7))])]
23716 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
23717 operands[5]), UNSPEC_VSIBADDR);
23720 (define_insn "*avx512f_gatherdi<VI48F:mode>"
23721 [(set (match_operand:VI48F 0 "register_operand" "=&v")
23723 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
23724 (match_operand:QI 7 "register_operand" "2")
23725 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
23727 [(match_operand:P 4 "vsib_address_operand" "Tv")
23728 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
23729 (match_operand:SI 5 "const1248_operand" "n")]
23730 UNSPEC_VSIBADDR)])]
23732 (clobber (match_scratch:QI 2 "=&Yk"))]
23734 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
23735 ;; gas changed what it requires incompatibly.
23736 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
23737 [(set_attr "type" "ssemov")
23738 (set_attr "prefix" "evex")
23739 (set_attr "mode" "<sseinsnmode>")])
23741 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
23742 [(set (match_operand:VI48F 0 "register_operand" "=&v")
23745 (match_operand:QI 6 "register_operand" "1")
23746 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
23748 [(match_operand:P 3 "vsib_address_operand" "Tv")
23749 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
23750 (match_operand:SI 4 "const1248_operand" "n")]
23751 UNSPEC_VSIBADDR)])]
23753 (clobber (match_scratch:QI 1 "=&Yk"))]
23756 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23757 gas changed what it requires incompatibly. */
23758 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
23760 if (<VI48F:MODE_SIZE> != 64)
23761 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
23763 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
23765 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
23767 [(set_attr "type" "ssemov")
23768 (set_attr "prefix" "evex")
23769 (set_attr "mode" "<sseinsnmode>")])
23771 (define_expand "<avx512>_scattersi<mode>"
23772 [(parallel [(set (mem:VI48F
23774 [(match_operand 0 "vsib_address_operand")
23775 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
23776 (match_operand:SI 4 "const1248_operand")]))
23778 [(match_operand:<avx512fmaskmode> 1 "register_operand")
23779 (match_operand:VI48F 3 "register_operand")]
23781 (clobber (match_scratch:<avx512fmaskmode> 6))])]
23785 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
23786 operands[4]), UNSPEC_VSIBADDR);
23789 (define_insn "*avx512f_scattersi<VI48F:mode>"
23790 [(set (match_operator:VI48F 5 "vsib_mem_operator"
23792 [(match_operand:P 0 "vsib_address_operand" "Tv")
23793 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
23794 (match_operand:SI 4 "const1248_operand" "n")]
23797 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
23798 (match_operand:VI48F 3 "register_operand" "v")]
23800 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
23802 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23803 ;; gas changed what it requires incompatibly.
23804 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
23805 [(set_attr "type" "ssemov")
23806 (set_attr "prefix" "evex")
23807 (set_attr "mode" "<sseinsnmode>")])
23809 (define_expand "<avx512>_scatterdi<mode>"
23810 [(parallel [(set (mem:VI48F
23812 [(match_operand 0 "vsib_address_operand")
23813 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
23814 (match_operand:SI 4 "const1248_operand")]))
23816 [(match_operand:QI 1 "register_operand")
23817 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
23819 (clobber (match_scratch:QI 6))])]
23823 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
23824 operands[4]), UNSPEC_VSIBADDR);
23827 (define_insn "*avx512f_scatterdi<VI48F:mode>"
23828 [(set (match_operator:VI48F 5 "vsib_mem_operator"
23830 [(match_operand:P 0 "vsib_address_operand" "Tv")
23831 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
23832 (match_operand:SI 4 "const1248_operand" "n")]
23835 [(match_operand:QI 6 "register_operand" "1")
23836 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
23838 (clobber (match_scratch:QI 1 "=&Yk"))]
23840 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23841 ;; gas changed what it requires incompatibly.
23842 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
23843 [(set_attr "type" "ssemov")
23844 (set_attr "prefix" "evex")
23845 (set_attr "mode" "<sseinsnmode>")])
23847 (define_insn "<avx512>_compress<mode>_mask"
23848 [(set (match_operand:VI48F 0 "register_operand" "=v")
23850 [(match_operand:VI48F 1 "register_operand" "v")
23851 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
23852 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
23855 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
23856 [(set_attr "type" "ssemov")
23857 (set_attr "prefix" "evex")
23858 (set_attr "mode" "<sseinsnmode>")])
23860 (define_insn "compress<mode>_mask"
23861 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
23862 (unspec:VI12_AVX512VLBW
23863 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
23864 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
23865 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
23867 "TARGET_AVX512VBMI2"
23868 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
23869 [(set_attr "type" "ssemov")
23870 (set_attr "prefix" "evex")
23871 (set_attr "mode" "<sseinsnmode>")])
23873 (define_insn "<avx512>_compressstore<mode>_mask"
23874 [(set (match_operand:VI48F 0 "memory_operand" "=m")
23876 [(match_operand:VI48F 1 "register_operand" "x")
23878 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
23879 UNSPEC_COMPRESS_STORE))]
23881 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
23882 [(set_attr "type" "ssemov")
23883 (set_attr "prefix" "evex")
23884 (set_attr "memory" "store")
23885 (set_attr "mode" "<sseinsnmode>")])
23887 (define_insn "compressstore<mode>_mask"
23888 [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
23889 (unspec:VI12_AVX512VLBW
23890 [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
23892 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
23893 UNSPEC_COMPRESS_STORE))]
23894 "TARGET_AVX512VBMI2"
23895 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
23896 [(set_attr "type" "ssemov")
23897 (set_attr "prefix" "evex")
23898 (set_attr "memory" "store")
23899 (set_attr "mode" "<sseinsnmode>")])
23901 (define_expand "<avx512>_expand<mode>_maskz"
23902 [(set (match_operand:VI48F 0 "register_operand")
23904 [(match_operand:VI48F 1 "nonimmediate_operand")
23905 (match_operand:VI48F 2 "nonimm_or_0_operand")
23906 (match_operand:<avx512fmaskmode> 3 "register_operand")]
23909 "operands[2] = CONST0_RTX (<MODE>mode);")
23911 (define_insn "expand<mode>_mask"
23912 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
23914 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
23915 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
23916 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
23919 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
23920 [(set_attr "type" "ssemov")
23921 (set_attr "prefix" "evex")
23922 (set_attr "memory" "none,load")
23923 (set_attr "mode" "<sseinsnmode>")])
23925 (define_insn "expand<mode>_mask"
23926 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
23927 (unspec:VI12_AVX512VLBW
23928 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
23929 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
23930 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
23932 "TARGET_AVX512VBMI2"
23933 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
23934 [(set_attr "type" "ssemov")
23935 (set_attr "prefix" "evex")
23936 (set_attr "memory" "none,load")
23937 (set_attr "mode" "<sseinsnmode>")])
23939 (define_insn_and_split "*expand<mode>_mask"
23940 [(set (match_operand:VI12_VI48F_AVX512VLBW 0 "register_operand")
23941 (unspec:VI12_VI48F_AVX512VLBW
23942 [(match_operand:VI12_VI48F_AVX512VLBW 1 "nonimmediate_operand")
23943 (match_operand:VI12_VI48F_AVX512VLBW 2 "nonimm_or_0_operand")
23944 (match_operand 3 "const_int_operand")]
23946 "ix86_pre_reload_split ()
23947 && (TARGET_AVX512VBMI2 || GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4)"
23952 unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
23953 bool has_zero = false;
23954 unsigned n = GET_MODE_NUNITS (<MODE>mode), i;
23957 /* If all ones bits is in mask's lower part,
23958 get number of ones and assign it to ONES. */
23959 for (i = 0; i != n; i++)
23961 if ((mask & HOST_WIDE_INT_1U << i) && has_zero)
23964 /* Record first zero bit. */
23965 if (!(mask & HOST_WIDE_INT_1U << i) && !has_zero)
23975 if (i != n || (ones != 0 && ones != n))
23977 rtx reg = gen_reg_rtx (<avx512fmaskmode>mode);
23978 emit_move_insn (reg, operands[3]);
23979 enum insn_code icode;
23981 /* For masks with all one bits in it's lower part,
23982 we can transform v{,p}expand* to vmovdq* with
23984 icode = CODE_FOR_<avx512>_load<mode>_mask;
23986 icode = CODE_FOR_expand<mode>_mask;
23987 emit_insn (GEN_FCN (icode) (operands[0], operands[1], operands[2], reg));
23990 /* For ALL_MASK_ONES or CONST0_RTX mask, transform it to simple mov. */
23991 emit_move_insn (operands[0], ones ? operands[1] : operands[2]);
23995 (define_expand "expand<mode>_maskz"
23996 [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
23997 (unspec:VI12_AVX512VLBW
23998 [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
23999 (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
24000 (match_operand:<avx512fmaskmode> 3 "register_operand")]
24002 "TARGET_AVX512VBMI2"
24003 "operands[2] = CONST0_RTX (<MODE>mode);")
24005 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
24006 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
24007 (unspec:VF_AVX512VL
24008 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
24009 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
24010 (match_operand:SI 3 "const_0_to_15_operand")]
24012 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
24013 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
24014 [(set_attr "type" "sse")
24015 (set_attr "prefix" "evex")
24016 (set_attr "mode" "<MODE>")])
24018 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
24019 [(set (match_operand:VF_128 0 "register_operand" "=v")
24022 [(match_operand:VF_128 1 "register_operand" "v")
24023 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
24024 (match_operand:SI 3 "const_0_to_15_operand")]
24029 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
24030 [(set_attr "type" "sse")
24031 (set_attr "prefix" "evex")
24032 (set_attr "mode" "<MODE>")])
24034 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
24035 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
24036 (unspec:<avx512fmaskmode>
24037 [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
24038 (match_operand 2 "const_0_to_255_operand" "n")]
24041 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
24042 [(set_attr "type" "sse")
24043 (set_attr "length_immediate" "1")
24044 (set_attr "prefix" "evex")
24045 (set_attr "mode" "<MODE>")])
24047 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
24048 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
24049 (and:<avx512fmaskmode>
24050 (unspec:<avx512fmaskmode>
24051 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
24052 (match_operand 2 "const_0_to_255_operand" "n")]
24056 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
24057 [(set_attr "type" "sse")
24058 (set_attr "length_immediate" "1")
24059 (set_attr "prefix" "evex")
24060 (set_attr "mode" "<MODE>")])
24062 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
24063 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
24064 (unspec:VF_AVX512VL
24065 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
24066 (match_operand:SI 2 "const_0_to_15_operand")]
24069 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
24070 [(set_attr "prefix" "evex")
24071 (set_attr "mode" "<MODE>")])
24073 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
24074 [(set (match_operand:VF_128 0 "register_operand" "=v")
24077 [(match_operand:VF_128 1 "register_operand" "v")
24078 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
24079 (match_operand:SI 3 "const_0_to_15_operand")]
24084 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
24085 [(set_attr "prefix" "evex")
24086 (set_attr "mode" "<ssescalarmode>")])
24088 ;; The correct representation for this is absolutely enormous, and
24089 ;; surely not generally useful.
24090 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
24091 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
24092 (unspec:VI2_AVX512VL
24093 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
24094 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
24095 (match_operand:SI 3 "const_0_to_255_operand")]
24098 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
24099 [(set_attr "type" "sselog1")
24100 (set_attr "length_immediate" "1")
24101 (set_attr "prefix" "evex")
24102 (set_attr "mode" "<sseinsnmode>")])
24104 (define_insn "clz<mode>2<mask_name>"
24105 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
24107 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
24109 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
24110 [(set_attr "type" "sse")
24111 (set_attr "prefix" "evex")
24112 (set_attr "mode" "<sseinsnmode>")])
24114 (define_insn "<mask_codefor>conflict<mode><mask_name>"
24115 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
24116 (unspec:VI48_AVX512VL
24117 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
24120 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
24121 [(set_attr "type" "sse")
24122 (set_attr "prefix" "evex")
24123 (set_attr "mode" "<sseinsnmode>")])
24125 (define_insn "sha1msg1"
24126 [(set (match_operand:V4SI 0 "register_operand" "=x")
24128 [(match_operand:V4SI 1 "register_operand" "0")
24129 (match_operand:V4SI 2 "vector_operand" "xBm")]
24132 "sha1msg1\t{%2, %0|%0, %2}"
24133 [(set_attr "type" "sselog1")
24134 (set_attr "mode" "TI")])
24136 (define_insn "sha1msg2"
24137 [(set (match_operand:V4SI 0 "register_operand" "=x")
24139 [(match_operand:V4SI 1 "register_operand" "0")
24140 (match_operand:V4SI 2 "vector_operand" "xBm")]
24143 "sha1msg2\t{%2, %0|%0, %2}"
24144 [(set_attr "type" "sselog1")
24145 (set_attr "mode" "TI")])
24147 (define_insn "sha1nexte"
24148 [(set (match_operand:V4SI 0 "register_operand" "=x")
24150 [(match_operand:V4SI 1 "register_operand" "0")
24151 (match_operand:V4SI 2 "vector_operand" "xBm")]
24152 UNSPEC_SHA1NEXTE))]
24154 "sha1nexte\t{%2, %0|%0, %2}"
24155 [(set_attr "type" "sselog1")
24156 (set_attr "mode" "TI")])
24158 (define_insn "sha1rnds4"
24159 [(set (match_operand:V4SI 0 "register_operand" "=x")
24161 [(match_operand:V4SI 1 "register_operand" "0")
24162 (match_operand:V4SI 2 "vector_operand" "xBm")
24163 (match_operand:SI 3 "const_0_to_3_operand" "n")]
24164 UNSPEC_SHA1RNDS4))]
24166 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
24167 [(set_attr "type" "sselog1")
24168 (set_attr "length_immediate" "1")
24169 (set_attr "mode" "TI")])
24171 (define_insn "sha256msg1"
24172 [(set (match_operand:V4SI 0 "register_operand" "=x")
24174 [(match_operand:V4SI 1 "register_operand" "0")
24175 (match_operand:V4SI 2 "vector_operand" "xBm")]
24176 UNSPEC_SHA256MSG1))]
24178 "sha256msg1\t{%2, %0|%0, %2}"
24179 [(set_attr "type" "sselog1")
24180 (set_attr "mode" "TI")])
24182 (define_insn "sha256msg2"
24183 [(set (match_operand:V4SI 0 "register_operand" "=x")
24185 [(match_operand:V4SI 1 "register_operand" "0")
24186 (match_operand:V4SI 2 "vector_operand" "xBm")]
24187 UNSPEC_SHA256MSG2))]
24189 "sha256msg2\t{%2, %0|%0, %2}"
24190 [(set_attr "type" "sselog1")
24191 (set_attr "mode" "TI")])
24193 (define_insn "sha256rnds2"
24194 [(set (match_operand:V4SI 0 "register_operand" "=x")
24196 [(match_operand:V4SI 1 "register_operand" "0")
24197 (match_operand:V4SI 2 "vector_operand" "xBm")
24198 (match_operand:V4SI 3 "register_operand" "Yz")]
24199 UNSPEC_SHA256RNDS2))]
24201 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
24202 [(set_attr "type" "sselog1")
24203 (set_attr "length_immediate" "1")
24204 (set_attr "mode" "TI")])
24206 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
24207 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
24208 (vec_concat:AVX512MODE2P
24209 (vec_concat:<ssehalfvecmode>
24210 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
24211 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
24212 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
24213 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
24215 "&& reload_completed"
24216 [(set (match_dup 0) (match_dup 1))]
24218 if (REG_P (operands[0]))
24219 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
24221 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
24222 <ssequartermode>mode);
24225 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
24226 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
24227 (vec_concat:AVX512MODE2P
24228 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
24229 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
24230 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
24232 "&& reload_completed"
24233 [(set (match_dup 0) (match_dup 1))]
24235 if (REG_P (operands[0]))
24236 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
24238 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
24239 <ssehalfvecmode>mode);
24242 (define_int_iterator VPMADD52
24243 [UNSPEC_VPMADD52LUQ
24244 UNSPEC_VPMADD52HUQ])
24246 (define_int_attr vpmadd52type
24247 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
24249 (define_expand "vpamdd52huq<mode>_maskz"
24250 [(match_operand:VI8_AVX512VL 0 "register_operand")
24251 (match_operand:VI8_AVX512VL 1 "register_operand")
24252 (match_operand:VI8_AVX512VL 2 "register_operand")
24253 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
24254 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24255 "TARGET_AVX512IFMA"
24257 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
24258 operands[0], operands[1], operands[2], operands[3],
24259 CONST0_RTX (<MODE>mode), operands[4]));
24263 (define_expand "vpamdd52luq<mode>_maskz"
24264 [(match_operand:VI8_AVX512VL 0 "register_operand")
24265 (match_operand:VI8_AVX512VL 1 "register_operand")
24266 (match_operand:VI8_AVX512VL 2 "register_operand")
24267 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
24268 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24269 "TARGET_AVX512IFMA"
24271 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
24272 operands[0], operands[1], operands[2], operands[3],
24273 CONST0_RTX (<MODE>mode), operands[4]));
24277 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
24278 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
24279 (unspec:VI8_AVX512VL
24280 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
24281 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
24282 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
24284 "TARGET_AVX512IFMA"
24285 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
24286 [(set_attr "type" "ssemuladd")
24287 (set_attr "prefix" "evex")
24288 (set_attr "mode" "<sseinsnmode>")])
24290 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
24291 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
24292 (vec_merge:VI8_AVX512VL
24293 (unspec:VI8_AVX512VL
24294 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
24295 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
24296 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
24299 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
24300 "TARGET_AVX512IFMA"
24301 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
24302 [(set_attr "type" "ssemuladd")
24303 (set_attr "prefix" "evex")
24304 (set_attr "mode" "<sseinsnmode>")])
24306 (define_insn "vpmultishiftqb<mode><mask_name>"
24307 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
24308 (unspec:VI1_AVX512VL
24309 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
24310 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
24311 UNSPEC_VPMULTISHIFT))]
24312 "TARGET_AVX512VBMI"
24313 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
24314 [(set_attr "type" "sselog")
24315 (set_attr "prefix" "evex")
24316 (set_attr "mode" "<sseinsnmode>")])
24318 (define_mode_iterator IMOD4
24319 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
24321 (define_mode_attr imod4_narrow
24322 [(V64SF "V16SF") (V64SI "V16SI")])
24324 (define_expand "mov<mode>"
24325 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
24326 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
24329 ix86_expand_vector_move (<MODE>mode, operands);
24333 (define_insn_and_split "*mov<mode>_internal"
24334 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
24335 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
24337 && (register_operand (operands[0], <MODE>mode)
24338 || register_operand (operands[1], <MODE>mode))"
24340 "&& reload_completed"
24346 for (i = 0; i < 4; i++)
24348 op0 = simplify_subreg
24349 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
24350 op1 = simplify_subreg
24351 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
24352 emit_move_insn (op0, op1);
24357 (define_insn "avx5124fmaddps_4fmaddps"
24358 [(set (match_operand:V16SF 0 "register_operand" "=v")
24360 [(match_operand:V16SF 1 "register_operand" "0")
24361 (match_operand:V64SF 2 "register_operand" "v")
24362 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
24363 "TARGET_AVX5124FMAPS"
24364 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
24365 [(set_attr ("type") ("ssemuladd"))
24366 (set_attr ("prefix") ("evex"))
24367 (set_attr ("mode") ("V16SF"))])
24369 (define_insn "avx5124fmaddps_4fmaddps_mask"
24370 [(set (match_operand:V16SF 0 "register_operand" "=v")
24373 [(match_operand:V64SF 1 "register_operand" "v")
24374 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
24375 (match_operand:V16SF 3 "register_operand" "0")
24376 (match_operand:HI 4 "register_operand" "Yk")))]
24377 "TARGET_AVX5124FMAPS"
24378 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24379 [(set_attr ("type") ("ssemuladd"))
24380 (set_attr ("prefix") ("evex"))
24381 (set_attr ("mode") ("V16SF"))])
24383 (define_insn "avx5124fmaddps_4fmaddps_maskz"
24384 [(set (match_operand:V16SF 0 "register_operand" "=v")
24387 [(match_operand:V16SF 1 "register_operand" "0")
24388 (match_operand:V64SF 2 "register_operand" "v")
24389 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
24390 (match_operand:V16SF 4 "const0_operand" "C")
24391 (match_operand:HI 5 "register_operand" "Yk")))]
24392 "TARGET_AVX5124FMAPS"
24393 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24394 [(set_attr ("type") ("ssemuladd"))
24395 (set_attr ("prefix") ("evex"))
24396 (set_attr ("mode") ("V16SF"))])
24398 (define_insn "avx5124fmaddps_4fmaddss"
24399 [(set (match_operand:V4SF 0 "register_operand" "=v")
24401 [(match_operand:V4SF 1 "register_operand" "0")
24402 (match_operand:V64SF 2 "register_operand" "v")
24403 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
24404 "TARGET_AVX5124FMAPS"
24405 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
24406 [(set_attr ("type") ("ssemuladd"))
24407 (set_attr ("prefix") ("evex"))
24408 (set_attr ("mode") ("SF"))])
24410 (define_insn "avx5124fmaddps_4fmaddss_mask"
24411 [(set (match_operand:V4SF 0 "register_operand" "=v")
24414 [(match_operand:V64SF 1 "register_operand" "v")
24415 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
24416 (match_operand:V4SF 3 "register_operand" "0")
24417 (match_operand:QI 4 "register_operand" "Yk")))]
24418 "TARGET_AVX5124FMAPS"
24419 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
24420 [(set_attr ("type") ("ssemuladd"))
24421 (set_attr ("prefix") ("evex"))
24422 (set_attr ("mode") ("SF"))])
24424 (define_insn "avx5124fmaddps_4fmaddss_maskz"
24425 [(set (match_operand:V4SF 0 "register_operand" "=v")
24428 [(match_operand:V4SF 1 "register_operand" "0")
24429 (match_operand:V64SF 2 "register_operand" "v")
24430 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
24431 (match_operand:V4SF 4 "const0_operand" "C")
24432 (match_operand:QI 5 "register_operand" "Yk")))]
24433 "TARGET_AVX5124FMAPS"
24434 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
24435 [(set_attr ("type") ("ssemuladd"))
24436 (set_attr ("prefix") ("evex"))
24437 (set_attr ("mode") ("SF"))])
24439 (define_insn "avx5124fmaddps_4fnmaddps"
24440 [(set (match_operand:V16SF 0 "register_operand" "=v")
24442 [(match_operand:V16SF 1 "register_operand" "0")
24443 (match_operand:V64SF 2 "register_operand" "v")
24444 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
24445 "TARGET_AVX5124FMAPS"
24446 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
24447 [(set_attr ("type") ("ssemuladd"))
24448 (set_attr ("prefix") ("evex"))
24449 (set_attr ("mode") ("V16SF"))])
24451 (define_insn "avx5124fmaddps_4fnmaddps_mask"
24452 [(set (match_operand:V16SF 0 "register_operand" "=v")
24455 [(match_operand:V64SF 1 "register_operand" "v")
24456 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24457 (match_operand:V16SF 3 "register_operand" "0")
24458 (match_operand:HI 4 "register_operand" "Yk")))]
24459 "TARGET_AVX5124FMAPS"
24460 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24461 [(set_attr ("type") ("ssemuladd"))
24462 (set_attr ("prefix") ("evex"))
24463 (set_attr ("mode") ("V16SF"))])
24465 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
24466 [(set (match_operand:V16SF 0 "register_operand" "=v")
24469 [(match_operand:V16SF 1 "register_operand" "0")
24470 (match_operand:V64SF 2 "register_operand" "v")
24471 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24472 (match_operand:V16SF 4 "const0_operand" "C")
24473 (match_operand:HI 5 "register_operand" "Yk")))]
24474 "TARGET_AVX5124FMAPS"
24475 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24476 [(set_attr ("type") ("ssemuladd"))
24477 (set_attr ("prefix") ("evex"))
24478 (set_attr ("mode") ("V16SF"))])
24480 (define_insn "avx5124fmaddps_4fnmaddss"
24481 [(set (match_operand:V4SF 0 "register_operand" "=v")
24483 [(match_operand:V4SF 1 "register_operand" "0")
24484 (match_operand:V64SF 2 "register_operand" "v")
24485 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
24486 "TARGET_AVX5124FMAPS"
24487 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
24488 [(set_attr ("type") ("ssemuladd"))
24489 (set_attr ("prefix") ("evex"))
24490 (set_attr ("mode") ("SF"))])
24492 (define_insn "avx5124fmaddps_4fnmaddss_mask"
24493 [(set (match_operand:V4SF 0 "register_operand" "=v")
24496 [(match_operand:V64SF 1 "register_operand" "v")
24497 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24498 (match_operand:V4SF 3 "register_operand" "0")
24499 (match_operand:QI 4 "register_operand" "Yk")))]
24500 "TARGET_AVX5124FMAPS"
24501 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
24502 [(set_attr ("type") ("ssemuladd"))
24503 (set_attr ("prefix") ("evex"))
24504 (set_attr ("mode") ("SF"))])
24506 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
24507 [(set (match_operand:V4SF 0 "register_operand" "=v")
24510 [(match_operand:V4SF 1 "register_operand" "0")
24511 (match_operand:V64SF 2 "register_operand" "v")
24512 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
24513 (match_operand:V4SF 4 "const0_operand" "C")
24514 (match_operand:QI 5 "register_operand" "Yk")))]
24515 "TARGET_AVX5124FMAPS"
24516 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
24517 [(set_attr ("type") ("ssemuladd"))
24518 (set_attr ("prefix") ("evex"))
24519 (set_attr ("mode") ("SF"))])
24521 (define_insn "avx5124vnniw_vp4dpwssd"
24522 [(set (match_operand:V16SI 0 "register_operand" "=v")
24524 [(match_operand:V16SI 1 "register_operand" "0")
24525 (match_operand:V64SI 2 "register_operand" "v")
24526 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
24527 "TARGET_AVX5124VNNIW"
24528 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
24529 [(set_attr ("type") ("ssemuladd"))
24530 (set_attr ("prefix") ("evex"))
24531 (set_attr ("mode") ("TI"))])
24533 (define_insn "avx5124vnniw_vp4dpwssd_mask"
24534 [(set (match_operand:V16SI 0 "register_operand" "=v")
24537 [(match_operand:V64SI 1 "register_operand" "v")
24538 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
24539 (match_operand:V16SI 3 "register_operand" "0")
24540 (match_operand:HI 4 "register_operand" "Yk")))]
24541 "TARGET_AVX5124VNNIW"
24542 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24543 [(set_attr ("type") ("ssemuladd"))
24544 (set_attr ("prefix") ("evex"))
24545 (set_attr ("mode") ("TI"))])
24547 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
24548 [(set (match_operand:V16SI 0 "register_operand" "=v")
24551 [(match_operand:V16SI 1 "register_operand" "0")
24552 (match_operand:V64SI 2 "register_operand" "v")
24553 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
24554 (match_operand:V16SI 4 "const0_operand" "C")
24555 (match_operand:HI 5 "register_operand" "Yk")))]
24556 "TARGET_AVX5124VNNIW"
24557 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24558 [(set_attr ("type") ("ssemuladd"))
24559 (set_attr ("prefix") ("evex"))
24560 (set_attr ("mode") ("TI"))])
24562 (define_insn "avx5124vnniw_vp4dpwssds"
24563 [(set (match_operand:V16SI 0 "register_operand" "=v")
24565 [(match_operand:V16SI 1 "register_operand" "0")
24566 (match_operand:V64SI 2 "register_operand" "v")
24567 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
24568 "TARGET_AVX5124VNNIW"
24569 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
24570 [(set_attr ("type") ("ssemuladd"))
24571 (set_attr ("prefix") ("evex"))
24572 (set_attr ("mode") ("TI"))])
24574 (define_insn "avx5124vnniw_vp4dpwssds_mask"
24575 [(set (match_operand:V16SI 0 "register_operand" "=v")
24578 [(match_operand:V64SI 1 "register_operand" "v")
24579 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
24580 (match_operand:V16SI 3 "register_operand" "0")
24581 (match_operand:HI 4 "register_operand" "Yk")))]
24582 "TARGET_AVX5124VNNIW"
24583 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
24584 [(set_attr ("type") ("ssemuladd"))
24585 (set_attr ("prefix") ("evex"))
24586 (set_attr ("mode") ("TI"))])
24588 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
24589 [(set (match_operand:V16SI 0 "register_operand" "=v")
24592 [(match_operand:V16SI 1 "register_operand" "0")
24593 (match_operand:V64SI 2 "register_operand" "v")
24594 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
24595 (match_operand:V16SI 4 "const0_operand" "C")
24596 (match_operand:HI 5 "register_operand" "Yk")))]
24597 "TARGET_AVX5124VNNIW"
24598 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
24599 [(set_attr ("type") ("ssemuladd"))
24600 (set_attr ("prefix") ("evex"))
24601 (set_attr ("mode") ("TI"))])
24603 (define_expand "popcount<mode>2"
24604 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
24605 (popcount:VI48_AVX512VL
24606 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
24607 "TARGET_AVX512VPOPCNTDQ")
24609 (define_insn "vpopcount<mode><mask_name>"
24610 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
24611 (popcount:VI48_AVX512VL
24612 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
24613 "TARGET_AVX512VPOPCNTDQ"
24614 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
24616 ;; Save multiple registers out-of-line.
24617 (define_insn "*save_multiple<mode>"
24618 [(match_parallel 0 "save_multiple"
24619 [(use (match_operand:P 1 "symbol_operand"))])]
24620 "TARGET_SSE && TARGET_64BIT"
24623 ;; Restore multiple registers out-of-line.
24624 (define_insn "*restore_multiple<mode>"
24625 [(match_parallel 0 "restore_multiple"
24626 [(use (match_operand:P 1 "symbol_operand"))])]
24627 "TARGET_SSE && TARGET_64BIT"
24630 ;; Restore multiple registers out-of-line and return.
24631 (define_insn "*restore_multiple_and_return<mode>"
24632 [(match_parallel 0 "restore_multiple"
24634 (use (match_operand:P 1 "symbol_operand"))
24635 (set (reg:DI SP_REG) (reg:DI R10_REG))
24637 "TARGET_SSE && TARGET_64BIT"
24640 ;; Restore multiple registers out-of-line when hard frame pointer is used,
24641 ;; perform the leave operation prior to returning (from the function).
24642 (define_insn "*restore_multiple_leave_return<mode>"
24643 [(match_parallel 0 "restore_multiple"
24645 (use (match_operand:P 1 "symbol_operand"))
24646 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
24647 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
24648 (clobber (mem:BLK (scratch)))
24650 "TARGET_SSE && TARGET_64BIT"
24653 (define_expand "popcount<mode>2"
24654 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
24655 (popcount:VI12_AVX512VL
24656 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
24657 "TARGET_AVX512BITALG")
24659 (define_insn "vpopcount<mode><mask_name>"
24660 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
24661 (popcount:VI12_AVX512VL
24662 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
24663 "TARGET_AVX512BITALG"
24664 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
24666 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
24667 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
24668 (unspec:VI1_AVX512F
24669 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
24670 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
24671 (match_operand 3 "const_0_to_255_operand" "n,n")]
24672 UNSPEC_GF2P8AFFINEINV))]
24675 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
24676 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
24677 [(set_attr "isa" "noavx,avx")
24678 (set_attr "prefix_data16" "1,*")
24679 (set_attr "prefix_extra" "1")
24680 (set_attr "prefix" "orig,maybe_evex")
24681 (set_attr "mode" "<sseinsnmode>")])
24683 (define_insn "vgf2p8affineqb_<mode><mask_name>"
24684 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
24685 (unspec:VI1_AVX512F
24686 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
24687 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
24688 (match_operand 3 "const_0_to_255_operand" "n,n")]
24689 UNSPEC_GF2P8AFFINE))]
24692 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
24693 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
24694 [(set_attr "isa" "noavx,avx")
24695 (set_attr "prefix_data16" "1,*")
24696 (set_attr "prefix_extra" "1")
24697 (set_attr "prefix" "orig,maybe_evex")
24698 (set_attr "mode" "<sseinsnmode>")])
24700 (define_insn "vgf2p8mulb_<mode><mask_name>"
24701 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
24702 (unspec:VI1_AVX512F
24703 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
24704 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
24708 gf2p8mulb\t{%2, %0| %0, %2}
24709 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
24710 [(set_attr "isa" "noavx,avx")
24711 (set_attr "prefix_data16" "1,*")
24712 (set_attr "prefix_extra" "1")
24713 (set_attr "prefix" "orig,maybe_evex")
24714 (set_attr "mode" "<sseinsnmode>")])
24716 (define_insn "vpshrd_<mode><mask_name>"
24717 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24718 (unspec:VI248_AVX512VL
24719 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
24720 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
24721 (match_operand:SI 3 "const_0_to_255_operand" "n")]
24723 "TARGET_AVX512VBMI2"
24724 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
24725 [(set_attr ("prefix") ("evex"))])
24727 (define_insn "vpshld_<mode><mask_name>"
24728 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24729 (unspec:VI248_AVX512VL
24730 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
24731 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
24732 (match_operand:SI 3 "const_0_to_255_operand" "n")]
24734 "TARGET_AVX512VBMI2"
24735 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
24736 [(set_attr ("prefix") ("evex"))])
24738 (define_insn "vpshrdv_<mode>"
24739 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24740 (unspec:VI248_AVX512VL
24741 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
24742 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
24743 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
24745 "TARGET_AVX512VBMI2"
24746 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
24747 [(set_attr ("prefix") ("evex"))
24748 (set_attr "mode" "<sseinsnmode>")])
24750 (define_insn "vpshrdv_<mode>_mask"
24751 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24752 (vec_merge:VI248_AVX512VL
24753 (unspec:VI248_AVX512VL
24754 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
24755 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
24756 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
24759 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
24760 "TARGET_AVX512VBMI2"
24761 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
24762 [(set_attr ("prefix") ("evex"))
24763 (set_attr "mode" "<sseinsnmode>")])
24765 (define_expand "vpshrdv_<mode>_maskz"
24766 [(match_operand:VI248_AVX512VL 0 "register_operand")
24767 (match_operand:VI248_AVX512VL 1 "register_operand")
24768 (match_operand:VI248_AVX512VL 2 "register_operand")
24769 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
24770 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24771 "TARGET_AVX512VBMI2"
24773 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
24774 operands[2], operands[3],
24775 CONST0_RTX (<MODE>mode),
24780 (define_insn "vpshrdv_<mode>_maskz_1"
24781 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24782 (vec_merge:VI248_AVX512VL
24783 (unspec:VI248_AVX512VL
24784 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
24785 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
24786 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
24788 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
24789 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
24790 "TARGET_AVX512VBMI2"
24791 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
24792 [(set_attr ("prefix") ("evex"))
24793 (set_attr "mode" "<sseinsnmode>")])
24795 (define_insn "vpshldv_<mode>"
24796 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24797 (unspec:VI248_AVX512VL
24798 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
24799 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
24800 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
24802 "TARGET_AVX512VBMI2"
24803 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
24804 [(set_attr ("prefix") ("evex"))
24805 (set_attr "mode" "<sseinsnmode>")])
24807 (define_insn "vpshldv_<mode>_mask"
24808 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24809 (vec_merge:VI248_AVX512VL
24810 (unspec:VI248_AVX512VL
24811 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
24812 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
24813 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
24816 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
24817 "TARGET_AVX512VBMI2"
24818 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
24819 [(set_attr ("prefix") ("evex"))
24820 (set_attr "mode" "<sseinsnmode>")])
24822 (define_expand "vpshldv_<mode>_maskz"
24823 [(match_operand:VI248_AVX512VL 0 "register_operand")
24824 (match_operand:VI248_AVX512VL 1 "register_operand")
24825 (match_operand:VI248_AVX512VL 2 "register_operand")
24826 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
24827 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24828 "TARGET_AVX512VBMI2"
24830 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
24831 operands[2], operands[3],
24832 CONST0_RTX (<MODE>mode),
24837 (define_insn "vpshldv_<mode>_maskz_1"
24838 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
24839 (vec_merge:VI248_AVX512VL
24840 (unspec:VI248_AVX512VL
24841 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
24842 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
24843 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
24845 (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
24846 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
24847 "TARGET_AVX512VBMI2"
24848 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
24849 [(set_attr ("prefix") ("evex"))
24850 (set_attr "mode" "<sseinsnmode>")])
24852 (define_insn "vpdpbusd_v16si"
24853 [(set (match_operand:V16SI 0 "register_operand" "=v")
24855 [(match_operand:V16SI 1 "register_operand" "0")
24856 (match_operand:V16SI 2 "register_operand" "v")
24857 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
24858 UNSPEC_VPMADDUBSWACCD))]
24859 "TARGET_AVX512VNNI"
24860 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
24861 [(set_attr ("prefix") ("evex"))])
24863 (define_insn "vpdpbusd_<mode>"
24864 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
24866 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
24867 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
24868 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
24869 UNSPEC_VPMADDUBSWACCD))]
24870 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
24872 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
24873 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
24874 [(set_attr ("prefix") ("vex,evex"))
24875 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
24877 (define_insn "vpdpbusd_<mode>_mask"
24878 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
24879 (vec_merge:VI4_AVX512VL
24880 (unspec:VI4_AVX512VL
24881 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
24882 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
24883 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
24884 UNSPEC_VPMADDUBSWACCD)
24886 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
24887 "TARGET_AVX512VNNI"
24888 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
24889 [(set_attr ("prefix") ("evex"))])
24891 (define_expand "vpdpbusd_<mode>_maskz"
24892 [(match_operand:VI4_AVX512VL 0 "register_operand")
24893 (match_operand:VI4_AVX512VL 1 "register_operand")
24894 (match_operand:VI4_AVX512VL 2 "register_operand")
24895 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
24896 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24897 "TARGET_AVX512VNNI"
24899 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
24900 operands[2], operands[3],
24901 CONST0_RTX (<MODE>mode),
24906 (define_insn "vpdpbusd_<mode>_maskz_1"
24907 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
24908 (vec_merge:VI4_AVX512VL
24909 (unspec:VI4_AVX512VL
24910 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
24911 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
24912 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
24913 ] UNSPEC_VPMADDUBSWACCD)
24914 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
24915 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
24916 "TARGET_AVX512VNNI"
24917 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
24918 [(set_attr ("prefix") ("evex"))])
24920 (define_insn "vpdpbusds_v16si"
24921 [(set (match_operand:V16SI 0 "register_operand" "=v")
24923 [(match_operand:V16SI 1 "register_operand" "0")
24924 (match_operand:V16SI 2 "register_operand" "v")
24925 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
24926 UNSPEC_VPMADDUBSWACCSSD))]
24927 "TARGET_AVX512VNNI"
24928 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
24929 [(set_attr ("prefix") ("evex"))])
24931 (define_insn "vpdpbusds_<mode>"
24932 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
24934 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
24935 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
24936 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
24937 UNSPEC_VPMADDUBSWACCSSD))]
24938 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
24940 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
24941 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
24942 [(set_attr ("prefix") ("vex,evex"))
24943 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
24945 (define_insn "vpdpbusds_<mode>_mask"
24946 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
24947 (vec_merge:VI4_AVX512VL
24948 (unspec:VI4_AVX512VL
24949 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
24950 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
24951 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
24952 UNSPEC_VPMADDUBSWACCSSD)
24954 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
24955 "TARGET_AVX512VNNI"
24956 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
24957 [(set_attr ("prefix") ("evex"))])
24959 (define_expand "vpdpbusds_<mode>_maskz"
24960 [(match_operand:VI4_AVX512VL 0 "register_operand")
24961 (match_operand:VI4_AVX512VL 1 "register_operand")
24962 (match_operand:VI4_AVX512VL 2 "register_operand")
24963 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
24964 (match_operand:<avx512fmaskmode> 4 "register_operand")]
24965 "TARGET_AVX512VNNI"
24967 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
24968 operands[2], operands[3],
24969 CONST0_RTX (<MODE>mode),
24974 (define_insn "vpdpbusds_<mode>_maskz_1"
24975 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
24976 (vec_merge:VI4_AVX512VL
24977 (unspec:VI4_AVX512VL
24978 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
24979 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
24980 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
24981 UNSPEC_VPMADDUBSWACCSSD)
24982 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
24983 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
24984 "TARGET_AVX512VNNI"
24985 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
24986 [(set_attr ("prefix") ("evex"))])
24988 (define_insn "vpdpwssd_v16si"
24989 [(set (match_operand:V16SI 0 "register_operand" "=v")
24991 [(match_operand:V16SI 1 "register_operand" "0")
24992 (match_operand:V16SI 2 "register_operand" "v")
24993 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
24994 UNSPEC_VPMADDWDACCD))]
24995 "TARGET_AVX512VNNI"
24996 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
24997 [(set_attr ("prefix") ("evex"))])
24999 (define_insn "vpdpwssd_<mode>"
25000 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
25002 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
25003 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
25004 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
25005 UNSPEC_VPMADDWDACCD))]
25006 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
25008 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
25009 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
25010 [(set_attr ("prefix") ("vex,evex"))
25011 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
25013 (define_insn "vpdpwssd_<mode>_mask"
25014 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25015 (vec_merge:VI4_AVX512VL
25016 (unspec:VI4_AVX512VL
25017 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25018 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25019 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25020 UNSPEC_VPMADDWDACCD)
25022 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25023 "TARGET_AVX512VNNI"
25024 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25025 [(set_attr ("prefix") ("evex"))])
25027 (define_expand "vpdpwssd_<mode>_maskz"
25028 [(match_operand:VI4_AVX512VL 0 "register_operand")
25029 (match_operand:VI4_AVX512VL 1 "register_operand")
25030 (match_operand:VI4_AVX512VL 2 "register_operand")
25031 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
25032 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25033 "TARGET_AVX512VNNI"
25035 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
25036 operands[2], operands[3],
25037 CONST0_RTX (<MODE>mode),
25042 (define_insn "vpdpwssd_<mode>_maskz_1"
25043 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25044 (vec_merge:VI4_AVX512VL
25045 (unspec:VI4_AVX512VL
25046 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25047 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25048 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25049 UNSPEC_VPMADDWDACCD)
25050 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
25051 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25052 "TARGET_AVX512VNNI"
25053 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25054 [(set_attr ("prefix") ("evex"))])
25056 (define_insn "vpdpwssds_v16si"
25057 [(set (match_operand:V16SI 0 "register_operand" "=v")
25059 [(match_operand:V16SI 1 "register_operand" "0")
25060 (match_operand:V16SI 2 "register_operand" "v")
25061 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
25062 UNSPEC_VPMADDWDACCSSD))]
25063 "TARGET_AVX512VNNI"
25064 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
25065 [(set_attr ("prefix") ("evex"))])
25067 (define_insn "vpdpwssds_<mode>"
25068 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
25070 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
25071 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
25072 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
25073 UNSPEC_VPMADDWDACCSSD))]
25074 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
25076 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
25077 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
25078 [(set_attr ("prefix") ("vex,evex"))
25079 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
25081 (define_insn "vpdpwssds_<mode>_mask"
25082 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25083 (vec_merge:VI4_AVX512VL
25084 (unspec:VI4_AVX512VL
25085 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25086 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25087 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25088 UNSPEC_VPMADDWDACCSSD)
25090 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
25091 "TARGET_AVX512VNNI"
25092 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
25093 [(set_attr ("prefix") ("evex"))])
25095 (define_expand "vpdpwssds_<mode>_maskz"
25096 [(match_operand:VI4_AVX512VL 0 "register_operand")
25097 (match_operand:VI4_AVX512VL 1 "register_operand")
25098 (match_operand:VI4_AVX512VL 2 "register_operand")
25099 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
25100 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25101 "TARGET_AVX512VNNI"
25103 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
25104 operands[2], operands[3],
25105 CONST0_RTX (<MODE>mode),
25110 (define_insn "vpdpwssds_<mode>_maskz_1"
25111 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
25112 (vec_merge:VI4_AVX512VL
25113 (unspec:VI4_AVX512VL
25114 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
25115 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
25116 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
25117 UNSPEC_VPMADDWDACCSSD)
25118 (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
25119 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
25120 "TARGET_AVX512VNNI"
25121 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
25122 [(set_attr ("prefix") ("evex"))])
25124 (define_insn "vaesdec_<mode>"
25125 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25126 (unspec:VI1_AVX512VL_F
25127 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25128 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25131 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
25134 (define_insn "vaesdeclast_<mode>"
25135 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25136 (unspec:VI1_AVX512VL_F
25137 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25138 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25139 UNSPEC_VAESDECLAST))]
25141 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
25144 (define_insn "vaesenc_<mode>"
25145 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25146 (unspec:VI1_AVX512VL_F
25147 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25148 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25151 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
25154 (define_insn "vaesenclast_<mode>"
25155 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
25156 (unspec:VI1_AVX512VL_F
25157 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
25158 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
25159 UNSPEC_VAESENCLAST))]
25161 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
25164 (define_insn "vpclmulqdq_<mode>"
25165 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
25166 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
25167 (match_operand:VI8_FVL 2 "vector_operand" "vm")
25168 (match_operand:SI 3 "const_0_to_255_operand" "n")]
25169 UNSPEC_VPCLMULQDQ))]
25170 "TARGET_VPCLMULQDQ"
25171 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
25172 [(set_attr "mode" "DI")])
25174 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
25175 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
25176 (unspec:<avx512fmaskmode>
25177 [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
25178 (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
25179 UNSPEC_VPSHUFBIT))]
25180 "TARGET_AVX512BITALG"
25181 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
25182 [(set_attr "prefix" "evex")
25183 (set_attr "mode" "<sseinsnmode>")])
25185 (define_mode_iterator VI48_AVX512VP2VL
25187 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
25188 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
25190 (define_mode_iterator MASK_DWI [P2QI P2HI])
25192 (define_expand "mov<mode>"
25193 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
25194 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
25195 "TARGET_AVX512VP2INTERSECT"
25197 if (MEM_P (operands[0]) && MEM_P (operands[1]))
25198 operands[1] = force_reg (<MODE>mode, operands[1]);
25201 (define_insn_and_split "*mov<mode>_internal"
25202 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
25203 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
25204 "TARGET_AVX512VP2INTERSECT
25205 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
25207 "&& reload_completed"
25208 [(set (match_dup 0) (match_dup 1))
25209 (set (match_dup 2) (match_dup 3))]
25211 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
25214 (define_insn "avx512vp2intersect_2intersect<mode>"
25215 [(set (match_operand:P2QI 0 "register_operand" "=k")
25217 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
25218 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
25219 UNSPEC_VP2INTERSECT))]
25220 "TARGET_AVX512VP2INTERSECT"
25221 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
25222 [(set_attr ("prefix") ("evex"))])
25224 (define_insn "avx512vp2intersect_2intersectv16si"
25225 [(set (match_operand:P2HI 0 "register_operand" "=k")
25226 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
25227 (match_operand:V16SI 2 "vector_operand" "vm")]
25228 UNSPEC_VP2INTERSECT))]
25229 "TARGET_AVX512VP2INTERSECT"
25230 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
25231 [(set_attr ("prefix") ("evex"))])
25233 (define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
25234 ;; Converting from BF to SF
25235 (define_mode_attr bf16_cvt_2sf
25236 [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
25237 ;; Converting from SF to BF
25238 (define_mode_attr sf_cvt_bf16
25239 [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
25240 ;; Mapping from BF to SF
25241 (define_mode_attr sf_bf16
25242 [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
25244 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
25245 [(match_operand:BF16 0 "register_operand")
25246 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
25247 (match_operand:<bf16_cvt_2sf> 2 "register_operand")
25248 (match_operand:<avx512fmaskmode> 3 "register_operand")]
25249 "TARGET_AVX512BF16"
25251 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
25252 operands[2], CONST0_RTX(<MODE>mode), operands[3]));
25256 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
25257 [(set (match_operand:BF16 0 "register_operand" "=v")
25259 [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
25260 (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
25261 UNSPEC_VCVTNE2PS2BF16))]
25262 "TARGET_AVX512BF16"
25263 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
25265 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
25266 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
25267 (match_operand:VF1_AVX512VL 1 "register_operand")
25268 (match_operand:<avx512fmaskmode> 2 "register_operand")]
25269 "TARGET_AVX512BF16"
25271 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
25272 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
25276 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
25277 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
25278 (unspec:<sf_cvt_bf16>
25279 [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
25280 UNSPEC_VCVTNEPS2BF16))]
25281 "TARGET_AVX512BF16"
25282 "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
25284 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
25285 [(match_operand:VF1_AVX512VL 0 "register_operand")
25286 (match_operand:VF1_AVX512VL 1 "register_operand")
25287 (match_operand:<sf_bf16> 2 "register_operand")
25288 (match_operand:<sf_bf16> 3 "register_operand")
25289 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
25290 "TARGET_AVX512BF16"
25292 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
25293 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
25297 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
25298 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
25299 (unspec:VF1_AVX512VL
25300 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
25301 (match_operand:<sf_bf16> 2 "register_operand" "v")
25302 (match_operand:<sf_bf16> 3 "register_operand" "v")]
25303 UNSPEC_VDPBF16PS))]
25304 "TARGET_AVX512BF16"
25305 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
25307 (define_insn "avx512f_dpbf16ps_<mode>_mask"
25308 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
25309 (vec_merge:VF1_AVX512VL
25310 (unspec:VF1_AVX512VL
25311 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
25312 (match_operand:<sf_bf16> 2 "register_operand" "v")
25313 (match_operand:<sf_bf16> 3 "register_operand" "v")]
25316 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
25317 "TARGET_AVX512BF16"
25318 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
25321 (define_insn "loadiwkey"
25322 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
25323 (match_operand:V2DI 1 "register_operand" "v")
25324 (match_operand:V2DI 2 "register_operand" "Yz")
25325 (match_operand:SI 3 "register_operand" "a")]
25327 (clobber (reg:CC FLAGS_REG))]
25329 "loadiwkey\t{%0, %1|%1, %0}"
25330 [(set_attr "type" "other")])
25332 (define_expand "encodekey128u32"
25334 [(set (match_operand:SI 0 "register_operand")
25335 (unspec_volatile:SI
25336 [(match_operand:SI 1 "register_operand")
25337 (reg:V2DI XMM0_REG)]
25338 UNSPECV_ENCODEKEY128U32))])]
25345 /* parallel rtx for encodekey128 predicate */
25346 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
25348 for (i = 0; i < 7; i++)
25349 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
25352 = gen_rtx_UNSPEC_VOLATILE (SImode,
25353 gen_rtvec (2, operands[1], xmm_regs[0]),
25354 UNSPECV_ENCODEKEY128U32);
25356 XVECEXP (operands[2], 0, 0)
25357 = gen_rtx_SET (operands[0], tmp_unspec);
25360 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
25361 gen_rtvec (1, const0_rtx),
25362 UNSPECV_ENCODEKEY128U32);
25364 for (i = 0; i < 3; i++)
25365 XVECEXP (operands[2], 0, i + 1)
25366 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
25368 for (i = 4; i < 7; i++)
25369 XVECEXP (operands[2], 0, i)
25370 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
25372 XVECEXP (operands[2], 0, 7)
25373 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
25376 (define_insn "*encodekey128u32"
25377 [(match_parallel 2 "encodekey128_operation"
25378 [(set (match_operand:SI 0 "register_operand" "=r")
25379 (unspec_volatile:SI
25380 [(match_operand:SI 1 "register_operand" "r")
25381 (reg:V2DI XMM0_REG)]
25382 UNSPECV_ENCODEKEY128U32))])]
25384 "encodekey128\t{%1, %0|%0, %1}"
25385 [(set_attr "type" "other")])
25387 (define_expand "encodekey256u32"
25389 [(set (match_operand:SI 0 "register_operand")
25390 (unspec_volatile:SI
25391 [(match_operand:SI 1 "register_operand")
25392 (reg:V2DI XMM0_REG)
25393 (reg:V2DI XMM1_REG)]
25394 UNSPECV_ENCODEKEY256U32))])]
25401 /* parallel rtx for encodekey256 predicate */
25402 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
25404 for (i = 0; i < 7; i++)
25405 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
25408 = gen_rtx_UNSPEC_VOLATILE (SImode,
25409 gen_rtvec (3, operands[1],
25410 xmm_regs[0], xmm_regs[1]),
25411 UNSPECV_ENCODEKEY256U32);
25413 XVECEXP (operands[2], 0, 0)
25414 = gen_rtx_SET (operands[0], tmp_unspec);
25417 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
25418 gen_rtvec (1, const0_rtx),
25419 UNSPECV_ENCODEKEY256U32);
25421 for (i = 0; i < 4; i++)
25422 XVECEXP (operands[2], 0, i + 1)
25423 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
25425 for (i = 4; i < 7; i++)
25426 XVECEXP (operands[2], 0, i + 1)
25427 = gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
25429 XVECEXP (operands[2], 0, 8)
25430 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
25433 (define_insn "*encodekey256u32"
25434 [(match_parallel 2 "encodekey256_operation"
25435 [(set (match_operand:SI 0 "register_operand" "=r")
25436 (unspec_volatile:SI
25437 [(match_operand:SI 1 "register_operand" "r")
25438 (reg:V2DI XMM0_REG)
25439 (reg:V2DI XMM1_REG)]
25440 UNSPECV_ENCODEKEY256U32))])]
25442 "encodekey256\t{%1, %0|%0, %1}"
25443 [(set_attr "type" "other")])
25445 (define_int_iterator AESDECENCKL
25446 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
25447 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
25449 (define_int_attr aesklvariant
25450 [(UNSPECV_AESDEC128KLU8 "dec128kl")
25451 (UNSPECV_AESDEC256KLU8 "dec256kl")
25452 (UNSPECV_AESENC128KLU8 "enc128kl")
25453 (UNSPECV_AESENC256KLU8 "enc256kl")])
25455 (define_insn "aes<aesklvariant>u8"
25456 [(set (match_operand:V2DI 0 "register_operand" "=v")
25457 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
25458 (match_operand:BLK 2 "memory_operand" "m")]
25460 (set (reg:CCZ FLAGS_REG)
25461 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
25463 "aes<aesklvariant>\t{%2, %0|%0, %2}"
25464 [(set_attr "type" "other")])
25466 (define_int_iterator AESDECENCWIDEKL
25467 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
25468 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
25470 (define_int_attr aeswideklvariant
25471 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
25472 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
25473 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
25474 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
25476 (define_int_attr AESWIDEKLVARIANT
25477 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
25478 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
25479 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
25480 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
25482 (define_expand "aes<aeswideklvariant>u8"
25484 [(set (reg:CCZ FLAGS_REG)
25485 (unspec_volatile:CCZ
25486 [(match_operand:BLK 0 "memory_operand")]
25487 AESDECENCWIDEKL))])]
25493 /* parallel rtx for widekl predicate */
25494 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
25497 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
25498 gen_rtvec (1, operands[0]),
25499 UNSPECV_<AESWIDEKLVARIANT>);
25501 XVECEXP (operands[1], 0, 0)
25502 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
25505 for (i = 0; i < 8; i++)
25507 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
25510 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
25511 gen_rtvec (1, xmm_reg),
25512 UNSPECV_<AESWIDEKLVARIANT>);
25513 XVECEXP (operands[1], 0, i + 1)
25514 = gen_rtx_SET (xmm_reg, tmp_unspec);
25518 (define_insn "*aes<aeswideklvariant>u8"
25519 [(match_parallel 1 "aeswidekl_operation"
25520 [(set (reg:CCZ FLAGS_REG)
25521 (unspec_volatile:CCZ
25522 [(match_operand:BLK 0 "memory_operand" "m")]
25523 AESDECENCWIDEKL))])]
25525 "aes<aeswideklvariant>\t{%0}"
25526 [(set_attr "type" "other")])
25528 ;; Modes handled by broadcast patterns. NB: Allow V64QI and V32HI with
25529 ;; TARGET_AVX512F since ix86_expand_vector_init_duplicate can expand
25530 ;; without TARGET_AVX512BW which is used by memset vector broadcast
25531 ;; expander to XI with:
25532 ;; vmovd %edi, %xmm15
25533 ;; vpbroadcastb %xmm15, %ymm15
25534 ;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15
25536 (define_mode_iterator INT_BROADCAST_MODE
25537 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
25538 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
25539 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
25540 (V8DI "TARGET_AVX512F && TARGET_64BIT")
25541 (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
25543 ;; Broadcast from an integer. NB: Enable broadcast only if we can move
25544 ;; from GPR to SSE register directly.
25545 (define_expand "vec_duplicate<mode>"
25546 [(set (match_operand:INT_BROADCAST_MODE 0 "register_operand")
25547 (vec_duplicate:INT_BROADCAST_MODE
25548 (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))]
25549 "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
25551 if (!ix86_expand_vector_init_duplicate (false,
25552 GET_MODE (operands[0]),
25555 gcc_unreachable ();