1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
53 UNSPEC_XOP_UNSIGNED_CMP
63 UNSPEC_AESKEYGENASSIST
83 ;; For AVX512F support
85 UNSPEC_UNSIGNED_FIX_NOTRUNC
109 ;; For embed. rounding feature
110 UNSPEC_EMBEDDED_ROUNDING
112 ;; For AVX512PF support
113 UNSPEC_GATHER_PREFETCH
114 UNSPEC_SCATTER_PREFETCH
116 ;; For AVX512ER support
130 ;; For AVX512BW support
138 ;; For AVX512DQ support
143 ;; For AVX512IFMA support
147 ;; For AVX512VBMI support
150 ;; For AVX5124FMAPS/AVX5124VNNIW support
157 UNSPEC_GF2P8AFFINEINV
161 ;; For AVX512VBMI2 support
167 ;; For AVX512VNNI support
179 ;; For VPCLMULQDQ support
182 ;; For AVX512BITALG support
185 ;; For VP2INTERSECT support
188 ;; For AVX512BF16 support
191 ;; For AVX512FP16 suppport
193 UNSPEC_COMPLEX_FMA_PAIR
195 UNSPEC_COMPLEX_FCMA_PAIR
200 ;; For AVX-VNNI-INT8 support
209 (define_c_enum "unspecv" [
219 UNSPECV_AESDEC128KLU8
220 UNSPECV_AESENC128KLU8
221 UNSPECV_AESDEC256KLU8
222 UNSPECV_AESENC256KLU8
223 UNSPECV_AESDECWIDE128KLU8
224 UNSPECV_AESENCWIDE128KLU8
225 UNSPECV_AESDECWIDE256KLU8
226 UNSPECV_AESENCWIDE256KLU8
227 UNSPECV_ENCODEKEY128U32
228 UNSPECV_ENCODEKEY256U32
231 ;; All vector modes including V?TImode, used in move patterns.
232 (define_mode_iterator VMOVE
233 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
234 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
235 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
236 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
237 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
238 (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
239 (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
240 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
241 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
243 ;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline.
244 (define_mode_iterator V48_AVX512VL
245 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
246 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
247 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
248 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
250 (define_mode_iterator V48_256_512_AVX512VL
251 [V16SI (V8SI "TARGET_AVX512VL")
252 V8DI (V4DI "TARGET_AVX512VL")
253 V16SF (V8SF "TARGET_AVX512VL")
254 V8DF (V4DF "TARGET_AVX512VL")])
256 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
257 (define_mode_iterator V48H_AVX512VL
258 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
259 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
260 (V32HF "TARGET_AVX512FP16")
261 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
262 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
263 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
264 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
266 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
267 (define_mode_iterator VI12_AVX512VL
268 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
269 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
271 (define_mode_iterator VI12HFBF_AVX512VL
272 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
273 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
274 V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
275 V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
277 (define_mode_iterator VI1_AVX512VL
278 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
281 (define_mode_iterator V
282 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
283 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
284 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
285 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
286 (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
287 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
288 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
290 ;; All 128bit vector modes
291 (define_mode_iterator V_128
292 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
294 ;; All 256bit vector modes
295 (define_mode_iterator V_256
296 [V32QI V16HI V8SI V4DI V8SF V4DF])
298 ;; All 256bit vector modes including HF/BF vector modes
299 (define_mode_iterator V_256H
300 [V32QI V16HI V8SI V4DI V8SF V4DF V16HF V16BF])
302 ;; All 128bit and 256bit vector modes
303 (define_mode_iterator V_128_256
304 [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V2TI V1TI
305 V16HF V8HF V8SF V4SF V4DF V2DF])
307 ;; All 512bit vector modes
308 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
310 ;; All 256bit and 512bit vector modes
311 (define_mode_iterator V_256_512
312 [V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF
313 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V32HF "TARGET_AVX512F")
314 (V32BF "TARGET_AVX512F") (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
315 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
317 ;; All vector float modes
318 (define_mode_iterator VF
319 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
320 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
322 (define_mode_iterator VF1_VF2_AVX512DQ
323 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
324 (V8DF "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
325 (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
327 (define_mode_iterator VFH
328 [(V32HF "TARGET_AVX512FP16")
329 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
330 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
331 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
332 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
334 ;; 128-, 256- and 512-bit float vector modes for bitwise operations
335 (define_mode_iterator VFB
336 [(V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
337 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
338 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
340 ;; 128- and 256-bit float vector modes
341 (define_mode_iterator VF_128_256
342 [(V8SF "TARGET_AVX") V4SF
343 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
345 ;; 128- and 256-bit float vector modes for bitwise operations
346 (define_mode_iterator VFB_128_256
347 [(V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
348 (V8SF "TARGET_AVX") V4SF
349 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
351 ;; All SFmode vector float modes
352 (define_mode_iterator VF1
353 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
355 (define_mode_iterator VF1_AVX2
356 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
358 ;; 128- and 256-bit SF vector modes
359 (define_mode_iterator VF1_128_256
360 [(V8SF "TARGET_AVX") V4SF])
362 (define_mode_iterator VF1_128_256VL
363 [V8SF (V4SF "TARGET_AVX512VL")])
365 ;; All DFmode vector float modes
366 (define_mode_iterator VF2
367 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
369 ;; All DFmode & HFmode vector float modes
370 (define_mode_iterator VF2H
371 [(V32HF "TARGET_AVX512FP16")
372 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
373 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
374 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
376 ;; 128- and 256-bit DF vector modes
377 (define_mode_iterator VF2_128_256
378 [(V4DF "TARGET_AVX") V2DF])
380 (define_mode_iterator VF2_512_256
381 [(V8DF "TARGET_AVX512F") V4DF])
383 (define_mode_iterator VF2_512_256VL
384 [V8DF (V4DF "TARGET_AVX512VL")])
386 ;; All 128bit vector SF/DF modes
387 (define_mode_iterator VF_128
388 [V4SF (V2DF "TARGET_SSE2")])
390 ;; All 128bit vector HF/SF/DF modes
391 (define_mode_iterator VFH_128
392 [(V8HF "TARGET_AVX512FP16")
393 V4SF (V2DF "TARGET_SSE2")])
395 ;; All 256bit vector float modes
396 (define_mode_iterator VF_256
399 ;; All 512bit vector float modes
400 (define_mode_iterator VF_512
403 ;; All 512bit vector float modes for bitwise operations
404 (define_mode_iterator VFB_512
407 (define_mode_iterator VI48_AVX512VL
408 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
409 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
411 (define_mode_iterator VI1248_AVX512VLBW
412 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
413 (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
414 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
415 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
416 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
417 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
419 (define_mode_iterator VF_AVX512VL
420 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
421 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
423 ;; AVX512ER SF plus 128- and 256-bit SF vector modes
424 (define_mode_iterator VF1_AVX512ER_128_256
425 [(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
427 (define_mode_iterator VFH_AVX512VL
428 [(V32HF "TARGET_AVX512FP16")
429 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
430 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
431 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
432 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
434 (define_mode_iterator VF2_AVX512VL
435 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
437 (define_mode_iterator VF1_AVX512VL
438 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
440 (define_mode_iterator VF_AVX512FP16
443 (define_mode_iterator VF_AVX512HFBF16
444 [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16")
445 (V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF])
447 (define_mode_iterator VF_AVX512HFBFVL
448 [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
449 V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
451 (define_mode_iterator VF_AVX512FP16VL
452 [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
454 ;; All vector integer modes
455 (define_mode_iterator VI
456 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
457 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
458 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
459 (V8SI "TARGET_AVX") V4SI
460 (V4DI "TARGET_AVX") V2DI])
462 ;; All vector integer and HF modes
463 (define_mode_iterator VIHFBF
464 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
465 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
466 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
467 (V8SI "TARGET_AVX") V4SI
468 (V4DI "TARGET_AVX") V2DI
469 (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
470 (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF])
472 (define_mode_iterator VI_AVX2
473 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
474 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
475 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
476 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
478 ;; All QImode vector integer modes
479 (define_mode_iterator VI1
480 [(V32QI "TARGET_AVX") V16QI])
482 ;; All DImode vector integer modes
483 (define_mode_iterator V_AVX
484 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF
485 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
486 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") (V2TI "TARGET_AVX")
487 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
489 (define_mode_iterator VI48_AVX
491 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
493 (define_mode_iterator VI8
494 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
496 (define_mode_iterator VI8_FVL
497 [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
499 (define_mode_iterator VI8_AVX512VL
500 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
502 (define_mode_iterator VI8_256_512
503 [V8DI (V4DI "TARGET_AVX512VL")])
505 (define_mode_iterator VI1_AVX2
506 [(V32QI "TARGET_AVX2") V16QI])
508 (define_mode_iterator VI1_AVX512
509 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
511 (define_mode_iterator VI1_AVX512F
512 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
514 (define_mode_iterator VI1_AVX512VNNI
515 [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI])
517 (define_mode_iterator VI12_256_512_AVX512VL
518 [V64QI (V32QI "TARGET_AVX512VL")
519 V32HI (V16HI "TARGET_AVX512VL")])
521 (define_mode_iterator VI2_AVX2
522 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
524 (define_mode_iterator VI2_AVX512F
525 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
527 (define_mode_iterator VI2_AVX512VNNIBW
528 [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI")
529 (V16HI "TARGET_AVX2") V8HI])
531 (define_mode_iterator VI4_AVX
532 [(V8SI "TARGET_AVX") V4SI])
534 (define_mode_iterator VI4_AVX2
535 [(V8SI "TARGET_AVX2") V4SI])
537 (define_mode_iterator VI4_AVX512F
538 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
540 (define_mode_iterator VI4_AVX512VL
541 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
543 (define_mode_iterator VI48_AVX512F_AVX512VL
544 [V4SI V8SI (V16SI "TARGET_AVX512F")
545 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
547 (define_mode_iterator VI2_AVX512VL
548 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
550 (define_mode_iterator VI2H_AVX512VL
551 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
552 (V8SI "TARGET_AVX512VL") V16SI
555 (define_mode_iterator VI1_AVX512VL_F
556 [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
558 (define_mode_iterator VI8_AVX2_AVX512BW
559 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
561 (define_mode_iterator VI8_AVX2
562 [(V4DI "TARGET_AVX2") V2DI])
564 (define_mode_iterator VI8_AVX2_AVX512F
565 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
567 (define_mode_iterator VI8_AVX_AVX512F
568 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
570 (define_mode_iterator VI4_128_8_256
574 (define_mode_iterator V8FI
578 (define_mode_iterator V16FI
581 ;; ??? We should probably use TImode instead.
582 (define_mode_iterator VIMAX_AVX2_AVX512BW
583 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
585 ;; Suppose TARGET_AVX512BW as baseline
586 (define_mode_iterator VIMAX_AVX512VL
587 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
589 (define_mode_iterator VIMAX_AVX2
590 [(V2TI "TARGET_AVX2") V1TI])
592 (define_mode_iterator VI12_AVX2
593 [(V32QI "TARGET_AVX2") V16QI
594 (V16HI "TARGET_AVX2") V8HI])
596 (define_mode_iterator VI12_AVX2_AVX512BW
597 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
598 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
600 (define_mode_iterator VI24_AVX2
601 [(V16HI "TARGET_AVX2") V8HI
602 (V8SI "TARGET_AVX2") V4SI])
604 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
605 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
606 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
607 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
609 (define_mode_iterator VI124_AVX2
610 [(V32QI "TARGET_AVX2") V16QI
611 (V16HI "TARGET_AVX2") V8HI
612 (V8SI "TARGET_AVX2") V4SI])
614 (define_mode_iterator VI2_AVX2_AVX512BW
615 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
617 (define_mode_iterator VI248_AVX512VL
619 (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
620 (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
621 (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
623 (define_mode_iterator VI248_AVX512VLBW
624 [(V32HI "TARGET_AVX512BW")
625 (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
626 (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
627 V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
628 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
630 (define_mode_iterator VI48_AVX2
631 [(V8SI "TARGET_AVX2") V4SI
632 (V4DI "TARGET_AVX2") V2DI])
634 (define_mode_iterator VI248_AVX2
635 [(V16HI "TARGET_AVX2") V8HI
636 (V8SI "TARGET_AVX2") V4SI
637 (V4DI "TARGET_AVX2") V2DI])
639 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
640 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
641 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
642 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
644 (define_mode_iterator VI248_AVX512BW
645 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
647 (define_mode_iterator VI248_AVX512BW_AVX512VL
648 [(V32HI "TARGET_AVX512BW")
649 (V4DI "TARGET_AVX512VL") V16SI V8DI])
651 ;; Suppose TARGET_AVX512VL as baseline
652 (define_mode_iterator VI248_AVX512BW_1
653 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
657 (define_mode_iterator VI248_AVX512BW_2
658 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
662 (define_mode_iterator VI48_AVX512F
663 [(V16SI "TARGET_AVX512F") V8SI V4SI
664 (V8DI "TARGET_AVX512F") V4DI V2DI])
666 (define_mode_iterator VI48_AVX_AVX512F
667 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
668 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
670 (define_mode_iterator VI12_AVX_AVX512F
671 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
672 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
674 (define_mode_iterator V48_AVX2
677 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
678 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
680 (define_mode_iterator VF4_128_8_256
683 (define_mode_attr avx512
684 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
685 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
686 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
687 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
688 (V8HF "avx512fp16") (V16HF "avx512vl") (V32HF "avx512bw")
689 (V8BF "avx512vl") (V16BF "avx512vl") (V32BF "avx512bw")
690 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
691 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
693 (define_mode_attr v_Yw
694 [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
695 (V8HI "Yw") (V16HI "Yw") (V32HI "v")
696 (V4SI "v") (V8SI "v") (V16SI "v")
697 (V2DI "v") (V4DI "v") (V8DI "v")
698 (V4SF "v") (V8SF "v") (V16SF "v")
699 (V2DF "v") (V4DF "v") (V8DF "v")
700 (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")])
702 (define_mode_attr sse2_avx_avx512f
703 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
704 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
705 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
706 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
707 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
708 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
710 (define_mode_attr sse2_avx2
711 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
712 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
713 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
714 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
715 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
717 (define_mode_attr ssse3_avx2
718 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
719 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
720 (V4SI "ssse3") (V8SI "avx2")
721 (V2DI "ssse3") (V4DI "avx2")
722 (V1TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
724 (define_mode_attr sse4_1_avx2
725 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
726 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
727 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
728 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
730 (define_mode_attr avx_avx2
731 [(V4SF "avx") (V2DF "avx")
732 (V8SF "avx") (V4DF "avx")
733 (V4SI "avx2") (V2DI "avx2")
734 (V8SI "avx2") (V4DI "avx2")])
736 (define_mode_attr vec_avx2
737 [(V16QI "vec") (V32QI "avx2")
738 (V8HI "vec") (V16HI "avx2")
739 (V4SI "vec") (V8SI "avx2")
740 (V2DI "vec") (V4DI "avx2")])
742 (define_mode_attr avx2_avx512
743 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
744 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
745 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
746 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
747 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
749 (define_mode_attr shuffletype
750 [(V32HF "i") (V16HF "i") (V8HF "i")
751 (V32BF "i") (V16BF "i") (V8BF "i")
752 (V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
753 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
754 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
755 (V32HI "i") (V16HI "i") (V8HI "i")
756 (V64QI "i") (V32QI "i") (V16QI "i")
757 (V4TI "i") (V2TI "i") (V1TI "i")])
759 (define_mode_attr ssequartermode
760 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
762 (define_mode_attr ssequarterinsnmode
763 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
765 (define_mode_attr vecmemsuffix
766 [(V32HF "{z}") (V16HF "{y}") (V8HF "{x}")
767 (V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
768 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
770 (define_mode_attr ssedoublemodelower
771 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
772 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
773 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
775 (define_mode_attr ssedoublemode
776 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
777 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
778 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
779 (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
780 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
781 (V4DI "V8DI") (V8DI "V16DI")])
783 (define_mode_attr ssebytemode
784 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
785 (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
787 (define_mode_attr sseintconvert
788 [(V32HI "w") (V16HI "w") (V8HI "w")
789 (V16SI "dq") (V8SI "dq") (V4SI "dq")
790 (V8DI "qq") (V4DI "qq") (V2DI "qq")])
792 ;; All 128bit vector integer modes
793 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
795 ;; All 256bit vector integer modes
796 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
798 ;; All 128 and 256bit vector integer modes
799 (define_mode_iterator VI_128_256 [V16QI V8HI V4SI V2DI V32QI V16HI V8SI V4DI])
800 ;; All 256bit vector integer and HF modes
801 (define_mode_iterator VIHFBF_256 [V32QI V16HI V8SI V4DI V16HF V16BF])
803 ;; Various 128bit vector integer mode combinations
804 (define_mode_iterator VI12_128 [V16QI V8HI])
805 (define_mode_iterator VI14_128 [V16QI V4SI])
806 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
807 (define_mode_iterator VI24_128 [V8HI V4SI])
808 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
809 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
810 (define_mode_iterator VI248_512 [V32HI V16SI V8DI])
811 (define_mode_iterator VI48_128 [V4SI V2DI])
812 (define_mode_iterator VI148_512 [V64QI V16SI V8DI])
813 (define_mode_iterator VI148_256 [V32QI V8SI V4DI])
814 (define_mode_iterator VI148_128 [V16QI V4SI V2DI])
816 ;; Various 256bit and 512 vector integer mode combinations
817 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
818 (define_mode_iterator VI124_256_AVX512F_AVX512BW
820 (V64QI "TARGET_AVX512BW")
821 (V32HI "TARGET_AVX512BW")
822 (V16SI "TARGET_AVX512F")])
823 (define_mode_iterator VI48_256 [V8SI V4DI])
824 (define_mode_iterator VI48_512 [V16SI V8DI])
825 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
826 (define_mode_iterator VI_AVX512BW
827 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
828 (define_mode_iterator VIHFBF_AVX512BW
829 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
830 (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")])
832 ;; Int-float size matches
833 (define_mode_iterator VI2F_256_512 [V16HI V32HI V16HF V32HF V16BF V32BF])
834 (define_mode_iterator VI4F_128 [V4SI V4SF])
835 (define_mode_iterator VI8F_128 [V2DI V2DF])
836 (define_mode_iterator VI4F_256 [V8SI V8SF])
837 (define_mode_iterator VI8F_256 [V4DI V4DF])
838 (define_mode_iterator VI4F_256_512
840 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
841 (define_mode_iterator VI48F_256_512
843 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
844 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
845 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
846 (define_mode_iterator VF48_I1248
847 [V16SI V16SF V8DI V8DF V32HI V64QI])
848 (define_mode_iterator VF48H_AVX512VL
849 [V8DF V16SF (V8SF "TARGET_AVX512VL")])
851 (define_mode_iterator VF48_128
854 (define_mode_iterator VI48F
855 [V16SI V16SF V8DI V8DF
856 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
857 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
858 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
859 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
860 (define_mode_iterator VI12_VI48F_AVX512VL
861 [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
862 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
863 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
864 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
865 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
866 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
867 V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
868 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
870 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
872 (define_mode_iterator VF_AVX512
873 [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
874 (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
877 (define_mode_iterator V8_128 [V8HI V8HF V8BF])
878 (define_mode_iterator V16_256 [V16HI V16HF V16BF])
879 (define_mode_iterator V32_512 [V32HI V32HF V32BF])
881 ;; Mapping from float mode to required SSE level
882 (define_mode_attr sse
883 [(SF "sse") (DF "sse2") (HF "avx512fp16")
884 (V4SF "sse") (V2DF "sse2")
885 (V32HF "avx512fp16") (V16HF "avx512fp16")
887 (V16SF "avx512f") (V8SF "avx")
888 (V8DF "avx512f") (V4DF "avx")])
890 (define_mode_attr sse2
891 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
892 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
894 (define_mode_attr sse3
895 [(V16QI "sse3") (V32QI "avx")])
897 (define_mode_attr sse4_1
898 [(V4SF "sse4_1") (V2DF "sse4_1")
899 (V8SF "avx") (V4DF "avx")
901 (V2TI "avx") (V1TI "sse4_1")
902 (V4DI "avx") (V2DI "sse4_1")
903 (V8SI "avx") (V4SI "sse4_1")
904 (V16QI "sse4_1") (V32QI "avx")
905 (V8HI "sse4_1") (V16HI "avx")])
907 (define_mode_attr avxsizesuffix
908 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512") (V4TI "512")
909 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") (V2TI "256")
910 (V16QI "") (V8HI "") (V4SI "") (V2DI "") (V1TI "")
911 (V32HF "512") (V16SF "512") (V8DF "512")
912 (V16HF "256") (V8SF "256") (V4DF "256")
913 (V8HF "") (V4SF "") (V2DF "")])
915 ;; SSE instruction mode
916 (define_mode_attr sseinsnmode
917 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
918 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
919 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
920 (V16SF "V16SF") (V8DF "V8DF")
921 (V8SF "V8SF") (V4DF "V4DF")
922 (V4SF "V4SF") (V2DF "V2DF")
923 (V8HF "TI") (V16HF "OI") (V32HF "XI")
924 (V8BF "TI") (V16BF "OI") (V32BF "XI")
927 (define_mode_attr sseintvecinsnmode
928 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
929 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
930 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
931 (V16SF "XI") (V8DF "XI")
932 (V8SF "OI") (V4DF "OI")
933 (V4SF "TI") (V2DF "TI")
936 ;; SSE constant -1 constraint
937 (define_mode_attr sseconstm1
938 [(V64QI "BC") (V32HI "BC") (V16SI "BC") (V8DI "BC") (V4TI "BC")
939 (V32QI "BC") (V16HI "BC") (V8SI "BC") (V4DI "BC") (V2TI "BC")
940 (V16QI "BC") (V8HI "BC") (V4SI "BC") (V2DI "BC") (V1TI "BC")
941 (V32HF "BF") (V32BF "BF") (V16SF "BF") (V8DF "BF")
942 (V16HF "BF") (V16BF "BF") (V8SF "BF") (V4DF "BF")
943 (V8HF "BF") (V8BF "BF") (V4SF "BF") (V2DF "BF")])
945 ;; SSE integer instruction suffix for various modes
946 (define_mode_attr sseintmodesuffix
947 [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")
948 (V32QI "b") (V16HI "w") (V8SI "d") (V4DI "q")
949 (V64QI "b") (V32HI "w") (V16SI "d") (V8DI "q")
950 (V8HF "w") (V16HF "w") (V32HF "w")
951 (V8BF "w") (V16BF "w") (V32BF "w")])
953 ;; Mapping of vector modes to corresponding mask size
954 (define_mode_attr avx512fmaskmode
955 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
956 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
957 (V16SI "HI") (V8SI "QI") (V4SI "QI")
958 (V8DI "QI") (V4DI "QI") (V2DI "QI")
959 (V32HF "SI") (V16HF "HI") (V8HF "QI")
960 (V32BF "SI") (V16BF "HI") (V8BF "QI")
961 (V16SF "HI") (V8SF "QI") (V4SF "QI")
962 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
964 ;; Mapping of vector modes to corresponding complex mask size
965 (define_mode_attr avx512fmaskcmode
966 [(V32HF "HI") (V16HF "QI") (V8HF "QI")])
968 ;; Mapping of vector modes to corresponding mask size
969 (define_mode_attr avx512fmaskmodelower
970 [(V64QI "di") (V32QI "si") (V16QI "hi")
971 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
972 (V16SI "hi") (V8SI "qi") (V4SI "qi")
973 (V8DI "qi") (V4DI "qi") (V2DI "qi")
974 (V32HF "si") (V16HF "hi") (V8HF "qi")
975 (V32BF "si") (V16BF "hi") (V8BF "qi")
976 (V16SF "hi") (V8SF "qi") (V4SF "qi")
977 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
979 ;; Mapping of vector modes to corresponding mask half size
980 (define_mode_attr avx512fmaskhalfmode
981 [(V64QI "SI") (V32QI "HI") (V16QI "QI")
982 (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
983 (V16SI "QI") (V8SI "QI") (V4SI "QI")
984 (V8DI "QI") (V4DI "QI") (V2DI "QI")
985 (V32HF "HI") (V16HF "QI") (V8HF "QI")
986 (V32BF "HI") (V16BF "QI") (V8BF "QI")
987 (V16SF "QI") (V8SF "QI") (V4SF "QI")
988 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
990 ;; Mapping of vector float modes to an integer mode of the same size
991 (define_mode_attr sseintvecmode
992 [(V32HF "V32HI") (V32BF "V32HI") (V16SF "V16SI") (V8DF "V8DI")
993 (V16HF "V16HI") (V16BF "V16HI") (V8SF "V8SI") (V4DF "V4DI")
994 (V8HF "V8HI") (V8BF "V8HI") (V4SF "V4SI") (V2DF "V2DI")
995 (V16SI "V16SI") (V8DI "V8DI")
996 (V8SI "V8SI") (V4DI "V4DI")
997 (V4SI "V4SI") (V2DI "V2DI")
998 (V16HI "V16HI") (V8HI "V8HI")
999 (V32HI "V32HI") (V64QI "V64QI")
1000 (V32QI "V32QI") (V16QI "V16QI")])
1002 ;; Mapping of vector modes to an V*HImode of the same size
1003 (define_mode_attr ssewvecmode
1004 [(V8DI "V32HI") (V4DI "V16HI") (V2DI "V8HI")
1005 (V16SI "V32HI") (V8SI "V16HI") (V4SI "V8HI")])
1007 (define_mode_attr ssewvecmodelower
1008 [(V8DI "v32hi") (V4DI "v16hi") (V2DI "v8hi")
1009 (V16SI "v32hi") (V8SI "v16hi") (V4SI "v8hi")])
1011 (define_mode_attr sseintvecmode2
1012 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
1013 (V8SF "OI") (V4SF "TI")
1014 (V16HF "OI") (V8HF "TI")])
1016 (define_mode_attr sseintvecmodelower
1017 [(V32HF "v32hi") (V32BF "v32hi") (V16SF "v16si") (V8DF "v8di")
1018 (V16HF "v16hi") (V16BF "v16hi") (V8SF "v8si") (V4DF "v4di")
1019 (V8HF "v8hi") (V8BF "v8hi") (V4SF "v4si") (V2DF "v2di")
1020 (V8SI "v8si") (V4DI "v4di")
1021 (V4SI "v4si") (V2DI "v2di")
1022 (V16HI "v16hi") (V8HI "v8hi")
1023 (V32QI "v32qi") (V16QI "v16qi")])
1025 ;; Mapping of vector modes to an V*SImode of the same size
1026 (define_mode_attr ssedvecmode
1027 [(V64QI "V16SI") (V32QI "V8SI") (V16QI "V4SI")])
1029 (define_mode_attr ssedvecmodelower
1030 [(V64QI "v16si") (V32QI "v8si") (V16QI "v4si")])
1032 ;; Mapping of vector modes to a vector mode of double size
1033 (define_mode_attr ssedoublevecmode
1034 [(V64QI "V128QI") (V32HI "V64HI") (V16SI "V32SI") (V8DI "V16DI")
1035 (V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
1036 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
1037 (V16SF "V32SF") (V8DF "V16DF")
1038 (V8SF "V16SF") (V4DF "V8DF")
1039 (V4SF "V8SF") (V2DF "V4DF")
1040 (V32HF "V64HF") (V16HF "V32HF") (V8HF "V16HF")
1041 (V32BF "V64BF") (V16BF "V32BF") (V8BF "V16BF")])
1043 ;; Mapping of vector modes to a vector mode of half size
1044 ;; instead of V1DI/V1DF, DI/DF are used for V2DI/V2DF although they are scalar.
1045 (define_mode_attr ssehalfvecmode
1046 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
1047 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
1048 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V2DI "DI")
1049 (V16SF "V8SF") (V8DF "V4DF")
1050 (V8SF "V4SF") (V4DF "V2DF")
1051 (V4SF "V2SF") (V2DF "DF")
1052 (V32HF "V16HF") (V16HF "V8HF") (V8HF "V4HF")
1053 (V32BF "V16BF") (V16BF "V8BF") (V8BF "V4BF")])
1055 (define_mode_attr ssehalfvecmodelower
1056 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
1057 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
1058 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
1059 (V16SF "v8sf") (V8DF "v4df")
1060 (V8SF "v4sf") (V4DF "v2df")
1062 (V32HF "v16hf") (V16HF "v8hf") (V8HF "v4hf")
1063 (V32BF "v16bf") (V16BF "v8bf") (V8BF "v4bf")])
1065 ;; Mapping of vector modes to vector hf modes of conversion.
1066 (define_mode_attr ssePHmode
1067 [(V32HI "V32HF") (V16HI "V16HF") (V8HI "V8HF")
1068 (V16SI "V16HF") (V8SI "V8HF") (V4SI "V8HF")
1069 (V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")
1070 (V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")])
1072 ;; Mapping of vector modes to vector hf modes of same element.
1073 (define_mode_attr ssePHmodelower
1074 [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf")
1075 (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf")
1076 (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf")
1077 (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")])
1079 ;; Mapping of vector modes to packed single mode of the same size
1080 (define_mode_attr ssePSmode
1081 [(V16SI "V16SF") (V8DF "V16SF")
1082 (V16SF "V16SF") (V8DI "V16SF")
1083 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
1084 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
1085 (V8SI "V8SF") (V4SI "V4SF")
1086 (V4DI "V8SF") (V2DI "V4SF")
1087 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
1088 (V8SF "V8SF") (V4SF "V4SF")
1089 (V4DF "V8SF") (V2DF "V4SF")
1090 (V32HF "V16SF") (V16HF "V8SF") (V8HF "V4SF")])
1092 (define_mode_attr ssePSmodelower
1093 [(V16SI "v16sf") (V8DF "v16sf")
1094 (V16SF "v16sf") (V8DI "v16sf")
1095 (V64QI "v16sf") (V32QI "v8sf") (V16QI "v4sf")
1096 (V32HI "v16sf") (V16HI "v8sf") (V8HI "v4sf")
1097 (V8SI "v8sf") (V4SI "v4sf")
1098 (V4DI "v8sf") (V2DI "v4sf")
1099 (V4TI "v16sf") (V2TI "v8sf") (V1TI "v4sf")
1100 (V8SF "v8sf") (V4SF "v4sf")
1101 (V4DF "v8sf") (V2DF "v4sf")
1102 (V32HF "v16sf") (V16HF "v8sf") (V8HF "v4sf")])
1104 (define_mode_attr ssePSmode2
1105 [(V8DI "V8SF") (V4DI "V4SF")])
1107 ;; Mapping of vector modes back to the scalar modes
1108 (define_mode_attr ssescalarmode
1109 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
1110 (V32HI "HI") (V16HI "HI") (V8HI "HI")
1111 (V16SI "SI") (V8SI "SI") (V4SI "SI")
1112 (V8DI "DI") (V4DI "DI") (V2DI "DI")
1113 (V32HF "HF") (V16HF "HF") (V8HF "HF")
1114 (V32BF "BF") (V16BF "BF") (V8BF "BF")
1115 (V16SF "SF") (V8SF "SF") (V4SF "SF")
1116 (V8DF "DF") (V4DF "DF") (V2DF "DF")
1117 (V4TI "TI") (V2TI "TI")])
1119 ;; Mapping of vector modes back to the scalar modes
1120 (define_mode_attr ssescalarmodelower
1121 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
1122 (V32HI "hi") (V16HI "hi") (V8HI "hi")
1123 (V16SI "si") (V8SI "si") (V4SI "si")
1124 (V8DI "di") (V4DI "di") (V2DI "di")
1125 (V32HF "hf") (V16HF "hf") (V8HF "hf")
1126 (V32BF "bf") (V16BF "bf") (V8BF "bf")
1127 (V16SF "sf") (V8SF "sf") (V4SF "sf")
1128 (V8DF "df") (V4DF "df") (V2DF "df")
1129 (V4TI "ti") (V2TI "ti")])
1131 ;; Mapping of vector modes to the 128bit modes
1132 (define_mode_attr ssexmmmode
1133 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
1134 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
1135 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
1136 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
1137 (V32HF "V8HF") (V16HF "V8HF") (V8HF "V8HF")
1138 (V32BF "V8BF") (V16BF "V8BF") (V8BF "V8BF")
1139 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
1140 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
1142 ;; Pointer size override for scalar modes (Intel asm dialect)
1143 (define_mode_attr iptr
1144 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
1145 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
1146 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
1147 (V32HF "w") (V32BF "w") (V16SF "k") (V8DF "q")
1148 (V16HF "w") (V16BF "w") (V8SF "k") (V4DF "q")
1149 (V8HF "w") (V8BF "w") (V4SF "k") (V2DF "q")
1150 (HF "w") (BF "w") (SF "k") (DF "q")])
1152 ;; Mapping of vector modes to VPTERNLOG suffix
1153 (define_mode_attr ternlogsuffix
1154 [(V8DI "q") (V4DI "q") (V2DI "q")
1155 (V8DF "q") (V4DF "q") (V2DF "q")
1156 (V16SI "d") (V8SI "d") (V4SI "d")
1157 (V16SF "d") (V8SF "d") (V4SF "d")
1158 (V32HI "d") (V16HI "d") (V8HI "d")
1159 (V32HF "d") (V16HF "d") (V8HF "d")
1160 (V32BF "d") (V16BF "d") (V8BF "d")
1161 (V64QI "d") (V32QI "d") (V16QI "d")])
1163 ;; Number of scalar elements in each vector type
1164 (define_mode_attr ssescalarnum
1165 [(V64QI "64") (V16SI "16") (V8DI "8")
1166 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
1167 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
1168 (V16SF "16") (V8DF "8")
1169 (V8SF "8") (V4DF "4")
1170 (V4SF "4") (V2DF "2")])
1172 ;; Mask of scalar elements in each vector type
1173 (define_mode_attr ssescalarnummask
1174 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
1175 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
1176 (V8SF "7") (V4DF "3")
1177 (V4SF "3") (V2DF "1")])
1179 (define_mode_attr ssescalarsize
1180 [(V4TI "64") (V2TI "64") (V1TI "64")
1181 (V8DI "64") (V4DI "64") (V2DI "64")
1182 (V64QI "8") (V32QI "8") (V16QI "8")
1183 (V32HI "16") (V16HI "16") (V8HI "16")
1184 (V16SI "32") (V8SI "32") (V4SI "32")
1185 (V32HF "16") (V16HF "16") (V8HF "16")
1186 (V32BF "16") (V16BF "16") (V8BF "16")
1187 (V16SF "32") (V8SF "32") (V4SF "32")
1188 (V8DF "64") (V4DF "64") (V2DF "64")])
1190 ;; SSE prefix for integer and HF vector modes
1191 (define_mode_attr sseintprefix
1192 [(V2DI "p") (V2DF "")
1193 (V4DI "p") (V4DF "")
1194 (V8DI "p") (V8DF "")
1195 (V4SI "p") (V4SF "")
1196 (V8SI "p") (V8SF "")
1197 (V16SI "p") (V16SF "")
1198 (V16QI "p") (V8HI "p") (V8HF "p") (V8BF "p")
1199 (V32QI "p") (V16HI "p") (V16HF "p") (V16BF "p")
1200 (V64QI "p") (V32HI "p") (V32HF "p") (V32BF "p")])
1202 ;; SSE prefix for integer and HF vector comparison.
1203 (define_mode_attr ssecmpintprefix
1204 [(V2DI "p") (V2DF "")
1205 (V4DI "p") (V4DF "")
1206 (V8DI "p") (V8DF "")
1207 (V4SI "p") (V4SF "")
1208 (V8SI "p") (V8SF "")
1209 (V16SI "p") (V16SF "")
1210 (V16QI "p") (V8HI "p") (V8HF "")
1211 (V32QI "p") (V16HI "p") (V16HF "")
1212 (V64QI "p") (V32HI "p") (V32HF "")])
1214 ;; SSE scalar suffix for vector modes
1215 (define_mode_attr ssescalarmodesuffix
1216 [(HF "sh") (SF "ss") (DF "sd")
1217 (V32HF "sh") (V16SF "ss") (V8DF "sd")
1218 (V16HF "sh") (V8SF "ss") (V4DF "sd")
1219 (V8HF "sh") (V4SF "ss") (V2DF "sd")
1220 (V16SI "d") (V8DI "q")
1221 (V8SI "d") (V4DI "q")
1222 (V4SI "d") (V2DI "q")])
1224 ;; Pack/unpack vector modes
1225 (define_mode_attr sseunpackmode
1226 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
1227 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
1228 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
1230 (define_mode_attr ssepackmode
1231 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
1232 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
1233 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
1235 (define_mode_attr ssepackmodelower
1236 [(V8HI "v16qi") (V4SI "v8hi") (V2DI "v4si")
1237 (V16HI "v32qi") (V8SI "v16hi") (V4DI "v8si")
1238 (V32HI "v64qi") (V16SI "v32hi") (V8DI "v16si")])
1240 ;; Mapping of the max integer size for xop rotate immediate constraint
1241 (define_mode_attr sserotatemax
1242 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
1244 ;; Mapping of mode to cast intrinsic name
1245 (define_mode_attr castmode
1246 [(V4SF "ps") (V2DF "pd")
1247 (V8SI "si") (V8SF "ps") (V4DF "pd")
1248 (V16SI "si") (V16SF "ps") (V8DF "pd")])
1250 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
1251 ;; i64x4 or f64x4 for 512bit modes.
1252 (define_mode_attr i128
1253 [(V16HF "%~128") (V32HF "i64x4") (V16BF "%~128") (V32BF "i64x4")
1254 (V16SF "f64x4") (V8SF "f128")
1255 (V8DF "f64x4") (V4DF "f128")
1256 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
1257 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
1259 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
1260 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
1261 (define_mode_attr i128vldq
1262 [(V16HF "i32x4") (V16BF "i32x4") (V8SF "f32x4") (V4DF "f64x2")
1263 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
1266 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
1267 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
1269 ;; Mapping for dbpsabbw modes
1270 (define_mode_attr dbpsadbwmode
1271 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
1273 ;; Mapping suffixes for broadcast
1274 (define_mode_attr bcstscalarsuff
1275 [(V64QI "b") (V32QI "b") (V16QI "b")
1276 (V32HI "w") (V16HI "w") (V8HI "w")
1277 (V16SI "d") (V8SI "d") (V4SI "d")
1278 (V8DI "q") (V4DI "q") (V2DI "q")
1279 (V32HF "w") (V16HF "w") (V8HF "w")
1280 (V32BF "w") (V16BF "w") (V8BF "w")
1281 (V16SF "ss") (V8SF "ss") (V4SF "ss")
1282 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
1284 ;; Tie mode of assembler operand to mode iterator
1285 (define_mode_attr xtg_mode
1286 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x")
1287 (V8HF "x") (V8BF "x") (V4SF "x") (V2DF "x")
1288 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t")
1289 (V16HF "t") (V16BF "t") (V8SF "t") (V4DF "t")
1290 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g")
1291 (V32HF "g") (V32BF "g") (V16SF "g") (V8DF "g")])
1293 ;; Half mask mode for unpacks
1294 (define_mode_attr HALFMASKMODE
1295 [(DI "SI") (SI "HI")])
1297 ;; Double mask mode for packs
1298 (define_mode_attr DOUBLEMASKMODE
1299 [(HI "SI") (SI "DI")])
1302 ;; Include define_subst patterns for instructions with mask
1303 (include "subst.md")
1305 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
1307 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1311 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1313 ;; All of these patterns are enabled for SSE1 as well as SSE2.
1314 ;; This is essential for maintaining stable calling conventions.
1316 (define_expand "mov<mode>"
1317 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1318 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1321 ix86_expand_vector_move (<MODE>mode, operands);
1325 (define_insn "mov<mode>_internal"
1326 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1328 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1329 " C,<sseconstm1>,BH,vm,v"))]
1331 && (register_operand (operands[0], <MODE>mode)
1332 || register_operand (operands[1], <MODE>mode))
1333 && ix86_hardreg_mov_ok (operands[0], operands[1])"
1335 switch (get_attr_type (insn))
1338 return standard_sse_constant_opcode (insn, operands);
1341 return ix86_output_ssemov (insn, operands);
1347 [(set_attr "type" "sselog1,sselog1,sselog1,ssemov,ssemov")
1348 (set_attr "prefix" "maybe_vex")
1350 (cond [(match_test "TARGET_AVX")
1351 (const_string "<sseinsnmode>")
1352 (ior (not (match_test "TARGET_SSE2"))
1353 (match_test "optimize_function_for_size_p (cfun)"))
1354 (const_string "V4SF")
1355 (and (match_test "<MODE>mode == V2DFmode")
1356 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1357 (const_string "V4SF")
1358 (and (eq_attr "alternative" "4")
1359 (match_test "TARGET_SSE_TYPELESS_STORES"))
1360 (const_string "V4SF")
1361 (and (eq_attr "alternative" "0")
1362 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1365 (const_string "<sseinsnmode>")))
1366 (set (attr "enabled")
1367 (cond [(and (match_test "<MODE_SIZE> == 16")
1368 (eq_attr "alternative" "1"))
1369 (symbol_ref "TARGET_SSE2")
1370 (and (match_test "<MODE_SIZE> == 32")
1371 (eq_attr "alternative" "1"))
1372 (symbol_ref "TARGET_AVX2")
1374 (symbol_ref "true")))])
1376 ;; If mem_addr points to a memory region with less than whole vector size bytes
1377 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
1378 ;; bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd
1380 (define_expand "<avx512>_load<mode>_mask"
1381 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1382 (vec_merge:V48_AVX512VL
1383 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
1384 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
1385 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1388 if (CONST_INT_P (operands[3]))
1390 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1393 else if (MEM_P (operands[1]))
1394 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1395 gen_rtvec(1, operands[1]),
1399 (define_insn "*<avx512>_load<mode>_mask"
1400 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1401 (vec_merge:V48_AVX512VL
1402 (unspec:V48_AVX512VL
1403 [(match_operand:V48_AVX512VL 1 "memory_operand" "m")]
1405 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C")
1406 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1409 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1411 if (misaligned_operand (operands[1], <MODE>mode))
1412 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1414 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1418 if (misaligned_operand (operands[1], <MODE>mode))
1419 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1421 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1424 [(set_attr "type" "ssemov")
1425 (set_attr "prefix" "evex")
1426 (set_attr "mode" "<sseinsnmode>")])
1428 (define_insn_and_split "*<avx512>_load<mode>"
1429 [(set (match_operand:V48_AVX512VL 0 "register_operand")
1430 (unspec:V48_AVX512VL
1431 [(match_operand:V48_AVX512VL 1 "memory_operand")]
1436 [(set (match_dup 0) (match_dup 1))])
1438 (define_expand "<avx512>_load<mode>_mask"
1439 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
1440 (vec_merge:VI12_AVX512VL
1441 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
1442 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
1443 (match_operand:<avx512fmaskmode> 3 "register_or_constm1_operand")))]
1446 if (CONST_INT_P (operands[3]))
1448 emit_insn (gen_rtx_SET (operands[0], operands[1]));
1451 else if (MEM_P (operands[1]))
1452 operands[1] = gen_rtx_UNSPEC (<MODE>mode,
1453 gen_rtvec(1, operands[1]),
1458 (define_insn "*<avx512>_load<mode>_mask"
1459 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1460 (vec_merge:VI12_AVX512VL
1461 (unspec:VI12_AVX512VL
1462 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1464 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
1465 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1467 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1468 [(set_attr "type" "ssemov")
1469 (set_attr "prefix" "evex")
1470 (set_attr "mode" "<sseinsnmode>")])
1472 (define_insn_and_split "*<avx512>_load<mode>"
1473 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1474 (unspec:VI12_AVX512VL
1475 [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
1480 [(set (match_dup 0) (match_dup 1))])
1482 (define_insn "avx512f_mov<ssescalarmodelower>_mask"
1483 [(set (match_operand:VFH_128 0 "register_operand" "=v")
1486 (match_operand:VFH_128 2 "register_operand" "v")
1487 (match_operand:VFH_128 3 "nonimm_or_0_operand" "0C")
1488 (match_operand:QI 4 "register_operand" "Yk"))
1489 (match_operand:VFH_128 1 "register_operand" "v")
1492 "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1493 [(set_attr "type" "ssemov")
1494 (set_attr "prefix" "evex")
1495 (set_attr "mode" "<ssescalarmode>")])
1497 (define_expand "avx512f_load<mode>_mask"
1498 [(set (match_operand:<ssevecmode> 0 "register_operand")
1499 (vec_merge:<ssevecmode>
1500 (vec_merge:<ssevecmode>
1501 (vec_duplicate:<ssevecmode>
1502 (match_operand:MODEFH 1 "memory_operand"))
1503 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1504 (match_operand:QI 3 "register_operand"))
1508 "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1510 (define_insn "*avx512f_load<mode>_mask"
1511 [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1512 (vec_merge:<ssevecmode>
1513 (vec_merge:<ssevecmode>
1514 (vec_duplicate:<ssevecmode>
1515 (match_operand:MODEFH 1 "memory_operand" "m"))
1516 (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1517 (match_operand:QI 3 "register_operand" "Yk"))
1518 (match_operand:<ssevecmode> 4 "const0_operand")
1521 "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1522 [(set_attr "type" "ssemov")
1523 (set_attr "prefix" "evex")
1524 (set_attr "memory" "load")
1525 (set_attr "mode" "<MODE>")])
1527 (define_insn "avx512f_store<mode>_mask"
1528 [(set (match_operand:MODEFH 0 "memory_operand" "=m")
1529 (if_then_else:MODEFH
1530 (and:QI (match_operand:QI 2 "register_operand" "Yk")
1533 (match_operand:<ssevecmode> 1 "register_operand" "v")
1534 (parallel [(const_int 0)]))
1537 "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1538 [(set_attr "type" "ssemov")
1539 (set_attr "prefix" "evex")
1540 (set_attr "memory" "store")
1541 (set_attr "mode" "<MODE>")])
1543 (define_insn "<avx512>_blendm<mode>"
1544 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1545 (vec_merge:V48_AVX512VL
1546 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1547 (match_operand:V48_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1548 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1551 if (REG_P (operands[1])
1552 && REGNO (operands[1]) != REGNO (operands[0]))
1553 return "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}";
1555 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1557 if (misaligned_operand (operands[2], <MODE>mode))
1558 return "vmovu<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1560 return "vmova<ssemodesuffix>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1564 if (misaligned_operand (operands[2], <MODE>mode))
1565 return "vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1567 return "vmovdqa<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}";
1570 [(set_attr "type" "ssemov")
1571 (set_attr "prefix" "evex")
1572 (set_attr "mode" "<sseinsnmode>")])
1574 (define_insn "<avx512>_blendm<mode>"
1575 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1576 (vec_merge:VI12_AVX512VL
1577 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm")
1578 (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v")
1579 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1582 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1583 vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1584 [(set_attr "type" "ssemov")
1585 (set_attr "prefix" "evex")
1586 (set_attr "mode" "<sseinsnmode>")])
1588 (define_insn "<avx512>_blendm<mode>"
1589 [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand" "=v,v")
1590 (vec_merge:VF_AVX512HFBFVL
1591 (match_operand:VF_AVX512HFBFVL 2 "nonimmediate_operand" "vm,vm")
1592 (match_operand:VF_AVX512HFBFVL 1 "nonimm_or_0_operand" "0C,v")
1593 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1596 vmovdqu<ssescalarsize>\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2}
1597 vpblendmw\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1598 [(set_attr "type" "ssemov")
1599 (set_attr "prefix" "evex")
1600 (set_attr "mode" "<sseinsnmode>")])
1602 (define_insn "<avx512>_store<mode>_mask"
1603 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1604 (vec_merge:V48_AVX512VL
1605 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1607 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1610 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1612 if (misaligned_operand (operands[0], <MODE>mode))
1613 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1615 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1619 if (misaligned_operand (operands[0], <MODE>mode))
1620 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1622 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1625 [(set_attr "type" "ssemov")
1626 (set_attr "prefix" "evex")
1627 (set_attr "memory" "store")
1628 (set_attr "mode" "<sseinsnmode>")])
1630 (define_insn "<avx512>_store<mode>_mask"
1631 [(set (match_operand:VI12HFBF_AVX512VL 0 "memory_operand" "=m")
1632 (vec_merge:VI12HFBF_AVX512VL
1633 (match_operand:VI12HFBF_AVX512VL 1 "register_operand" "v")
1635 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1637 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1638 [(set_attr "type" "ssemov")
1639 (set_attr "prefix" "evex")
1640 (set_attr "memory" "store")
1641 (set_attr "mode" "<sseinsnmode>")])
1643 (define_expand "sse2_movq128"
1644 [(set (match_operand:V2DI 0 "register_operand")
1647 (match_operand:V2DI 1 "nonimmediate_operand")
1648 (parallel [(const_int 0)]))
1652 (define_insn "*sse2_movq128_<mode>"
1653 [(set (match_operand:VI8F_128 0 "register_operand" "=v")
1654 (vec_concat:VI8F_128
1655 (vec_select:<ssescalarmode>
1656 (match_operand:VI8F_128 1 "nonimmediate_operand" "vm")
1657 (parallel [(const_int 0)]))
1658 (match_operand:<ssescalarmode> 2 "const0_operand")))]
1660 "%vmovq\t{%1, %0|%0, %q1}"
1661 [(set_attr "type" "ssemov")
1662 (set_attr "prefix" "maybe_vex")
1663 (set_attr "mode" "TI")])
1665 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1666 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1667 ;; from memory, we'd prefer to load the memory directly into the %xmm
1668 ;; register. To facilitate this happy circumstance, this pattern won't
1669 ;; split until after register allocation. If the 64-bit value didn't
1670 ;; come from memory, this is the best we can do. This is much better
1671 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1674 (define_insn_and_split "movdi_to_sse"
1675 [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1676 (unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1677 UNSPEC_MOVDI_TO_SSE))
1678 (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1679 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1681 "&& reload_completed"
1684 if (register_operand (operands[1], DImode))
1686 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1687 Assemble the 64-bit DImode value in an xmm register. */
1688 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1689 gen_lowpart (SImode, operands[1])));
1691 emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1692 gen_highpart (SImode, operands[1]),
1696 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1697 gen_highpart (SImode, operands[1])));
1698 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1702 else if (memory_operand (operands[1], DImode))
1703 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1704 operands[1], const0_rtx));
1709 [(set_attr "isa" "sse4,*,*")])
1712 [(set (match_operand:V4SF 0 "register_operand")
1713 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1714 "TARGET_SSE && reload_completed"
1717 (vec_duplicate:V4SF (match_dup 1))
1721 operands[1] = gen_lowpart (SFmode, operands[1]);
1722 operands[2] = CONST0_RTX (V4SFmode);
1726 [(set (match_operand:V2DF 0 "register_operand")
1727 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1728 "TARGET_SSE2 && reload_completed"
1729 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1731 operands[1] = gen_lowpart (DFmode, operands[1]);
1732 operands[2] = CONST0_RTX (DFmode);
1735 (define_expand "movmisalign<mode>"
1736 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1737 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1740 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1744 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1746 [(set (match_operand:V2DF 0 "sse_reg_operand")
1747 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1748 (match_operand:DF 4 "const0_operand")))
1749 (set (match_operand:V2DF 2 "sse_reg_operand")
1750 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1751 (parallel [(const_int 0)]))
1752 (match_operand:DF 3 "memory_operand")))]
1753 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1754 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1755 [(set (match_dup 2) (match_dup 5))]
1756 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1759 [(set (match_operand:DF 0 "sse_reg_operand")
1760 (match_operand:DF 1 "memory_operand"))
1761 (set (match_operand:V2DF 2 "sse_reg_operand")
1762 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1763 (match_operand:DF 3 "memory_operand")))]
1764 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1765 && REGNO (operands[4]) == REGNO (operands[2])
1766 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1767 [(set (match_dup 2) (match_dup 5))]
1768 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1770 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1772 [(set (match_operand:DF 0 "memory_operand")
1773 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1774 (parallel [(const_int 0)])))
1775 (set (match_operand:DF 2 "memory_operand")
1776 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1777 (parallel [(const_int 1)])))]
1778 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1779 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1780 [(set (match_dup 4) (match_dup 1))]
1781 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1783 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1784 [(set (match_operand:VI1 0 "register_operand" "=x")
1785 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1788 "%vlddqu\t{%1, %0|%0, %1}"
1789 [(set_attr "type" "ssemov")
1790 (set_attr "movu" "1")
1791 (set (attr "prefix_data16")
1793 (match_test "TARGET_AVX")
1795 (const_string "0")))
1796 (set (attr "prefix_rep")
1798 (match_test "TARGET_AVX")
1800 (const_string "1")))
1801 (set_attr "prefix" "maybe_vex")
1802 (set_attr "mode" "<sseinsnmode>")])
1804 (define_insn "sse2_movnti<mode>"
1805 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1806 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1809 "movnti\t{%1, %0|%0, %1}"
1810 [(set_attr "type" "ssemov")
1811 (set_attr "prefix_data16" "0")
1812 (set_attr "mode" "<MODE>")])
1814 (define_insn "<sse>_movnt<mode>"
1815 [(set (match_operand:VF 0 "memory_operand" "=m")
1817 [(match_operand:VF 1 "register_operand" "v")]
1820 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssemov")
1822 (set_attr "prefix" "maybe_vex")
1823 (set_attr "mode" "<MODE>")])
1825 (define_insn "<sse2>_movnt<mode>"
1826 [(set (match_operand:VI8 0 "memory_operand" "=m")
1827 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1830 "%vmovntdq\t{%1, %0|%0, %1}"
1831 [(set_attr "type" "ssecvt")
1832 (set (attr "prefix_data16")
1834 (match_test "TARGET_AVX")
1836 (const_string "1")))
1837 (set_attr "prefix" "maybe_vex")
1838 (set_attr "mode" "<sseinsnmode>")])
1840 ; Expand patterns for non-temporal stores. At the moment, only those
1841 ; that directly map to insns are defined; it would be possible to
1842 ; define patterns for other modes that would expand to several insns.
1844 ;; Modes handled by storent patterns.
1845 (define_mode_iterator STORENT_MODE
1846 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1847 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1848 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1849 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1850 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1852 (define_expand "storent<mode>"
1853 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1854 (unspec:STORENT_MODE
1855 [(match_operand:STORENT_MODE 1 "register_operand")]
1859 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1863 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1865 ;; All integer modes with AVX512BW/DQ.
1866 (define_mode_iterator SWI1248_AVX512BWDQ
1867 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1869 ;; All integer modes with AVX512BW, where HImode operation
1870 ;; can be used instead of QImode.
1871 (define_mode_iterator SWI1248_AVX512BW
1872 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1874 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1875 (define_mode_iterator SWI1248_AVX512BWDQ2
1876 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1877 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1879 (define_expand "kmov<mskmodesuffix>"
1880 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1881 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1883 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1885 (define_insn "k<code><mode>"
1886 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1887 (any_logic:SWI1248_AVX512BW
1888 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1889 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1890 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1893 if (get_attr_mode (insn) == MODE_HI)
1894 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1896 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1898 [(set_attr "type" "msklog")
1899 (set_attr "prefix" "vex")
1901 (cond [(and (match_test "<MODE>mode == QImode")
1902 (not (match_test "TARGET_AVX512DQ")))
1905 (const_string "<MODE>")))])
1908 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1909 (any_logic:SWI1248_AVX512BW
1910 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
1911 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1912 (clobber (reg:CC FLAGS_REG))]
1913 "TARGET_AVX512F && reload_completed"
1916 (any_logic:SWI1248_AVX512BW (match_dup 1) (match_dup 2)))
1917 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1919 (define_insn "kandn<mode>"
1920 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1921 (and:SWI1248_AVX512BW
1922 (not:SWI1248_AVX512BW
1923 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1924 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1925 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1928 if (get_attr_mode (insn) == MODE_HI)
1929 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1931 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1933 [(set_attr "type" "msklog")
1934 (set_attr "prefix" "vex")
1936 (cond [(and (match_test "<MODE>mode == QImode")
1937 (not (match_test "TARGET_AVX512DQ")))
1940 (const_string "<MODE>")))])
1943 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
1944 (and:SWI1248_AVX512BW
1945 (not:SWI1248_AVX512BW
1946 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand"))
1947 (match_operand:SWI1248_AVX512BW 2 "mask_reg_operand")))
1948 (clobber (reg:CC FLAGS_REG))]
1949 "TARGET_AVX512F && reload_completed"
1952 (and:SWI1248_AVX512BW
1953 (not:SWI1248_AVX512BW (match_dup 1))
1955 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
1957 (define_insn "kxnor<mode>"
1958 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1959 (not:SWI1248_AVX512BW
1960 (xor:SWI1248_AVX512BW
1961 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1962 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1963 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1966 if (get_attr_mode (insn) == MODE_HI)
1967 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1969 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1971 [(set_attr "type" "msklog")
1972 (set_attr "prefix" "vex")
1974 (cond [(and (match_test "<MODE>mode == QImode")
1975 (not (match_test "TARGET_AVX512DQ")))
1978 (const_string "<MODE>")))])
1980 (define_insn "knot<mode>"
1981 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1982 (not:SWI1248_AVX512BW
1983 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1984 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1987 if (get_attr_mode (insn) == MODE_HI)
1988 return "knotw\t{%1, %0|%0, %1}";
1990 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1992 [(set_attr "type" "msklog")
1993 (set_attr "prefix" "vex")
1995 (cond [(and (match_test "<MODE>mode == QImode")
1996 (not (match_test "TARGET_AVX512DQ")))
1999 (const_string "<MODE>")))])
2002 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
2003 (not:SWI1248_AVX512BW
2004 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")))]
2005 "TARGET_AVX512F && reload_completed"
2008 (not:SWI1248_AVX512BW (match_dup 1)))
2009 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
2011 (define_insn "*knotsi_1_zext"
2012 [(set (match_operand:DI 0 "register_operand" "=k")
2014 (not:SI (match_operand:SI 1 "register_operand" "k"))))
2015 (unspec [(const_int 0)] UNSPEC_MASKOP)]
2017 "knotd\t{%1, %0|%0, %1}";
2018 [(set_attr "type" "msklog")
2019 (set_attr "prefix" "vex")
2020 (set_attr "mode" "SI")])
2023 [(set (match_operand:DI 0 "mask_reg_operand")
2025 (not:SI (match_operand:SI 1 "mask_reg_operand"))))]
2026 "TARGET_AVX512BW && reload_completed"
2030 (not:SI (match_dup 1))))
2031 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
2033 (define_insn "kadd<mode>"
2034 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
2035 (plus:SWI1248_AVX512BWDQ2
2036 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
2037 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
2038 (unspec [(const_int 0)] UNSPEC_MASKOP)]
2040 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2041 [(set_attr "type" "msklog")
2042 (set_attr "prefix" "vex")
2043 (set_attr "mode" "<MODE>")])
2045 ;; Mask variant shift mnemonics
2046 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
2048 (define_insn "k<code><mode>"
2049 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
2050 (any_lshift:SWI1248_AVX512BWDQ
2051 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
2052 (match_operand 2 "const_0_to_255_operand")))
2053 (unspec [(const_int 0)] UNSPEC_MASKOP)]
2055 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2056 [(set_attr "type" "msklog")
2057 (set_attr "prefix" "vex")
2058 (set_attr "mode" "<MODE>")])
2061 [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
2062 (any_lshift:SWI1248_AVX512BW
2063 (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
2064 (match_operand 2 "const_int_operand")))
2065 (clobber (reg:CC FLAGS_REG))]
2066 "TARGET_AVX512F && reload_completed"
2069 (any_lshift:SWI1248_AVX512BW
2072 (unspec [(const_int 0)] UNSPEC_MASKOP)])])
2074 (define_insn "ktest<mode>"
2075 [(set (reg:CC FLAGS_REG)
2077 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
2078 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
2081 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
2082 [(set_attr "mode" "<MODE>")
2083 (set_attr "type" "msklog")
2084 (set_attr "prefix" "vex")])
2086 (define_insn "kortest<mode>"
2087 [(set (reg:CC FLAGS_REG)
2089 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
2090 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
2093 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
2094 [(set_attr "mode" "<MODE>")
2095 (set_attr "type" "msklog")
2096 (set_attr "prefix" "vex")])
2098 (define_insn "kunpckhi"
2099 [(set (match_operand:HI 0 "register_operand" "=k")
2102 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
2104 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))
2105 (unspec [(const_int 0)] UNSPEC_MASKOP)]
2107 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "mode" "HI")
2109 (set_attr "type" "msklog")
2110 (set_attr "prefix" "vex")])
2112 (define_insn "kunpcksi"
2113 [(set (match_operand:SI 0 "register_operand" "=k")
2116 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
2118 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))
2119 (unspec [(const_int 0)] UNSPEC_MASKOP)]
2121 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
2122 [(set_attr "mode" "SI")])
2124 (define_insn "kunpckdi"
2125 [(set (match_operand:DI 0 "register_operand" "=k")
2128 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
2130 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))
2131 (unspec [(const_int 0)] UNSPEC_MASKOP)]
2133 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
2134 [(set_attr "mode" "DI")])
2137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2139 ;; Parallel floating point arithmetic
2141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2143 (define_expand "<code><mode>2"
2144 [(set (match_operand:VFB 0 "register_operand")
2146 (match_operand:VFB 1 "register_operand")))]
2148 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
2150 (define_insn_and_split "*<code><mode>2"
2151 [(set (match_operand:VFB 0 "register_operand" "=x,x,v,v")
2153 (match_operand:VFB 1 "vector_operand" "0,xBm,v,m")))
2154 (use (match_operand:VFB 2 "vector_operand" "xBm,0,vm,v"))]
2157 "&& reload_completed"
2159 (<absneg_op>:VFB (match_dup 1) (match_dup 2)))]
2163 if (MEM_P (operands[1]))
2164 std::swap (operands[1], operands[2]);
2168 if (operands_match_p (operands[0], operands[2]))
2169 std::swap (operands[1], operands[2]);
2172 [(set_attr "isa" "noavx,noavx,avx,avx")])
2174 (define_insn_and_split "*nabs<mode>2"
2175 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
2178 (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
2179 (use (match_operand:VF 2 "vector_operand" "xBm,0,vm,v"))]
2182 "&& reload_completed"
2184 (ior:VF (match_dup 1) (match_dup 2)))]
2188 if (MEM_P (operands[1]))
2189 std::swap (operands[1], operands[2]);
2193 if (operands_match_p (operands[0], operands[2]))
2194 std::swap (operands[1], operands[2]);
2197 [(set_attr "isa" "noavx,noavx,avx,avx")])
2199 (define_expand "cond_<insn><mode>"
2200 [(set (match_operand:VFH 0 "register_operand")
2203 (match_operand:VFH 2 "vector_operand")
2204 (match_operand:VFH 3 "vector_operand"))
2205 (match_operand:VFH 4 "nonimm_or_0_operand")
2206 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2207 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2209 emit_insn (gen_<insn><mode>3_mask (operands[0],
2217 (define_expand "<insn><mode>3<mask_name><round_name>"
2218 [(set (match_operand:VFH 0 "register_operand")
2220 (match_operand:VFH 1 "<round_nimm_predicate>")
2221 (match_operand:VFH 2 "<round_nimm_predicate>")))]
2222 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2223 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2225 (define_insn "*<insn><mode>3<mask_name><round_name>"
2226 [(set (match_operand:VFH 0 "register_operand" "=x,v")
2228 (match_operand:VFH 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
2229 (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2230 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
2231 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2233 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
2234 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2235 [(set_attr "isa" "noavx,avx")
2236 (set_attr "type" "sseadd")
2237 (set_attr "prefix" "<bcst_mask_prefix3>")
2238 (set_attr "mode" "<MODE>")])
2240 ;; Standard scalar operation patterns which preserve the rest of the
2241 ;; vector for combiner.
2242 (define_insn "*<sse>_vm<insn><mode>3"
2243 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2245 (vec_duplicate:VFH_128
2246 (plusminus:<ssescalarmode>
2247 (vec_select:<ssescalarmode>
2248 (match_operand:VFH_128 1 "register_operand" "0,v")
2249 (parallel [(const_int 0)]))
2250 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
2255 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2256 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2257 [(set_attr "isa" "noavx,avx")
2258 (set_attr "type" "sseadd")
2259 (set (attr "prefix")
2260 (cond [(eq_attr "alternative" "0")
2261 (const_string "orig")
2262 (eq_attr "alternative" "1")
2264 (match_test "<MODE>mode == V8HFmode")
2265 (const_string "evex")
2266 (const_string "vex"))
2268 (const_string "*")))
2269 (set_attr "mode" "<ssescalarmode>")])
2271 (define_insn "<sse>_vm<insn><mode>3<mask_scalar_name><round_scalar_name>"
2272 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2275 (match_operand:VFH_128 1 "register_operand" "0,v")
2276 (match_operand:VFH_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2281 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2282 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
2283 [(set_attr "isa" "noavx,avx")
2284 (set_attr "type" "sseadd")
2285 (set_attr "prefix" "<round_scalar_prefix>")
2286 (set_attr "mode" "<ssescalarmode>")])
2288 (define_expand "cond_mul<mode>"
2289 [(set (match_operand:VFH 0 "register_operand")
2292 (match_operand:VFH 2 "vector_operand")
2293 (match_operand:VFH 3 "vector_operand"))
2294 (match_operand:VFH 4 "nonimm_or_0_operand")
2295 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2296 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2298 emit_insn (gen_mul<mode>3_mask (operands[0],
2306 (define_expand "mul<mode>3<mask_name><round_name>"
2307 [(set (match_operand:VFH 0 "register_operand")
2309 (match_operand:VFH 1 "<round_nimm_predicate>")
2310 (match_operand:VFH 2 "<round_nimm_predicate>")))]
2311 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2312 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
2314 (define_insn "*mul<mode>3<mask_name><round_name>"
2315 [(set (match_operand:VFH 0 "register_operand" "=x,v")
2317 (match_operand:VFH 1 "<bcst_round_nimm_predicate>" "%0,v")
2318 (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2319 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
2320 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2322 mul<ssemodesuffix>\t{%2, %0|%0, %2}
2323 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2324 [(set_attr "isa" "noavx,avx")
2325 (set_attr "type" "ssemul")
2326 (set_attr "prefix" "<bcst_mask_prefix3>")
2327 (set_attr "btver2_decode" "direct,double")
2328 (set_attr "mode" "<MODE>")])
2330 ;; Standard scalar operation patterns which preserve the rest of the
2331 ;; vector for combiner.
2332 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
2333 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2335 (vec_duplicate:VFH_128
2336 (multdiv:<ssescalarmode>
2337 (vec_select:<ssescalarmode>
2338 (match_operand:VFH_128 1 "register_operand" "0,v")
2339 (parallel [(const_int 0)]))
2340 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
2345 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2346 v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2347 [(set_attr "isa" "noavx,avx")
2348 (set_attr "type" "sse<multdiv_mnemonic>")
2349 (set (attr "prefix")
2350 (cond [(eq_attr "alternative" "0")
2351 (const_string "orig")
2352 (eq_attr "alternative" "1")
2354 (match_test "<MODE>mode == V8HFmode")
2355 (const_string "evex")
2356 (const_string "vex"))
2358 (const_string "*")))
2359 (set_attr "btver2_decode" "direct,double")
2360 (set_attr "mode" "<ssescalarmode>")])
2362 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
2363 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2366 (match_operand:VFH_128 1 "register_operand" "0,v")
2367 (match_operand:VFH_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2372 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2373 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
2374 [(set_attr "isa" "noavx,avx")
2375 (set_attr "type" "sse<multdiv_mnemonic>")
2376 (set_attr "prefix" "<round_scalar_prefix>")
2377 (set_attr "btver2_decode" "direct,double")
2378 (set_attr "mode" "<ssescalarmode>")])
2380 (define_expand "div<mode>3"
2381 [(set (match_operand:VF2 0 "register_operand")
2382 (div:VF2 (match_operand:VF2 1 "register_operand")
2383 (match_operand:VF2 2 "vector_operand")))]
2386 (define_expand "div<mode>3"
2387 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
2388 (div:VF_AVX512FP16VL
2389 (match_operand:VF_AVX512FP16VL 1 "register_operand")
2390 (match_operand:VF_AVX512FP16VL 2 "vector_operand")))]
2393 /* Transform HF vector div to vector mul/rcp. */
2394 if (GET_MODE_INNER (<MODE>mode) == HFmode
2395 && TARGET_RECIP_VEC_DIV
2396 && optimize_insn_for_speed_p ()
2397 && flag_finite_math_only && !flag_trapping_math
2398 && flag_unsafe_math_optimizations)
2400 rtx op = gen_reg_rtx (<MODE>mode);
2401 operands[2] = force_reg (<MODE>mode, operands[2]);
2402 emit_insn (gen_avx512fp16_rcp<mode>2 (op, operands[2]));
2403 emit_insn (gen_mul<mode>3 (operands[0], operands[1], op));
2408 (define_expand "div<mode>3"
2409 [(set (match_operand:VF1 0 "register_operand")
2410 (div:VF1 (match_operand:VF1 1 "register_operand")
2411 (match_operand:VF1 2 "vector_operand")))]
2415 && TARGET_RECIP_VEC_DIV
2416 && !optimize_insn_for_size_p ()
2417 && flag_finite_math_only && !flag_trapping_math
2418 && flag_unsafe_math_optimizations)
2420 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
2425 (define_expand "cond_div<mode>"
2426 [(set (match_operand:VFH 0 "register_operand")
2429 (match_operand:VFH 2 "register_operand")
2430 (match_operand:VFH 3 "vector_operand"))
2431 (match_operand:VFH 4 "nonimm_or_0_operand")
2432 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2433 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2435 emit_insn (gen_<sse>_div<mode>3_mask (operands[0],
2443 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
2444 [(set (match_operand:VFH 0 "register_operand" "=x,v")
2446 (match_operand:VFH 1 "register_operand" "0,v")
2447 (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
2448 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2450 div<ssemodesuffix>\t{%2, %0|%0, %2}
2451 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
2452 [(set_attr "isa" "noavx,avx")
2453 (set_attr "type" "ssediv")
2454 (set_attr "prefix" "<bcst_mask_prefix3>")
2455 (set_attr "mode" "<MODE>")])
2457 (define_insn "<sse>_rcp<mode>2"
2458 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2460 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
2462 "%vrcpps\t{%1, %0|%0, %1}"
2463 [(set_attr "type" "sse")
2464 (set_attr "atom_sse_attr" "rcp")
2465 (set_attr "btver2_sse_attr" "rcp")
2466 (set_attr "prefix" "maybe_vex")
2467 (set_attr "mode" "<MODE>")])
2469 (define_insn "sse_vmrcpv4sf2"
2470 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2472 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2474 (match_operand:V4SF 2 "register_operand" "0,x")
2478 rcpss\t{%1, %0|%0, %k1}
2479 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
2480 [(set_attr "isa" "noavx,avx")
2481 (set_attr "type" "sse")
2482 (set_attr "atom_sse_attr" "rcp")
2483 (set_attr "btver2_sse_attr" "rcp")
2484 (set_attr "prefix" "orig,vex")
2485 (set_attr "mode" "SF")])
2487 (define_insn "*sse_vmrcpv4sf2"
2488 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2491 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2493 (match_operand:V4SF 2 "register_operand" "0,x")
2497 rcpss\t{%1, %0|%0, %1}
2498 vrcpss\t{%1, %2, %0|%0, %2, %1}"
2499 [(set_attr "isa" "noavx,avx")
2500 (set_attr "type" "sse")
2501 (set_attr "atom_sse_attr" "rcp")
2502 (set_attr "btver2_sse_attr" "rcp")
2503 (set_attr "prefix" "orig,vex")
2504 (set_attr "mode" "SF")])
2506 (define_insn "avx512fp16_rcp<mode>2<mask_name>"
2507 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v")
2508 (unspec:VF_AVX512FP16VL
2509 [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "vm")]
2512 "vrcpph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2513 [(set_attr "type" "sse")
2514 (set_attr "prefix" "evex")
2515 (set_attr "mode" "<MODE>")])
2517 (define_insn "avx512fp16_vmrcpv8hf2<mask_scalar_name>"
2518 [(set (match_operand:V8HF 0 "register_operand" "=v")
2520 (unspec:V8HF [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
2522 (match_operand:V8HF 2 "register_operand" "v")
2525 "vrcpsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}"
2526 [(set_attr "type" "sse")
2527 (set_attr "prefix" "evex")
2528 (set_attr "mode" "HF")])
2530 (define_insn "*avx512fp16_vmrcpv8hf2"
2531 [(set (match_operand:V8HF 0 "register_operand" "=v")
2534 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "vm")]
2536 (match_operand:V8HF 2 "register_operand" "v")
2539 "vrcpsh\t{%1, %2, %0|%0, %2, %w1}"
2540 [(set_attr "type" "sse")
2541 (set_attr "prefix" "evex")
2542 (set_attr "mode" "HF")])
2544 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
2545 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2547 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2550 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2551 [(set_attr "type" "sse")
2552 (set_attr "prefix" "evex")
2553 (set_attr "mode" "<MODE>")])
2555 (define_insn "srcp14<mode>"
2556 [(set (match_operand:VF_128 0 "register_operand" "=v")
2559 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2561 (match_operand:VF_128 2 "register_operand" "v")
2564 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2565 [(set_attr "type" "sse")
2566 (set_attr "prefix" "evex")
2567 (set_attr "mode" "<MODE>")])
2569 (define_insn "srcp14<mode>_mask"
2570 [(set (match_operand:VF_128 0 "register_operand" "=v")
2574 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2576 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2577 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2578 (match_operand:VF_128 2 "register_operand" "v")
2581 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2582 [(set_attr "type" "sse")
2583 (set_attr "prefix" "evex")
2584 (set_attr "mode" "<MODE>")])
2586 (define_expand "sqrt<mode>2"
2587 [(set (match_operand:VF2H 0 "register_operand")
2588 (sqrt:VF2H (match_operand:VF2H 1 "vector_operand")))]
2591 (define_expand "sqrt<mode>2"
2592 [(set (match_operand:VF1 0 "register_operand")
2593 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2597 && TARGET_RECIP_VEC_SQRT
2598 && !optimize_insn_for_size_p ()
2599 && flag_finite_math_only && !flag_trapping_math
2600 && flag_unsafe_math_optimizations)
2602 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2607 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2608 [(set (match_operand:VFH 0 "register_operand" "=x,v")
2609 (sqrt:VFH (match_operand:VFH 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2610 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2612 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2613 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2614 [(set_attr "isa" "noavx,avx")
2615 (set_attr "type" "sse")
2616 (set_attr "atom_sse_attr" "sqrt")
2617 (set_attr "btver2_sse_attr" "sqrt")
2618 (set_attr "prefix" "maybe_vex")
2619 (set_attr "mode" "<MODE>")])
2621 (define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2622 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2625 (match_operand:VFH_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2626 (match_operand:VFH_128 2 "register_operand" "0,v")
2630 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2631 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2632 [(set_attr "isa" "noavx,avx")
2633 (set_attr "type" "sse")
2634 (set_attr "atom_sse_attr" "sqrt")
2635 (set_attr "prefix" "<round_scalar_prefix>")
2636 (set_attr "btver2_sse_attr" "sqrt")
2637 (set_attr "mode" "<ssescalarmode>")])
2639 (define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2640 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2642 (vec_duplicate:VFH_128
2643 (sqrt:<ssescalarmode>
2644 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2645 (match_operand:VFH_128 2 "register_operand" "0,v")
2649 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2650 vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2651 [(set_attr "isa" "noavx,avx")
2652 (set_attr "type" "sse")
2653 (set_attr "atom_sse_attr" "sqrt")
2654 (set_attr "prefix" "<round_scalar_prefix>")
2655 (set_attr "btver2_sse_attr" "sqrt")
2656 (set_attr "mode" "<ssescalarmode>")])
2658 (define_expand "rsqrt<mode>2"
2659 [(set (match_operand:VF1_AVX512ER_128_256 0 "register_operand")
2660 (unspec:VF1_AVX512ER_128_256
2661 [(match_operand:VF1_AVX512ER_128_256 1 "vector_operand")]
2663 "TARGET_SSE && TARGET_SSE_MATH"
2665 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2669 (define_expand "rsqrt<mode>2"
2670 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
2671 (unspec:VF_AVX512FP16VL
2672 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")]
2674 "TARGET_AVX512FP16")
2676 (define_insn "<sse>_rsqrt<mode>2"
2677 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2679 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2681 "%vrsqrtps\t{%1, %0|%0, %1}"
2682 [(set_attr "type" "sse")
2683 (set_attr "prefix" "maybe_vex")
2684 (set_attr "mode" "<MODE>")])
2686 (define_insn "<sse>_rsqrt<mode>2<mask_name>"
2687 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v")
2688 (unspec:VF_AVX512FP16VL
2689 [(match_operand:VF_AVX512FP16VL 1 "vector_operand" "vBm")] UNSPEC_RSQRT))]
2691 "vrsqrtph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2692 [(set_attr "type" "sse")
2693 (set_attr "prefix" "evex")
2694 (set_attr "mode" "<MODE>")])
2696 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2697 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2699 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2702 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2703 [(set_attr "type" "sse")
2704 (set_attr "prefix" "evex")
2705 (set_attr "mode" "<MODE>")])
2707 (define_insn "rsqrt14<mode>"
2708 [(set (match_operand:VF_128 0 "register_operand" "=v")
2711 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2713 (match_operand:VF_128 2 "register_operand" "v")
2716 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2717 [(set_attr "type" "sse")
2718 (set_attr "prefix" "evex")
2719 (set_attr "mode" "<MODE>")])
2721 (define_insn "rsqrt14_<mode>_mask"
2722 [(set (match_operand:VF_128 0 "register_operand" "=v")
2726 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2728 (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2729 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2730 (match_operand:VF_128 2 "register_operand" "v")
2733 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2734 [(set_attr "type" "sse")
2735 (set_attr "prefix" "evex")
2736 (set_attr "mode" "<MODE>")])
2738 (define_insn "sse_vmrsqrtv4sf2"
2739 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2741 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2743 (match_operand:V4SF 2 "register_operand" "0,x")
2747 rsqrtss\t{%1, %0|%0, %k1}
2748 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2749 [(set_attr "isa" "noavx,avx")
2750 (set_attr "type" "sse")
2751 (set_attr "prefix" "orig,vex")
2752 (set_attr "mode" "SF")])
2754 (define_insn "*sse_vmrsqrtv4sf2"
2755 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2758 (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2760 (match_operand:V4SF 2 "register_operand" "0,x")
2764 rsqrtss\t{%1, %0|%0, %1}
2765 vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2766 [(set_attr "isa" "noavx,avx")
2767 (set_attr "type" "sse")
2768 (set_attr "prefix" "orig,vex")
2769 (set_attr "mode" "SF")])
2771 (define_insn "avx512fp16_vmrsqrtv8hf2<mask_scalar_name>"
2772 [(set (match_operand:V8HF 0 "register_operand" "=v")
2774 (unspec:V8HF [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
2776 (match_operand:V8HF 2 "register_operand" "v")
2779 "vrsqrtsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}"
2780 [(set_attr "type" "sse")
2781 (set_attr "prefix" "evex")
2782 (set_attr "mode" "HF")])
2784 (define_insn "*avx512fp16_vmrsqrtv8hf2"
2785 [(set (match_operand:V8HF 0 "register_operand" "=v")
2788 (unspec:HF [(match_operand:HF 1 "nonimmediate_operand" "vm")]
2790 (match_operand:V8HF 2 "register_operand" "v")
2793 "vrsqrtsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}"
2794 [(set_attr "type" "sse")
2795 (set_attr "prefix" "evex")
2796 (set_attr "mode" "HF")])
2798 (define_expand "cond_<code><mode>"
2799 [(set (match_operand:VFH 0 "register_operand")
2802 (match_operand:VFH 2 "vector_operand")
2803 (match_operand:VFH 3 "vector_operand"))
2804 (match_operand:VFH 4 "nonimm_or_0_operand")
2805 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
2806 "<MODE_SIZE> == 64 || TARGET_AVX512VL"
2808 emit_insn (gen_<code><mode>3_mask (operands[0],
2816 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2817 [(set (match_operand:VFH 0 "register_operand")
2819 (match_operand:VFH 1 "<round_saeonly_nimm_predicate>")
2820 (match_operand:VFH 2 "<round_saeonly_nimm_predicate>")))]
2821 "TARGET_SSE && <mask_mode512bit_condition>
2822 && <round_saeonly_mode512bit_condition>"
2824 if (!flag_finite_math_only || flag_signed_zeros)
2826 operands[1] = force_reg (<MODE>mode, operands[1]);
2827 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2828 (operands[0], operands[1], operands[2]
2829 <mask_operand_arg34>
2830 <round_saeonly_mask_arg3>));
2834 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2837 ;; These versions of the min/max patterns are intentionally ignorant of
2838 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2839 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2840 ;; are undefined in this condition, we're certain this is correct.
2842 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2843 [(set (match_operand:VFH 0 "register_operand" "=x,v")
2845 (match_operand:VFH 1 "<round_saeonly_nimm_predicate>" "%0,v")
2846 (match_operand:VFH 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2848 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2849 && <mask_mode512bit_condition>
2850 && <round_saeonly_mode512bit_condition>"
2852 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2853 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2854 [(set_attr "isa" "noavx,avx")
2855 (set_attr "type" "sseadd")
2856 (set_attr "btver2_sse_attr" "maxmin")
2857 (set_attr "prefix" "<mask_prefix3>")
2858 (set_attr "mode" "<MODE>")])
2860 ;; These versions of the min/max patterns implement exactly the operations
2861 ;; min = (op1 < op2 ? op1 : op2)
2862 ;; max = (!(op1 < op2) ? op1 : op2)
2863 ;; Their operands are not commutative, and thus they may be used in the
2864 ;; presence of -0.0 and NaN.
2866 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2867 [(set (match_operand:VFH 0 "register_operand" "=x,v")
2869 [(match_operand:VFH 1 "register_operand" "0,v")
2870 (match_operand:VFH 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2873 && <mask_mode512bit_condition>
2874 && <round_saeonly_mode512bit_condition>"
2876 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2877 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2878 [(set_attr "isa" "noavx,avx")
2879 (set_attr "type" "sseadd")
2880 (set_attr "btver2_sse_attr" "maxmin")
2881 (set_attr "prefix" "<mask_prefix3>")
2882 (set_attr "mode" "<MODE>")])
2884 ;; Standard scalar operation patterns which preserve the rest of the
2885 ;; vector for combiner.
2886 (define_insn "*ieee_<ieee_maxmin><mode>3"
2887 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2889 (vec_duplicate:VFH_128
2890 (unspec:<ssescalarmode>
2891 [(vec_select:<ssescalarmode>
2892 (match_operand:VFH_128 1 "register_operand" "0,v")
2893 (parallel [(const_int 0)]))
2894 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2900 <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2901 v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2902 [(set_attr "isa" "noavx,avx")
2903 (set_attr "type" "sseadd")
2904 (set_attr "btver2_sse_attr" "maxmin")
2905 (set (attr "prefix")
2906 (cond [(eq_attr "alternative" "0")
2907 (const_string "orig")
2908 (eq_attr "alternative" "1")
2910 (match_test "<MODE>mode == V8HFmode")
2911 (const_string "evex")
2912 (const_string "vex"))
2914 (const_string "*")))
2915 (set_attr "mode" "<ssescalarmode>")])
2917 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2918 [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
2921 (match_operand:VFH_128 1 "register_operand" "0,v")
2922 (match_operand:VFH_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2927 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2928 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2929 [(set_attr "isa" "noavx,avx")
2930 (set_attr "type" "sse")
2931 (set_attr "btver2_sse_attr" "maxmin")
2932 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2933 (set_attr "mode" "<ssescalarmode>")])
2935 (define_mode_attr addsub_cst [(V4DF "5") (V2DF "1")
2936 (V4SF "5") (V8SF "85")])
2938 (define_insn "vec_addsub<mode>3"
2939 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2940 (vec_merge:VF_128_256
2942 (match_operand:VF_128_256 1 "register_operand" "0,x")
2943 (match_operand:VF_128_256 2 "vector_operand" "xBm, xm"))
2944 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2945 (const_int <addsub_cst>)))]
2948 addsub<ssemodesuffix>\t{%2, %0|%0, %2}
2949 vaddsub<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2950 [(set_attr "isa" "noavx,avx")
2951 (set_attr "type" "sseadd")
2952 (set (attr "atom_unit")
2954 (match_test "<MODE>mode == V2DFmode")
2955 (const_string "complex")
2956 (const_string "*")))
2957 (set_attr "prefix" "orig,vex")
2958 (set (attr "prefix_rep")
2960 (and (match_test "<MODE>mode == V4SFmode")
2961 (eq_attr "alternative" "0"))
2963 (const_string "*")))
2964 (set_attr "mode" "<MODE>")])
2967 [(set (match_operand:VF_128_256 0 "register_operand")
2968 (match_operator:VF_128_256 6 "addsub_vm_operator"
2970 (match_operand:VF_128_256 1 "register_operand")
2971 (match_operand:VF_128_256 2 "vector_operand"))
2973 (match_operand:VF_128_256 3 "vector_operand")
2974 (match_operand:VF_128_256 4 "vector_operand"))
2975 (match_operand 5 "const_int_operand")]))]
2977 && can_create_pseudo_p ()
2978 && ((rtx_equal_p (operands[1], operands[3])
2979 && rtx_equal_p (operands[2], operands[4]))
2980 || (rtx_equal_p (operands[1], operands[4])
2981 && rtx_equal_p (operands[2], operands[3])))"
2983 (vec_merge:VF_128_256
2984 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2985 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2989 [(set (match_operand:VF_128_256 0 "register_operand")
2990 (match_operator:VF_128_256 6 "addsub_vm_operator"
2992 (match_operand:VF_128_256 1 "vector_operand")
2993 (match_operand:VF_128_256 2 "vector_operand"))
2995 (match_operand:VF_128_256 3 "register_operand")
2996 (match_operand:VF_128_256 4 "vector_operand"))
2997 (match_operand 5 "const_int_operand")]))]
2999 && can_create_pseudo_p ()
3000 && ((rtx_equal_p (operands[1], operands[3])
3001 && rtx_equal_p (operands[2], operands[4]))
3002 || (rtx_equal_p (operands[1], operands[4])
3003 && rtx_equal_p (operands[2], operands[3])))"
3005 (vec_merge:VF_128_256
3006 (minus:VF_128_256 (match_dup 3) (match_dup 4))
3007 (plus:VF_128_256 (match_dup 3) (match_dup 4))
3010 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
3012 = GEN_INT (~INTVAL (operands[5])
3013 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
3017 [(set (match_operand:VF_128_256 0 "register_operand")
3018 (match_operator:VF_128_256 7 "addsub_vs_operator"
3019 [(vec_concat:<ssedoublemode>
3021 (match_operand:VF_128_256 1 "register_operand")
3022 (match_operand:VF_128_256 2 "vector_operand"))
3024 (match_operand:VF_128_256 3 "vector_operand")
3025 (match_operand:VF_128_256 4 "vector_operand")))
3026 (match_parallel 5 "addsub_vs_parallel"
3027 [(match_operand 6 "const_int_operand")])]))]
3029 && can_create_pseudo_p ()
3030 && ((rtx_equal_p (operands[1], operands[3])
3031 && rtx_equal_p (operands[2], operands[4]))
3032 || (rtx_equal_p (operands[1], operands[4])
3033 && rtx_equal_p (operands[2], operands[3])))"
3035 (vec_merge:VF_128_256
3036 (minus:VF_128_256 (match_dup 1) (match_dup 2))
3037 (plus:VF_128_256 (match_dup 1) (match_dup 2))
3040 int i, nelt = XVECLEN (operands[5], 0);
3041 HOST_WIDE_INT ival = 0;
3043 for (i = 0; i < nelt; i++)
3044 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
3045 ival |= HOST_WIDE_INT_1 << i;
3047 operands[5] = GEN_INT (ival);
3051 [(set (match_operand:VF_128_256 0 "register_operand")
3052 (match_operator:VF_128_256 7 "addsub_vs_operator"
3053 [(vec_concat:<ssedoublemode>
3055 (match_operand:VF_128_256 1 "vector_operand")
3056 (match_operand:VF_128_256 2 "vector_operand"))
3058 (match_operand:VF_128_256 3 "register_operand")
3059 (match_operand:VF_128_256 4 "vector_operand")))
3060 (match_parallel 5 "addsub_vs_parallel"
3061 [(match_operand 6 "const_int_operand")])]))]
3063 && can_create_pseudo_p ()
3064 && ((rtx_equal_p (operands[1], operands[3])
3065 && rtx_equal_p (operands[2], operands[4]))
3066 || (rtx_equal_p (operands[1], operands[4])
3067 && rtx_equal_p (operands[2], operands[3])))"
3069 (vec_merge:VF_128_256
3070 (minus:VF_128_256 (match_dup 3) (match_dup 4))
3071 (plus:VF_128_256 (match_dup 3) (match_dup 4))
3074 int i, nelt = XVECLEN (operands[5], 0);
3075 HOST_WIDE_INT ival = 0;
3077 for (i = 0; i < nelt; i++)
3078 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
3079 ival |= HOST_WIDE_INT_1 << i;
3081 operands[5] = GEN_INT (ival);
3084 (define_insn "avx_h<insn>v4df3"
3085 [(set (match_operand:V4DF 0 "register_operand" "=x")
3090 (match_operand:V4DF 1 "register_operand" "x")
3091 (parallel [(const_int 0)]))
3092 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
3095 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
3096 (parallel [(const_int 0)]))
3097 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
3100 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
3101 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
3103 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
3104 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
3106 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
3107 [(set_attr "type" "sseadd")
3108 (set_attr "prefix" "vex")
3109 (set_attr "mode" "V4DF")])
3111 (define_expand "sse3_haddv2df3"
3112 [(set (match_operand:V2DF 0 "register_operand")
3116 (match_operand:V2DF 1 "register_operand")
3117 (parallel [(const_int 0)]))
3118 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
3121 (match_operand:V2DF 2 "vector_operand")
3122 (parallel [(const_int 0)]))
3123 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
3126 (define_insn "*sse3_haddv2df3"
3127 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3131 (match_operand:V2DF 1 "register_operand" "0,x")
3132 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
3135 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
3138 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
3139 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
3142 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
3144 && INTVAL (operands[3]) != INTVAL (operands[4])
3145 && INTVAL (operands[5]) != INTVAL (operands[6])"
3147 haddpd\t{%2, %0|%0, %2}
3148 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
3149 [(set_attr "isa" "noavx,avx")
3150 (set_attr "type" "sseadd")
3151 (set_attr "prefix" "orig,vex")
3152 (set_attr "mode" "V2DF")])
3154 (define_insn "sse3_hsubv2df3"
3155 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3159 (match_operand:V2DF 1 "register_operand" "0,x")
3160 (parallel [(const_int 0)]))
3161 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
3164 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
3165 (parallel [(const_int 0)]))
3166 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
3169 hsubpd\t{%2, %0|%0, %2}
3170 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
3171 [(set_attr "isa" "noavx,avx")
3172 (set_attr "type" "sseadd")
3173 (set_attr "prefix" "orig,vex")
3174 (set_attr "mode" "V2DF")])
3176 (define_insn "*sse3_haddv2df3_low"
3177 [(set (match_operand:DF 0 "register_operand" "=x,x")
3180 (match_operand:V2DF 1 "register_operand" "0,x")
3181 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
3184 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
3185 "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD
3186 && INTVAL (operands[2]) != INTVAL (operands[3])"
3188 haddpd\t{%0, %0|%0, %0}
3189 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
3190 [(set_attr "isa" "noavx,avx")
3191 (set_attr "type" "sseadd1")
3192 (set_attr "prefix" "orig,vex")
3193 (set_attr "mode" "V2DF")])
3195 (define_insn "*sse3_hsubv2df3_low"
3196 [(set (match_operand:DF 0 "register_operand" "=x,x")
3199 (match_operand:V2DF 1 "register_operand" "0,x")
3200 (parallel [(const_int 0)]))
3203 (parallel [(const_int 1)]))))]
3204 "TARGET_SSE3 && TARGET_V2DF_REDUCTION_PREFER_HADDPD"
3206 hsubpd\t{%0, %0|%0, %0}
3207 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
3208 [(set_attr "isa" "noavx,avx")
3209 (set_attr "type" "sseadd1")
3210 (set_attr "prefix" "orig,vex")
3211 (set_attr "mode" "V2DF")])
3213 (define_insn "avx_h<insn>v8sf3"
3214 [(set (match_operand:V8SF 0 "register_operand" "=x")
3220 (match_operand:V8SF 1 "register_operand" "x")
3221 (parallel [(const_int 0)]))
3222 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
3224 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
3225 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
3229 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
3230 (parallel [(const_int 0)]))
3231 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
3233 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
3234 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
3238 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
3239 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
3241 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
3242 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
3245 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
3246 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
3248 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
3249 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
3251 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
3252 [(set_attr "type" "sseadd")
3253 (set_attr "prefix" "vex")
3254 (set_attr "mode" "V8SF")])
3256 (define_insn "sse3_h<insn>v4sf3"
3257 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3262 (match_operand:V4SF 1 "register_operand" "0,x")
3263 (parallel [(const_int 0)]))
3264 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
3266 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
3267 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
3271 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
3272 (parallel [(const_int 0)]))
3273 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
3275 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
3276 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
3279 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
3280 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
3281 [(set_attr "isa" "noavx,avx")
3282 (set_attr "type" "sseadd")
3283 (set_attr "atom_unit" "complex")
3284 (set_attr "prefix" "orig,vex")
3285 (set_attr "prefix_rep" "1,*")
3286 (set_attr "mode" "V4SF")])
3288 (define_mode_iterator REDUC_SSE_PLUS_MODE
3289 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")
3290 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")])
3292 (define_expand "reduc_plus_scal_<mode>"
3293 [(plus:REDUC_SSE_PLUS_MODE
3294 (match_operand:<ssescalarmode> 0 "register_operand")
3295 (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
3298 rtx tmp = gen_reg_rtx (<MODE>mode);
3299 ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
3300 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
3305 (define_expand "reduc_plus_scal_v16qi"
3307 (match_operand:QI 0 "register_operand")
3308 (match_operand:V16QI 1 "register_operand"))]
3311 rtx tmp = gen_reg_rtx (V1TImode);
3312 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
3314 rtx tmp2 = gen_reg_rtx (V16QImode);
3315 emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
3316 rtx tmp3 = gen_reg_rtx (V16QImode);
3317 emit_move_insn (tmp3, CONST0_RTX (V16QImode));
3318 rtx tmp4 = gen_reg_rtx (V2DImode);
3319 emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
3320 tmp4 = gen_lowpart (V16QImode, tmp4);
3321 emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
3325 (define_mode_iterator REDUC_PLUS_MODE
3326 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
3327 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
3328 (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
3329 (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
3330 (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
3332 (define_expand "reduc_plus_scal_<mode>"
3333 [(plus:REDUC_PLUS_MODE
3334 (match_operand:<ssescalarmode> 0 "register_operand")
3335 (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
3338 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
3339 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
3340 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
3341 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
3342 emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
3343 emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
3347 ;; Modes handled by reduc_sm{in,ax}* patterns.
3348 (define_mode_iterator REDUC_SSE_SMINMAX_MODE
3349 [(V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
3350 (V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
3351 (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
3352 (V2DI "TARGET_SSE4_2")])
3354 (define_expand "reduc_<code>_scal_<mode>"
3355 [(smaxmin:REDUC_SSE_SMINMAX_MODE
3356 (match_operand:<ssescalarmode> 0 "register_operand")
3357 (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
3360 rtx tmp = gen_reg_rtx (<MODE>mode);
3361 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
3362 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
3367 (define_mode_iterator REDUC_SMINMAX_MODE
3368 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3369 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
3370 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
3371 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
3372 (V64QI "TARGET_AVX512BW")
3373 (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
3374 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
3375 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
3376 (V8DF "TARGET_AVX512F")])
3378 (define_expand "reduc_<code>_scal_<mode>"
3379 [(smaxmin:REDUC_SMINMAX_MODE
3380 (match_operand:<ssescalarmode> 0 "register_operand")
3381 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
3384 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
3385 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
3386 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
3387 emit_insn (gen_<code><ssehalfvecmodelower>3
3388 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
3389 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
3393 (define_expand "reduc_<code>_scal_<mode>"
3394 [(umaxmin:VI_AVX512BW
3395 (match_operand:<ssescalarmode> 0 "register_operand")
3396 (match_operand:VI_AVX512BW 1 "register_operand"))]
3399 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
3400 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
3401 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
3402 emit_insn (gen_<code><ssehalfvecmodelower>3
3403 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
3404 emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
3408 (define_expand "reduc_<code>_scal_<mode>"
3410 (match_operand:<ssescalarmode> 0 "register_operand")
3411 (match_operand:VI_256 1 "register_operand"))]
3414 rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
3415 emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
3416 rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
3417 emit_insn (gen_<code><ssehalfvecmodelower>3
3418 (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
3419 rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
3420 ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
3421 emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
3422 (operands[0], tmp3, const0_rtx));
3426 (define_expand "reduc_umin_scal_v8hi"
3428 (match_operand:HI 0 "register_operand")
3429 (match_operand:V8HI 1 "register_operand"))]
3432 rtx tmp = gen_reg_rtx (V8HImode);
3433 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
3434 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
3438 (define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
3439 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
3440 (unspec:VFH_AVX512VL
3441 [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3442 (match_operand:SI 2 "const_0_to_255_operand")]
3444 "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
3445 "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
3446 [(set_attr "type" "sse")
3447 (set_attr "prefix" "evex")
3448 (set_attr "mode" "<MODE>")])
3450 (define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
3451 [(set (match_operand:VFH_128 0 "register_operand" "=v")
3454 [(match_operand:VFH_128 1 "register_operand" "v")
3455 (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
3456 (match_operand:SI 3 "const_0_to_255_operand")]
3460 "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
3461 "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
3462 [(set_attr "type" "sse")
3463 (set_attr "prefix" "evex")
3464 (set_attr "mode" "<MODE>")])
3466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3468 ;; Parallel floating point comparisons
3470 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3472 (define_insn "avx_cmp<mode>3"
3473 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
3475 [(match_operand:VF_128_256 1 "register_operand" "x")
3476 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
3477 (match_operand:SI 3 "const_0_to_31_operand")]
3480 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3481 [(set_attr "type" "ssecmp")
3482 (set_attr "length_immediate" "1")
3483 (set_attr "prefix" "vex")
3484 (set_attr "mode" "<MODE>")])
3486 (define_insn_and_split "*avx_cmp<mode>3_1"
3487 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3488 (vec_merge:<sseintvecmode>
3489 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3490 (match_operand:<sseintvecmode> 2 "const0_operand")
3491 (unspec:<avx512fmaskmode>
3492 [(match_operand:VF_128_256 3 "register_operand")
3493 (match_operand:VF_128_256 4 "nonimmediate_operand")
3494 (match_operand:SI 5 "const_0_to_31_operand")]
3496 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3505 (set (match_dup 0) (match_dup 7))]
3507 operands[6] = gen_reg_rtx (<MODE>mode);
3509 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3512 (define_insn_and_split "*avx_cmp<mode>3_2"
3513 [(set (match_operand:<sseintvecmode> 0 "register_operand")
3514 (vec_merge:<sseintvecmode>
3515 (match_operand:<sseintvecmode> 1 "vector_all_ones_operand")
3516 (match_operand:<sseintvecmode> 2 "const0_operand")
3517 (not:<avx512fmaskmode>
3518 (unspec:<avx512fmaskmode>
3519 [(match_operand:VF_128_256 3 "register_operand")
3520 (match_operand:VF_128_256 4 "nonimmediate_operand")
3521 (match_operand:SI 5 "const_0_to_31_operand")]
3523 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3532 (set (match_dup 0) (match_dup 7))]
3534 operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);
3535 operands[6] = gen_reg_rtx (<MODE>mode);
3537 = lowpart_subreg (GET_MODE (operands[0]), operands[6], <MODE>mode);
3540 (define_insn_and_split "*avx_cmp<mode>3_3"
3541 [(set (match_operand:VF_128_256 0 "register_operand")
3542 (vec_merge:VF_128_256
3543 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3544 (match_operand:VF_128_256 2 "const0_operand")
3545 (unspec:<avx512fmaskmode>
3546 [(match_operand:VF_128_256 3 "register_operand")
3547 (match_operand:VF_128_256 4 "nonimmediate_operand")
3548 (match_operand:SI 5 "const_0_to_31_operand")]
3550 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3560 (define_insn_and_split "*avx_cmp<mode>3_4"
3561 [(set (match_operand:VF_128_256 0 "register_operand")
3562 (vec_merge:VF_128_256
3563 (match_operand:VF_128_256 1 "float_vector_all_ones_operand")
3564 (match_operand:VF_128_256 2 "const0_operand")
3565 (not:<avx512fmaskmode>
3566 (unspec:<avx512fmaskmode>
3567 [(match_operand:VF_128_256 3 "register_operand")
3568 (match_operand:VF_128_256 4 "nonimmediate_operand")
3569 (match_operand:SI 5 "const_0_to_31_operand")]
3571 "TARGET_AVX512VL && ix86_pre_reload_split ()"
3580 "operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
3582 (define_insn_and_split "*avx_cmp<mode>3_lt"
3583 [(set (match_operand:VF_128_256 0 "register_operand")
3584 (vec_merge:VF_128_256
3585 (match_operand:VF_128_256 1 "vector_operand")
3586 (match_operand:VF_128_256 2 "vector_operand")
3587 (unspec:<avx512fmaskmode>
3588 [(match_operand:<sseintvecmode> 3 "register_operand")
3589 (match_operand:<sseintvecmode> 4 "const0_operand")
3590 (match_operand:SI 5 "const_0_to_7_operand")]
3592 "TARGET_AVX512VL && ix86_pre_reload_split ()
3594 && ((INTVAL (operands[5]) == 1) || (INTVAL (operands[5]) == 5))"
3606 if (INTVAL (operands[5]) == 5)
3607 std::swap (operands[1], operands[2]);
3608 operands[2] = force_reg (<MODE>mode, operands[2]);
3611 (define_insn_and_split "*avx_cmp<mode>3_ltint"
3612 [(set (match_operand:VI48_AVX 0 "register_operand")
3614 (match_operand:VI48_AVX 1 "vector_operand")
3615 (match_operand:VI48_AVX 2 "vector_operand")
3616 (unspec:<avx512fmaskmode>
3617 [(match_operand:VI48_AVX 3 "register_operand")
3618 (match_operand:VI48_AVX 4 "const0_operand")
3619 (match_operand:SI 5 "const_0_to_7_operand")]
3621 "TARGET_AVX512VL && ix86_pre_reload_split ()
3623 && ((INTVAL (operands[5]) == 1) || (INTVAL (operands[5]) == 5))"
3627 (unspec:<ssebytemode>
3630 (subreg:<ssebytemode>
3636 if (INTVAL (operands[5]) == 5)
3637 std::swap (operands[1], operands[2]);
3638 operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
3639 operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]);
3640 operands[2] = force_reg (<ssebytemode>mode,
3641 gen_lowpart (<ssebytemode>mode, operands[2]));
3644 (define_insn_and_split "*avx_cmp<mode>3_ltint_not"
3645 [(set (match_operand:VI48_AVX 0 "register_operand")
3647 (match_operand:VI48_AVX 1 "vector_operand")
3648 (match_operand:VI48_AVX 2 "vector_operand")
3649 (unspec:<avx512fmaskmode>
3652 (match_operand:<ssebytemode> 3 "vector_operand")) 0)
3653 (match_operand:VI48_AVX 4 "const0_operand")
3654 (match_operand:SI 5 "const_0_to_7_operand")]
3656 "TARGET_AVX512VL && ix86_pre_reload_split ()
3657 /* not LT or GE 0 */
3658 && ((INTVAL (operands[5]) == 1) || (INTVAL (operands[5]) == 5))"
3662 (unspec:<ssebytemode>
3665 (subreg:<ssebytemode>
3671 if (INTVAL (operands[5]) == 5)
3672 std::swap (operands[1], operands[2]);
3673 operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
3674 operands[1] = force_reg (<ssebytemode>mode,
3675 gen_lowpart (<ssebytemode>mode, operands[1]));
3676 operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]);
3678 operands[3] = force_reg (<ssebytemode>mode, operands[3]);
3679 operands[3] = lowpart_subreg (<MODE>mode, operands[3], <ssebytemode>mode);
3682 (define_insn "avx_vmcmp<mode>3"
3683 [(set (match_operand:VF_128 0 "register_operand" "=x")
3686 [(match_operand:VF_128 1 "register_operand" "x")
3687 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
3688 (match_operand:SI 3 "const_0_to_31_operand")]
3693 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
3694 [(set_attr "type" "ssecmp")
3695 (set_attr "length_immediate" "1")
3696 (set_attr "prefix" "vex")
3697 (set_attr "mode" "<ssescalarmode>")])
3699 (define_insn "*<sse>_maskcmp<mode>3_comm"
3700 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3701 (match_operator:VF_128_256 3 "sse_comparison_operator"
3702 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
3703 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3705 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
3707 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3708 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3709 [(set_attr "isa" "noavx,avx")
3710 (set_attr "type" "ssecmp")
3711 (set_attr "length_immediate" "1")
3712 (set_attr "prefix" "orig,vex")
3713 (set_attr "mode" "<MODE>")])
3715 (define_insn "<sse>_maskcmp<mode>3"
3716 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
3717 (match_operator:VF_128_256 3 "sse_comparison_operator"
3718 [(match_operand:VF_128_256 1 "register_operand" "0,x")
3719 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
3722 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
3723 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
3724 [(set_attr "isa" "noavx,avx")
3725 (set_attr "type" "ssecmp")
3726 (set_attr "length_immediate" "1")
3727 (set_attr "prefix" "orig,vex")
3728 (set_attr "mode" "<MODE>")])
3730 (define_insn "<sse>_vmmaskcmp<mode>3"
3731 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3733 (match_operator:VF_128 3 "sse_comparison_operator"
3734 [(match_operand:VF_128 1 "register_operand" "0,x")
3735 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
3740 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
3741 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
3742 [(set_attr "isa" "noavx,avx")
3743 (set_attr "type" "ssecmp")
3744 (set_attr "length_immediate" "1,*")
3745 (set_attr "prefix" "orig,vex")
3746 (set_attr "mode" "<ssescalarmode>")])
3748 (define_mode_attr cmp_imm_predicate
3749 [(V32HF "const_0_to_31_operand") (V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
3750 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
3751 (V16HF "const_0_to_31_operand") (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
3752 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
3753 (V8HF "const_0_to_31_operand") (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
3754 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
3755 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
3756 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
3757 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
3759 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
3760 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3761 (unspec:<avx512fmaskmode>
3762 [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
3763 (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
3764 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3766 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
3767 "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
3768 [(set_attr "type" "ssecmp")
3769 (set_attr "length_immediate" "1")
3770 (set_attr "prefix" "evex")
3771 (set_attr "mode" "<sseinsnmode>")])
3773 ;; Since vpcmpd implicitly clear the upper bits of dest, transform
3774 ;; vpcmpd + zero_extend to vpcmpd since the instruction
3775 (define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
3776 [(set (match_operand:SWI248x 0 "register_operand")
3777 (zero_extend:SWI248x
3778 (unspec:<V48H_AVX512VL:avx512fmaskmode>
3779 [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
3780 (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
3781 (match_operand:SI 3 "const_0_to_7_operand")]
3784 && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
3785 && ix86_pre_reload_split ()
3786 && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
3787 < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
3791 (unspec:<V48H_AVX512VL:avx512fmaskmode>
3797 operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
3798 operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
3799 operands[0], <SWI248x:MODE>mode);
3801 [(set_attr "type" "ssecmp")
3802 (set_attr "length_immediate" "1")
3803 (set_attr "prefix" "evex")
3804 (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
3806 (define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
3807 [(set (match_operand:SWI248x 0 "register_operand")
3808 (zero_extend:SWI248x
3809 (unspec:<V48H_AVX512VL:avx512fmaskmode>
3810 [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
3811 (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
3812 (match_operand:SI 3 "const_0_to_7_operand")]
3814 (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
3815 (unspec:<V48H_AVX512VL:avx512fmaskmode>
3821 && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
3822 && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
3823 < GET_MODE_PRECISION (<SWI248x:MODE>mode))
3824 && ix86_pre_reload_split ()"
3828 (unspec:<V48H_AVX512VL:avx512fmaskmode>
3833 (set (match_dup 4) (match_dup 0))]
3835 operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
3836 operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
3837 operands[0], <SWI248x:MODE>mode);
3839 [(set_attr "type" "ssecmp")
3840 (set_attr "length_immediate" "1")
3841 (set_attr "prefix" "evex")
3842 (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
3844 (define_insn_and_split "*<avx512>_cmp<mode>3"
3845 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3846 (not:<avx512fmaskmode>
3847 (unspec:<avx512fmaskmode>
3848 [(match_operand:V48_AVX512VL 1 "register_operand")
3849 (match_operand:V48_AVX512VL 2 "nonimmediate_operand")
3850 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3852 "TARGET_AVX512F && ix86_pre_reload_split ()"
3856 (unspec:<avx512fmaskmode>
3861 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3863 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
3864 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3865 (unspec:<avx512fmaskmode>
3866 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3867 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3868 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
3871 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3872 [(set_attr "type" "ssecmp")
3873 (set_attr "length_immediate" "1")
3874 (set_attr "prefix" "evex")
3875 (set_attr "mode" "<sseinsnmode>")])
3877 (define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
3878 [(set (match_operand:SWI248x 0 "register_operand")
3879 (zero_extend:SWI248x
3880 (unspec:<VI12_AVX512VL:avx512fmaskmode>
3881 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3882 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3883 (match_operand:SI 3 "const_0_to_7_operand")]
3886 && ix86_pre_reload_split ()
3887 && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
3888 < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
3892 (unspec:<VI12_AVX512VL:avx512fmaskmode>
3898 operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
3899 operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
3900 operands[0], <SWI248x:MODE>mode);
3902 [(set_attr "type" "ssecmp")
3903 (set_attr "length_immediate" "1")
3904 (set_attr "prefix" "evex")
3905 (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
3907 (define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
3908 [(set (match_operand:SWI248x 0 "register_operand")
3909 (zero_extend:SWI248x
3910 (unspec:<VI12_AVX512VL:avx512fmaskmode>
3911 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3912 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3913 (match_operand:SI 3 "const_0_to_7_operand")]
3915 (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand")
3916 (unspec:<VI12_AVX512VL:avx512fmaskmode>
3922 && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
3923 < GET_MODE_PRECISION (<SWI248x:MODE>mode))
3924 && ix86_pre_reload_split ()"
3928 (unspec:<VI12_AVX512VL:avx512fmaskmode>
3933 (set (match_dup 4) (match_dup 0))]
3935 operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
3936 operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
3937 operands[0], <SWI248x:MODE>mode);
3939 [(set_attr "type" "ssecmp")
3940 (set_attr "length_immediate" "1")
3941 (set_attr "prefix" "evex")
3942 (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
3944 (define_int_iterator UNSPEC_PCMP_ITER
3945 [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
3947 (define_insn_and_split "*<avx512>_cmp<mode>3"
3948 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3949 (not:<avx512fmaskmode>
3950 (unspec:<avx512fmaskmode>
3951 [(match_operand:VI12_AVX512VL 1 "register_operand")
3952 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
3953 (match_operand:SI 3 "<cmp_imm_predicate>")]
3954 UNSPEC_PCMP_ITER)))]
3955 "TARGET_AVX512BW && ix86_pre_reload_split ()"
3959 (unspec:<avx512fmaskmode>
3964 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
3966 (define_insn "*<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
3967 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
3968 (unspec:<avx512fmaskmode>
3969 [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
3970 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")
3973 "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3975 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
3976 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
3977 [(set_attr "type" "ssecmp")
3978 (set_attr "prefix_extra" "1")
3979 (set_attr "prefix" "evex")
3980 (set_attr "mode" "<sseinsnmode>")])
3982 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
3983 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3984 (unspec:<avx512fmaskmode>
3985 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
3986 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
3987 (match_operand:SI 3 "const_0_to_7_operand")]
3988 UNSPEC_UNSIGNED_PCMP))]
3990 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
3991 [(set_attr "type" "ssecmp")
3992 (set_attr "length_immediate" "1")
3993 (set_attr "prefix" "evex")
3994 (set_attr "mode" "<sseinsnmode>")])
3996 (define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
3997 [(set (match_operand:SWI248x 0 "register_operand")
3998 (zero_extend:SWI248x
3999 (unspec:<VI12_AVX512VL:avx512fmaskmode>
4000 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
4001 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
4002 (match_operand:SI 3 "const_0_to_7_operand")]
4003 UNSPEC_UNSIGNED_PCMP)))]
4005 && ix86_pre_reload_split ()
4006 && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
4007 < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
4011 (unspec:<VI12_AVX512VL:avx512fmaskmode>
4015 UNSPEC_UNSIGNED_PCMP))]
4017 operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
4018 operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
4019 operands[0], <SWI248x:MODE>mode);
4021 [(set_attr "type" "ssecmp")
4022 (set_attr "length_immediate" "1")
4023 (set_attr "prefix" "evex")
4024 (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
4026 (define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
4027 [(set (match_operand:SWI248x 0 "register_operand")
4028 (zero_extend:SWI248x
4029 (unspec:<VI12_AVX512VL:avx512fmaskmode>
4030 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
4031 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
4032 (match_operand:SI 3 "const_0_to_7_operand")]
4033 UNSPEC_UNSIGNED_PCMP)))
4034 (set (match_operand:<VI12_AVX512VL:avx512fmaskmode> 4 "register_operand")
4035 (unspec:<VI12_AVX512VL:avx512fmaskmode>
4039 UNSPEC_UNSIGNED_PCMP))]
4041 && ix86_pre_reload_split ()
4042 && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
4043 < GET_MODE_PRECISION (<SWI248x:MODE>mode))
4044 && ix86_pre_reload_split ()"
4048 (unspec:<VI12_AVX512VL:avx512fmaskmode>
4052 UNSPEC_UNSIGNED_PCMP))
4053 (set (match_dup 4) (match_dup 0))]
4055 operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
4056 operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
4057 operands[0], <SWI248x:MODE>mode);
4059 [(set_attr "type" "ssecmp")
4060 (set_attr "length_immediate" "1")
4061 (set_attr "prefix" "evex")
4062 (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")])
4064 (define_insn "*<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
4065 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
4066 (unspec:<avx512fmaskmode>
4067 [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
4068 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")
4071 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4073 vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
4074 vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
4075 [(set_attr "type" "ssecmp")
4076 (set_attr "prefix_extra" "1")
4077 (set_attr "prefix" "evex")
4078 (set_attr "mode" "<sseinsnmode>")])
4080 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
4081 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
4082 (unspec:<avx512fmaskmode>
4083 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
4084 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
4085 (match_operand:SI 3 "const_0_to_7_operand")]
4086 UNSPEC_UNSIGNED_PCMP))]
4088 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
4089 [(set_attr "type" "ssecmp")
4090 (set_attr "length_immediate" "1")
4091 (set_attr "prefix" "evex")
4092 (set_attr "mode" "<sseinsnmode>")])
4094 (define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
4095 [(set (match_operand:SWI248x 0 "register_operand")
4096 (zero_extend:SWI248x
4097 (unspec:<VI48_AVX512VL:avx512fmaskmode>
4098 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
4099 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
4100 (match_operand:SI 3 "const_0_to_7_operand")]
4101 UNSPEC_UNSIGNED_PCMP)))]
4103 && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
4104 && ix86_pre_reload_split ()
4105 && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
4106 < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
4110 (unspec:<VI48_AVX512VL:avx512fmaskmode>
4114 UNSPEC_UNSIGNED_PCMP))]
4116 operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
4117 operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
4118 operands[0], <SWI248x:MODE>mode);
4120 [(set_attr "type" "ssecmp")
4121 (set_attr "length_immediate" "1")
4122 (set_attr "prefix" "evex")
4123 (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
4125 (define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2"
4126 [(set (match_operand:SWI248x 0 "register_operand")
4127 (zero_extend:SWI248x
4128 (unspec:<VI48_AVX512VL:avx512fmaskmode>
4129 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
4130 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
4131 (match_operand:SI 3 "const_0_to_7_operand")]
4132 UNSPEC_UNSIGNED_PCMP)))
4133 (set (match_operand:<VI48_AVX512VL:avx512fmaskmode> 4 "register_operand")
4134 (unspec:<VI48_AVX512VL:avx512fmaskmode>
4138 UNSPEC_UNSIGNED_PCMP))]
4140 && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
4141 && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
4142 < GET_MODE_PRECISION (<SWI248x:MODE>mode))
4143 && ix86_pre_reload_split ()"
4147 (unspec:<VI48_AVX512VL:avx512fmaskmode>
4151 UNSPEC_UNSIGNED_PCMP))
4152 (set (match_dup 4) (match_dup 0))]
4154 operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
4155 operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
4156 operands[0], <SWI248x:MODE>mode);
4158 [(set_attr "type" "ssecmp")
4159 (set_attr "length_immediate" "1")
4160 (set_attr "prefix" "evex")
4161 (set_attr "mode" "<VI48_AVX512VL:sseinsnmode>")])
4163 (define_insn_and_split "*<avx512>_ucmp<mode>3"
4164 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
4165 (not:<avx512fmaskmode>
4166 (unspec:<avx512fmaskmode>
4167 [(match_operand:VI48_AVX512VL 1 "register_operand")
4168 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
4169 (match_operand:SI 3 "const_0_to_7_operand")]
4170 UNSPEC_UNSIGNED_PCMP)))]
4171 "TARGET_AVX512F && ix86_pre_reload_split ()"
4175 (unspec:<avx512fmaskmode>
4179 UNSPEC_UNSIGNED_PCMP))]
4180 "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
4182 (define_int_attr pcmp_signed_mask
4183 [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
4185 ;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
4186 ;; For signed comparison, handle EQ 0: NEQ 4,
4187 ;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
4189 (define_insn_and_split "*<avx512>_ucmp<mode>3_1"
4190 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
4191 (unspec:<avx512fmaskmode>
4192 [(us_minus:VI12_AVX512VL
4193 (match_operand:VI12_AVX512VL 1 "vector_operand")
4194 (match_operand:VI12_AVX512VL 2 "vector_operand"))
4195 (match_operand:VI12_AVX512VL 3 "const0_operand")
4196 (match_operand:SI 4 "const_0_to_7_operand")]
4198 "TARGET_AVX512BW && ix86_pre_reload_split ()
4199 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
4200 && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
4205 /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
4206 int cmp_predicate = 2; /* LE */
4207 if (MEM_P (operands[1]))
4209 std::swap (operands[1], operands[2]);
4210 cmp_predicate = 5; /* NLT (GE) */
4212 if ((INTVAL (operands[4]) & 4) != 0)
4213 cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
4214 emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
4215 GEN_INT (cmp_predicate)));
4219 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
4220 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
4221 (and:<avx512fmaskmode>
4222 (unspec:<avx512fmaskmode>
4223 [(match_operand:VFH_128 1 "register_operand" "v")
4224 (match_operand:VFH_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4225 (match_operand:SI 3 "const_0_to_31_operand")]
4229 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
4230 [(set_attr "type" "ssecmp")
4231 (set_attr "length_immediate" "1")
4232 (set_attr "prefix" "evex")
4233 (set_attr "mode" "<ssescalarmode>")])
4235 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
4236 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
4237 (and:<avx512fmaskmode>
4238 (unspec:<avx512fmaskmode>
4239 [(match_operand:VFH_128 1 "register_operand" "v")
4240 (match_operand:VFH_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4241 (match_operand:SI 3 "const_0_to_31_operand")]
4243 (and:<avx512fmaskmode>
4244 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
4247 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
4248 [(set_attr "type" "ssecmp")
4249 (set_attr "length_immediate" "1")
4250 (set_attr "prefix" "evex")
4251 (set_attr "mode" "<ssescalarmode>")])
4253 (define_insn "<sse>_<unord>comi<round_saeonly_name>"
4254 [(set (reg:CCFP FLAGS_REG)
4257 (match_operand:<ssevecmode> 0 "register_operand" "v")
4258 (parallel [(const_int 0)]))
4260 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4261 (parallel [(const_int 0)]))))]
4262 "SSE_FLOAT_MODE_P (<MODE>mode)"
4263 "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
4264 [(set_attr "type" "ssecomi")
4265 (set_attr "prefix" "maybe_vex")
4266 (set_attr "prefix_rep" "0")
4267 (set (attr "prefix_data16")
4268 (if_then_else (eq_attr "mode" "DF")
4270 (const_string "0")))
4271 (set_attr "mode" "<MODE>")])
4273 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
4274 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
4275 (match_operator:<avx512fmaskmode> 1 ""
4276 [(match_operand:V48H_AVX512VL 2 "register_operand")
4277 (match_operand:V48H_AVX512VL 3 "nonimmediate_operand")]))]
4280 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
4281 operands[2], operands[3]);
4286 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
4287 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
4288 (match_operator:<avx512fmaskmode> 1 ""
4289 [(match_operand:VI12_AVX512VL 2 "register_operand")
4290 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
4293 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
4294 operands[2], operands[3]);
4299 (define_expand "vec_cmp<mode><sseintvecmodelower>"
4300 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4301 (match_operator:<sseintvecmode> 1 ""
4302 [(match_operand:VI_256 2 "register_operand")
4303 (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
4306 bool ok = ix86_expand_int_vec_cmp (operands);
4311 (define_expand "vec_cmp<mode><sseintvecmodelower>"
4312 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4313 (match_operator:<sseintvecmode> 1 ""
4314 [(match_operand:VI124_128 2 "register_operand")
4315 (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
4318 bool ok = ix86_expand_int_vec_cmp (operands);
4323 (define_expand "vec_cmpv2div2di"
4324 [(set (match_operand:V2DI 0 "register_operand")
4325 (match_operator:V2DI 1 ""
4326 [(match_operand:V2DI 2 "register_operand")
4327 (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
4330 bool ok = ix86_expand_int_vec_cmp (operands);
4335 (define_expand "vec_cmp<mode><sseintvecmodelower>"
4336 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4337 (match_operator:<sseintvecmode> 1 ""
4338 [(match_operand:VF_256 2 "register_operand")
4339 (match_operand:VF_256 3 "nonimmediate_operand")]))]
4342 bool ok = ix86_expand_fp_vec_cmp (operands);
4347 (define_expand "vec_cmp<mode><sseintvecmodelower>"
4348 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4349 (match_operator:<sseintvecmode> 1 ""
4350 [(match_operand:VF_128 2 "register_operand")
4351 (match_operand:VF_128 3 "vector_operand")]))]
4354 bool ok = ix86_expand_fp_vec_cmp (operands);
4359 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
4360 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
4361 (match_operator:<avx512fmaskmode> 1 ""
4362 [(match_operand:VI48_AVX512VL 2 "register_operand")
4363 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
4366 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
4367 operands[2], operands[3]);
4372 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
4373 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
4374 (match_operator:<avx512fmaskmode> 1 ""
4375 [(match_operand:VI12_AVX512VL 2 "register_operand")
4376 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
4379 bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
4380 operands[2], operands[3]);
4385 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
4386 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4387 (match_operator:<sseintvecmode> 1 ""
4388 [(match_operand:VI_256 2 "register_operand")
4389 (match_operand:VI_256 3 "nonimmediate_or_const_vector_operand")]))]
4392 bool ok = ix86_expand_int_vec_cmp (operands);
4397 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
4398 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4399 (match_operator:<sseintvecmode> 1 ""
4400 [(match_operand:VI124_128 2 "register_operand")
4401 (match_operand:VI124_128 3 "vector_or_const_vector_operand")]))]
4404 bool ok = ix86_expand_int_vec_cmp (operands);
4409 (define_expand "vec_cmpuv2div2di"
4410 [(set (match_operand:V2DI 0 "register_operand")
4411 (match_operator:V2DI 1 ""
4412 [(match_operand:V2DI 2 "register_operand")
4413 (match_operand:V2DI 3 "vector_or_const_vector_operand")]))]
4416 bool ok = ix86_expand_int_vec_cmp (operands);
4421 (define_expand "vec_cmpeqv2div2di"
4422 [(set (match_operand:V2DI 0 "register_operand")
4423 (match_operator:V2DI 1 ""
4424 [(match_operand:V2DI 2 "register_operand")
4425 (match_operand:V2DI 3 "vector_operand")]))]
4432 ops[0] = gen_reg_rtx (V4SImode);
4433 ops[2] = gen_lowpart (V4SImode, force_reg (V2DImode, operands[2]));
4434 ops[3] = gen_lowpart (V4SImode, force_reg (V2DImode, operands[3]));
4435 ops[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), V4SImode,
4437 ok = ix86_expand_int_vec_cmp (ops);
4439 rtx tmp1 = gen_reg_rtx (V4SImode);
4440 emit_insn (gen_sse2_pshufd (tmp1, ops[0], GEN_INT (0xb1)));
4442 rtx tmp2 = gen_reg_rtx (V4SImode);
4443 if (GET_CODE (operands[1]) == EQ)
4444 emit_insn (gen_andv4si3 (tmp2, tmp1, ops[0]));
4446 emit_insn (gen_iorv4si3 (tmp2, tmp1, ops[0]));
4448 emit_move_insn (operands[0], gen_lowpart (V2DImode, tmp2));
4451 ok = ix86_expand_int_vec_cmp (operands);
4456 (define_expand "vec_cmpeqv1tiv1ti"
4457 [(set (match_operand:V1TI 0 "register_operand")
4458 (match_operator:V1TI 1 ""
4459 [(match_operand:V1TI 2 "register_operand")
4460 (match_operand:V1TI 3 "vector_operand")]))]
4463 rtx dst = gen_reg_rtx (V2DImode);
4464 rtx op1 = gen_lowpart (V2DImode, force_reg (V1TImode, operands[2]));
4465 rtx op2 = gen_lowpart (V2DImode, force_reg (V1TImode, operands[3]));
4466 rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), V2DImode, op1, op2);
4467 emit_insn (gen_vec_cmpeqv2div2di (dst, cmp, op1, op2));
4469 rtx tmp1 = gen_reg_rtx (V4SImode);
4470 rtx tmp2 = gen_lowpart (V4SImode, dst);
4471 emit_insn (gen_sse2_pshufd (tmp1, tmp2, GEN_INT (0x4e)));
4473 rtx tmp3 = gen_reg_rtx (V4SImode);
4474 if (GET_CODE (operands[1]) == EQ)
4475 emit_insn (gen_andv4si3 (tmp3, tmp2, tmp1));
4477 emit_insn (gen_iorv4si3 (tmp3, tmp2, tmp1));
4479 emit_move_insn (operands[0], gen_lowpart (V1TImode, tmp3));
4483 (define_expand "vcond<V_512:mode><VF_512:mode>"
4484 [(set (match_operand:V_512 0 "register_operand")
4486 (match_operator 3 ""
4487 [(match_operand:VF_512 4 "nonimmediate_operand")
4488 (match_operand:VF_512 5 "nonimmediate_operand")])
4489 (match_operand:V_512 1 "general_operand")
4490 (match_operand:V_512 2 "general_operand")))]
4492 && (GET_MODE_NUNITS (<V_512:MODE>mode)
4493 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
4495 bool ok = ix86_expand_fp_vcond (operands);
4500 (define_expand "vcond<V_256:mode><VF_256:mode>"
4501 [(set (match_operand:V_256 0 "register_operand")
4503 (match_operator 3 ""
4504 [(match_operand:VF_256 4 "nonimmediate_operand")
4505 (match_operand:VF_256 5 "nonimmediate_operand")])
4506 (match_operand:V_256 1 "general_operand")
4507 (match_operand:V_256 2 "general_operand")))]
4509 && (GET_MODE_NUNITS (<V_256:MODE>mode)
4510 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
4512 bool ok = ix86_expand_fp_vcond (operands);
4517 (define_expand "vcond<V_128:mode><VF_128:mode>"
4518 [(set (match_operand:V_128 0 "register_operand")
4520 (match_operator 3 ""
4521 [(match_operand:VF_128 4 "vector_operand")
4522 (match_operand:VF_128 5 "vector_operand")])
4523 (match_operand:V_128 1 "general_operand")
4524 (match_operand:V_128 2 "general_operand")))]
4526 && (GET_MODE_NUNITS (<V_128:MODE>mode)
4527 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
4529 bool ok = ix86_expand_fp_vcond (operands);
4534 (define_expand "vcond<mode><mode>"
4535 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
4536 (if_then_else:VF_AVX512FP16VL
4537 (match_operator 3 ""
4538 [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
4539 (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
4540 (match_operand:VF_AVX512FP16VL 1 "general_operand")
4541 (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
4544 bool ok = ix86_expand_fp_vcond (operands);
4549 (define_expand "vcond<mode><sseintvecmodelower>"
4550 [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand")
4551 (if_then_else:VF_AVX512HFBFVL
4552 (match_operator 3 ""
4553 [(match_operand:<sseintvecmode> 4 "vector_operand")
4554 (match_operand:<sseintvecmode> 5 "vector_operand")])
4555 (match_operand:VF_AVX512HFBFVL 1 "general_operand")
4556 (match_operand:VF_AVX512HFBFVL 2 "general_operand")))]
4559 bool ok = ix86_expand_int_vcond (operands);
4564 (define_expand "vcond<sseintvecmodelower><mode>"
4565 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4566 (if_then_else:<sseintvecmode>
4567 (match_operator 3 ""
4568 [(match_operand:VF_AVX512FP16VL 4 "vector_operand")
4569 (match_operand:VF_AVX512FP16VL 5 "vector_operand")])
4570 (match_operand:<sseintvecmode> 1 "general_operand")
4571 (match_operand:<sseintvecmode> 2 "general_operand")))]
4574 bool ok = ix86_expand_fp_vcond (operands);
4579 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
4580 [(set (match_operand:V48_AVX512VL 0 "register_operand")
4581 (vec_merge:V48_AVX512VL
4582 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
4583 (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
4584 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
4587 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
4588 [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
4589 (vec_merge:VI12HFBF_AVX512VL
4590 (match_operand:VI12HFBF_AVX512VL 1 "nonimmediate_operand")
4591 (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand")
4592 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
4595 ;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
4596 ;; and their condition can be folded late into a constant, we need to
4597 ;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
4598 (define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
4601 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
4602 [(set (match_operand:VI_256_AVX2 0 "register_operand")
4603 (vec_merge:VI_256_AVX2
4604 (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
4605 (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
4606 (match_operand:<sseintvecmode> 3 "register_operand")))]
4609 ix86_expand_sse_movcc (operands[0], operands[3],
4610 operands[1], operands[2]);
4614 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
4615 [(set (match_operand:VI_128 0 "register_operand")
4617 (match_operand:VI_128 1 "vector_operand")
4618 (match_operand:VI_128 2 "nonimm_or_0_operand")
4619 (match_operand:<sseintvecmode> 3 "register_operand")))]
4622 ix86_expand_sse_movcc (operands[0], operands[3],
4623 operands[1], operands[2]);
4627 (define_expand "vcond_mask_v1tiv1ti"
4628 [(set (match_operand:V1TI 0 "register_operand")
4630 (match_operand:V1TI 1 "vector_operand")
4631 (match_operand:V1TI 2 "nonimm_or_0_operand")
4632 (match_operand:V1TI 3 "register_operand")))]
4635 ix86_expand_sse_movcc (operands[0], operands[3],
4636 operands[1], operands[2]);
4640 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
4641 [(set (match_operand:VF_256 0 "register_operand")
4643 (match_operand:VF_256 1 "nonimmediate_operand")
4644 (match_operand:VF_256 2 "nonimm_or_0_operand")
4645 (match_operand:<sseintvecmode> 3 "register_operand")))]
4648 ix86_expand_sse_movcc (operands[0], operands[3],
4649 operands[1], operands[2]);
4653 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
4654 [(set (match_operand:VF_128 0 "register_operand")
4656 (match_operand:VF_128 1 "vector_operand")
4657 (match_operand:VF_128 2 "nonimm_or_0_operand")
4658 (match_operand:<sseintvecmode> 3 "register_operand")))]
4661 ix86_expand_sse_movcc (operands[0], operands[3],
4662 operands[1], operands[2]);
4666 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4668 ;; Parallel floating point logical operations
4670 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4672 (define_insn "<sse>_andnot<mode>3<mask_name>"
4673 [(set (match_operand:VFB_128_256 0 "register_operand" "=x,x,v,v")
4676 (match_operand:VFB_128_256 1 "register_operand" "0,x,v,v"))
4677 (match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
4678 "TARGET_SSE && <mask_avx512vl_condition>
4679 && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
4685 switch (which_alternative)
4688 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
4693 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
4699 switch (get_attr_mode (insn))
4709 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
4710 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
4711 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
4714 suffix = "<ssemodesuffix>";
4717 snprintf (buf, sizeof (buf), ops, suffix);
4718 output_asm_insn (buf, operands);
4721 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
4722 (set_attr "type" "sselog")
4723 (set_attr "prefix" "orig,maybe_vex,evex,evex")
4725 (cond [(and (match_test "<mask_applied>")
4726 (and (eq_attr "alternative" "1")
4727 (match_test "!TARGET_AVX512DQ")))
4728 (const_string "<sseintvecmode2>")
4729 (eq_attr "alternative" "3")
4730 (const_string "<sseintvecmode2>")
4731 (match_test "TARGET_AVX")
4732 (const_string "<MODE>")
4733 (match_test "optimize_function_for_size_p (cfun)")
4734 (const_string "V4SF")
4735 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4736 (const_string "V4SF")
4738 (const_string "<MODE>")))])
4740 (define_insn "<sse>_andnot<mode>3<mask_name>"
4741 [(set (match_operand:VFB_512 0 "register_operand" "=v")
4744 (match_operand:VFB_512 1 "register_operand" "v"))
4745 (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))]
4746 "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
4752 suffix = "<ssemodesuffix>";
4755 /* Since there are no vandnp[sd] without AVX512DQ nor vandnph,
4756 use vp<logic>[dq]. */
4757 if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode)
4759 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
4763 snprintf (buf, sizeof (buf),
4764 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
4766 output_asm_insn (buf, operands);
4769 [(set_attr "type" "sselog")
4770 (set_attr "prefix" "evex")
4772 (if_then_else (match_test "TARGET_AVX512DQ")
4773 (const_string "<sseinsnmode>")
4774 (const_string "XI")))])
4776 (define_expand "<code><mode>3<mask_name>"
4777 [(set (match_operand:VFB_128_256 0 "register_operand")
4778 (any_logic:VFB_128_256
4779 (match_operand:VFB_128_256 1 "vector_operand")
4780 (match_operand:VFB_128_256 2 "vector_operand")))]
4781 "TARGET_SSE && <mask_avx512vl_condition>
4782 && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
4783 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4785 (define_expand "<code><mode>3<mask_name>"
4786 [(set (match_operand:VFB_512 0 "register_operand")
4788 (match_operand:VFB_512 1 "nonimmediate_operand")
4789 (match_operand:VFB_512 2 "nonimmediate_operand")))]
4790 "TARGET_AVX512F && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
4791 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
4793 (define_insn "*<code><mode>3<mask_name>"
4794 [(set (match_operand:VFB_128_256 0 "register_operand" "=x,x,v,v")
4795 (any_logic:VFB_128_256
4796 (match_operand:VFB_128_256 1 "vector_operand" "%0,x,v,v")
4797 (match_operand:VFB_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
4798 "TARGET_SSE && <mask_avx512vl_condition>
4799 && (!<mask_applied> || <ssescalarmode>mode != HFmode)
4800 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4806 switch (which_alternative)
4809 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
4814 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
4820 switch (get_attr_mode (insn))
4830 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
4831 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
4832 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
4835 suffix = "<ssemodesuffix>";
4838 snprintf (buf, sizeof (buf), ops, suffix);
4839 output_asm_insn (buf, operands);
4842 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
4843 (set_attr "type" "sselog")
4844 (set_attr "prefix" "orig,maybe_evex,evex,evex")
4846 (cond [(and (match_test "<mask_applied>")
4847 (and (eq_attr "alternative" "1")
4848 (match_test "!TARGET_AVX512DQ")))
4849 (const_string "<sseintvecmode2>")
4850 (eq_attr "alternative" "3")
4851 (const_string "<sseintvecmode2>")
4852 (match_test "TARGET_AVX")
4853 (const_string "<MODE>")
4854 (match_test "optimize_function_for_size_p (cfun)")
4855 (const_string "V4SF")
4856 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
4857 (const_string "V4SF")
4859 (const_string "<MODE>")))])
4861 (define_insn "*<code><mode>3<mask_name>"
4862 [(set (match_operand:VFB_512 0 "register_operand" "=v")
4864 (match_operand:VFB_512 1 "nonimmediate_operand" "%v")
4865 (match_operand:VFB_512 2 "nonimmediate_operand" "vm")))]
4866 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))
4867 && (!<mask_applied> || <ssescalarmode>mode != HFmode)"
4873 suffix = "<ssemodesuffix>";
4876 /* Since there are no v<logic>p[sd] without AVX512DQ nor v<logic>ph,
4877 use vp<logic>[dq]. */
4878 if (!TARGET_AVX512DQ || <MODE>mode == V32HFmode)
4880 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
4884 snprintf (buf, sizeof (buf),
4885 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
4887 output_asm_insn (buf, operands);
4890 [(set_attr "type" "sselog")
4891 (set_attr "prefix" "evex")
4893 (if_then_else (match_test "TARGET_AVX512DQ")
4894 (const_string "<sseinsnmode>")
4895 (const_string "XI")))])
4897 ;; Generic part doesn't support the simpliciation of logic operation with
4898 ;; float_vector_all_ones_operand since it's not valid rtl. Add combine spiltter
4899 ;; for them, it should be safe since there's no SIMD Floating-Point Exceptions.
4900 (define_insn_and_split "*bit_and_float_vector_all_ones"
4901 [(set (match_operand:VFB 0 "nonimmediate_operand")
4902 (and:VFB (match_operand:VFB 1 "nonimmediate_operand")
4903 (match_operand:VFB 2 "float_vector_all_ones_operand")))]
4904 "TARGET_SSE && ix86_pre_reload_split ()"
4907 [(set (match_dup 0) (match_dup 1))]
4908 "operands[1] = force_reg (<MODE>mode, operands[1]);")
4910 (define_expand "copysign<mode>3"
4913 (not:VFB (match_dup 3))
4914 (match_operand:VFB 1 "vector_operand")))
4916 (and:VFB (match_dup 3)
4917 (match_operand:VFB 2 "vector_operand")))
4918 (set (match_operand:VFB 0 "register_operand")
4919 (ior:VFB (match_dup 4) (match_dup 5)))]
4922 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
4924 operands[4] = gen_reg_rtx (<MODE>mode);
4925 operands[5] = gen_reg_rtx (<MODE>mode);
4928 (define_expand "xorsign<mode>3"
4930 (and:VFB (match_dup 3)
4931 (match_operand:VFB 2 "vector_operand")))
4932 (set (match_operand:VFB 0 "register_operand")
4933 (xor:VFB (match_dup 4)
4934 (match_operand:VFB 1 "vector_operand")))]
4937 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
4939 operands[4] = gen_reg_rtx (<MODE>mode);
4942 (define_expand "signbit<mode>2"
4943 [(set (match_operand:<sseintvecmode> 0 "register_operand")
4944 (lshiftrt:<sseintvecmode>
4945 (subreg:<sseintvecmode>
4946 (match_operand:VF1_AVX2 1 "register_operand") 0)
4949 "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
4951 ;; Also define scalar versions. These are used for abs, neg, and
4952 ;; conditional move. Using subregs into vector modes causes register
4953 ;; allocation lossage. These patterns do not allow memory operands
4954 ;; because the native instructions read the full 128-bits.
4956 (define_insn "*andnot<mode>3"
4957 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
4960 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
4961 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
4962 "SSE_FLOAT_MODE_P (<MODE>mode)"
4967 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
4969 switch (which_alternative)
4972 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
4975 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4978 if (TARGET_AVX512DQ)
4979 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4982 suffix = <MODE>mode == DFmode ? "q" : "d";
4983 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
4987 if (TARGET_AVX512DQ)
4988 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4991 suffix = <MODE>mode == DFmode ? "q" : "d";
4992 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
4999 snprintf (buf, sizeof (buf), ops, suffix);
5000 output_asm_insn (buf, operands);
5003 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
5004 (set_attr "type" "sselog")
5005 (set_attr "prefix" "orig,vex,evex,evex")
5007 (cond [(eq_attr "alternative" "2")
5008 (if_then_else (match_test "TARGET_AVX512DQ")
5009 (const_string "<ssevecmode>")
5010 (const_string "TI"))
5011 (eq_attr "alternative" "3")
5012 (if_then_else (match_test "TARGET_AVX512DQ")
5013 (const_string "<avx512fvecmode>")
5014 (const_string "XI"))
5015 (match_test "TARGET_AVX")
5016 (const_string "<ssevecmode>")
5017 (match_test "optimize_function_for_size_p (cfun)")
5018 (const_string "V4SF")
5019 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
5020 (const_string "V4SF")
5022 (const_string "<ssevecmode>")))])
5024 ;; Modes for andnot3 not covered by VI and MODEF.
5025 (define_mode_iterator ANDNOT_MODE [TF V1TI])
5027 (define_insn "*andnot<mode>3"
5028 [(set (match_operand:ANDNOT_MODE 0 "register_operand" "=x,x,v,v")
5030 (not:ANDNOT_MODE (match_operand:ANDNOT_MODE 1 "register_operand" "0,x,v,v"))
5031 (match_operand:ANDNOT_MODE 2 "vector_operand" "xBm,xm,vm,v")))]
5037 = (which_alternative >= 2 ? "pandnq"
5038 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
5040 switch (which_alternative)
5043 ops = "%s\t{%%2, %%0|%%0, %%2}";
5047 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5050 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
5056 snprintf (buf, sizeof (buf), ops, tmp);
5057 output_asm_insn (buf, operands);
5060 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
5061 (set_attr "type" "sselog")
5062 (set (attr "prefix_data16")
5064 (and (eq_attr "alternative" "0")
5065 (eq_attr "mode" "TI"))
5067 (const_string "*")))
5068 (set_attr "prefix" "orig,vex,evex,evex")
5070 (cond [(eq_attr "alternative" "2")
5072 (eq_attr "alternative" "3")
5074 (match_test "TARGET_AVX")
5076 (ior (not (match_test "TARGET_SSE2"))
5077 (match_test "optimize_function_for_size_p (cfun)"))
5078 (const_string "V4SF")
5079 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
5080 (const_string "V4SF")
5082 (const_string "TI")))])
5084 (define_insn "<code><mode>3"
5085 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
5087 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
5088 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
5089 "SSE_FLOAT_MODE_P (<MODE>mode)"
5094 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
5096 switch (which_alternative)
5099 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
5102 if (!TARGET_AVX512DQ)
5104 suffix = <MODE>mode == DFmode ? "q" : "d";
5105 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5110 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5113 if (TARGET_AVX512DQ)
5114 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
5117 suffix = <MODE>mode == DFmode ? "q" : "d";
5118 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
5125 snprintf (buf, sizeof (buf), ops, suffix);
5126 output_asm_insn (buf, operands);
5129 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
5130 (set_attr "type" "sselog")
5131 (set_attr "prefix" "orig,vex,evex,evex")
5133 (cond [(eq_attr "alternative" "2")
5134 (if_then_else (match_test "TARGET_AVX512DQ")
5135 (const_string "<ssevecmode>")
5136 (const_string "TI"))
5137 (eq_attr "alternative" "3")
5138 (if_then_else (match_test "TARGET_AVX512DQ")
5139 (const_string "<avx512fvecmode>")
5140 (const_string "XI"))
5141 (match_test "TARGET_AVX")
5142 (const_string "<ssevecmode>")
5143 (match_test "optimize_function_for_size_p (cfun)")
5144 (const_string "V4SF")
5145 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
5146 (const_string "V4SF")
5148 (const_string "<ssevecmode>")))])
5150 (define_expand "<code>tf3"
5151 [(set (match_operand:TF 0 "register_operand")
5153 (match_operand:TF 1 "vector_operand")
5154 (match_operand:TF 2 "vector_operand")))]
5156 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
5158 (define_insn "*<code>tf3"
5159 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
5161 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
5162 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
5163 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5168 = (which_alternative >= 2 ? "p<logic>q"
5169 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
5171 switch (which_alternative)
5174 ops = "%s\t{%%2, %%0|%%0, %%2}";
5178 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
5181 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
5187 snprintf (buf, sizeof (buf), ops, tmp);
5188 output_asm_insn (buf, operands);
5191 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
5192 (set_attr "type" "sselog")
5193 (set (attr "prefix_data16")
5195 (and (eq_attr "alternative" "0")
5196 (eq_attr "mode" "TI"))
5198 (const_string "*")))
5199 (set_attr "prefix" "orig,vex,evex,evex")
5201 (cond [(eq_attr "alternative" "2")
5203 (eq_attr "alternative" "3")
5205 (match_test "TARGET_AVX")
5207 (ior (not (match_test "TARGET_SSE2"))
5208 (match_test "optimize_function_for_size_p (cfun)"))
5209 (const_string "V4SF")
5210 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
5211 (const_string "V4SF")
5213 (const_string "TI")))])
5215 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5217 ;; FMA floating point multiply/accumulate instructions. These include
5218 ;; scalar versions of the instructions as well as vector versions.
5220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5222 ;; The standard names for scalar FMA are only available with SSE math enabled.
5223 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
5224 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
5225 ;; and TARGET_FMA4 are both false.
5226 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
5227 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
5228 ;; GAS to allow proper prefix selection. However, for the moment all hardware
5229 ;; that supports AVX512F also supports FMA so we can ignore this for now.
5230 (define_mode_iterator FMAMODEM
5231 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
5232 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
5233 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5234 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5235 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5236 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5237 (V16SF "TARGET_AVX512F")
5238 (V8DF "TARGET_AVX512F")
5239 (HF "TARGET_AVX512FP16")
5240 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
5241 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
5242 (V32HF "TARGET_AVX512FP16")])
5244 (define_expand "fma<mode>4"
5245 [(set (match_operand:FMAMODEM 0 "register_operand")
5247 (match_operand:FMAMODEM 1 "nonimmediate_operand")
5248 (match_operand:FMAMODEM 2 "nonimmediate_operand")
5249 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
5251 (define_expand "fms<mode>4"
5252 [(set (match_operand:FMAMODEM 0 "register_operand")
5254 (match_operand:FMAMODEM 1 "nonimmediate_operand")
5255 (match_operand:FMAMODEM 2 "nonimmediate_operand")
5256 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
5258 (define_expand "fnma<mode>4"
5259 [(set (match_operand:FMAMODEM 0 "register_operand")
5261 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
5262 (match_operand:FMAMODEM 2 "nonimmediate_operand")
5263 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
5265 (define_expand "fnms<mode>4"
5266 [(set (match_operand:FMAMODEM 0 "register_operand")
5268 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
5269 (match_operand:FMAMODEM 2 "nonimmediate_operand")
5270 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
5272 ;; The builtins for intrinsics are not constrained by SSE math enabled.
5273 (define_mode_iterator FMAMODE_AVX512
5274 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
5275 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
5276 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5277 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5278 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5279 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
5280 (V16SF "TARGET_AVX512F")
5281 (V8DF "TARGET_AVX512F")])
5283 (define_mode_iterator FMAMODE
5284 [SF DF V4SF V2DF V8SF V4DF])
5286 (define_expand "fma4i_fmadd_<mode>"
5287 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
5289 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
5290 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
5291 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
5293 (define_expand "fma4i_fmsub_<mode>"
5294 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
5296 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
5297 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
5299 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
5301 (define_expand "fma4i_fnmadd_<mode>"
5302 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
5305 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
5306 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
5307 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
5309 (define_expand "fma4i_fnmsub_<mode>"
5310 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
5313 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
5314 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
5316 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
5318 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
5319 [(match_operand:VFH_AVX512VL 0 "register_operand")
5320 (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>")
5321 (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
5322 (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
5323 (match_operand:<avx512fmaskmode> 4 "register_operand")]
5324 "TARGET_AVX512F && <round_mode512bit_condition>"
5326 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
5327 operands[0], operands[1], operands[2], operands[3],
5328 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5332 (define_insn "*fma_fmadd_<mode>"
5333 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
5335 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
5336 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
5337 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
5338 "TARGET_FMA || TARGET_FMA4"
5340 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
5341 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5342 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
5343 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
5344 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5345 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
5346 (set_attr "type" "ssemuladd")
5347 (set_attr "mode" "<MODE>")])
5349 ;; Suppose AVX-512F as baseline
5350 (define_mode_iterator VFH_SF_AVX512VL
5351 [(V32HF "TARGET_AVX512FP16")
5352 (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
5353 (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
5354 (HF "TARGET_AVX512FP16")
5355 SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
5356 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
5358 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
5359 [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
5360 (fma:VFH_SF_AVX512VL
5361 (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
5362 (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
5363 (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
5364 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
5366 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5367 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5368 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
5369 [(set_attr "type" "ssemuladd")
5370 (set_attr "mode" "<MODE>")])
5372 (define_expand "cond_fma<mode>"
5373 [(set (match_operand:VFH_AVX512VL 0 "register_operand")
5374 (vec_merge:VFH_AVX512VL
5376 (match_operand:VFH_AVX512VL 2 "vector_operand")
5377 (match_operand:VFH_AVX512VL 3 "vector_operand")
5378 (match_operand:VFH_AVX512VL 4 "vector_operand"))
5379 (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
5380 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5383 rtx tmp = gen_reg_rtx (<MODE>mode);
5384 emit_insn (gen_fma<mode>4 (tmp,
5388 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
5395 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
5396 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
5397 (vec_merge:VFH_AVX512VL
5399 (match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
5400 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5401 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
5403 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5404 "TARGET_AVX512F && <round_mode512bit_condition>"
5406 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5407 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5408 [(set_attr "type" "ssemuladd")
5409 (set_attr "mode" "<MODE>")])
5411 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
5412 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
5413 (vec_merge:VFH_AVX512VL
5415 (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v")
5416 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5417 (match_operand:VFH_AVX512VL 3 "register_operand" "0"))
5419 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5421 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5422 [(set_attr "type" "ssemuladd")
5423 (set_attr "mode" "<MODE>")])
5425 (define_insn "*fma_fmsub_<mode>"
5426 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
5428 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
5429 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
5431 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
5432 "TARGET_FMA || TARGET_FMA4"
5434 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
5435 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5436 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
5437 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
5438 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5439 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
5440 (set_attr "type" "ssemuladd")
5441 (set_attr "mode" "<MODE>")])
5443 (define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
5444 [(match_operand:VFH_AVX512VL 0 "register_operand")
5445 (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>")
5446 (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
5447 (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
5448 (match_operand:<avx512fmaskmode> 4 "register_operand")]
5449 "TARGET_AVX512F && <round_mode512bit_condition>"
5451 emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
5452 operands[0], operands[1], operands[2], operands[3],
5453 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5457 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
5458 [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
5459 (fma:VFH_SF_AVX512VL
5460 (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
5461 (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
5462 (neg:VFH_SF_AVX512VL
5463 (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
5464 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
5466 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5467 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5468 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
5469 [(set_attr "type" "ssemuladd")
5470 (set_attr "mode" "<MODE>")])
5472 (define_expand "cond_fms<mode>"
5473 [(set (match_operand:VFH_AVX512VL 0 "register_operand")
5474 (vec_merge:VFH_AVX512VL
5476 (match_operand:VFH_AVX512VL 2 "vector_operand")
5477 (match_operand:VFH_AVX512VL 3 "vector_operand")
5479 (match_operand:VFH_AVX512VL 4 "vector_operand")))
5480 (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
5481 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5484 rtx tmp = gen_reg_rtx (<MODE>mode);
5485 emit_insn (gen_fms<mode>4 (tmp,
5489 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
5496 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
5497 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
5498 (vec_merge:VFH_AVX512VL
5500 (match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
5501 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5503 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
5505 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5508 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5509 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5510 [(set_attr "type" "ssemuladd")
5511 (set_attr "mode" "<MODE>")])
5513 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
5514 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
5515 (vec_merge:VFH_AVX512VL
5517 (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v")
5518 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5520 (match_operand:VFH_AVX512VL 3 "register_operand" "0")))
5522 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5523 "TARGET_AVX512F && <round_mode512bit_condition>"
5524 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5525 [(set_attr "type" "ssemuladd")
5526 (set_attr "mode" "<MODE>")])
5528 (define_insn "*fma_fnmadd_<mode>"
5529 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
5532 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
5533 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
5534 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
5535 "TARGET_FMA || TARGET_FMA4"
5537 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
5538 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5539 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
5540 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
5541 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5542 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
5543 (set_attr "type" "ssemuladd")
5544 (set_attr "mode" "<MODE>")])
5546 (define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
5547 [(match_operand:VFH_AVX512VL 0 "register_operand")
5548 (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>")
5549 (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
5550 (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
5551 (match_operand:<avx512fmaskmode> 4 "register_operand")]
5552 "TARGET_AVX512F && <round_mode512bit_condition>"
5554 emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
5555 operands[0], operands[1], operands[2], operands[3],
5556 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5560 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
5561 [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
5562 (fma:VFH_SF_AVX512VL
5563 (neg:VFH_SF_AVX512VL
5564 (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
5565 (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
5566 (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
5567 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
5569 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5570 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5571 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
5572 [(set_attr "type" "ssemuladd")
5573 (set_attr "mode" "<MODE>")])
5575 (define_expand "cond_fnma<mode>"
5576 [(set (match_operand:VFH_AVX512VL 0 "register_operand")
5577 (vec_merge:VFH_AVX512VL
5580 (match_operand:VFH_AVX512VL 2 "vector_operand"))
5581 (match_operand:VFH_AVX512VL 3 "vector_operand")
5582 (match_operand:VFH_AVX512VL 4 "vector_operand"))
5583 (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
5584 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5587 rtx tmp = gen_reg_rtx (<MODE>mode);
5588 emit_insn (gen_fnma<mode>4 (tmp,
5592 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
5599 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
5600 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
5601 (vec_merge:VFH_AVX512VL
5604 (match_operand:VFH_AVX512VL 1 "register_operand" "0,0"))
5605 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5606 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
5608 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5609 "TARGET_AVX512F && <round_mode512bit_condition>"
5611 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5612 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5613 [(set_attr "type" "ssemuladd")
5614 (set_attr "mode" "<MODE>")])
5616 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
5617 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
5618 (vec_merge:VFH_AVX512VL
5621 (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v"))
5622 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5623 (match_operand:VFH_AVX512VL 3 "register_operand" "0"))
5625 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5626 "TARGET_AVX512F && <round_mode512bit_condition>"
5627 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5628 [(set_attr "type" "ssemuladd")
5629 (set_attr "mode" "<MODE>")])
5631 (define_insn "*fma_fnmsub_<mode>"
5632 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
5635 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
5636 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
5638 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
5639 "TARGET_FMA || TARGET_FMA4"
5641 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5642 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5643 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
5644 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
5645 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5646 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
5647 (set_attr "type" "ssemuladd")
5648 (set_attr "mode" "<MODE>")])
5650 (define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
5651 [(match_operand:VFH_AVX512VL 0 "register_operand")
5652 (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>")
5653 (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
5654 (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
5655 (match_operand:<avx512fmaskmode> 4 "register_operand")]
5656 "TARGET_AVX512F && <round_mode512bit_condition>"
5658 emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
5659 operands[0], operands[1], operands[2], operands[3],
5660 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5664 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
5665 [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
5666 (fma:VFH_SF_AVX512VL
5667 (neg:VFH_SF_AVX512VL
5668 (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
5669 (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
5670 (neg:VFH_SF_AVX512VL
5671 (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
5672 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
5674 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5675 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5676 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
5677 [(set_attr "type" "ssemuladd")
5678 (set_attr "mode" "<MODE>")])
5680 (define_expand "cond_fnms<mode>"
5681 [(set (match_operand:VFH_AVX512VL 0 "register_operand")
5682 (vec_merge:VFH_AVX512VL
5685 (match_operand:VFH_AVX512VL 2 "vector_operand"))
5686 (match_operand:VFH_AVX512VL 3 "vector_operand")
5688 (match_operand:VFH_AVX512VL 4 "vector_operand")))
5689 (match_operand:VFH_AVX512VL 5 "nonimm_or_0_operand")
5690 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5693 rtx tmp = gen_reg_rtx (<MODE>mode);
5694 emit_insn (gen_fnms<mode>4 (tmp,
5698 emit_move_insn (operands[0], gen_rtx_VEC_MERGE (<MODE>mode,
5705 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
5706 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
5707 (vec_merge:VFH_AVX512VL
5710 (match_operand:VFH_AVX512VL 1 "register_operand" "0,0"))
5711 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5713 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
5715 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5716 "TARGET_AVX512F && <round_mode512bit_condition>"
5718 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5719 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5720 [(set_attr "type" "ssemuladd")
5721 (set_attr "mode" "<MODE>")])
5723 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
5724 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
5725 (vec_merge:VFH_AVX512VL
5728 (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v"))
5729 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5731 (match_operand:VFH_AVX512VL 3 "register_operand" "0")))
5733 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5735 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5736 [(set_attr "type" "ssemuladd")
5737 (set_attr "mode" "<MODE>")])
5739 ;; FMA parallel floating point multiply addsub and subadd operations.
5741 ;; It would be possible to represent these without the UNSPEC as
5744 ;; (fma op1 op2 op3)
5745 ;; (fma op1 op2 (neg op3))
5748 ;; But this doesn't seem useful in practice.
5750 (define_expand "vec_fmaddsub<mode>4"
5751 [(set (match_operand:VF 0 "register_operand")
5753 [(match_operand:VF 1 "nonimmediate_operand")
5754 (match_operand:VF 2 "nonimmediate_operand")
5755 (match_operand:VF 3 "nonimmediate_operand")]
5757 "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
5759 (define_expand "vec_fmsubadd<mode>4"
5760 [(set (match_operand:VF 0 "register_operand")
5762 [(match_operand:VF 1 "nonimmediate_operand")
5763 (match_operand:VF 2 "nonimmediate_operand")
5765 (match_operand:VF 3 "nonimmediate_operand"))]
5767 "TARGET_FMA || TARGET_FMA4 || (<MODE_SIZE> == 64 || TARGET_AVX512VL)")
5769 (define_expand "fmaddsub_<mode>"
5770 [(set (match_operand:VF 0 "register_operand")
5772 [(match_operand:VF 1 "nonimmediate_operand")
5773 (match_operand:VF 2 "nonimmediate_operand")
5774 (match_operand:VF 3 "nonimmediate_operand")]
5776 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
5778 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
5779 [(match_operand:VFH_AVX512VL 0 "register_operand")
5780 (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>")
5781 (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
5782 (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
5783 (match_operand:<avx512fmaskmode> 4 "register_operand")]
5786 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
5787 operands[0], operands[1], operands[2], operands[3],
5788 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5792 (define_expand "<avx512>_fmsubadd_<mode>_maskz<round_expand_name>"
5793 [(match_operand:VFH_AVX512VL 0 "register_operand")
5794 (match_operand:VFH_AVX512VL 1 "<round_expand_nimm_predicate>")
5795 (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
5796 (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
5797 (match_operand:<avx512fmaskmode> 4 "register_operand")]
5800 emit_insn (gen_fma_fmsubadd_<mode>_maskz_1<round_expand_name> (
5801 operands[0], operands[1], operands[2], operands[3],
5802 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
5806 (define_insn "*fma_fmaddsub_<mode>"
5807 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
5809 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
5810 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
5811 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
5813 "TARGET_FMA || TARGET_FMA4"
5815 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
5816 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5817 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
5818 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
5819 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5820 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
5821 (set_attr "type" "ssemuladd")
5822 (set_attr "mode" "<MODE>")])
5824 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
5825 [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
5826 (unspec:VFH_SF_AVX512VL
5827 [(match_operand:VFH_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
5828 (match_operand:VFH_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
5829 (match_operand:VFH_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
5831 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
5833 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5834 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5835 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
5836 [(set_attr "type" "ssemuladd")
5837 (set_attr "mode" "<MODE>")])
5839 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
5840 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
5841 (vec_merge:VFH_AVX512VL
5842 (unspec:VFH_AVX512VL
5843 [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
5844 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5845 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
5848 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5851 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5852 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5853 [(set_attr "type" "ssemuladd")
5854 (set_attr "mode" "<MODE>")])
5856 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
5857 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
5858 (vec_merge:VFH_AVX512VL
5859 (unspec:VFH_AVX512VL
5860 [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
5861 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5862 (match_operand:VFH_AVX512VL 3 "register_operand" "0")]
5865 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5867 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5868 [(set_attr "type" "ssemuladd")
5869 (set_attr "mode" "<MODE>")])
5871 (define_insn "*fma_fmsubadd_<mode>"
5872 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
5874 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
5875 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
5877 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
5879 "TARGET_FMA || TARGET_FMA4"
5881 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
5882 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
5883 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
5884 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
5885 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
5886 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
5887 (set_attr "type" "ssemuladd")
5888 (set_attr "mode" "<MODE>")])
5890 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
5891 [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
5892 (unspec:VFH_SF_AVX512VL
5893 [(match_operand:VFH_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
5894 (match_operand:VFH_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
5895 (neg:VFH_SF_AVX512VL
5896 (match_operand:VFH_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
5898 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
5900 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
5901 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
5902 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
5903 [(set_attr "type" "ssemuladd")
5904 (set_attr "mode" "<MODE>")])
5906 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
5907 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
5908 (vec_merge:VFH_AVX512VL
5909 (unspec:VFH_AVX512VL
5910 [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
5911 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
5913 (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
5916 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
5919 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
5920 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
5921 [(set_attr "type" "ssemuladd")
5922 (set_attr "mode" "<MODE>")])
5924 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
5925 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
5926 (vec_merge:VFH_AVX512VL
5927 (unspec:VFH_AVX512VL
5928 [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
5929 (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
5931 (match_operand:VFH_AVX512VL 3 "register_operand" "0"))]
5934 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
5936 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
5937 [(set_attr "type" "ssemuladd")
5938 (set_attr "mode" "<MODE>")])
5940 ;; FMA3 floating point scalar intrinsics. These merge result with
5941 ;; high-order elements from the destination register.
5943 (define_expand "fmai_vmfmadd_<mode><round_name>"
5944 [(set (match_operand:VFH_128 0 "register_operand")
5947 (match_operand:VFH_128 1 "register_operand")
5948 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
5949 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>"))
5954 (define_expand "fmai_vmfmsub_<mode><round_name>"
5955 [(set (match_operand:VFH_128 0 "register_operand")
5958 (match_operand:VFH_128 1 "register_operand")
5959 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
5961 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>")))
5966 (define_expand "fmai_vmfnmadd_<mode><round_name>"
5967 [(set (match_operand:VFH_128 0 "register_operand")
5971 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>"))
5972 (match_operand:VFH_128 1 "register_operand")
5973 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>"))
5978 (define_expand "fmai_vmfnmsub_<mode><round_name>"
5979 [(set (match_operand:VFH_128 0 "register_operand")
5983 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>"))
5984 (match_operand:VFH_128 1 "register_operand")
5986 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>")))
5991 (define_insn "*fmai_fmadd_<mode>"
5992 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
5995 (match_operand:VFH_128 1 "register_operand" "0,0")
5996 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
5997 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
6000 "TARGET_FMA || TARGET_AVX512F"
6002 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
6003 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
6004 [(set_attr "type" "ssemuladd")
6005 (set_attr "mode" "<MODE>")])
6007 (define_insn "*fmai_fmsub_<mode>"
6008 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6011 (match_operand:VFH_128 1 "register_operand" "0,0")
6012 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
6014 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
6017 "TARGET_FMA || TARGET_AVX512F"
6019 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
6020 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
6021 [(set_attr "type" "ssemuladd")
6022 (set_attr "mode" "<MODE>")])
6024 (define_insn "*fmai_fnmadd_<mode><round_name>"
6025 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6029 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
6030 (match_operand:VFH_128 1 "register_operand" "0,0")
6031 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
6034 "TARGET_FMA || TARGET_AVX512F"
6036 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
6037 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
6038 [(set_attr "type" "ssemuladd")
6039 (set_attr "mode" "<MODE>")])
6041 (define_insn "*fmai_fnmsub_<mode><round_name>"
6042 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6046 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
6047 (match_operand:VFH_128 1 "register_operand" "0,0")
6049 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
6052 "TARGET_FMA || TARGET_AVX512F"
6054 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
6055 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
6056 [(set_attr "type" "ssemuladd")
6057 (set_attr "mode" "<MODE>")])
6059 (define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
6060 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6064 (match_operand:VFH_128 1 "register_operand" "0,0")
6065 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
6066 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
6068 (match_operand:QI 4 "register_operand" "Yk,Yk"))
6073 vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
6074 vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
6075 [(set_attr "type" "ssemuladd")
6076 (set_attr "mode" "<MODE>")])
6078 (define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
6079 [(set (match_operand:VFH_128 0 "register_operand" "=v")
6083 (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
6084 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
6085 (match_operand:VFH_128 3 "register_operand" "0"))
6087 (match_operand:QI 4 "register_operand" "Yk"))
6091 "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
6092 [(set_attr "type" "ssemuladd")
6093 (set_attr "mode" "<MODE>")])
6095 (define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
6096 [(match_operand:VFH_128 0 "register_operand")
6097 (match_operand:VFH_128 1 "<round_expand_nimm_predicate>")
6098 (match_operand:VFH_128 2 "<round_expand_nimm_predicate>")
6099 (match_operand:VFH_128 3 "<round_expand_nimm_predicate>")
6100 (match_operand:QI 4 "register_operand")]
6103 emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
6104 operands[0], operands[1], operands[2], operands[3],
6105 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
6109 (define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
6110 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6114 (match_operand:VFH_128 1 "register_operand" "0,0")
6115 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
6116 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
6117 (match_operand:VFH_128 4 "const0_operand")
6118 (match_operand:QI 5 "register_operand" "Yk,Yk"))
6123 vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
6124 vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
6125 [(set_attr "type" "ssemuladd")
6126 (set_attr "mode" "<MODE>")])
6128 (define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
6129 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6133 (match_operand:VFH_128 1 "register_operand" "0,0")
6134 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
6136 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
6138 (match_operand:QI 4 "register_operand" "Yk,Yk"))
6143 vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
6144 vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
6145 [(set_attr "type" "ssemuladd")
6146 (set_attr "mode" "<MODE>")])
6148 (define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
6149 [(set (match_operand:VFH_128 0 "register_operand" "=v")
6153 (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
6154 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
6156 (match_operand:VFH_128 3 "register_operand" "0")))
6158 (match_operand:QI 4 "register_operand" "Yk"))
6162 "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
6163 [(set_attr "type" "ssemuladd")
6164 (set_attr "mode" "<MODE>")])
6166 (define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
6167 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6171 (match_operand:VFH_128 1 "register_operand" "0,0")
6172 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
6174 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
6175 (match_operand:VFH_128 4 "const0_operand")
6176 (match_operand:QI 5 "register_operand" "Yk,Yk"))
6181 vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
6182 vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
6183 [(set_attr "type" "ssemuladd")
6184 (set_attr "mode" "<MODE>")])
6186 (define_insn "avx512f_vmfnmadd_<mode>_mask<round_name>"
6187 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6192 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
6193 (match_operand:VFH_128 1 "register_operand" "0,0")
6194 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
6196 (match_operand:QI 4 "register_operand" "Yk,Yk"))
6201 vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
6202 vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
6203 [(set_attr "type" "ssemuladd")
6204 (set_attr "mode" "<MODE>")])
6206 (define_insn "avx512f_vmfnmadd_<mode>_mask3<round_name>"
6207 [(set (match_operand:VFH_128 0 "register_operand" "=v")
6212 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
6213 (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
6214 (match_operand:VFH_128 3 "register_operand" "0"))
6216 (match_operand:QI 4 "register_operand" "Yk"))
6220 "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
6221 [(set_attr "type" "ssemuladd")
6222 (set_attr "mode" "<MODE>")])
6224 (define_expand "avx512f_vmfnmadd_<mode>_maskz<round_expand_name>"
6225 [(match_operand:VFH_128 0 "register_operand")
6226 (match_operand:VFH_128 1 "<round_expand_nimm_predicate>")
6227 (match_operand:VFH_128 2 "<round_expand_nimm_predicate>")
6228 (match_operand:VFH_128 3 "<round_expand_nimm_predicate>")
6229 (match_operand:QI 4 "register_operand")]
6232 emit_insn (gen_avx512f_vmfnmadd_<mode>_maskz_1<round_expand_name> (
6233 operands[0], operands[1], operands[2], operands[3],
6234 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
6238 (define_insn "avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
6239 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6244 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
6245 (match_operand:VFH_128 1 "register_operand" "0,0")
6246 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
6247 (match_operand:VFH_128 4 "const0_operand")
6248 (match_operand:QI 5 "register_operand" "Yk,Yk"))
6253 vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
6254 vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
6255 [(set_attr "type" "ssemuladd")
6256 (set_attr "mode" "<MODE>")])
6258 (define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
6259 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6264 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
6265 (match_operand:VFH_128 1 "register_operand" "0,0")
6267 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
6269 (match_operand:QI 4 "register_operand" "Yk,Yk"))
6274 vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
6275 vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
6276 [(set_attr "type" "ssemuladd")
6277 (set_attr "mode" "<MODE>")])
6279 (define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
6280 [(set (match_operand:VFH_128 0 "register_operand" "=v")
6285 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
6286 (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
6288 (match_operand:VFH_128 3 "register_operand" "0")))
6290 (match_operand:QI 4 "register_operand" "Yk"))
6294 "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
6295 [(set_attr "type" "ssemuladd")
6296 (set_attr "mode" "<MODE>")])
6298 (define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
6299 [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
6304 (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
6305 (match_operand:VFH_128 1 "register_operand" "0,0")
6307 (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
6308 (match_operand:VFH_128 4 "const0_operand")
6309 (match_operand:QI 5 "register_operand" "Yk,Yk"))
6314 vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
6315 vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
6316 [(set_attr "type" "ssemuladd")
6317 (set_attr "mode" "<MODE>")])
6319 ;; FMA4 floating point scalar intrinsics. These write the
6320 ;; entire destination register, with the high-order elements zeroed.
6322 (define_expand "fma4i_vmfmadd_<mode>"
6323 [(set (match_operand:VF_128 0 "register_operand")
6326 (match_operand:VF_128 1 "nonimmediate_operand")
6327 (match_operand:VF_128 2 "nonimmediate_operand")
6328 (match_operand:VF_128 3 "nonimmediate_operand"))
6332 "operands[4] = CONST0_RTX (<MODE>mode);")
6334 (define_insn "*fma4i_vmfmadd_<mode>"
6335 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
6338 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
6339 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
6340 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
6341 (match_operand:VF_128 4 "const0_operand")
6344 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
6345 [(set_attr "type" "ssemuladd")
6346 (set_attr "mode" "<MODE>")])
6348 (define_insn "*fma4i_vmfmsub_<mode>"
6349 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
6352 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
6353 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
6355 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
6356 (match_operand:VF_128 4 "const0_operand")
6359 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
6360 [(set_attr "type" "ssemuladd")
6361 (set_attr "mode" "<MODE>")])
6363 (define_insn "*fma4i_vmfnmadd_<mode>"
6364 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
6368 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
6369 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
6370 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
6371 (match_operand:VF_128 4 "const0_operand")
6374 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
6375 [(set_attr "type" "ssemuladd")
6376 (set_attr "mode" "<MODE>")])
6378 (define_insn "*fma4i_vmfnmsub_<mode>"
6379 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
6383 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
6384 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
6386 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
6387 (match_operand:VF_128 4 "const0_operand")
6390 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
6391 [(set_attr "type" "ssemuladd")
6392 (set_attr "mode" "<MODE>")])
6394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6396 ;; Complex type operations
6398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6400 (define_int_iterator UNSPEC_COMPLEX_F_C_MA
6401 [UNSPEC_COMPLEX_FMA UNSPEC_COMPLEX_FCMA])
6403 (define_int_iterator UNSPEC_COMPLEX_F_C_MA_PAIR
6404 [UNSPEC_COMPLEX_FMA_PAIR UNSPEC_COMPLEX_FCMA_PAIR])
6406 (define_int_iterator UNSPEC_COMPLEX_F_C_MUL
6407 [UNSPEC_COMPLEX_FMUL UNSPEC_COMPLEX_FCMUL])
6409 (define_int_attr complexopname
6410 [(UNSPEC_COMPLEX_FMA "fmaddc")
6411 (UNSPEC_COMPLEX_FCMA "fcmaddc")
6412 (UNSPEC_COMPLEX_FMUL "fmulc")
6413 (UNSPEC_COMPLEX_FCMUL "fcmulc")])
6415 (define_int_attr complexpairopname
6416 [(UNSPEC_COMPLEX_FMA_PAIR "fmaddc")
6417 (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")])
6419 (define_int_attr conj_op
6420 [(UNSPEC_COMPLEX_FMA "")
6421 (UNSPEC_COMPLEX_FCMA "_conj")
6422 (UNSPEC_COMPLEX_FMUL "")
6423 (UNSPEC_COMPLEX_FCMUL "_conj")])
6425 (define_mode_attr complexmove
6426 [(V32HF "avx512f_loadv16sf")
6427 (V16HF "avx512vl_loadv8sf")
6428 (V8HF "avx512vl_loadv4sf")])
6430 (define_expand "<avx512>_fmaddc_<mode>_mask1<round_expand_name>"
6431 [(match_operand:VF_AVX512FP16VL 0 "register_operand")
6432 (match_operand:VF_AVX512FP16VL 1 "<round_expand_nimm_predicate>")
6433 (match_operand:VF_AVX512FP16VL 2 "<round_expand_nimm_predicate>")
6434 (match_operand:VF_AVX512FP16VL 3 "<round_expand_nimm_predicate>")
6435 (match_operand:<avx512fmaskcmode> 4 "register_operand")]
6436 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6439 if (<round_embedded_complex>)
6440 emit_insn (gen_<avx512>_fmaddc_<mode>_mask<round_expand_name> (
6441 operands[0], operands[1], operands[2], operands[3],
6442 operands[4]<round_expand_operand>));
6444 emit_insn (gen_<avx512>_fmaddc_<mode>_mask (operands[0],
6445 operands[1], operands[2], operands[3], operands[4]));
6447 op0 = lowpart_subreg (<ssePSmode>mode,
6448 force_reg (<MODE>mode, operands[0]),
6450 dest = gen_reg_rtx (<ssePSmode>mode);
6451 if (!MEM_P (operands[1]))
6452 operands[1] = force_reg (<MODE>mode, operands[1]);
6453 op1 = lowpart_subreg (<ssePSmode>mode, operands[1], <MODE>mode);
6454 emit_insn (gen_<complexmove>_mask (dest, op0, op1, operands[4]));
6455 emit_move_insn (operands[0],
6456 lowpart_subreg (<MODE>mode, dest, <ssePSmode>mode));
6460 (define_expand "<avx512>_fmaddc_<mode>_maskz<round_expand_name>"
6461 [(match_operand:VF_AVX512FP16VL 0 "register_operand")
6462 (match_operand:VF_AVX512FP16VL 1 "<round_expand_nimm_predicate>")
6463 (match_operand:VF_AVX512FP16VL 2 "<round_expand_nimm_predicate>")
6464 (match_operand:VF_AVX512FP16VL 3 "<round_expand_nimm_predicate>")
6465 (match_operand:<avx512fmaskcmode> 4 "register_operand")]
6466 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6468 emit_insn (gen_fma_fmaddc_<mode>_maskz_1<round_expand_name> (
6469 operands[0], operands[1], operands[2], operands[3],
6470 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
6474 (define_expand "<avx512>_fcmaddc_<mode>_mask1<round_expand_name>"
6475 [(match_operand:VF_AVX512FP16VL 0 "register_operand")
6476 (match_operand:VF_AVX512FP16VL 1 "<round_expand_nimm_predicate>")
6477 (match_operand:VF_AVX512FP16VL 2 "<round_expand_nimm_predicate>")
6478 (match_operand:VF_AVX512FP16VL 3 "<round_expand_nimm_predicate>")
6479 (match_operand:<avx512fmaskcmode> 4 "register_operand")]
6480 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6483 if (<round_embedded_complex>)
6484 emit_insn (gen_<avx512>_fcmaddc_<mode>_mask<round_expand_name> (
6485 operands[0], operands[1], operands[2], operands[3],
6486 operands[4]<round_expand_operand>));
6489 emit_insn (gen_<avx512>_fcmaddc_<mode>_mask (operands[0],
6490 operands[1], operands[2], operands[3], operands[4]));
6493 op0 = lowpart_subreg (<ssePSmode>mode,
6494 force_reg (<MODE>mode, operands[0]),
6496 dest = gen_reg_rtx (<ssePSmode>mode);
6497 if (!MEM_P (operands[1]))
6498 operands[1] = force_reg (<MODE>mode, operands[1]);
6499 op1 = lowpart_subreg (<ssePSmode>mode, operands[1], <MODE>mode);
6500 emit_insn (gen_<complexmove>_mask (dest, op0, op1, operands[4]));
6501 emit_move_insn (operands[0],
6502 lowpart_subreg (<MODE>mode, dest, <ssePSmode>mode));
6506 (define_expand "<avx512>_fcmaddc_<mode>_maskz<round_expand_name>"
6507 [(match_operand:VF_AVX512FP16VL 0 "register_operand")
6508 (match_operand:VF_AVX512FP16VL 1 "<round_expand_nimm_predicate>")
6509 (match_operand:VF_AVX512FP16VL 2 "<round_expand_nimm_predicate>")
6510 (match_operand:VF_AVX512FP16VL 3 "<round_expand_nimm_predicate>")
6511 (match_operand:<avx512fmaskcmode> 4 "register_operand")]
6512 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6514 emit_insn (gen_fma_fcmaddc_<mode>_maskz_1<round_expand_name> (
6515 operands[0], operands[1], operands[2], operands[3],
6516 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
6520 (define_expand "cmla<conj_op><mode>4"
6521 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6522 (unspec:VF_AVX512FP16VL
6523 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6524 (match_operand:VF_AVX512FP16VL 2 "vector_operand")
6525 (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
6526 UNSPEC_COMPLEX_F_C_MA))]
6527 "TARGET_AVX512FP16")
6529 (define_insn "fma_<complexopname>_<mode><sdc_maskz_name><round_name>"
6530 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
6531 (unspec:VF_AVX512FP16VL
6532 [(match_operand:VF_AVX512FP16VL 1 "<round_nimm_predicate>" "%v")
6533 (match_operand:VF_AVX512FP16VL 2 "<round_nimm_predicate>" "<round_constraint>")
6534 (match_operand:VF_AVX512FP16VL 3 "<round_nimm_predicate>" "0")]
6535 UNSPEC_COMPLEX_F_C_MA))]
6536 "TARGET_AVX512FP16 && <sdc_mask_mode512bit_condition> && <round_mode512bit_condition>"
6537 "v<complexopname><ssemodesuffix>\t{<round_sdc_mask_op4>%2, %1, %0<sdc_mask_op4>|%0<sdc_mask_op4>, %1, %2<round_sdc_mask_op4>}"
6538 [(set_attr "type" "ssemuladd")
6539 (set_attr "mode" "<MODE>")])
6541 (define_insn_and_split "fma_<mode>_fadd_fmul"
6542 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6543 (plus:VF_AVX512FP16VL
6544 (unspec:VF_AVX512FP16VL
6545 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6546 (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
6547 UNSPEC_COMPLEX_FMUL)
6548 (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
6549 "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
6550 && ix86_pre_reload_split ()"
6554 (unspec:VF_AVX512FP16VL
6555 [(match_dup 1) (match_dup 2) (match_dup 3)]
6556 UNSPEC_COMPLEX_FMA))])
6558 (define_insn_and_split "fma_<mode>_fadd_fcmul"
6559 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6560 (plus:VF_AVX512FP16VL
6561 (unspec:VF_AVX512FP16VL
6562 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6563 (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
6564 UNSPEC_COMPLEX_FCMUL)
6565 (match_operand:VF_AVX512FP16VL 3 "vector_operand")))]
6566 "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
6567 && ix86_pre_reload_split ()"
6571 (unspec:VF_AVX512FP16VL
6572 [(match_dup 1) (match_dup 2) (match_dup 3)]
6573 UNSPEC_COMPLEX_FCMA))])
6575 (define_insn_and_split "fma_<complexopname>_<mode>_fma_zero"
6576 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6577 (plus:VF_AVX512FP16VL
6578 (unspec:VF_AVX512FP16VL
6579 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6580 (match_operand:VF_AVX512FP16VL 2 "vector_operand")
6581 (match_operand:VF_AVX512FP16VL 3 "const0_operand")]
6582 UNSPEC_COMPLEX_F_C_MA)
6583 (match_operand:VF_AVX512FP16VL 4 "vector_operand")))]
6584 "TARGET_AVX512FP16 && flag_unsafe_math_optimizations
6585 && ix86_pre_reload_split ()"
6589 (unspec:VF_AVX512FP16VL
6590 [(match_dup 1) (match_dup 2) (match_dup 4)]
6591 UNSPEC_COMPLEX_F_C_MA))])
6593 (define_insn "fma_<complexpairopname>_<mode>_pair"
6594 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v")
6595 (unspec:VF1_AVX512VL
6596 [(match_operand:VF1_AVX512VL 1 "vector_operand" "%v")
6597 (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr")
6598 (match_operand:VF1_AVX512VL 3 "vector_operand" "0")]
6599 UNSPEC_COMPLEX_F_C_MA_PAIR))]
6601 "v<complexpairopname>ph\t{%2, %1, %0|%0, %1, %2}"
6602 [(set_attr "type" "ssemuladd")])
6604 (define_insn_and_split "fma_<mode>_fmaddc_bcst"
6605 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6606 (unspec:VF_AVX512FP16VL
6607 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6608 (subreg:VF_AVX512FP16VL
6609 (match_operand:<ssePSmode> 2 "bcst_vector_operand") 0)
6610 (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
6611 UNSPEC_COMPLEX_FMA))]
6612 "TARGET_AVX512FP16 && ix86_pre_reload_split ()"
6617 if (!MEM_P (operands[1]))
6618 operands[1] = force_reg (<MODE>mode, operands[1]);
6619 if (!MEM_P (operands[3]))
6620 operands[3] = force_reg (<MODE>mode, operands[3]);
6621 operands[1] = lowpart_subreg (<ssePSmode>mode, operands[1], <MODE>mode);
6622 operands[3] = lowpart_subreg (<ssePSmode>mode, operands[3], <MODE>mode);
6623 rtx dest = gen_reg_rtx (<ssePSmode>mode);
6624 emit_insn (gen_fma_fmaddc_<ssePSmodelower>_pair (dest, operands[1],
6625 operands[2], operands[3]));
6626 emit_move_insn (operands[0],
6627 lowpart_subreg (<MODE>mode, dest, <ssePSmode>mode));
6631 (define_insn_and_split "fma_<mode>_fcmaddc_bcst"
6632 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6633 (unspec:VF_AVX512FP16VL
6634 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6635 (subreg:VF_AVX512FP16VL
6636 (match_operand:<ssePSmode> 2 "bcst_vector_operand") 0)
6637 (match_operand:VF_AVX512FP16VL 3 "vector_operand")]
6638 UNSPEC_COMPLEX_FCMA))]
6639 "TARGET_AVX512FP16 && ix86_pre_reload_split ()"
6644 if (!MEM_P (operands[1]))
6645 operands[1] = force_reg (<MODE>mode, operands[1]);
6646 if (!MEM_P (operands[3]))
6647 operands[3] = force_reg (<MODE>mode, operands[3]);
6648 operands[1] = lowpart_subreg (<ssePSmode>mode, operands[1], <MODE>mode);
6649 operands[3] = lowpart_subreg (<ssePSmode>mode, operands[3], <MODE>mode);
6650 rtx dest = gen_reg_rtx (<ssePSmode>mode);
6651 emit_insn (gen_fma_fcmaddc_<ssePSmodelower>_pair (dest, operands[1],
6654 emit_move_insn (operands[0],
6655 lowpart_subreg (<MODE>mode, dest, <ssePSmode>mode));
6659 (define_insn "<avx512>_<complexopname>_<mode>_mask<round_name>"
6660 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
6661 (vec_merge:VF_AVX512FP16VL
6662 (unspec:VF_AVX512FP16VL
6663 [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
6664 (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")
6665 (match_operand:VF_AVX512FP16VL 3 "register_operand" "0")]
6666 UNSPEC_COMPLEX_F_C_MA)
6668 (unspec:<avx512fmaskmode>
6669 [(match_operand:<avx512fmaskcmode> 4 "register_operand" "Yk")]
6670 UNSPEC_COMPLEX_MASK)))]
6671 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6672 "v<complexopname><ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
6673 [(set_attr "type" "ssemuladd")
6674 (set_attr "mode" "<MODE>")])
6676 (define_expand "cmul<conj_op><mode>3"
6677 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
6678 (unspec:VF_AVX512FP16VL
6679 [(match_operand:VF_AVX512FP16VL 1 "vector_operand")
6680 (match_operand:VF_AVX512FP16VL 2 "vector_operand")]
6681 UNSPEC_COMPLEX_F_C_MUL))]
6682 "TARGET_AVX512FP16")
6684 (define_insn "<avx512>_<complexopname>_<mode><maskc_name><round_name>"
6685 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v")
6686 (unspec:VF_AVX512FP16VL
6687 [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v")
6688 (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")]
6689 UNSPEC_COMPLEX_F_C_MUL))]
6690 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6692 if (TARGET_DEST_FALSE_DEP_FOR_GLC
6693 && <maskc_dest_false_dep_for_glc_cond>)
6694 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
6695 return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}";
6697 [(set_attr "type" "ssemul")
6698 (set_attr "mode" "<MODE>")])
6700 (define_expand "avx512fp16_fmaddcsh_v8hf_maskz<round_expand_name>"
6701 [(match_operand:V8HF 0 "register_operand")
6702 (match_operand:V8HF 1 "<round_expand_nimm_predicate>")
6703 (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
6704 (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
6705 (match_operand:QI 4 "register_operand")]
6706 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6708 emit_insn (gen_avx512fp16_fma_fmaddcsh_v8hf_maskz<round_expand_name> (
6709 operands[0], operands[1], operands[2], operands[3],
6710 CONST0_RTX (V8HFmode), operands[4]<round_expand_operand>));
6714 (define_expand "avx512fp16_fmaddcsh_v8hf_mask1<round_expand_name>"
6715 [(match_operand:V8HF 0 "register_operand")
6716 (match_operand:V8HF 1 "<round_expand_nimm_predicate>")
6717 (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
6718 (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
6719 (match_operand:QI 4 "register_operand")]
6720 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6724 if (<round_embedded_complex>)
6725 emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask<round_expand_name> (
6726 operands[0], operands[1], operands[2], operands[3],
6727 operands[4]<round_expand_operand>));
6729 emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask (operands[0],
6730 operands[1], operands[2], operands[3], operands[4]));
6732 op0 = lowpart_subreg (V4SFmode, force_reg (V8HFmode, operands[0]),
6734 if (!MEM_P (operands[1]))
6735 operands[1] = force_reg (V8HFmode, operands[1]);
6736 op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
6737 dest = gen_reg_rtx (V4SFmode);
6738 emit_insn (gen_avx512f_movsf_mask (dest, op1, op0, op1, operands[4]));
6739 emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest,
6744 (define_expand "avx512fp16_fcmaddcsh_v8hf_maskz<round_expand_name>"
6745 [(match_operand:V8HF 0 "register_operand")
6746 (match_operand:V8HF 1 "<round_expand_nimm_predicate>")
6747 (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
6748 (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
6749 (match_operand:QI 4 "register_operand")]
6750 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6752 emit_insn (gen_avx512fp16_fma_fcmaddcsh_v8hf_maskz<round_expand_name> (
6753 operands[0], operands[1], operands[2], operands[3],
6754 CONST0_RTX (V8HFmode), operands[4]<round_expand_operand>));
6758 (define_expand "avx512fp16_fcmaddcsh_v8hf_mask1<round_expand_name>"
6759 [(match_operand:V8HF 0 "register_operand")
6760 (match_operand:V8HF 1 "<round_expand_nimm_predicate>")
6761 (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
6762 (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
6763 (match_operand:QI 4 "register_operand")]
6764 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6768 if (<round_embedded_complex>)
6769 emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask<round_expand_name> (
6770 operands[0], operands[1], operands[2], operands[3],
6771 operands[4]<round_expand_operand>));
6773 emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask (operands[0],
6774 operands[1], operands[2], operands[3], operands[4]));
6776 op0 = lowpart_subreg (V4SFmode, force_reg (V8HFmode, operands[0]),
6778 if (!MEM_P (operands[1]))
6779 operands[1] = force_reg (V8HFmode, operands[1]);
6780 op1 = lowpart_subreg (V4SFmode, operands[1], V8HFmode);
6781 dest = gen_reg_rtx (V4SFmode);
6782 emit_insn (gen_avx512f_movsf_mask (dest, op1, op0, op1, operands[4]));
6783 emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest,
6788 (define_expand "avx512fp16_fcmaddcsh_v8hf_mask3<round_expand_name>"
6789 [(match_operand:V8HF 0 "register_operand")
6790 (match_operand:V8HF 1 "<round_expand_nimm_predicate>")
6791 (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
6792 (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
6793 (match_operand:QI 4 "register_operand")]
6794 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6798 if (<round_embedded_complex>)
6799 emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask<round_expand_name> (
6800 operands[0], operands[1], operands[2], operands[3],
6801 operands[4]<round_expand_operand>));
6803 emit_insn (gen_avx512fp16_fcmaddcsh_v8hf_mask (operands[0],
6804 operands[1], operands[2], operands[3], operands[4]));
6806 dest = gen_reg_rtx (V4SFmode);
6807 op0 = lowpart_subreg (V4SFmode,
6808 force_reg (V8HFmode, operands[0]),
6810 if (!MEM_P (operands[3]))
6811 operands[3] = force_reg (V8HFmode, operands[3]);
6812 op1 = lowpart_subreg (V4SFmode, operands[3], V8HFmode);
6813 emit_insn (gen_sse_movss_v4sf (dest, op1, op0));
6814 emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest, V4SFmode));
6818 (define_expand "avx512fp16_fmaddcsh_v8hf_mask3<round_expand_name>"
6819 [(match_operand:V8HF 0 "register_operand")
6820 (match_operand:V8HF 1 "<round_expand_nimm_predicate>")
6821 (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
6822 (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
6823 (match_operand:QI 4 "register_operand")]
6824 "TARGET_AVX512FP16 && <round_mode512bit_condition>"
6828 if (<round_embedded_complex>)
6829 emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask<round_expand_name> (
6830 operands[0], operands[1], operands[2], operands[3],
6831 operands[4]<round_expand_operand>));
6833 emit_insn (gen_avx512fp16_fmaddcsh_v8hf_mask (operands[0],
6834 operands[1], operands[2], operands[3], operands[4]));
6836 dest = gen_reg_rtx (V4SFmode);
6837 op0 = lowpart_subreg (V4SFmode,
6838 force_reg (V8HFmode, operands[0]),
6840 if (!MEM_P (operands[3]))
6841 operands[3] = force_reg (V8HFmode, operands[3]);
6842 op1 = lowpart_subreg (V4SFmode, operands[3], V8HFmode);
6843 emit_insn (gen_sse_movss_v4sf (dest, op1, op0));
6844 emit_move_insn (operands[0], lowpart_subreg (V8HFmode, dest, V4SFmode));
6848 (define_insn "avx512fp16_fma_<complexopname>sh_v8hf<mask_scalarcz_name><round_scalarcz_name>"
6849 [(set (match_operand:V8HF 0 "register_operand" "=&v")
6852 [(match_operand:V8HF 1 "<round_scalarcz_nimm_predicate>" "v")
6853 (match_operand:V8HF 2 "<round_scalarcz_nimm_predicate>" "<round_scalarcz_constraint>")
6854 (match_operand:V8HF 3 "<round_scalarcz_nimm_predicate>" "0")]
6855 UNSPEC_COMPLEX_F_C_MA)
6859 "v<complexopname>sh\t{<round_scalarcz_mask_op4>%2, %1, %0<mask_scalarcz_operand4>|%0<mask_scalarcz_operand4>, %1, %2<round_scalarcz_mask_op4>}"
6860 [(set_attr "type" "ssemuladd")
6861 (set_attr "mode" "V8HF")])
6863 (define_insn "avx512fp16_<complexopname>sh_v8hf_mask<round_name>"
6864 [(set (match_operand:V8HF 0 "register_operand" "=&v")
6868 [(match_operand:V8HF 1 "<round_nimm_predicate>" "v")
6869 (match_operand:V8HF 2 "<round_nimm_predicate>" "<round_constraint>")
6870 (match_operand:V8HF 3 "<round_nimm_predicate>" "0")]
6871 UNSPEC_COMPLEX_F_C_MA)
6873 (unspec:QI [(match_operand:QI 4 "register_operand" "Yk")]
6874 UNSPEC_COMPLEX_MASK))
6878 "v<complexopname>sh\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
6879 [(set_attr "type" "ssemuladd")
6880 (set_attr "mode" "V8HF")])
6882 (define_insn "avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>"
6883 [(set (match_operand:V8HF 0 "register_operand" "=&v")
6886 [(match_operand:V8HF 1 "nonimmediate_operand" "v")
6887 (match_operand:V8HF 2 "<round_scalarcz_nimm_predicate>" "<round_scalarcz_constraint>")]
6888 UNSPEC_COMPLEX_F_C_MUL)
6893 if (TARGET_DEST_FALSE_DEP_FOR_GLC
6894 && <mask_scalarc_dest_false_dep_for_glc_cond>)
6895 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
6896 return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}";
6898 [(set_attr "type" "ssemul")
6899 (set_attr "mode" "V8HF")])
6901 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6903 ;; Parallel half-precision floating point conversion operations
6905 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6907 (define_int_iterator UNSPEC_US_FIX_NOTRUNC
6908 [UNSPEC_UNSIGNED_FIX_NOTRUNC UNSPEC_FIX_NOTRUNC])
6910 (define_int_attr sseintconvertsignprefix
6911 [(UNSPEC_UNSIGNED_FIX_NOTRUNC "u")
6912 (UNSPEC_FIX_NOTRUNC "")])
6914 (define_mode_attr qq2phsuff
6915 [(V32HI "") (V16HI "") (V8HI "")
6916 (V16SI "") (V8SI "{y}") (V4SI "{x}")
6917 (V8DI "{z}") (V4DI "{y}") (V2DI "{x}")
6918 (V16SF "") (V8SF "{y}") (V4SF "{x}")
6919 (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
6921 (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode><mask_name><round_name>"
6922 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
6923 (unspec:VI248_AVX512VL
6924 [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")]
6925 UNSPEC_US_FIX_NOTRUNC))]
6927 "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6928 [(set_attr "type" "ssecvt")
6929 (set_attr "prefix" "evex")
6930 (set_attr "mode" "<sseinsnmode>")])
6932 (define_expand "float<floatunssuffix><mode><ssePHmodelower>2"
6933 [(set (match_operand:<ssePHmode> 0 "register_operand")
6934 (any_float:<ssePHmode>
6935 (match_operand:VI2H_AVX512VL 1 "nonimmediate_operand")))]
6936 "TARGET_AVX512FP16")
6938 (define_insn "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode><mask_name><round_name>"
6939 [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
6940 (any_float:<ssePHmode>
6941 (match_operand:VI2H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))]
6943 "vcvt<floatsuffix><sseintconvert>2ph<round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6944 [(set_attr "type" "ssecvt")
6945 (set_attr "prefix" "evex")
6946 (set_attr "mode" "<sseinsnmode>")])
6948 (define_expand "float<floatunssuffix><mode>v4hf2"
6949 [(set (match_operand:V4HF 0 "register_operand")
6951 (match_operand:VI4_128_8_256 1 "vector_operand")))]
6952 "TARGET_AVX512FP16 && TARGET_AVX512VL"
6954 rtx dest = gen_reg_rtx (V8HFmode);
6955 emit_insn (gen_avx512fp16_float<floatunssuffix><mode>v4hf2 (dest,
6957 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, dest, V8HFmode));
6961 (define_expand "avx512fp16_float<floatunssuffix><mode>v4hf2"
6962 [(set (match_operand:V8HF 0 "register_operand")
6964 (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand"))
6966 "TARGET_AVX512FP16 && TARGET_AVX512VL"
6967 "operands[2] = CONST0_RTX (V4HFmode);")
6969 (define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>"
6970 [(set (match_operand:V8HF 0 "register_operand" "=v")
6972 (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
6973 (match_operand:V4HF 2 "const0_operand")))]
6974 "TARGET_AVX512FP16 && TARGET_AVX512VL"
6975 "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0|%0, %1}"
6976 [(set_attr "type" "ssecvt")
6977 (set_attr "prefix" "evex")
6978 (set_attr "mode" "<sseinsnmode>")])
6980 (define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask"
6981 [(set (match_operand:V8HF 0 "register_operand" "=v")
6984 (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
6986 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
6987 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
6988 (match_operand:QI 3 "register_operand" "Yk"))
6990 "TARGET_AVX512FP16 && TARGET_AVX512VL"
6991 "operands[4] = CONST0_RTX (V4HFmode);")
6993 (define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask"
6994 [(set (match_operand:V8HF 0 "register_operand" "=v")
6997 (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
6999 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7000 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
7001 (match_operand:QI 3 "register_operand" "Yk"))
7002 (match_operand:V4HF 4 "const0_operand")))]
7003 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7004 "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7005 [(set_attr "type" "ssecvt")
7006 (set_attr "prefix" "evex")
7007 (set_attr "mode" "<sseinsnmode>")])
7009 (define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask_1"
7010 [(set (match_operand:V8HF 0 "register_operand" "=v")
7013 (any_float:V4HF (match_operand:VI4_128_8_256 1
7014 "vector_operand" "vm"))
7015 (match_operand:V4HF 3 "const0_operand")
7016 (match_operand:QI 2 "register_operand" "Yk"))
7017 (match_operand:V4HF 4 "const0_operand")))]
7018 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7019 "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
7020 [(set_attr "type" "ssecvt")
7021 (set_attr "prefix" "evex")
7022 (set_attr "mode" "<sseinsnmode>")])
7024 (define_expand "float<floatunssuffix>v2div2hf2"
7025 [(set (match_operand:V2HF 0 "register_operand")
7027 (match_operand:V2DI 1 "vector_operand")))]
7028 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7030 rtx dest = gen_reg_rtx (V8HFmode);
7031 emit_insn (gen_avx512fp16_float<floatunssuffix>v2div2hf2 (dest,
7033 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, dest, V8HFmode));
7037 (define_expand "avx512fp16_float<floatunssuffix>v2div2hf2"
7038 [(set (match_operand:V8HF 0 "register_operand")
7040 (any_float:V2HF (match_operand:V2DI 1 "vector_operand"))
7042 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7043 "operands[2] = CONST0_RTX (V6HFmode);")
7045 (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di"
7046 [(set (match_operand:V8HF 0 "register_operand" "=v")
7048 (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
7049 (match_operand:V6HF 2 "const0_operand")))]
7050 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7051 "vcvt<floatsuffix>qq2ph{x}\t{%1, %0|%0, %1}"
7052 [(set_attr "type" "ssecvt")
7053 (set_attr "prefix" "evex")
7054 (set_attr "mode" "TI")])
7056 (define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask"
7057 [(set (match_operand:V8HF 0 "register_operand" "=v")
7060 (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
7062 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7063 (parallel [(const_int 0) (const_int 1)]))
7064 (match_operand:QI 3 "register_operand" "Yk"))
7066 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7067 "operands[4] = CONST0_RTX (V6HFmode);")
7069 (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask"
7070 [(set (match_operand:V8HF 0 "register_operand" "=v")
7073 (any_float:V2HF (match_operand:V2DI 1 "vector_operand" "vm"))
7075 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7076 (parallel [(const_int 0) (const_int 1)]))
7077 (match_operand:QI 3 "register_operand" "Yk"))
7078 (match_operand:V6HF 4 "const0_operand")))]
7079 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7080 "vcvt<floatsuffix>qq2ph{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7081 [(set_attr "type" "ssecvt")
7082 (set_attr "prefix" "evex")
7083 (set_attr "mode" "TI")])
7085 (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1"
7086 [(set (match_operand:V8HF 0 "register_operand" "=v")
7089 (any_float:V2HF (match_operand:V2DI 1
7090 "vector_operand" "vm"))
7091 (match_operand:V2HF 3 "const0_operand")
7092 (match_operand:QI 2 "register_operand" "Yk"))
7093 (match_operand:V6HF 4 "const0_operand")))]
7094 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7095 "vcvt<floatsuffix>qq2ph{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
7096 [(set_attr "type" "ssecvt")
7097 (set_attr "prefix" "evex")
7098 (set_attr "mode" "TI")])
7100 (define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>"
7101 [(set (match_operand:SWI48 0 "register_operand" "=r")
7104 (match_operand:V8HF 1 "register_operand" "v")
7105 (parallel [(const_int 0)]))]
7106 UNSPEC_US_FIX_NOTRUNC))]
7108 "vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %1<round_op2>}"
7109 [(set_attr "type" "sseicvt")
7110 (set_attr "prefix" "evex")
7111 (set_attr "mode" "<MODE>")])
7113 (define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2"
7114 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
7116 [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
7117 UNSPEC_US_FIX_NOTRUNC))]
7119 "vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %1}"
7120 [(set_attr "type" "sseicvt")
7121 (set_attr "prefix" "evex")
7122 (set_attr "mode" "<MODE>")])
7124 (define_mode_attr sseicvtsuffix
7125 [(SI "l") (DI "q")])
7126 (define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
7127 [(set (match_operand:V8HF 0 "register_operand" "=v")
7131 (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
7132 (match_operand:V8HF 1 "register_operand" "v")
7135 "vcvt<floatsuffix>si2sh{<sseicvtsuffix>}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
7136 [(set_attr "type" "sseicvt")
7137 (set_attr "prefix" "evex")
7138 (set_attr "mode" "HF")])
7140 (define_expand "fix<fixunssuffix>_trunc<ssePHmodelower><mode>2"
7141 [(set (match_operand:VI2H_AVX512VL 0 "register_operand")
7142 (any_fix:VI2H_AVX512VL
7143 (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
7144 "TARGET_AVX512FP16")
7146 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>"
7147 [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v")
7148 (any_fix:VI2H_AVX512VL
7149 (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
7151 "vcvttph2<fixsuffix><sseintconvert>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
7152 [(set_attr "type" "ssecvt")
7153 (set_attr "prefix" "evex")
7154 (set_attr "mode" "<sseinsnmode>")])
7156 (define_expand "fix<fixunssuffix>_truncv4hf<mode>2"
7157 [(set (match_operand:VI4_128_8_256 0 "register_operand")
7158 (any_fix:VI4_128_8_256
7159 (match_operand:V4HF 1 "nonimmediate_operand")))]
7160 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7162 if (!MEM_P (operands[1]))
7164 operands[1] = lowpart_subreg (V8HFmode,
7165 force_reg (V4HFmode, operands[1]),
7167 emit_insn (gen_avx512fp16_fix<fixunssuffix>_trunc<mode>2 (operands[0],
7173 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>"
7174 [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
7175 (any_fix:VI4_128_8_256
7177 (match_operand:V8HF 1 "register_operand" "v")
7178 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
7179 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7180 "vcvttph2<fixsuffix><sseintconvert>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7181 [(set_attr "type" "ssecvt")
7182 (set_attr "prefix" "evex")
7183 (set_attr "mode" "<sseinsnmode>")])
7185 (define_insn "*avx512fp16_fix<fixunssuffix>_trunc<mode>2_load<mask_name>"
7186 [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
7187 (any_fix:VI4_128_8_256
7188 (match_operand:V4HF 1 "memory_operand" "m")))]
7189 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7190 "vcvttph2<fixsuffix><sseintconvert>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7191 [(set_attr "type" "ssecvt")
7192 (set_attr "prefix" "evex")
7193 (set_attr "mode" "<sseinsnmode>")])
7195 (define_expand "fix<fixunssuffix>_truncv2hfv2di2"
7196 [(set (match_operand:V2DI 0 "register_operand")
7198 (match_operand:V2HF 1 "nonimmediate_operand")))]
7199 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7201 if (!MEM_P (operands[1]))
7203 operands[1] = lowpart_subreg (V8HFmode,
7204 force_reg (V2HFmode, operands[1]),
7206 emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv2di2 (operands[0],
7212 (define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>"
7213 [(set (match_operand:V2DI 0 "register_operand" "=v")
7216 (match_operand:V8HF 1 "nonimmediate_operand" "v")
7217 (parallel [(const_int 0) (const_int 1)]))))]
7218 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7219 "vcvttph2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7220 [(set_attr "type" "ssecvt")
7221 (set_attr "prefix" "evex")
7222 (set_attr "mode" "TI")])
7224 (define_insn "*avx512fp16_fix<fixunssuffix>_truncv2di2_load<mask_name>"
7225 [(set (match_operand:V2DI 0 "register_operand" "=v")
7227 (match_operand:V2HF 1 "memory_operand" "m")))]
7228 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7229 "vcvttph2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
7230 [(set_attr "type" "ssecvt")
7231 (set_attr "prefix" "evex")
7232 (set_attr "mode" "TI")])
7234 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>"
7235 [(set (match_operand:SWI48 0 "register_operand" "=r")
7238 (match_operand:V8HF 1 "register_operand" "v")
7239 (parallel [(const_int 0)]))))]
7241 "%vcvttsh2<fixsuffix>si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
7242 [(set_attr "type" "sseicvt")
7243 (set_attr "prefix" "evex")
7244 (set_attr "mode" "<MODE>")])
7246 (define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2_mem"
7247 [(set (match_operand:SWI48 0 "register_operand" "=r")
7249 (match_operand:HF 1 "memory_operand" "vm")))]
7251 "%vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
7252 [(set_attr "type" "sseicvt")
7253 (set_attr "prefix" "evex")
7254 (set_attr "mode" "<MODE>")])
7256 (define_mode_attr ph2pssuffix
7257 [(V16SF "x") (V8SF "x") (V4SF "x")
7258 (V8DF "") (V4DF "") (V2DF "")])
7260 (define_expand "extend<ssePHmodelower><mode>2"
7261 [(set (match_operand:VF48H_AVX512VL 0 "register_operand")
7262 (float_extend:VF48H_AVX512VL
7263 (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
7264 "TARGET_AVX512FP16")
7266 (define_insn "avx512fp16_float_extend_ph<mode>2<mask_name><round_saeonly_name>"
7267 [(set (match_operand:VF48H_AVX512VL 0 "register_operand" "=v")
7268 (float_extend:VF48H_AVX512VL
7269 (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
7271 "vcvtph2<castmode><ph2pssuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
7272 [(set_attr "type" "ssecvt")
7273 (set_attr "prefix" "evex")
7274 (set_attr "mode" "<sseinsnmode>")])
7276 (define_expand "extendv4hf<mode>2"
7277 [(set (match_operand:VF4_128_8_256 0 "register_operand")
7278 (float_extend:VF4_128_8_256
7279 (match_operand:V4HF 1 "nonimmediate_operand")))]
7280 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7282 if (!MEM_P (operands[1]))
7284 operands[1] = lowpart_subreg (V8HFmode,
7285 force_reg (V4HFmode, operands[1]),
7287 emit_insn (gen_avx512fp16_float_extend_ph<mode>2
7288 (operands[0], operands[1]));
7293 (define_insn "avx512fp16_float_extend_ph<mode>2<mask_name>"
7294 [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v")
7295 (float_extend:VF4_128_8_256
7297 (match_operand:V8HF 1 "register_operand" "v")
7298 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
7299 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7300 "vcvtph2<castmode><ph2pssuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7301 [(set_attr "type" "ssecvt")
7302 (set_attr "prefix" "evex")
7303 (set_attr "mode" "<sseinsnmode>")])
7305 (define_insn "*avx512fp16_float_extend_ph<mode>2_load<mask_name>"
7306 [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v")
7307 (float_extend:VF4_128_8_256
7308 (match_operand:V4HF 1 "memory_operand" "m")))]
7309 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7310 "vcvtph2<castmode><ph2pssuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7311 [(set_attr "type" "ssecvt")
7312 (set_attr "prefix" "evex")
7313 (set_attr "mode" "<sseinsnmode>")])
7315 (define_expand "extendv2hfv2df2"
7316 [(set (match_operand:V2DF 0 "register_operand")
7318 (match_operand:V2HF 1 "nonimmediate_operand")))]
7319 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7321 if (!MEM_P (operands[1]))
7323 operands[1] = lowpart_subreg (V8HFmode,
7324 force_reg (V2HFmode, operands[1]),
7326 emit_insn (gen_avx512fp16_float_extend_phv2df2
7327 (operands[0], operands[1]));
7332 (define_insn "avx512fp16_float_extend_phv2df2<mask_name>"
7333 [(set (match_operand:V2DF 0 "register_operand" "=v")
7336 (match_operand:V8HF 1 "register_operand" "v")
7337 (parallel [(const_int 0) (const_int 1)]))))]
7338 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7339 "vcvtph2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7340 [(set_attr "type" "ssecvt")
7341 (set_attr "prefix" "evex")
7342 (set_attr "mode" "TI")])
7344 (define_insn "*avx512fp16_float_extend_phv2df2_load<mask_name>"
7345 [(set (match_operand:V2DF 0 "register_operand" "=v")
7347 (match_operand:V2HF 1 "memory_operand" "m")))]
7348 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7349 "vcvtph2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
7350 [(set_attr "type" "ssecvt")
7351 (set_attr "prefix" "evex")
7352 (set_attr "mode" "TI")])
7354 (define_expand "trunc<mode><ssePHmodelower>2"
7355 [(set (match_operand:<ssePHmode> 0 "register_operand")
7356 (float_truncate:<ssePHmode>
7357 (match_operand:VF48H_AVX512VL 1 "nonimmediate_operand")))]
7358 "TARGET_AVX512FP16")
7360 (define_insn "avx512fp16_vcvt<castmode>2ph_<mode><mask_name><round_name>"
7361 [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
7362 (float_truncate:<ssePHmode>
7363 (match_operand:VF48H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))]
7365 "vcvt<castmode>2ph<ph2pssuffix><round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7366 [(set_attr "type" "ssecvt")
7367 (set_attr "prefix" "evex")
7368 (set_attr "mode" "<sseinsnmode>")])
7370 (define_expand "trunc<mode>v4hf2"
7371 [(set (match_operand:V4HF 0 "register_operand")
7372 (float_truncate:V4HF (match_operand:VF4_128_8_256 1 "vector_operand")))]
7373 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7375 rtx dest = gen_reg_rtx (V8HFmode);
7377 emit_insn (gen_avx512fp16_trunc<mode>v4hf2 (dest, operands[1]));
7378 emit_move_insn (operands[0], lowpart_subreg (V4HFmode, dest, V8HFmode));
7382 (define_expand "avx512fp16_trunc<mode>v4hf2"
7383 [(set (match_operand:V8HF 0 "register_operand")
7385 (float_truncate:V4HF
7386 (match_operand:VF4_128_8_256 1 "vector_operand"))
7388 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7389 "operands[2] = CONST0_RTX (V4HFmode);")
7391 (define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>"
7392 [(set (match_operand:V8HF 0 "register_operand" "=v")
7394 (float_truncate:V4HF
7395 (match_operand:VF4_128_8_256 1 "vector_operand" "vm"))
7396 (match_operand:V4HF 2 "const0_operand")))]
7397 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7398 "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0|%0, %1}"
7399 [(set_attr "type" "ssecvt")
7400 (set_attr "prefix" "evex")
7401 (set_attr "mode" "<sseinsnmode>")])
7403 (define_expand "avx512fp16_vcvt<castmode>2ph_<mode>_mask"
7404 [(set (match_operand:V8HF 0 "register_operand" "=v")
7407 (float_truncate:V4HF
7408 (match_operand:VF4_128_8_256 1 "vector_operand" "vm"))
7410 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7411 (parallel [(const_int 0) (const_int 1)
7412 (const_int 2) (const_int 3)]))
7413 (match_operand:QI 3 "register_operand" "Yk"))
7415 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7416 "operands[4] = CONST0_RTX (V4HFmode);")
7418 (define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask"
7419 [(set (match_operand:V8HF 0 "register_operand" "=v")
7422 (float_truncate:V4HF
7423 (match_operand:VF4_128_8_256 1 "vector_operand" "vm"))
7425 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7426 (parallel [(const_int 0) (const_int 1)
7427 (const_int 2) (const_int 3)]))
7428 (match_operand:QI 3 "register_operand" "Yk"))
7429 (match_operand:V4HF 4 "const0_operand")))]
7430 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7431 "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7432 [(set_attr "type" "ssecvt")
7433 (set_attr "prefix" "evex")
7434 (set_attr "mode" "<sseinsnmode>")])
7436 (define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask_1"
7437 [(set (match_operand:V8HF 0 "register_operand" "=v")
7440 (float_truncate:V4HF
7441 (match_operand:VF4_128_8_256 1 "vector_operand" "vm"))
7442 (match_operand:V4HF 3 "const0_operand")
7443 (match_operand:QI 2 "register_operand" "Yk"))
7444 (match_operand:V4HF 4 "const0_operand")))]
7445 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7446 "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
7447 [(set_attr "type" "ssecvt")
7448 (set_attr "prefix" "evex")
7449 (set_attr "mode" "<sseinsnmode>")])
7451 (define_expand "truncv2dfv2hf2"
7452 [(set (match_operand:V2HF 0 "register_operand")
7453 (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand")))]
7454 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7456 rtx dest = gen_reg_rtx (V8HFmode);
7457 emit_insn (gen_avx512fp16_truncv2dfv2hf2 (dest, operands[1]));
7458 emit_move_insn (operands[0], lowpart_subreg (V2HFmode, dest, V8HFmode));
7462 (define_expand "avx512fp16_truncv2dfv2hf2"
7463 [(set (match_operand:V8HF 0 "register_operand")
7465 (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand"))
7467 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7468 "operands[2] = CONST0_RTX (V6HFmode);")
7470 (define_insn "*avx512fp16_vcvtpd2ph_v2df"
7471 [(set (match_operand:V8HF 0 "register_operand" "=v")
7473 (float_truncate:V2HF
7474 (match_operand:V2DF 1 "vector_operand" "vm"))
7475 (match_operand:V6HF 2 "const0_operand")))]
7476 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7477 "vcvtpd2ph{x}\t{%1, %0|%0, %1}"
7478 [(set_attr "type" "ssecvt")
7479 (set_attr "prefix" "evex")
7480 (set_attr "mode" "TI")])
7482 (define_expand "avx512fp16_vcvtpd2ph_v2df_mask"
7483 [(set (match_operand:V8HF 0 "register_operand" "=v")
7486 (float_truncate:V2HF
7487 (match_operand:V2DF 1 "vector_operand" "vm"))
7489 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7490 (parallel [(const_int 0) (const_int 1)]))
7491 (match_operand:QI 3 "register_operand" "Yk"))
7493 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7494 "operands[4] = CONST0_RTX (V6HFmode);")
7496 (define_insn "*avx512fp16_vcvtpd2ph_v2df_mask"
7497 [(set (match_operand:V8HF 0 "register_operand" "=v")
7500 (float_truncate:V2HF (match_operand:V2DF 1 "vector_operand" "vm"))
7502 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
7503 (parallel [(const_int 0) (const_int 1)]))
7504 (match_operand:QI 3 "register_operand" "Yk"))
7505 (match_operand:V6HF 4 "const0_operand")))]
7506 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7507 "vcvtpd2ph{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7508 [(set_attr "type" "ssecvt")
7509 (set_attr "prefix" "evex")
7510 (set_attr "mode" "TI")])
7512 (define_insn "*avx512fp16_vcvtpd2ph_v2df_mask_1"
7513 [(set (match_operand:V8HF 0 "register_operand" "=v")
7516 (float_truncate:V2HF
7517 (match_operand:V2DF 1 "vector_operand" "vm"))
7518 (match_operand:V2HF 3 "const0_operand")
7519 (match_operand:QI 2 "register_operand" "Yk"))
7520 (match_operand:V6HF 4 "const0_operand")))]
7521 "TARGET_AVX512FP16 && TARGET_AVX512VL"
7522 "vcvtpd2ph{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
7523 [(set_attr "type" "ssecvt")
7524 (set_attr "prefix" "evex")
7525 (set_attr "mode" "TI")])
7527 (define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name><round_saeonly_scalar_name>"
7528 [(set (match_operand:VF48_128 0 "register_operand" "=v")
7530 (vec_duplicate:VF48_128
7531 (float_extend:<ssescalarmode>
7533 (match_operand:V8HF 1 "register_operand" "v")
7534 (parallel [(const_int 0)]))))
7535 (match_operand:VF48_128 2 "register_operand" "v")
7538 "vcvtsh2<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_saeonly_scalar_mask_op3>}"
7539 [(set_attr "type" "ssecvt")
7540 (set_attr "prefix" "evex")
7541 (set_attr "mode" "TI")])
7543 (define_insn "avx512fp16_vcvtsh2<ssescalarmodesuffix><mask_scalar_name>_mem"
7544 [(set (match_operand:VF48_128 0 "register_operand" "=v")
7546 (vec_duplicate:VF48_128
7547 (float_extend:<ssescalarmode>
7548 (match_operand:HF 1 "memory_operand" "m")))
7549 (match_operand:VF48_128 2 "register_operand" "v")
7552 "vcvtsh2<ssescalarmodesuffix>\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1}"
7553 [(set_attr "type" "ssecvt")
7554 (set_attr "prefix" "evex")
7555 (set_attr "mode" "TI")])
7557 (define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name><round_scalar_name>"
7558 [(set (match_operand:V8HF 0 "register_operand" "=v")
7562 (vec_select:<ssescalarmode>
7563 (match_operand:VF48_128 1 "register_operand" "v")
7564 (parallel [(const_int 0)]))))
7565 (match_operand:V8HF 2 "register_operand" "v")
7568 "vcvt<ssescalarmodesuffix>2sh\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
7569 [(set_attr "type" "ssecvt")
7570 (set_attr "prefix" "evex")
7571 (set_attr "mode" "TI")])
7573 (define_insn "avx512fp16_vcvt<ssescalarmodesuffix>2sh<mask_scalar_name>_mem"
7574 [(set (match_operand:V8HF 0 "register_operand" "=v")
7578 (match_operand:MODEF 1 "memory_operand" "m")))
7579 (match_operand:V8HF 2 "register_operand" "v")
7582 "vcvt<ssescalarmodesuffix>2sh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1}"
7583 [(set_attr "type" "ssecvt")
7584 (set_attr "prefix" "evex")
7585 (set_attr "mode" "TI")])
7587 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7589 ;; Parallel single-precision floating point conversion operations
7591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7593 (define_insn_and_split "sse_cvtpi2ps"
7594 [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
7597 (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
7598 (match_operand:V4SF 1 "register_operand" "0,0,Yv")
7600 (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
7601 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
7603 cvtpi2ps\t{%2, %0|%0, %2}
7606 "TARGET_SSE2 && reload_completed
7607 && SSE_REG_P (operands[2])"
7610 rtx op2 = lowpart_subreg (V4SImode, operands[2],
7611 GET_MODE (operands[2]));
7612 /* Generate SSE2 cvtdq2ps. */
7613 emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
7615 /* Merge operands[3] with operands[0]. */
7619 mask = gen_rtx_PARALLEL (VOIDmode,
7620 gen_rtvec (4, GEN_INT (0), GEN_INT (1),
7621 GEN_INT (6), GEN_INT (7)));
7622 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
7623 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
7624 emit_insn (gen_rtx_SET (operands[0], op2));
7628 /* NB: SSE can only concatenate OP0 and OP3 to OP0. */
7629 mask = gen_rtx_PARALLEL (VOIDmode,
7630 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
7631 GEN_INT (4), GEN_INT (5)));
7632 op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
7633 op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
7634 emit_insn (gen_rtx_SET (operands[0], op2));
7636 /* Swap bits 0:63 with bits 64:127. */
7637 mask = gen_rtx_PARALLEL (VOIDmode,
7638 gen_rtvec (4, GEN_INT (2), GEN_INT (3),
7639 GEN_INT (0), GEN_INT (1)));
7640 rtx dest = lowpart_subreg (V4SImode, operands[0],
7641 GET_MODE (operands[0]));
7642 op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
7643 emit_insn (gen_rtx_SET (dest, op1));
7647 [(set_attr "mmx_isa" "native,sse_noavx,avx")
7648 (set_attr "type" "ssecvt")
7649 (set_attr "mode" "V4SF")])
7651 (define_insn_and_split "sse_cvtps2pi"
7652 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
7654 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
7656 (parallel [(const_int 0) (const_int 1)])))]
7657 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
7659 cvtps2pi\t{%1, %0|%0, %q1}
7661 "TARGET_SSE2 && reload_completed
7662 && SSE_REG_P (operands[0])"
7665 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
7666 GET_MODE (operands[1]));
7667 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
7668 GET_MODE (operands[0]));
7670 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
7671 emit_insn (gen_rtx_SET (tmp, op1));
7673 rtx dest = lowpart_subreg (V4SImode, operands[0],
7674 GET_MODE (operands[0]));
7675 emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
7678 [(set_attr "isa" "*,sse2")
7679 (set_attr "mmx_isa" "native,*")
7680 (set_attr "type" "ssecvt")
7681 (set_attr "unit" "mmx,*")
7682 (set_attr "mode" "DI")])
7684 (define_insn_and_split "sse_cvttps2pi"
7685 [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
7687 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
7688 (parallel [(const_int 0) (const_int 1)])))]
7689 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
7691 cvttps2pi\t{%1, %0|%0, %q1}
7693 "TARGET_SSE2 && reload_completed
7694 && SSE_REG_P (operands[0])"
7697 rtx op1 = lowpart_subreg (V2SFmode, operands[1],
7698 GET_MODE (operands[1]));
7699 rtx tmp = lowpart_subreg (V4SFmode, operands[0],
7700 GET_MODE (operands[0]));
7702 op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
7703 emit_insn (gen_rtx_SET (tmp, op1));
7705 rtx dest = lowpart_subreg (V4SImode, operands[0],
7706 GET_MODE (operands[0]));
7707 emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
7710 [(set_attr "isa" "*,sse2")
7711 (set_attr "mmx_isa" "native,*")
7712 (set_attr "type" "ssecvt")
7713 (set_attr "unit" "mmx,*")
7714 (set_attr "prefix_rep" "0")
7715 (set_attr "mode" "SF")])
7717 (define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
7718 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
7721 (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
7722 (match_operand:V4SF 1 "register_operand" "0,0,v")
7726 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
7727 cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
7728 vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
7729 [(set_attr "isa" "noavx,noavx,avx")
7730 (set_attr "type" "sseicvt")
7731 (set_attr "athlon_decode" "vector,double,*")
7732 (set_attr "amdfam10_decode" "vector,double,*")
7733 (set_attr "bdver1_decode" "double,direct,*")
7734 (set_attr "btver2_decode" "double,double,double")
7735 (set_attr "znver1_decode" "double,double,double")
7736 (set (attr "length_vex")
7738 (and (match_test "<MODE>mode == DImode")
7739 (eq_attr "alternative" "2"))
7741 (const_string "*")))
7742 (set (attr "prefix_rex")
7744 (and (match_test "<MODE>mode == DImode")
7745 (eq_attr "alternative" "0,1"))
7747 (const_string "*")))
7748 (set_attr "prefix" "orig,orig,maybe_evex")
7749 (set_attr "mode" "SF")])
7751 (define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
7752 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
7755 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
7756 (parallel [(const_int 0)]))]
7757 UNSPEC_FIX_NOTRUNC))]
7759 "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
7760 [(set_attr "type" "sseicvt")
7761 (set_attr "athlon_decode" "double,vector")
7762 (set_attr "bdver1_decode" "double,double")
7763 (set_attr "prefix_rep" "1")
7764 (set_attr "prefix" "maybe_vex")
7765 (set_attr "mode" "<MODE>")])
7767 (define_insn "sse_cvtss2si<rex64namesuffix>_2"
7768 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
7769 (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
7770 UNSPEC_FIX_NOTRUNC))]
7772 "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
7773 [(set_attr "type" "sseicvt")
7774 (set_attr "athlon_decode" "double,vector")
7775 (set_attr "amdfam10_decode" "double,double")
7776 (set_attr "bdver1_decode" "double,double")
7777 (set_attr "prefix_rep" "1")
7778 (set_attr "prefix" "maybe_vex")
7779 (set_attr "mode" "<MODE>")])
7781 (define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
7782 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
7785 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
7786 (parallel [(const_int 0)]))))]
7788 "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
7789 [(set_attr "type" "sseicvt")
7790 (set_attr "athlon_decode" "double,vector")
7791 (set_attr "amdfam10_decode" "double,double")
7792 (set_attr "bdver1_decode" "double,double")
7793 (set_attr "prefix_rep" "1")
7794 (set_attr "prefix" "maybe_vex")
7795 (set_attr "mode" "<MODE>")])
7797 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
7798 [(set (match_operand:VF_128 0 "register_operand" "=v")
7800 (vec_duplicate:VF_128
7801 (unsigned_float:<ssescalarmode>
7802 (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
7803 (match_operand:VF_128 1 "register_operand" "v")
7805 "TARGET_AVX512F && <round_modev4sf_condition>"
7806 "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
7807 [(set_attr "type" "sseicvt")
7808 (set_attr "prefix" "evex")
7809 (set_attr "mode" "<ssescalarmode>")])
7811 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
7812 [(set (match_operand:VF_128 0 "register_operand" "=v")
7814 (vec_duplicate:VF_128
7815 (unsigned_float:<ssescalarmode>
7816 (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
7817 (match_operand:VF_128 1 "register_operand" "v")
7819 "TARGET_AVX512F && TARGET_64BIT"
7820 "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
7821 [(set_attr "type" "sseicvt")
7822 (set_attr "prefix" "evex")
7823 (set_attr "mode" "<ssescalarmode>")])
7825 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
7826 [(set (match_operand:VF1 0 "register_operand" "=x,v")
7828 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
7829 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
7831 cvtdq2ps\t{%1, %0|%0, %1}
7832 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7833 [(set_attr "isa" "noavx,avx")
7834 (set_attr "type" "ssecvt")
7835 (set_attr "prefix" "maybe_vex")
7836 (set_attr "mode" "<sseinsnmode>")])
7838 (define_insn "<mask_codefor>floatuns<sseintvecmodelower><mode>2<mask_name><round_name>"
7839 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
7840 (unsigned_float:VF1_AVX512VL
7841 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
7843 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7844 [(set_attr "type" "ssecvt")
7845 (set_attr "prefix" "evex")
7846 (set_attr "mode" "<MODE>")])
7848 (define_expand "floatuns<sseintvecmodelower><mode>2"
7849 [(set (match_operand:VF1 0 "register_operand")
7851 (match_operand:<sseintvecmode> 1 "register_operand")))]
7852 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
7854 if (<MODE>mode != V16SFmode && !TARGET_AVX512VL)
7856 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
7862 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
7863 (define_mode_attr sf2simodelower
7864 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
7866 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
7867 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
7869 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
7870 UNSPEC_FIX_NOTRUNC))]
7871 "TARGET_SSE2 && <mask_mode512bit_condition>"
7872 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7873 [(set_attr "type" "ssecvt")
7874 (set (attr "prefix_data16")
7876 (match_test "TARGET_AVX")
7878 (const_string "1")))
7879 (set_attr "prefix" "maybe_vex")
7880 (set_attr "mode" "<sseinsnmode>")])
7882 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
7883 [(set (match_operand:V16SI 0 "register_operand" "=v")
7885 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
7886 UNSPEC_FIX_NOTRUNC))]
7888 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7889 [(set_attr "type" "ssecvt")
7890 (set_attr "prefix" "evex")
7891 (set_attr "mode" "XI")])
7893 (define_insn "<mask_codefor><avx512>_fixuns_notrunc<sf2simodelower><mode><mask_name><round_name>"
7894 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
7895 (unspec:VI4_AVX512VL
7896 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
7897 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
7899 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7900 [(set_attr "type" "ssecvt")
7901 (set_attr "prefix" "evex")
7902 (set_attr "mode" "<sseinsnmode>")])
7904 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
7905 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
7906 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
7907 UNSPEC_FIX_NOTRUNC))]
7908 "TARGET_AVX512DQ && <round_mode512bit_condition>"
7909 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7910 [(set_attr "type" "ssecvt")
7911 (set_attr "prefix" "evex")
7912 (set_attr "mode" "<sseinsnmode>")])
7914 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
7915 [(set (match_operand:V2DI 0 "register_operand" "=v")
7918 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
7919 (parallel [(const_int 0) (const_int 1)]))]
7920 UNSPEC_FIX_NOTRUNC))]
7921 "TARGET_AVX512DQ && TARGET_AVX512VL"
7922 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7923 [(set_attr "type" "ssecvt")
7924 (set_attr "prefix" "evex")
7925 (set_attr "mode" "TI")])
7927 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
7928 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
7929 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
7930 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
7931 "TARGET_AVX512DQ && <round_mode512bit_condition>"
7932 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
7933 [(set_attr "type" "ssecvt")
7934 (set_attr "prefix" "evex")
7935 (set_attr "mode" "<sseinsnmode>")])
7937 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
7938 [(set (match_operand:V2DI 0 "register_operand" "=v")
7941 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
7942 (parallel [(const_int 0) (const_int 1)]))]
7943 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
7944 "TARGET_AVX512DQ && TARGET_AVX512VL"
7945 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
7946 [(set_attr "type" "ssecvt")
7947 (set_attr "prefix" "evex")
7948 (set_attr "mode" "TI")])
7950 (define_insn "fix<fixunssuffix>_truncv16sfv16si2<mask_name><round_saeonly_name>"
7951 [(set (match_operand:V16SI 0 "register_operand" "=v")
7953 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
7955 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
7956 [(set_attr "type" "ssecvt")
7957 (set_attr "prefix" "evex")
7958 (set_attr "mode" "XI")])
7960 (define_insn "fix_truncv8sfv8si2<mask_name>"
7961 [(set (match_operand:V8SI 0 "register_operand" "=v")
7962 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
7963 "TARGET_AVX && <mask_avx512vl_condition>"
7964 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7965 [(set_attr "type" "ssecvt")
7966 (set_attr "prefix" "<mask_prefix>")
7967 (set_attr "mode" "OI")])
7969 (define_insn "fix_truncv4sfv4si2<mask_name>"
7970 [(set (match_operand:V4SI 0 "register_operand" "=v")
7971 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
7972 "TARGET_SSE2 && <mask_avx512vl_condition>"
7973 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7974 [(set_attr "type" "ssecvt")
7975 (set (attr "prefix_rep")
7977 (match_test "TARGET_AVX")
7979 (const_string "1")))
7980 (set (attr "prefix_data16")
7982 (match_test "TARGET_AVX")
7984 (const_string "0")))
7985 (set_attr "prefix_data16" "0")
7986 (set_attr "prefix" "<mask_prefix2>")
7987 (set_attr "mode" "TI")])
7989 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
7990 [(set (match_operand:<sseintvecmode> 0 "register_operand")
7991 (unsigned_fix:<sseintvecmode>
7992 (match_operand:VF1_128_256 1 "register_operand")))]
7995 /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors. */
7996 if (!TARGET_AVX512VL)
7999 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
8000 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
8001 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
8002 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
8007 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8009 ;; Parallel double-precision floating point conversion operations
8011 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8013 (define_insn "sse2_cvtpi2pd"
8014 [(set (match_operand:V2DF 0 "register_operand" "=v,?!x")
8015 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,yBm")))]
8018 %vcvtdq2pd\t{%1, %0|%0, %1}
8019 cvtpi2pd\t{%1, %0|%0, %1}"
8020 [(set_attr "mmx_isa" "*,native")
8021 (set_attr "type" "ssecvt")
8022 (set_attr "unit" "*,mmx")
8023 (set_attr "prefix_data16" "*,1")
8024 (set_attr "prefix" "maybe_vex,*")
8025 (set_attr "mode" "V2DF")])
8027 (define_expand "floatv2siv2df2"
8028 [(set (match_operand:V2DF 0 "register_operand")
8029 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand")))]
8030 "TARGET_MMX_WITH_SSE")
8032 (define_insn "floatunsv2siv2df2"
8033 [(set (match_operand:V2DF 0 "register_operand" "=v")
8034 (unsigned_float:V2DF
8035 (match_operand:V2SI 1 "nonimmediate_operand" "vm")))]
8036 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
8037 "vcvtudq2pd\t{%1, %0|%0, %1}"
8038 [(set_attr "type" "ssecvt")
8039 (set_attr "prefix" "evex")
8040 (set_attr "mode" "V2DF")])
8042 (define_insn "sse2_cvtpd2pi"
8043 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
8044 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
8045 UNSPEC_FIX_NOTRUNC))]
8048 * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
8049 cvtpd2pi\t{%1, %0|%0, %1}"
8050 [(set_attr "mmx_isa" "*,native")
8051 (set_attr "type" "ssecvt")
8052 (set_attr "unit" "*,mmx")
8053 (set_attr "amdfam10_decode" "double")
8054 (set_attr "athlon_decode" "vector")
8055 (set_attr "bdver1_decode" "double")
8056 (set_attr "prefix_data16" "*,1")
8057 (set_attr "prefix" "maybe_vex,*")
8058 (set_attr "mode" "TI")])
8060 (define_insn "sse2_cvttpd2pi"
8061 [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
8062 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
8065 * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
8066 cvttpd2pi\t{%1, %0|%0, %1}"
8067 [(set_attr "mmx_isa" "*,native")
8068 (set_attr "type" "ssecvt")
8069 (set_attr "unit" "*,mmx")
8070 (set_attr "amdfam10_decode" "double")
8071 (set_attr "athlon_decode" "vector")
8072 (set_attr "bdver1_decode" "double")
8073 (set_attr "prefix_data16" "*,1")
8074 (set_attr "prefix" "maybe_vex,*")
8075 (set_attr "mode" "TI")])
8077 (define_expand "fix_truncv2dfv2si2"
8078 [(set (match_operand:V2SI 0 "register_operand")
8079 (fix:V2SI (match_operand:V2DF 1 "vector_operand")))]
8080 "TARGET_MMX_WITH_SSE")
8082 (define_insn "fixuns_truncv2dfv2si2"
8083 [(set (match_operand:V2SI 0 "register_operand" "=v")
8085 (match_operand:V2DF 1 "nonimmediate_operand" "vm")))]
8086 "TARGET_MMX_WITH_SSE && TARGET_AVX512VL"
8087 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
8088 [(set_attr "type" "ssecvt")
8089 (set_attr "prefix" "evex")
8090 (set_attr "mode" "TI")])
8092 (define_insn "sse2_cvtsi2sd"
8093 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8096 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
8097 (match_operand:V2DF 1 "register_operand" "0,0,v")
8101 cvtsi2sd{l}\t{%2, %0|%0, %2}
8102 cvtsi2sd{l}\t{%2, %0|%0, %2}
8103 vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
8104 [(set_attr "isa" "noavx,noavx,avx")
8105 (set_attr "type" "sseicvt")
8106 (set_attr "athlon_decode" "double,direct,*")
8107 (set_attr "amdfam10_decode" "vector,double,*")
8108 (set_attr "bdver1_decode" "double,direct,*")
8109 (set_attr "btver2_decode" "double,double,double")
8110 (set_attr "znver1_decode" "double,double,double")
8111 (set_attr "prefix" "orig,orig,maybe_evex")
8112 (set_attr "mode" "DF")])
8114 (define_insn "sse2_cvtsi2sdq<round_name>"
8115 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8118 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
8119 (match_operand:V2DF 1 "register_operand" "0,0,v")
8121 "TARGET_SSE2 && TARGET_64BIT"
8123 cvtsi2sd{q}\t{%2, %0|%0, %2}
8124 cvtsi2sd{q}\t{%2, %0|%0, %2}
8125 vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
8126 [(set_attr "isa" "noavx,noavx,avx")
8127 (set_attr "type" "sseicvt")
8128 (set_attr "athlon_decode" "double,direct,*")
8129 (set_attr "amdfam10_decode" "vector,double,*")
8130 (set_attr "bdver1_decode" "double,direct,*")
8131 (set_attr "length_vex" "*,*,4")
8132 (set_attr "prefix_rex" "1,1,*")
8133 (set_attr "prefix" "orig,orig,maybe_evex")
8134 (set_attr "mode" "DF")])
8136 (define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
8137 [(set (match_operand:SWI48 0 "register_operand" "=r")
8140 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
8141 (parallel [(const_int 0)]))]
8142 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
8144 "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
8145 [(set_attr "type" "sseicvt")
8146 (set_attr "prefix" "evex")
8147 (set_attr "mode" "<MODE>")])
8149 (define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
8150 [(set (match_operand:SWI48 0 "register_operand" "=r")
8153 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
8154 (parallel [(const_int 0)]))))]
8156 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
8157 [(set_attr "type" "sseicvt")
8158 (set_attr "prefix" "evex")
8159 (set_attr "mode" "<MODE>")])
8161 (define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
8162 [(set (match_operand:SWI48 0 "register_operand" "=r")
8165 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
8166 (parallel [(const_int 0)]))]
8167 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
8169 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
8170 [(set_attr "type" "sseicvt")
8171 (set_attr "prefix" "evex")
8172 (set_attr "mode" "<MODE>")])
8174 (define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
8175 [(set (match_operand:SWI48 0 "register_operand" "=r")
8178 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
8179 (parallel [(const_int 0)]))))]
8181 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
8182 [(set_attr "type" "sseicvt")
8183 (set_attr "prefix" "evex")
8184 (set_attr "mode" "<MODE>")])
8186 (define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
8187 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
8190 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
8191 (parallel [(const_int 0)]))]
8192 UNSPEC_FIX_NOTRUNC))]
8194 "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
8195 [(set_attr "type" "sseicvt")
8196 (set_attr "athlon_decode" "double,vector")
8197 (set_attr "bdver1_decode" "double,double")
8198 (set_attr "btver2_decode" "double,double")
8199 (set_attr "prefix_rep" "1")
8200 (set_attr "prefix" "maybe_vex")
8201 (set_attr "mode" "<MODE>")])
8203 (define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
8204 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
8205 (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
8206 UNSPEC_FIX_NOTRUNC))]
8208 "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
8209 [(set_attr "type" "sseicvt")
8210 (set_attr "athlon_decode" "double,vector")
8211 (set_attr "amdfam10_decode" "double,double")
8212 (set_attr "bdver1_decode" "double,double")
8213 (set_attr "prefix_rep" "1")
8214 (set_attr "prefix" "maybe_vex")
8215 (set_attr "mode" "<MODE>")])
8217 (define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
8218 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
8221 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
8222 (parallel [(const_int 0)]))))]
8224 "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
8225 [(set_attr "type" "sseicvt")
8226 (set_attr "athlon_decode" "double,vector")
8227 (set_attr "amdfam10_decode" "double,double")
8228 (set_attr "bdver1_decode" "double,double")
8229 (set_attr "btver2_decode" "double,double")
8230 (set_attr "prefix_rep" "1")
8231 (set_attr "prefix" "maybe_vex")
8232 (set_attr "mode" "<MODE>")])
8234 ;; For float<si2dfmode><mode>2 insn pattern
8235 (define_mode_attr si2dfmode
8236 [(V8DF "V8SI") (V4DF "V4SI")])
8237 (define_mode_attr si2dfmodelower
8238 [(V8DF "v8si") (V4DF "v4si")])
8240 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
8241 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
8242 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
8243 "TARGET_AVX && <mask_mode512bit_condition>"
8244 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8245 [(set_attr "type" "ssecvt")
8246 (set_attr "prefix" "maybe_vex")
8247 (set_attr "mode" "<MODE>")])
8249 (define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
8250 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
8251 (any_float:VF2_AVX512VL
8252 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
8254 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8255 [(set_attr "type" "ssecvt")
8256 (set_attr "prefix" "evex")
8257 (set_attr "mode" "<MODE>")])
8259 ;; For float<floatunssuffix><sselondveclower><mode> insn patterns
8260 (define_mode_attr qq2pssuff
8261 [(V8SF "") (V4SF "{y}")])
8263 (define_mode_attr sselongvecmode
8264 [(V8SF "V8DI") (V4SF "V4DI")])
8266 (define_mode_attr sselongvecmodelower
8267 [(V8SF "v8di") (V4SF "v4di")])
8269 (define_mode_attr sseintvecmode3
8270 [(V8SF "XI") (V4SF "OI")
8271 (V8DF "OI") (V4DF "TI")])
8273 (define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
8274 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
8275 (any_float:VF1_128_256VL
8276 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
8277 "TARGET_AVX512DQ && <round_modev8sf_condition>"
8278 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8279 [(set_attr "type" "ssecvt")
8280 (set_attr "prefix" "evex")
8281 (set_attr "mode" "<MODE>")])
8283 (define_expand "avx512dq_float<floatunssuffix>v2div2sf2"
8284 [(set (match_operand:V4SF 0 "register_operand" "=v")
8286 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
8288 "TARGET_AVX512DQ && TARGET_AVX512VL"
8289 "operands[2] = CONST0_RTX (V2SFmode);")
8291 (define_insn "*avx512dq_float<floatunssuffix>v2div2sf2"
8292 [(set (match_operand:V4SF 0 "register_operand" "=v")
8294 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
8295 (match_operand:V2SF 2 "const0_operand")))]
8296 "TARGET_AVX512DQ && TARGET_AVX512VL"
8297 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
8298 [(set_attr "type" "ssecvt")
8299 (set_attr "prefix" "evex")
8300 (set_attr "mode" "V4SF")])
8302 (define_expand "float<floatunssuffix>v2div2sf2"
8303 [(set (match_operand:V2SF 0 "register_operand")
8304 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand")))]
8305 "TARGET_AVX512DQ && TARGET_AVX512VL"
8307 rtx op0 = gen_reg_rtx (V4SFmode);
8309 emit_insn (gen_avx512dq_float<floatunssuffix>v2div2sf2
8310 (op0, operands[1]));
8312 emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
8316 (define_mode_attr vpckfloat_concat_mode
8317 [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
8318 (define_mode_attr vpckfloat_temp_mode
8319 [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
8320 (define_mode_attr vpckfloat_op_mode
8321 [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
8323 (define_expand "vec_pack<floatprefix>_float_<mode>"
8324 [(match_operand:<ssePSmode> 0 "register_operand")
8325 (any_float:<ssePSmode>
8326 (match_operand:VI8_AVX512VL 1 "register_operand"))
8327 (match_operand:VI8_AVX512VL 2 "register_operand")]
8330 rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
8331 rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
8332 rtx (*gen) (rtx, rtx);
8334 if (<MODE>mode == V2DImode)
8335 gen = gen_avx512dq_float<floatunssuffix>v2div2sf2;
8337 gen = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
8338 emit_insn (gen (r1, operands[1]));
8339 emit_insn (gen (r2, operands[2]));
8340 if (<MODE>mode == V2DImode)
8341 emit_insn (gen_sse_movlhps (operands[0], r1, r2));
8343 emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
8348 (define_expand "float<floatunssuffix>v2div2sf2_mask"
8349 [(set (match_operand:V4SF 0 "register_operand" "=v")
8352 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
8354 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
8355 (parallel [(const_int 0) (const_int 1)]))
8356 (match_operand:QI 3 "register_operand" "Yk"))
8358 "TARGET_AVX512DQ && TARGET_AVX512VL"
8359 "operands[4] = CONST0_RTX (V2SFmode);")
8361 (define_insn "*float<floatunssuffix>v2div2sf2_mask"
8362 [(set (match_operand:V4SF 0 "register_operand" "=v")
8365 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
8367 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
8368 (parallel [(const_int 0) (const_int 1)]))
8369 (match_operand:QI 3 "register_operand" "Yk"))
8370 (match_operand:V2SF 4 "const0_operand")))]
8371 "TARGET_AVX512DQ && TARGET_AVX512VL"
8372 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8373 [(set_attr "type" "ssecvt")
8374 (set_attr "prefix" "evex")
8375 (set_attr "mode" "V4SF")])
8377 (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
8378 [(set (match_operand:V4SF 0 "register_operand" "=v")
8381 (any_float:V2SF (match_operand:V2DI 1
8382 "nonimmediate_operand" "vm"))
8383 (match_operand:V2SF 3 "const0_operand")
8384 (match_operand:QI 2 "register_operand" "Yk"))
8385 (match_operand:V2SF 4 "const0_operand")))]
8386 "TARGET_AVX512DQ && TARGET_AVX512VL"
8387 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8388 [(set_attr "type" "ssecvt")
8389 (set_attr "prefix" "evex")
8390 (set_attr "mode" "V4SF")])
8392 (define_insn "floatuns<si2dfmodelower><mode>2<mask_name>"
8393 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
8394 (unsigned_float:VF2_512_256VL
8395 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
8397 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8398 [(set_attr "type" "ssecvt")
8399 (set_attr "prefix" "evex")
8400 (set_attr "mode" "<MODE>")])
8402 (define_insn "<mask_codefor>floatunsv2siv2df2<mask_name>"
8403 [(set (match_operand:V2DF 0 "register_operand" "=v")
8404 (unsigned_float:V2DF
8406 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
8407 (parallel [(const_int 0) (const_int 1)]))))]
8409 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
8410 [(set_attr "type" "ssecvt")
8411 (set_attr "prefix" "evex")
8412 (set_attr "mode" "V2DF")])
8414 (define_insn "avx512f_cvtdq2pd512_2"
8415 [(set (match_operand:V8DF 0 "register_operand" "=v")
8418 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
8419 (parallel [(const_int 0) (const_int 1)
8420 (const_int 2) (const_int 3)
8421 (const_int 4) (const_int 5)
8422 (const_int 6) (const_int 7)]))))]
8424 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
8425 [(set_attr "type" "ssecvt")
8426 (set_attr "prefix" "evex")
8427 (set_attr "mode" "V8DF")])
8429 (define_insn "avx_cvtdq2pd256_2"
8430 [(set (match_operand:V4DF 0 "register_operand" "=v")
8433 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
8434 (parallel [(const_int 0) (const_int 1)
8435 (const_int 2) (const_int 3)]))))]
8437 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
8438 [(set_attr "type" "ssecvt")
8439 (set_attr "prefix" "maybe_evex")
8440 (set_attr "mode" "V4DF")])
8442 (define_insn "sse2_cvtdq2pd<mask_name>"
8443 [(set (match_operand:V2DF 0 "register_operand" "=v")
8446 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
8447 (parallel [(const_int 0) (const_int 1)]))))]
8448 "TARGET_SSE2 && <mask_avx512vl_condition>"
8449 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
8450 [(set_attr "type" "ssecvt")
8451 (set_attr "prefix" "maybe_vex")
8452 (set_attr "mode" "V2DF")])
8454 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
8455 [(set (match_operand:V8SI 0 "register_operand" "=v")
8457 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
8458 UNSPEC_FIX_NOTRUNC))]
8460 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8461 [(set_attr "type" "ssecvt")
8462 (set_attr "prefix" "evex")
8463 (set_attr "mode" "OI")])
8465 (define_insn "avx_cvtpd2dq256<mask_name>"
8466 [(set (match_operand:V4SI 0 "register_operand" "=v")
8467 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
8468 UNSPEC_FIX_NOTRUNC))]
8469 "TARGET_AVX && <mask_avx512vl_condition>"
8470 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8471 [(set_attr "type" "ssecvt")
8472 (set_attr "prefix" "<mask_prefix>")
8473 (set_attr "mode" "OI")])
8475 (define_expand "avx_cvtpd2dq256_2"
8476 [(set (match_operand:V8SI 0 "register_operand")
8478 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
8482 "operands[2] = CONST0_RTX (V4SImode);")
8484 (define_insn "*avx_cvtpd2dq256_2"
8485 [(set (match_operand:V8SI 0 "register_operand" "=v")
8487 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
8489 (match_operand:V4SI 2 "const0_operand")))]
8491 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
8492 [(set_attr "type" "ssecvt")
8493 (set_attr "prefix" "vex")
8494 (set_attr "btver2_decode" "vector")
8495 (set_attr "mode" "OI")])
8497 (define_insn "sse2_cvtpd2dq"
8498 [(set (match_operand:V4SI 0 "register_operand" "=v")
8500 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
8502 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8506 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
8508 return "cvtpd2dq\t{%1, %0|%0, %1}";
8510 [(set_attr "type" "ssecvt")
8511 (set_attr "prefix_rep" "1")
8512 (set_attr "prefix_data16" "0")
8513 (set_attr "prefix" "maybe_vex")
8514 (set_attr "mode" "TI")
8515 (set_attr "amdfam10_decode" "double")
8516 (set_attr "athlon_decode" "vector")
8517 (set_attr "bdver1_decode" "double")])
8519 (define_insn "sse2_cvtpd2dq_mask"
8520 [(set (match_operand:V4SI 0 "register_operand" "=v")
8523 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
8526 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
8527 (parallel [(const_int 0) (const_int 1)]))
8528 (match_operand:QI 3 "register_operand" "Yk"))
8529 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8531 "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8532 [(set_attr "type" "ssecvt")
8533 (set_attr "prefix" "evex")
8534 (set_attr "mode" "TI")])
8536 (define_insn "*sse2_cvtpd2dq_mask_1"
8537 [(set (match_operand:V4SI 0 "register_operand" "=v")
8540 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
8542 (const_vector:V2SI [(const_int 0) (const_int 0)])
8543 (match_operand:QI 2 "register_operand" "Yk"))
8544 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8546 "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8547 [(set_attr "type" "ssecvt")
8548 (set_attr "prefix" "evex")
8549 (set_attr "mode" "TI")])
8551 ;; For fixuns_notrunc* insn patterns
8552 (define_mode_attr pd2udqsuff
8553 [(V8DF "") (V4DF "{y}")])
8555 (define_insn "fixuns_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
8556 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
8558 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
8559 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
8561 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8562 [(set_attr "type" "ssecvt")
8563 (set_attr "prefix" "evex")
8564 (set_attr "mode" "<sseinsnmode>")])
8566 (define_insn "fixuns_notruncv2dfv2si2"
8567 [(set (match_operand:V4SI 0 "register_operand" "=v")
8570 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
8571 UNSPEC_UNSIGNED_FIX_NOTRUNC)
8572 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8574 "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
8575 [(set_attr "type" "ssecvt")
8576 (set_attr "prefix" "evex")
8577 (set_attr "mode" "TI")])
8579 (define_insn "fixuns_notruncv2dfv2si2_mask"
8580 [(set (match_operand:V4SI 0 "register_operand" "=v")
8584 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
8585 UNSPEC_UNSIGNED_FIX_NOTRUNC)
8587 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
8588 (parallel [(const_int 0) (const_int 1)]))
8589 (match_operand:QI 3 "register_operand" "Yk"))
8590 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8592 "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8593 [(set_attr "type" "ssecvt")
8594 (set_attr "prefix" "evex")
8595 (set_attr "mode" "TI")])
8597 (define_insn "*fixuns_notruncv2dfv2si2_mask_1"
8598 [(set (match_operand:V4SI 0 "register_operand" "=v")
8602 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
8603 UNSPEC_UNSIGNED_FIX_NOTRUNC)
8604 (const_vector:V2SI [(const_int 0) (const_int 0)])
8605 (match_operand:QI 2 "register_operand" "Yk"))
8606 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8608 "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8609 [(set_attr "type" "ssecvt")
8610 (set_attr "prefix" "evex")
8611 (set_attr "mode" "TI")])
8613 (define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
8614 [(set (match_operand:V8SI 0 "register_operand" "=v")
8616 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
8618 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
8619 [(set_attr "type" "ssecvt")
8620 (set_attr "prefix" "evex")
8621 (set_attr "mode" "OI")])
8623 (define_insn "*fixuns_truncv2dfv2si2"
8624 [(set (match_operand:V4SI 0 "register_operand" "=v")
8626 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
8627 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8629 "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
8630 [(set_attr "type" "ssecvt")
8631 (set_attr "prefix" "evex")
8632 (set_attr "mode" "TI")])
8634 (define_insn "fixuns_truncv2dfv2si2_mask"
8635 [(set (match_operand:V4SI 0 "register_operand" "=v")
8638 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
8640 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
8641 (parallel [(const_int 0) (const_int 1)]))
8642 (match_operand:QI 3 "register_operand" "Yk"))
8643 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8645 "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8646 [(set_attr "type" "ssecvt")
8647 (set_attr "prefix" "evex")
8648 (set_attr "mode" "TI")])
8650 (define_insn "*fixuns_truncv2dfv2si2_mask_1"
8651 [(set (match_operand:V4SI 0 "register_operand" "=v")
8654 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
8655 (const_vector:V2SI [(const_int 0) (const_int 0)])
8656 (match_operand:QI 2 "register_operand" "Yk"))
8657 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8659 "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8660 [(set_attr "type" "ssecvt")
8661 (set_attr "prefix" "evex")
8662 (set_attr "mode" "TI")])
8664 (define_insn "fix_truncv4dfv4si2<mask_name>"
8665 [(set (match_operand:V4SI 0 "register_operand" "=v")
8666 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
8667 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
8668 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8669 [(set_attr "type" "ssecvt")
8670 (set_attr "prefix" "maybe_evex")
8671 (set_attr "mode" "OI")])
8673 (define_insn "fixuns_truncv4dfv4si2<mask_name>"
8674 [(set (match_operand:V4SI 0 "register_operand" "=v")
8675 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
8676 "TARGET_AVX512VL && TARGET_AVX512F"
8677 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8678 [(set_attr "type" "ssecvt")
8679 (set_attr "prefix" "maybe_evex")
8680 (set_attr "mode" "OI")])
8682 (define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
8683 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
8684 (any_fix:<sseintvecmode>
8685 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
8686 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
8687 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
8688 [(set_attr "type" "ssecvt")
8689 (set_attr "prefix" "evex")
8690 (set_attr "mode" "<sseintvecmode2>")])
8692 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
8693 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
8694 (unspec:<sseintvecmode>
8695 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
8696 UNSPEC_FIX_NOTRUNC))]
8697 "TARGET_AVX512DQ && <round_mode512bit_condition>"
8698 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8699 [(set_attr "type" "ssecvt")
8700 (set_attr "prefix" "evex")
8701 (set_attr "mode" "<sseintvecmode2>")])
8703 (define_insn "fixuns_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
8704 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
8705 (unspec:<sseintvecmode>
8706 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
8707 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
8708 "TARGET_AVX512DQ && <round_mode512bit_condition>"
8709 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8710 [(set_attr "type" "ssecvt")
8711 (set_attr "prefix" "evex")
8712 (set_attr "mode" "<sseintvecmode2>")])
8714 (define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
8715 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
8716 (any_fix:<sselongvecmode>
8717 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
8718 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
8719 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
8720 [(set_attr "type" "ssecvt")
8721 (set_attr "prefix" "evex")
8722 (set_attr "mode" "<sseintvecmode3>")])
8724 (define_insn "avx512dq_fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
8725 [(set (match_operand:V2DI 0 "register_operand" "=v")
8728 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
8729 (parallel [(const_int 0) (const_int 1)]))))]
8730 "TARGET_AVX512DQ && TARGET_AVX512VL"
8731 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
8732 [(set_attr "type" "ssecvt")
8733 (set_attr "prefix" "evex")
8734 (set_attr "mode" "TI")])
8736 (define_expand "fix<fixunssuffix>_truncv2sfv2di2"
8737 [(set (match_operand:V2DI 0 "register_operand")
8739 (match_operand:V2SF 1 "register_operand")))]
8740 "TARGET_AVX512DQ && TARGET_AVX512VL"
8742 rtx op1 = force_reg (V2SFmode, operands[1]);
8743 op1 = lowpart_subreg (V4SFmode, op1, V2SFmode);
8744 emit_insn (gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2
8745 (operands[0], op1));
8749 (define_mode_attr vunpckfixt_mode
8750 [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
8751 (define_mode_attr vunpckfixt_model
8752 [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
8753 (define_mode_attr vunpckfixt_extract_mode
8754 [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
8756 (define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
8757 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
8758 (any_fix:<vunpckfixt_mode>
8759 (match_operand:VF1_AVX512VL 1 "register_operand"))]
8762 rtx tem = operands[1];
8763 rtx (*gen) (rtx, rtx);
8765 if (<MODE>mode != V4SFmode)
8767 tem = gen_reg_rtx (<ssehalfvecmode>mode);
8768 emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
8770 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
8773 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
8775 emit_insn (gen (operands[0], tem));
8779 (define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
8780 [(match_operand:<vunpckfixt_mode> 0 "register_operand")
8781 (any_fix:<vunpckfixt_mode>
8782 (match_operand:VF1_AVX512VL 1 "register_operand"))]
8786 rtx (*gen) (rtx, rtx);
8788 if (<MODE>mode != V4SFmode)
8790 tem = gen_reg_rtx (<ssehalfvecmode>mode);
8791 emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
8793 gen = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
8797 tem = gen_reg_rtx (V4SFmode);
8798 emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
8799 gen = gen_avx512dq_fix<fixunssuffix>_truncv2sfv2di2;
8802 emit_insn (gen (operands[0], tem));
8806 (define_insn "<mask_codefor>fixuns_trunc<mode><sseintvecmodelower>2<mask_name>"
8807 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
8808 (unsigned_fix:<sseintvecmode>
8809 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
8811 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8812 [(set_attr "type" "ssecvt")
8813 (set_attr "prefix" "evex")
8814 (set_attr "mode" "<sseintvecmode2>")])
8816 (define_expand "avx_cvttpd2dq256_2"
8817 [(set (match_operand:V8SI 0 "register_operand")
8819 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
8822 "operands[2] = CONST0_RTX (V4SImode);")
8824 (define_insn "sse2_cvttpd2dq"
8825 [(set (match_operand:V4SI 0 "register_operand" "=v")
8827 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
8828 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8832 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
8834 return "cvttpd2dq\t{%1, %0|%0, %1}";
8836 [(set_attr "type" "ssecvt")
8837 (set_attr "amdfam10_decode" "double")
8838 (set_attr "athlon_decode" "vector")
8839 (set_attr "bdver1_decode" "double")
8840 (set_attr "prefix" "maybe_vex")
8841 (set_attr "mode" "TI")])
8843 (define_insn "sse2_cvttpd2dq_mask"
8844 [(set (match_operand:V4SI 0 "register_operand" "=v")
8847 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
8849 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
8850 (parallel [(const_int 0) (const_int 1)]))
8851 (match_operand:QI 3 "register_operand" "Yk"))
8852 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8854 "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8855 [(set_attr "type" "ssecvt")
8856 (set_attr "prefix" "evex")
8857 (set_attr "mode" "TI")])
8859 (define_insn "*sse2_cvttpd2dq_mask_1"
8860 [(set (match_operand:V4SI 0 "register_operand" "=v")
8863 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
8864 (const_vector:V2SI [(const_int 0) (const_int 0)])
8865 (match_operand:QI 2 "register_operand" "Yk"))
8866 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
8868 "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
8869 [(set_attr "type" "ssecvt")
8870 (set_attr "prefix" "evex")
8871 (set_attr "mode" "TI")])
8873 (define_insn "sse2_cvtsd2ss<mask_name><round_name>"
8874 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
8877 (float_truncate:V2SF
8878 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
8879 (match_operand:V4SF 1 "register_operand" "0,0,v")
8883 cvtsd2ss\t{%2, %0|%0, %2}
8884 cvtsd2ss\t{%2, %0|%0, %q2}
8885 vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %q2<round_mask_op3>}"
8886 [(set_attr "isa" "noavx,noavx,avx")
8887 (set_attr "type" "ssecvt")
8888 (set_attr "athlon_decode" "vector,double,*")
8889 (set_attr "amdfam10_decode" "vector,double,*")
8890 (set_attr "bdver1_decode" "direct,direct,*")
8891 (set_attr "btver2_decode" "double,double,double")
8892 (set_attr "prefix" "orig,orig,<round_prefix>")
8893 (set_attr "mode" "SF")])
8895 (define_insn "*sse2_vd_cvtsd2ss"
8896 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
8899 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
8900 (match_operand:V4SF 1 "register_operand" "0,0,v")
8904 cvtsd2ss\t{%2, %0|%0, %2}
8905 cvtsd2ss\t{%2, %0|%0, %2}
8906 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
8907 [(set_attr "isa" "noavx,noavx,avx")
8908 (set_attr "type" "ssecvt")
8909 (set_attr "athlon_decode" "vector,double,*")
8910 (set_attr "amdfam10_decode" "vector,double,*")
8911 (set_attr "bdver1_decode" "direct,direct,*")
8912 (set_attr "btver2_decode" "double,double,double")
8913 (set_attr "prefix" "orig,orig,vex")
8914 (set_attr "mode" "SF")])
8916 (define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
8917 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8921 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
8922 (parallel [(const_int 0) (const_int 1)])))
8923 (match_operand:V2DF 1 "register_operand" "0,0,v")
8927 cvtss2sd\t{%2, %0|%0, %2}
8928 cvtss2sd\t{%2, %0|%0, %k2}
8929 vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %k2<round_saeonly_mask_op3>}"
8930 [(set_attr "isa" "noavx,noavx,avx")
8931 (set_attr "type" "ssecvt")
8932 (set_attr "amdfam10_decode" "vector,double,*")
8933 (set_attr "athlon_decode" "direct,direct,*")
8934 (set_attr "bdver1_decode" "direct,direct,*")
8935 (set_attr "btver2_decode" "double,double,double")
8936 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
8937 (set_attr "mode" "DF")])
8939 (define_insn "*sse2_vd_cvtss2sd"
8940 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
8943 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
8944 (match_operand:V2DF 1 "register_operand" "0,0,v")
8948 cvtss2sd\t{%2, %0|%0, %2}
8949 cvtss2sd\t{%2, %0|%0, %2}
8950 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
8951 [(set_attr "isa" "noavx,noavx,avx")
8952 (set_attr "type" "ssecvt")
8953 (set_attr "amdfam10_decode" "vector,double,*")
8954 (set_attr "athlon_decode" "direct,direct,*")
8955 (set_attr "bdver1_decode" "direct,direct,*")
8956 (set_attr "btver2_decode" "double,double,double")
8957 (set_attr "prefix" "orig,orig,vex")
8958 (set_attr "mode" "DF")])
8960 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
8961 [(set (match_operand:V8SF 0 "register_operand" "=v")
8962 (float_truncate:V8SF
8963 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
8965 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
8966 [(set_attr "type" "ssecvt")
8967 (set_attr "prefix" "evex")
8968 (set_attr "mode" "V8SF")])
8970 (define_insn "avx_cvtpd2ps256<mask_name>"
8971 [(set (match_operand:V4SF 0 "register_operand" "=v")
8972 (float_truncate:V4SF
8973 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
8974 "TARGET_AVX && <mask_avx512vl_condition>"
8975 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8976 [(set_attr "type" "ssecvt")
8977 (set_attr "prefix" "maybe_evex")
8978 (set_attr "btver2_decode" "vector")
8979 (set_attr "mode" "V4SF")])
8981 (define_expand "sse2_cvtpd2ps"
8982 [(set (match_operand:V4SF 0 "register_operand")
8984 (float_truncate:V2SF
8985 (match_operand:V2DF 1 "vector_operand"))
8988 "operands[2] = CONST0_RTX (V2SFmode);")
8990 (define_expand "sse2_cvtpd2ps_mask"
8991 [(set (match_operand:V4SF 0 "register_operand")
8994 (float_truncate:V2SF
8995 (match_operand:V2DF 1 "vector_operand"))
8997 (match_operand:V4SF 2 "nonimm_or_0_operand")
8998 (parallel [(const_int 0) (const_int 1)]))
8999 (match_operand:QI 3 "register_operand"))
9002 "operands[4] = CONST0_RTX (V2SFmode);")
9004 (define_insn "*sse2_cvtpd2ps"
9005 [(set (match_operand:V4SF 0 "register_operand" "=v")
9007 (float_truncate:V2SF
9008 (match_operand:V2DF 1 "vector_operand" "vBm"))
9009 (match_operand:V2SF 2 "const0_operand")))]
9013 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
9015 return "cvtpd2ps\t{%1, %0|%0, %1}";
9017 [(set_attr "type" "ssecvt")
9018 (set_attr "amdfam10_decode" "double")
9019 (set_attr "athlon_decode" "vector")
9020 (set_attr "bdver1_decode" "double")
9021 (set_attr "prefix_data16" "1")
9022 (set_attr "prefix" "maybe_vex")
9023 (set_attr "mode" "V4SF")])
9025 (define_insn "truncv2dfv2sf2"
9026 [(set (match_operand:V2SF 0 "register_operand" "=v")
9027 (float_truncate:V2SF
9028 (match_operand:V2DF 1 "vector_operand" "vBm")))]
9029 "TARGET_MMX_WITH_SSE"
9032 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
9034 return "cvtpd2ps\t{%1, %0|%0, %1}";
9036 [(set_attr "type" "ssecvt")
9037 (set_attr "amdfam10_decode" "double")
9038 (set_attr "athlon_decode" "vector")
9039 (set_attr "bdver1_decode" "double")
9040 (set_attr "prefix_data16" "1")
9041 (set_attr "prefix" "maybe_vex")
9042 (set_attr "mode" "V4SF")])
9044 (define_insn "*sse2_cvtpd2ps_mask"
9045 [(set (match_operand:V4SF 0 "register_operand" "=v")
9048 (float_truncate:V2SF
9049 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
9051 (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
9052 (parallel [(const_int 0) (const_int 1)]))
9053 (match_operand:QI 3 "register_operand" "Yk"))
9054 (match_operand:V2SF 4 "const0_operand")))]
9056 "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9057 [(set_attr "type" "ssecvt")
9058 (set_attr "prefix" "evex")
9059 (set_attr "mode" "V4SF")])
9061 (define_insn "*sse2_cvtpd2ps_mask_1"
9062 [(set (match_operand:V4SF 0 "register_operand" "=v")
9065 (float_truncate:V2SF
9066 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
9067 (match_operand:V2SF 3 "const0_operand")
9068 (match_operand:QI 2 "register_operand" "Yk"))
9069 (match_operand:V2SF 4 "const0_operand")))]
9071 "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9072 [(set_attr "type" "ssecvt")
9073 (set_attr "prefix" "evex")
9074 (set_attr "mode" "V4SF")])
9076 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
9077 (define_mode_attr sf2dfmode
9078 [(V8DF "V8SF") (V4DF "V4SF")])
9079 (define_mode_attr sf2dfmode_lower
9080 [(V8DF "v8sf") (V4DF "v4sf")])
9082 (define_expand "trunc<mode><sf2dfmode_lower>2"
9083 [(set (match_operand:<sf2dfmode> 0 "register_operand")
9084 (float_truncate:<sf2dfmode>
9085 (match_operand:VF2_512_256 1 "vector_operand")))]
9088 (define_expand "extend<sf2dfmode_lower><mode>2"
9089 [(set (match_operand:VF2_512_256 0 "register_operand")
9090 (float_extend:VF2_512_256
9091 (match_operand:<sf2dfmode> 1 "vector_operand")))]
9094 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
9095 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
9096 (float_extend:VF2_512_256
9097 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
9098 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
9099 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
9100 [(set_attr "type" "ssecvt")
9101 (set_attr "prefix" "maybe_vex")
9102 (set_attr "mode" "<MODE>")])
9104 (define_insn "*avx_cvtps2pd256_2"
9105 [(set (match_operand:V4DF 0 "register_operand" "=v")
9108 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
9109 (parallel [(const_int 0) (const_int 1)
9110 (const_int 2) (const_int 3)]))))]
9112 "vcvtps2pd\t{%x1, %0|%0, %x1}"
9113 [(set_attr "type" "ssecvt")
9114 (set_attr "prefix" "vex")
9115 (set_attr "mode" "V4DF")])
9117 (define_insn "vec_unpacks_lo_v16sf"
9118 [(set (match_operand:V8DF 0 "register_operand" "=v")
9121 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
9122 (parallel [(const_int 0) (const_int 1)
9123 (const_int 2) (const_int 3)
9124 (const_int 4) (const_int 5)
9125 (const_int 6) (const_int 7)]))))]
9127 "vcvtps2pd\t{%t1, %0|%0, %t1}"
9128 [(set_attr "type" "ssecvt")
9129 (set_attr "prefix" "evex")
9130 (set_attr "mode" "V8DF")])
9132 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
9133 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
9134 (unspec:<avx512fmaskmode>
9135 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
9136 UNSPEC_CVTINT2MASK))]
9138 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
9139 [(set_attr "prefix" "evex")
9140 (set_attr "mode" "<sseinsnmode>")])
9142 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
9143 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
9144 (unspec:<avx512fmaskmode>
9145 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
9146 UNSPEC_CVTINT2MASK))]
9148 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
9149 [(set_attr "prefix" "evex")
9150 (set_attr "mode" "<sseinsnmode>")])
9152 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
9153 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9154 (vec_merge:VI12_AVX512VL
9157 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
9160 operands[2] = CONSTM1_RTX (<MODE>mode);
9161 operands[3] = CONST0_RTX (<MODE>mode);
9164 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
9165 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9166 (vec_merge:VI12_AVX512VL
9167 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
9168 (match_operand:VI12_AVX512VL 3 "const0_operand")
9169 (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
9171 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
9172 [(set_attr "prefix" "evex")
9173 (set_attr "mode" "<sseinsnmode>")])
9175 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
9176 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9177 (vec_merge:VI48_AVX512VL
9180 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
9183 operands[2] = CONSTM1_RTX (<MODE>mode);
9184 operands[3] = CONST0_RTX (<MODE>mode);
9187 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
9188 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
9189 (vec_merge:VI48_AVX512VL
9190 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
9191 (match_operand:VI48_AVX512VL 3 "const0_operand")
9192 (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
9195 vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
9196 vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
9197 [(set_attr "isa" "avx512dq,*")
9198 (set_attr "length_immediate" "0,1")
9199 (set_attr "prefix" "evex")
9200 (set_attr "mode" "<sseinsnmode>")])
9202 (define_expand "extendv2sfv2df2"
9203 [(set (match_operand:V2DF 0 "register_operand")
9205 (match_operand:V2SF 1 "nonimmediate_operand")))]
9206 "TARGET_MMX_WITH_SSE"
9208 if (!MEM_P (operands[1]))
9210 operands[1] = lowpart_subreg (V4SFmode,
9211 force_reg (V2SFmode, operands[1]),
9213 emit_insn (gen_sse2_cvtps2pd (operands[0], operands[1]));
9218 (define_insn "sse2_cvtps2pd<mask_name>"
9219 [(set (match_operand:V2DF 0 "register_operand" "=v")
9222 (match_operand:V4SF 1 "register_operand" "v")
9223 (parallel [(const_int 0) (const_int 1)]))))]
9224 "TARGET_SSE2 && <mask_avx512vl_condition>"
9225 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
9226 [(set_attr "type" "ssecvt")
9227 (set_attr "amdfam10_decode" "direct")
9228 (set_attr "athlon_decode" "double")
9229 (set_attr "bdver1_decode" "double")
9230 (set_attr "prefix_data16" "0")
9231 (set_attr "prefix" "maybe_vex")
9232 (set_attr "mode" "V2DF")])
9234 (define_insn "sse2_cvtps2pd<mask_name>_1"
9235 [(set (match_operand:V2DF 0 "register_operand" "=v")
9237 (match_operand:V2SF 1 "memory_operand" "m")))]
9238 "TARGET_SSE2 && <mask_avx512vl_condition>"
9239 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
9240 [(set_attr "type" "ssecvt")
9241 (set_attr "amdfam10_decode" "direct")
9242 (set_attr "athlon_decode" "double")
9243 (set_attr "bdver1_decode" "double")
9244 (set_attr "prefix_data16" "0")
9245 (set_attr "prefix" "maybe_vex")
9246 (set_attr "mode" "V2DF")])
9248 (define_expand "vec_unpacks_hi_v4sf"
9253 (match_operand:V4SF 1 "vector_operand"))
9254 (parallel [(const_int 6) (const_int 7)
9255 (const_int 2) (const_int 3)])))
9256 (set (match_operand:V2DF 0 "register_operand")
9260 (parallel [(const_int 0) (const_int 1)]))))]
9262 "operands[2] = gen_reg_rtx (V4SFmode);")
9264 (define_expand "vec_unpacks_hi_v8sf"
9267 (match_operand:V8SF 1 "register_operand")
9268 (parallel [(const_int 4) (const_int 5)
9269 (const_int 6) (const_int 7)])))
9270 (set (match_operand:V4DF 0 "register_operand")
9274 "operands[2] = gen_reg_rtx (V4SFmode);")
9276 (define_expand "vec_unpacks_hi_v16sf"
9279 (match_operand:V16SF 1 "register_operand")
9280 (parallel [(const_int 8) (const_int 9)
9281 (const_int 10) (const_int 11)
9282 (const_int 12) (const_int 13)
9283 (const_int 14) (const_int 15)])))
9284 (set (match_operand:V8DF 0 "register_operand")
9288 "operands[2] = gen_reg_rtx (V8SFmode);")
9290 (define_expand "vec_unpacks_lo_v4sf"
9291 [(set (match_operand:V2DF 0 "register_operand")
9294 (match_operand:V4SF 1 "vector_operand")
9295 (parallel [(const_int 0) (const_int 1)]))))]
9298 if (MEM_P (operands[1]))
9300 operands[1] = adjust_address_nv (operands[1], V2SFmode, 0);
9301 emit_insn (gen_sse2_cvtps2pd_1 (operands[0], operands[1]));
9306 (define_expand "vec_unpacks_lo_v8sf"
9307 [(set (match_operand:V4DF 0 "register_operand")
9310 (match_operand:V8SF 1 "nonimmediate_operand")
9311 (parallel [(const_int 0) (const_int 1)
9312 (const_int 2) (const_int 3)]))))]
9315 (define_mode_attr sseunpackfltmode
9316 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
9317 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
9319 (define_expand "vec_unpacks_float_hi_<mode>"
9320 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9321 (match_operand:VI2_AVX512F 1 "register_operand")]
9324 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
9326 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
9327 emit_insn (gen_rtx_SET (operands[0],
9328 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
9332 (define_expand "vec_unpacks_float_lo_<mode>"
9333 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9334 (match_operand:VI2_AVX512F 1 "register_operand")]
9337 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
9339 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
9340 emit_insn (gen_rtx_SET (operands[0],
9341 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
9345 (define_expand "vec_unpacku_float_hi_<mode>"
9346 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9347 (match_operand:VI2_AVX512F 1 "register_operand")]
9350 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
9352 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
9353 emit_insn (gen_rtx_SET (operands[0],
9354 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
9358 (define_expand "vec_unpacku_float_lo_<mode>"
9359 [(match_operand:<sseunpackfltmode> 0 "register_operand")
9360 (match_operand:VI2_AVX512F 1 "register_operand")]
9363 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
9365 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
9366 emit_insn (gen_rtx_SET (operands[0],
9367 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
9371 (define_expand "vec_unpacks_float_hi_v4si"
9374 (match_operand:V4SI 1 "vector_operand")
9375 (parallel [(const_int 2) (const_int 3)
9376 (const_int 2) (const_int 3)])))
9377 (set (match_operand:V2DF 0 "register_operand")
9381 (parallel [(const_int 0) (const_int 1)]))))]
9383 "operands[2] = gen_reg_rtx (V4SImode);")
9385 (define_expand "vec_unpacks_float_lo_v4si"
9386 [(set (match_operand:V2DF 0 "register_operand")
9389 (match_operand:V4SI 1 "vector_operand")
9390 (parallel [(const_int 0) (const_int 1)]))))]
9393 (define_expand "vec_unpacks_float_hi_v8si"
9396 (match_operand:V8SI 1 "register_operand")
9397 (parallel [(const_int 4) (const_int 5)
9398 (const_int 6) (const_int 7)])))
9399 (set (match_operand:V4DF 0 "register_operand")
9403 "operands[2] = gen_reg_rtx (V4SImode);")
9405 (define_expand "vec_unpacks_float_lo_v8si"
9406 [(set (match_operand:V4DF 0 "register_operand")
9409 (match_operand:V8SI 1 "nonimmediate_operand")
9410 (parallel [(const_int 0) (const_int 1)
9411 (const_int 2) (const_int 3)]))))]
9414 (define_expand "vec_unpacks_float_hi_v16si"
9417 (match_operand:V16SI 1 "nonimmediate_operand")
9418 (parallel [(const_int 8) (const_int 9)
9419 (const_int 10) (const_int 11)
9420 (const_int 12) (const_int 13)
9421 (const_int 14) (const_int 15)])))
9422 (set (match_operand:V8DF 0 "register_operand")
9426 "operands[2] = gen_reg_rtx (V8SImode);")
9428 (define_expand "vec_unpacks_float_lo_v16si"
9429 [(set (match_operand:V8DF 0 "register_operand")
9432 (match_operand:V16SI 1 "nonimmediate_operand")
9433 (parallel [(const_int 0) (const_int 1)
9434 (const_int 2) (const_int 3)
9435 (const_int 4) (const_int 5)
9436 (const_int 6) (const_int 7)]))))]
9439 (define_expand "vec_unpacku_float_hi_v4si"
9442 (match_operand:V4SI 1 "vector_operand")
9443 (parallel [(const_int 2) (const_int 3)
9444 (const_int 2) (const_int 3)])))
9449 (parallel [(const_int 0) (const_int 1)]))))
9451 (lt:V2DF (match_dup 6) (match_dup 3)))
9453 (and:V2DF (match_dup 7) (match_dup 4)))
9454 (set (match_operand:V2DF 0 "register_operand")
9455 (plus:V2DF (match_dup 6) (match_dup 8)))]
9458 REAL_VALUE_TYPE TWO32r;
9462 real_ldexp (&TWO32r, &dconst1, 32);
9463 x = const_double_from_real_value (TWO32r, DFmode);
9465 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
9466 operands[4] = force_reg (V2DFmode,
9467 ix86_build_const_vector (V2DFmode, 1, x));
9469 operands[5] = gen_reg_rtx (V4SImode);
9471 for (i = 6; i < 9; i++)
9472 operands[i] = gen_reg_rtx (V2DFmode);
9475 (define_expand "vec_unpacku_float_lo_v4si"
9479 (match_operand:V4SI 1 "vector_operand")
9480 (parallel [(const_int 0) (const_int 1)]))))
9482 (lt:V2DF (match_dup 5) (match_dup 3)))
9484 (and:V2DF (match_dup 6) (match_dup 4)))
9485 (set (match_operand:V2DF 0 "register_operand")
9486 (plus:V2DF (match_dup 5) (match_dup 7)))]
9489 REAL_VALUE_TYPE TWO32r;
9493 real_ldexp (&TWO32r, &dconst1, 32);
9494 x = const_double_from_real_value (TWO32r, DFmode);
9496 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
9497 operands[4] = force_reg (V2DFmode,
9498 ix86_build_const_vector (V2DFmode, 1, x));
9500 for (i = 5; i < 8; i++)
9501 operands[i] = gen_reg_rtx (V2DFmode);
9504 (define_expand "vec_unpacku_float_hi_v8si"
9505 [(match_operand:V4DF 0 "register_operand")
9506 (match_operand:V8SI 1 "register_operand")]
9509 REAL_VALUE_TYPE TWO32r;
9513 real_ldexp (&TWO32r, &dconst1, 32);
9514 x = const_double_from_real_value (TWO32r, DFmode);
9516 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
9517 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
9518 tmp[5] = gen_reg_rtx (V4SImode);
9520 for (i = 2; i < 5; i++)
9521 tmp[i] = gen_reg_rtx (V4DFmode);
9522 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
9523 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
9524 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
9525 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
9526 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
9530 (define_expand "vec_unpacku_float_hi_v16si"
9531 [(match_operand:V8DF 0 "register_operand")
9532 (match_operand:V16SI 1 "register_operand")]
9535 REAL_VALUE_TYPE TWO32r;
9538 real_ldexp (&TWO32r, &dconst1, 32);
9539 x = const_double_from_real_value (TWO32r, DFmode);
9541 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
9542 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
9543 tmp[2] = gen_reg_rtx (V8DFmode);
9544 tmp[3] = gen_reg_rtx (V8SImode);
9545 k = gen_reg_rtx (QImode);
9547 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
9548 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
9549 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
9550 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
9551 emit_move_insn (operands[0], tmp[2]);
9555 (define_expand "vec_unpacku_float_lo_v8si"
9556 [(match_operand:V4DF 0 "register_operand")
9557 (match_operand:V8SI 1 "nonimmediate_operand")]
9560 REAL_VALUE_TYPE TWO32r;
9564 real_ldexp (&TWO32r, &dconst1, 32);
9565 x = const_double_from_real_value (TWO32r, DFmode);
9567 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
9568 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
9570 for (i = 2; i < 5; i++)
9571 tmp[i] = gen_reg_rtx (V4DFmode);
9572 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
9573 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
9574 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
9575 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
9579 (define_expand "vec_unpacku_float_lo_v16si"
9580 [(match_operand:V8DF 0 "register_operand")
9581 (match_operand:V16SI 1 "nonimmediate_operand")]
9584 REAL_VALUE_TYPE TWO32r;
9587 real_ldexp (&TWO32r, &dconst1, 32);
9588 x = const_double_from_real_value (TWO32r, DFmode);
9590 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
9591 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
9592 tmp[2] = gen_reg_rtx (V8DFmode);
9593 k = gen_reg_rtx (QImode);
9595 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
9596 ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
9597 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
9598 emit_move_insn (operands[0], tmp[2]);
9602 (define_expand "vec_pack_trunc_<mode>"
9604 (float_truncate:<sf2dfmode>
9605 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
9607 (float_truncate:<sf2dfmode>
9608 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
9609 (set (match_operand:<ssePSmode> 0 "register_operand")
9610 (vec_concat:<ssePSmode>
9615 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
9616 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
9619 (define_expand "vec_pack_trunc_v2df"
9620 [(match_operand:V4SF 0 "register_operand")
9621 (match_operand:V2DF 1 "vector_operand")
9622 (match_operand:V2DF 2 "vector_operand")]
9627 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
9629 tmp0 = gen_reg_rtx (V4DFmode);
9630 tmp1 = force_reg (V2DFmode, operands[1]);
9632 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9633 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
9637 tmp0 = gen_reg_rtx (V4SFmode);
9638 tmp1 = gen_reg_rtx (V4SFmode);
9640 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
9641 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
9642 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
9647 (define_expand "vec_pack_sfix_trunc_v8df"
9648 [(match_operand:V16SI 0 "register_operand")
9649 (match_operand:V8DF 1 "nonimmediate_operand")
9650 (match_operand:V8DF 2 "nonimmediate_operand")]
9655 r1 = gen_reg_rtx (V8SImode);
9656 r2 = gen_reg_rtx (V8SImode);
9658 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
9659 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
9660 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
9664 (define_expand "vec_pack_sfix_trunc_v4df"
9665 [(match_operand:V8SI 0 "register_operand")
9666 (match_operand:V4DF 1 "nonimmediate_operand")
9667 (match_operand:V4DF 2 "nonimmediate_operand")]
9672 r1 = gen_reg_rtx (V4SImode);
9673 r2 = gen_reg_rtx (V4SImode);
9675 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
9676 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
9677 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
9681 (define_expand "vec_pack_sfix_trunc_v2df"
9682 [(match_operand:V4SI 0 "register_operand")
9683 (match_operand:V2DF 1 "vector_operand")
9684 (match_operand:V2DF 2 "vector_operand")]
9687 rtx tmp0, tmp1, tmp2;
9689 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
9691 tmp0 = gen_reg_rtx (V4DFmode);
9692 tmp1 = force_reg (V2DFmode, operands[1]);
9694 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9695 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
9699 tmp0 = gen_reg_rtx (V4SImode);
9700 tmp1 = gen_reg_rtx (V4SImode);
9701 tmp2 = gen_reg_rtx (V2DImode);
9703 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
9704 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
9705 emit_insn (gen_vec_interleave_lowv2di (tmp2,
9706 gen_lowpart (V2DImode, tmp0),
9707 gen_lowpart (V2DImode, tmp1)));
9708 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
9713 (define_mode_attr ssepackfltmode
9714 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
9716 (define_expand "vec_pack_ufix_trunc_<mode>"
9717 [(match_operand:<ssepackfltmode> 0 "register_operand")
9718 (match_operand:VF2 1 "register_operand")
9719 (match_operand:VF2 2 "register_operand")]
9722 if (<MODE>mode == V8DFmode)
9726 r1 = gen_reg_rtx (V8SImode);
9727 r2 = gen_reg_rtx (V8SImode);
9729 emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
9730 emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
9731 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
9736 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
9737 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
9738 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
9739 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
9740 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
9742 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
9743 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
9747 tmp[5] = gen_reg_rtx (V8SFmode);
9748 ix86_expand_vec_extract_even_odd (tmp[5],
9749 gen_lowpart (V8SFmode, tmp[2]),
9750 gen_lowpart (V8SFmode, tmp[3]), 0);
9751 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
9753 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
9754 operands[0], 0, OPTAB_DIRECT);
9755 if (tmp[6] != operands[0])
9756 emit_move_insn (operands[0], tmp[6]);
9762 (define_expand "avx512f_vec_pack_sfix_v8df"
9763 [(match_operand:V16SI 0 "register_operand")
9764 (match_operand:V8DF 1 "nonimmediate_operand")
9765 (match_operand:V8DF 2 "nonimmediate_operand")]
9770 r1 = gen_reg_rtx (V8SImode);
9771 r2 = gen_reg_rtx (V8SImode);
9773 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
9774 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
9775 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
9779 (define_expand "vec_pack_sfix_v4df"
9780 [(match_operand:V8SI 0 "register_operand")
9781 (match_operand:V4DF 1 "nonimmediate_operand")
9782 (match_operand:V4DF 2 "nonimmediate_operand")]
9787 r1 = gen_reg_rtx (V4SImode);
9788 r2 = gen_reg_rtx (V4SImode);
9790 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
9791 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
9792 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
9796 (define_expand "vec_pack_sfix_v2df"
9797 [(match_operand:V4SI 0 "register_operand")
9798 (match_operand:V2DF 1 "vector_operand")
9799 (match_operand:V2DF 2 "vector_operand")]
9802 rtx tmp0, tmp1, tmp2;
9804 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
9806 tmp0 = gen_reg_rtx (V4DFmode);
9807 tmp1 = force_reg (V2DFmode, operands[1]);
9809 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
9810 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
9814 tmp0 = gen_reg_rtx (V4SImode);
9815 tmp1 = gen_reg_rtx (V4SImode);
9816 tmp2 = gen_reg_rtx (V2DImode);
9818 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
9819 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
9820 emit_insn (gen_vec_interleave_lowv2di (tmp2,
9821 gen_lowpart (V2DImode, tmp0),
9822 gen_lowpart (V2DImode, tmp1)));
9823 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
9828 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9830 ;; Parallel single-precision floating point element swizzling
9832 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9834 (define_expand "sse_movhlps_exp"
9835 [(set (match_operand:V4SF 0 "nonimmediate_operand")
9838 (match_operand:V4SF 1 "nonimmediate_operand")
9839 (match_operand:V4SF 2 "nonimmediate_operand"))
9840 (parallel [(const_int 6)
9846 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
9848 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
9850 /* Fix up the destination if needed. */
9851 if (dst != operands[0])
9852 emit_move_insn (operands[0], dst);
9857 (define_insn "sse_movhlps"
9858 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
9861 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
9862 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
9863 (parallel [(const_int 6)
9867 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9869 movhlps\t{%2, %0|%0, %2}
9870 vmovhlps\t{%2, %1, %0|%0, %1, %2}
9871 movlps\t{%H2, %0|%0, %H2}
9872 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
9873 %vmovhps\t{%2, %0|%q0, %2}"
9874 [(set_attr "isa" "noavx,avx,noavx,avx,*")
9875 (set_attr "type" "ssemov")
9876 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
9877 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
9879 (define_expand "sse_movlhps_exp"
9880 [(set (match_operand:V4SF 0 "nonimmediate_operand")
9883 (match_operand:V4SF 1 "nonimmediate_operand")
9884 (match_operand:V4SF 2 "nonimmediate_operand"))
9885 (parallel [(const_int 0)
9891 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
9893 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
9895 /* Fix up the destination if needed. */
9896 if (dst != operands[0])
9897 emit_move_insn (operands[0], dst);
9902 (define_insn "sse_movlhps"
9903 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
9906 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
9907 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
9908 (parallel [(const_int 0)
9912 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
9914 movlhps\t{%2, %0|%0, %2}
9915 vmovlhps\t{%2, %1, %0|%0, %1, %2}
9916 movhps\t{%2, %0|%0, %q2}
9917 vmovhps\t{%2, %1, %0|%0, %1, %q2}
9918 %vmovlps\t{%2, %H0|%H0, %2}"
9919 [(set_attr "isa" "noavx,avx,noavx,avx,*")
9920 (set_attr "type" "ssemov")
9921 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
9922 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
9924 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
9925 [(set (match_operand:V16SF 0 "register_operand" "=v")
9928 (match_operand:V16SF 1 "register_operand" "v")
9929 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9930 (parallel [(const_int 2) (const_int 18)
9931 (const_int 3) (const_int 19)
9932 (const_int 6) (const_int 22)
9933 (const_int 7) (const_int 23)
9934 (const_int 10) (const_int 26)
9935 (const_int 11) (const_int 27)
9936 (const_int 14) (const_int 30)
9937 (const_int 15) (const_int 31)])))]
9939 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9940 [(set_attr "type" "sselog")
9941 (set_attr "prefix" "evex")
9942 (set_attr "mode" "V16SF")])
9944 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9945 (define_insn "avx_unpckhps256<mask_name>"
9946 [(set (match_operand:V8SF 0 "register_operand" "=v")
9949 (match_operand:V8SF 1 "register_operand" "v")
9950 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
9951 (parallel [(const_int 2) (const_int 10)
9952 (const_int 3) (const_int 11)
9953 (const_int 6) (const_int 14)
9954 (const_int 7) (const_int 15)])))]
9955 "TARGET_AVX && <mask_avx512vl_condition>"
9956 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9957 [(set_attr "type" "sselog")
9958 (set_attr "prefix" "vex")
9959 (set_attr "mode" "V8SF")])
9961 (define_expand "vec_interleave_highv8sf"
9965 (match_operand:V8SF 1 "register_operand")
9966 (match_operand:V8SF 2 "nonimmediate_operand"))
9967 (parallel [(const_int 0) (const_int 8)
9968 (const_int 1) (const_int 9)
9969 (const_int 4) (const_int 12)
9970 (const_int 5) (const_int 13)])))
9976 (parallel [(const_int 2) (const_int 10)
9977 (const_int 3) (const_int 11)
9978 (const_int 6) (const_int 14)
9979 (const_int 7) (const_int 15)])))
9980 (set (match_operand:V8SF 0 "register_operand")
9985 (parallel [(const_int 4) (const_int 5)
9986 (const_int 6) (const_int 7)
9987 (const_int 12) (const_int 13)
9988 (const_int 14) (const_int 15)])))]
9991 operands[3] = gen_reg_rtx (V8SFmode);
9992 operands[4] = gen_reg_rtx (V8SFmode);
9995 (define_insn "vec_interleave_highv4sf<mask_name>"
9996 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
9999 (match_operand:V4SF 1 "register_operand" "0,v")
10000 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
10001 (parallel [(const_int 2) (const_int 6)
10002 (const_int 3) (const_int 7)])))]
10003 "TARGET_SSE && <mask_avx512vl_condition>"
10005 unpckhps\t{%2, %0|%0, %2}
10006 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10007 [(set_attr "isa" "noavx,avx")
10008 (set_attr "type" "sselog")
10009 (set_attr "prefix" "orig,vex")
10010 (set_attr "mode" "V4SF")])
10012 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
10013 [(set (match_operand:V16SF 0 "register_operand" "=v")
10016 (match_operand:V16SF 1 "register_operand" "v")
10017 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
10018 (parallel [(const_int 0) (const_int 16)
10019 (const_int 1) (const_int 17)
10020 (const_int 4) (const_int 20)
10021 (const_int 5) (const_int 21)
10022 (const_int 8) (const_int 24)
10023 (const_int 9) (const_int 25)
10024 (const_int 12) (const_int 28)
10025 (const_int 13) (const_int 29)])))]
10027 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10028 [(set_attr "type" "sselog")
10029 (set_attr "prefix" "evex")
10030 (set_attr "mode" "V16SF")])
10032 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
10033 (define_insn "avx_unpcklps256<mask_name>"
10034 [(set (match_operand:V8SF 0 "register_operand" "=v")
10037 (match_operand:V8SF 1 "register_operand" "v")
10038 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
10039 (parallel [(const_int 0) (const_int 8)
10040 (const_int 1) (const_int 9)
10041 (const_int 4) (const_int 12)
10042 (const_int 5) (const_int 13)])))]
10043 "TARGET_AVX && <mask_avx512vl_condition>"
10044 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10045 [(set_attr "type" "sselog")
10046 (set_attr "prefix" "vex")
10047 (set_attr "mode" "V8SF")])
10049 (define_insn "unpcklps128_mask"
10050 [(set (match_operand:V4SF 0 "register_operand" "=v")
10054 (match_operand:V4SF 1 "register_operand" "v")
10055 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
10056 (parallel [(const_int 0) (const_int 4)
10057 (const_int 1) (const_int 5)]))
10058 (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
10059 (match_operand:QI 4 "register_operand" "Yk")))]
10061 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10062 [(set_attr "type" "sselog")
10063 (set_attr "prefix" "evex")
10064 (set_attr "mode" "V4SF")])
10066 (define_expand "vec_interleave_lowv8sf"
10067 [(set (match_dup 3)
10070 (match_operand:V8SF 1 "register_operand")
10071 (match_operand:V8SF 2 "nonimmediate_operand"))
10072 (parallel [(const_int 0) (const_int 8)
10073 (const_int 1) (const_int 9)
10074 (const_int 4) (const_int 12)
10075 (const_int 5) (const_int 13)])))
10081 (parallel [(const_int 2) (const_int 10)
10082 (const_int 3) (const_int 11)
10083 (const_int 6) (const_int 14)
10084 (const_int 7) (const_int 15)])))
10085 (set (match_operand:V8SF 0 "register_operand")
10090 (parallel [(const_int 0) (const_int 1)
10091 (const_int 2) (const_int 3)
10092 (const_int 8) (const_int 9)
10093 (const_int 10) (const_int 11)])))]
10096 operands[3] = gen_reg_rtx (V8SFmode);
10097 operands[4] = gen_reg_rtx (V8SFmode);
10100 (define_insn "vec_interleave_lowv4sf"
10101 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
10104 (match_operand:V4SF 1 "register_operand" "0,v")
10105 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
10106 (parallel [(const_int 0) (const_int 4)
10107 (const_int 1) (const_int 5)])))]
10110 unpcklps\t{%2, %0|%0, %2}
10111 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
10112 [(set_attr "isa" "noavx,avx")
10113 (set_attr "type" "sselog")
10114 (set_attr "prefix" "orig,maybe_evex")
10115 (set_attr "mode" "V4SF")])
10117 ;; These are modeled with the same vec_concat as the others so that we
10118 ;; capture users of shufps that can use the new instructions
10119 (define_insn "avx_movshdup256<mask_name>"
10120 [(set (match_operand:V8SF 0 "register_operand" "=v")
10123 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
10125 (parallel [(const_int 1) (const_int 1)
10126 (const_int 3) (const_int 3)
10127 (const_int 5) (const_int 5)
10128 (const_int 7) (const_int 7)])))]
10129 "TARGET_AVX && <mask_avx512vl_condition>"
10130 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10131 [(set_attr "type" "sse")
10132 (set_attr "prefix" "vex")
10133 (set_attr "mode" "V8SF")])
10135 (define_insn "sse3_movshdup<mask_name>"
10136 [(set (match_operand:V4SF 0 "register_operand" "=v")
10139 (match_operand:V4SF 1 "vector_operand" "vBm")
10141 (parallel [(const_int 1)
10145 "TARGET_SSE3 && <mask_avx512vl_condition>"
10146 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10147 [(set_attr "type" "sse")
10148 (set_attr "prefix_rep" "1")
10149 (set_attr "prefix" "maybe_vex")
10150 (set_attr "mode" "V4SF")])
10152 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
10153 [(set (match_operand:V16SF 0 "register_operand" "=v")
10156 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
10158 (parallel [(const_int 1) (const_int 1)
10159 (const_int 3) (const_int 3)
10160 (const_int 5) (const_int 5)
10161 (const_int 7) (const_int 7)
10162 (const_int 9) (const_int 9)
10163 (const_int 11) (const_int 11)
10164 (const_int 13) (const_int 13)
10165 (const_int 15) (const_int 15)])))]
10167 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10168 [(set_attr "type" "sse")
10169 (set_attr "prefix" "evex")
10170 (set_attr "mode" "V16SF")])
10172 (define_insn "avx_movsldup256<mask_name>"
10173 [(set (match_operand:V8SF 0 "register_operand" "=v")
10176 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
10178 (parallel [(const_int 0) (const_int 0)
10179 (const_int 2) (const_int 2)
10180 (const_int 4) (const_int 4)
10181 (const_int 6) (const_int 6)])))]
10182 "TARGET_AVX && <mask_avx512vl_condition>"
10183 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10184 [(set_attr "type" "sse")
10185 (set_attr "prefix" "vex")
10186 (set_attr "mode" "V8SF")])
10188 (define_insn "sse3_movsldup<mask_name>"
10189 [(set (match_operand:V4SF 0 "register_operand" "=v")
10192 (match_operand:V4SF 1 "vector_operand" "vBm")
10194 (parallel [(const_int 0)
10198 "TARGET_SSE3 && <mask_avx512vl_condition>"
10199 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10200 [(set_attr "type" "sse")
10201 (set_attr "prefix_rep" "1")
10202 (set_attr "prefix" "maybe_vex")
10203 (set_attr "mode" "V4SF")])
10205 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
10206 [(set (match_operand:V16SF 0 "register_operand" "=v")
10209 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
10211 (parallel [(const_int 0) (const_int 0)
10212 (const_int 2) (const_int 2)
10213 (const_int 4) (const_int 4)
10214 (const_int 6) (const_int 6)
10215 (const_int 8) (const_int 8)
10216 (const_int 10) (const_int 10)
10217 (const_int 12) (const_int 12)
10218 (const_int 14) (const_int 14)])))]
10220 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10221 [(set_attr "type" "sse")
10222 (set_attr "prefix" "evex")
10223 (set_attr "mode" "V16SF")])
10225 (define_expand "avx_shufps256<mask_expand4_name>"
10226 [(match_operand:V8SF 0 "register_operand")
10227 (match_operand:V8SF 1 "register_operand")
10228 (match_operand:V8SF 2 "nonimmediate_operand")
10229 (match_operand:SI 3 "const_int_operand")]
10232 int mask = INTVAL (operands[3]);
10233 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
10236 GEN_INT ((mask >> 0) & 3),
10237 GEN_INT ((mask >> 2) & 3),
10238 GEN_INT (((mask >> 4) & 3) + 8),
10239 GEN_INT (((mask >> 6) & 3) + 8),
10240 GEN_INT (((mask >> 0) & 3) + 4),
10241 GEN_INT (((mask >> 2) & 3) + 4),
10242 GEN_INT (((mask >> 4) & 3) + 12),
10243 GEN_INT (((mask >> 6) & 3) + 12)
10244 <mask_expand4_args>));
10248 ;; One bit in mask selects 2 elements.
10249 (define_insn "avx_shufps256_1<mask_name>"
10250 [(set (match_operand:V8SF 0 "register_operand" "=v")
10253 (match_operand:V8SF 1 "register_operand" "v")
10254 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
10255 (parallel [(match_operand 3 "const_0_to_3_operand" )
10256 (match_operand 4 "const_0_to_3_operand" )
10257 (match_operand 5 "const_8_to_11_operand" )
10258 (match_operand 6 "const_8_to_11_operand" )
10259 (match_operand 7 "const_4_to_7_operand" )
10260 (match_operand 8 "const_4_to_7_operand" )
10261 (match_operand 9 "const_12_to_15_operand")
10262 (match_operand 10 "const_12_to_15_operand")])))]
10264 && <mask_avx512vl_condition>
10265 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
10266 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
10267 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
10268 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
10271 mask = INTVAL (operands[3]);
10272 mask |= INTVAL (operands[4]) << 2;
10273 mask |= (INTVAL (operands[5]) - 8) << 4;
10274 mask |= (INTVAL (operands[6]) - 8) << 6;
10275 operands[3] = GEN_INT (mask);
10277 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
10279 [(set_attr "type" "sseshuf")
10280 (set_attr "length_immediate" "1")
10281 (set_attr "prefix" "<mask_prefix>")
10282 (set_attr "mode" "V8SF")])
10284 (define_expand "sse_shufps<mask_expand4_name>"
10285 [(match_operand:V4SF 0 "register_operand")
10286 (match_operand:V4SF 1 "register_operand")
10287 (match_operand:V4SF 2 "vector_operand")
10288 (match_operand:SI 3 "const_int_operand")]
10291 int mask = INTVAL (operands[3]);
10292 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
10295 GEN_INT ((mask >> 0) & 3),
10296 GEN_INT ((mask >> 2) & 3),
10297 GEN_INT (((mask >> 4) & 3) + 4),
10298 GEN_INT (((mask >> 6) & 3) + 4)
10299 <mask_expand4_args>));
10303 (define_insn "sse_shufps_v4sf_mask"
10304 [(set (match_operand:V4SF 0 "register_operand" "=v")
10308 (match_operand:V4SF 1 "register_operand" "v")
10309 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
10310 (parallel [(match_operand 3 "const_0_to_3_operand")
10311 (match_operand 4 "const_0_to_3_operand")
10312 (match_operand 5 "const_4_to_7_operand")
10313 (match_operand 6 "const_4_to_7_operand")]))
10314 (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
10315 (match_operand:QI 8 "register_operand" "Yk")))]
10319 mask |= INTVAL (operands[3]) << 0;
10320 mask |= INTVAL (operands[4]) << 2;
10321 mask |= (INTVAL (operands[5]) - 4) << 4;
10322 mask |= (INTVAL (operands[6]) - 4) << 6;
10323 operands[3] = GEN_INT (mask);
10325 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
10327 [(set_attr "type" "sseshuf")
10328 (set_attr "length_immediate" "1")
10329 (set_attr "prefix" "evex")
10330 (set_attr "mode" "V4SF")])
10332 (define_insn "sse_shufps_<mode>"
10333 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
10334 (vec_select:VI4F_128
10335 (vec_concat:<ssedoublevecmode>
10336 (match_operand:VI4F_128 1 "register_operand" "0,v")
10337 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
10338 (parallel [(match_operand 3 "const_0_to_3_operand")
10339 (match_operand 4 "const_0_to_3_operand")
10340 (match_operand 5 "const_4_to_7_operand")
10341 (match_operand 6 "const_4_to_7_operand")])))]
10345 mask |= INTVAL (operands[3]) << 0;
10346 mask |= INTVAL (operands[4]) << 2;
10347 mask |= (INTVAL (operands[5]) - 4) << 4;
10348 mask |= (INTVAL (operands[6]) - 4) << 6;
10349 operands[3] = GEN_INT (mask);
10351 switch (which_alternative)
10354 return "shufps\t{%3, %2, %0|%0, %2, %3}";
10356 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10358 gcc_unreachable ();
10361 [(set_attr "isa" "noavx,avx")
10362 (set_attr "type" "sseshuf")
10363 (set_attr "length_immediate" "1")
10364 (set_attr "prefix" "orig,maybe_evex")
10365 (set_attr "mode" "V4SF")])
10367 (define_insn "sse_storehps"
10368 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
10370 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
10371 (parallel [(const_int 2) (const_int 3)])))]
10372 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10374 %vmovhps\t{%1, %0|%q0, %1}
10375 %vmovhlps\t{%1, %d0|%d0, %1}
10376 %vmovlps\t{%H1, %d0|%d0, %H1}"
10377 [(set_attr "type" "ssemov")
10378 (set_attr "prefix" "maybe_vex")
10379 (set_attr "mode" "V2SF,V4SF,V2SF")])
10381 (define_expand "sse_loadhps_exp"
10382 [(set (match_operand:V4SF 0 "nonimmediate_operand")
10385 (match_operand:V4SF 1 "nonimmediate_operand")
10386 (parallel [(const_int 0) (const_int 1)]))
10387 (match_operand:V2SF 2 "nonimmediate_operand")))]
10390 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
10392 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
10394 /* Fix up the destination if needed. */
10395 if (dst != operands[0])
10396 emit_move_insn (operands[0], dst);
10401 (define_insn "sse_loadhps"
10402 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
10405 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
10406 (parallel [(const_int 0) (const_int 1)]))
10407 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
10410 movhps\t{%2, %0|%0, %q2}
10411 vmovhps\t{%2, %1, %0|%0, %1, %q2}
10412 movlhps\t{%2, %0|%0, %2}
10413 vmovlhps\t{%2, %1, %0|%0, %1, %2}
10414 %vmovlps\t{%2, %H0|%H0, %2}"
10415 [(set_attr "isa" "noavx,avx,noavx,avx,*")
10416 (set_attr "type" "ssemov")
10417 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
10418 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
10420 (define_insn "sse_storelps"
10421 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
10423 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
10424 (parallel [(const_int 0) (const_int 1)])))]
10425 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10427 %vmovlps\t{%1, %0|%q0, %1}
10428 %vmovaps\t{%1, %0|%0, %1}
10429 %vmovlps\t{%1, %d0|%d0, %q1}"
10430 [(set_attr "type" "ssemov")
10431 (set_attr "prefix" "maybe_vex")
10432 (set_attr "mode" "V2SF,V4SF,V2SF")])
10434 (define_expand "sse_loadlps_exp"
10435 [(set (match_operand:V4SF 0 "nonimmediate_operand")
10437 (match_operand:V2SF 2 "nonimmediate_operand")
10439 (match_operand:V4SF 1 "nonimmediate_operand")
10440 (parallel [(const_int 2) (const_int 3)]))))]
10443 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
10445 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
10447 /* Fix up the destination if needed. */
10448 if (dst != operands[0])
10449 emit_move_insn (operands[0], dst);
10454 (define_insn "sse_loadlps"
10455 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
10457 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
10459 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
10460 (parallel [(const_int 2) (const_int 3)]))))]
10463 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
10464 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
10465 movlps\t{%2, %0|%0, %q2}
10466 vmovlps\t{%2, %1, %0|%0, %1, %q2}
10467 %vmovlps\t{%2, %0|%q0, %2}"
10468 [(set_attr "isa" "noavx,avx,noavx,avx,*")
10469 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
10470 (set (attr "length_immediate")
10471 (if_then_else (eq_attr "alternative" "0,1")
10473 (const_string "*")))
10474 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
10475 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
10477 (define_insn "sse_movss_<mode>"
10478 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
10479 (vec_merge:VI4F_128
10480 (match_operand:VI4F_128 2 "register_operand" " x,v")
10481 (match_operand:VI4F_128 1 "register_operand" " 0,v")
10485 movss\t{%2, %0|%0, %2}
10486 vmovss\t{%2, %1, %0|%0, %1, %2}"
10487 [(set_attr "isa" "noavx,avx")
10488 (set_attr "type" "ssemov")
10489 (set_attr "prefix" "orig,maybe_evex")
10490 (set_attr "mode" "SF")])
10492 (define_insn "avx2_vec_dup<mode>"
10493 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
10494 (vec_duplicate:VF1_128_256
10496 (match_operand:V4SF 1 "register_operand" "v")
10497 (parallel [(const_int 0)]))))]
10499 "vbroadcastss\t{%1, %0|%0, %1}"
10500 [(set_attr "type" "sselog1")
10501 (set_attr "prefix" "maybe_evex")
10502 (set_attr "mode" "<MODE>")])
10504 (define_insn "avx2_vec_dupv8sf_1"
10505 [(set (match_operand:V8SF 0 "register_operand" "=v")
10506 (vec_duplicate:V8SF
10508 (match_operand:V8SF 1 "register_operand" "v")
10509 (parallel [(const_int 0)]))))]
10511 "vbroadcastss\t{%x1, %0|%0, %x1}"
10512 [(set_attr "type" "sselog1")
10513 (set_attr "prefix" "maybe_evex")
10514 (set_attr "mode" "V8SF")])
10516 (define_insn "avx512f_vec_dup<mode>_1"
10517 [(set (match_operand:VF_512 0 "register_operand" "=v")
10518 (vec_duplicate:VF_512
10519 (vec_select:<ssescalarmode>
10520 (match_operand:VF_512 1 "register_operand" "v")
10521 (parallel [(const_int 0)]))))]
10523 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
10524 [(set_attr "type" "sselog1")
10525 (set_attr "prefix" "evex")
10526 (set_attr "mode" "<MODE>")])
10528 ;; Although insertps takes register source, we prefer
10529 ;; unpcklps with register source since it is shorter.
10530 (define_insn "*vec_concatv2sf_sse4_1"
10531 [(set (match_operand:V2SF 0 "register_operand"
10532 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
10534 (match_operand:SF 1 "nonimmediate_operand"
10535 " 0, 0,Yv, 0,0, v,m, 0 , m")
10536 (match_operand:SF 2 "nonimm_or_0_operand"
10537 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
10538 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10540 unpcklps\t{%2, %0|%0, %2}
10541 unpcklps\t{%2, %0|%0, %2}
10542 vunpcklps\t{%2, %1, %0|%0, %1, %2}
10543 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
10544 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
10545 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
10546 %vmovss\t{%1, %0|%0, %1}
10547 punpckldq\t{%2, %0|%0, %2}
10548 movd\t{%1, %0|%0, %1}"
10550 (cond [(eq_attr "alternative" "0,1,3,4")
10551 (const_string "noavx")
10552 (eq_attr "alternative" "2,5")
10553 (const_string "avx")
10555 (const_string "*")))
10557 (cond [(eq_attr "alternative" "6")
10558 (const_string "ssemov")
10559 (eq_attr "alternative" "7")
10560 (const_string "mmxcvt")
10561 (eq_attr "alternative" "8")
10562 (const_string "mmxmov")
10564 (const_string "sselog")))
10565 (set (attr "mmx_isa")
10566 (if_then_else (eq_attr "alternative" "7,8")
10567 (const_string "native")
10568 (const_string "*")))
10569 (set (attr "prefix_data16")
10570 (if_then_else (eq_attr "alternative" "3,4")
10572 (const_string "*")))
10573 (set (attr "prefix_extra")
10574 (if_then_else (eq_attr "alternative" "3,4,5")
10576 (const_string "*")))
10577 (set (attr "length_immediate")
10578 (if_then_else (eq_attr "alternative" "3,4,5")
10580 (const_string "*")))
10581 (set (attr "prefix")
10582 (cond [(eq_attr "alternative" "2,5")
10583 (const_string "maybe_evex")
10584 (eq_attr "alternative" "6")
10585 (const_string "maybe_vex")
10587 (const_string "orig")))
10588 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
10590 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10591 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
10592 ;; alternatives pretty much forces the MMX alternative to be chosen.
10593 (define_insn "*vec_concatv2sf_sse"
10594 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
10596 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
10597 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
10600 unpcklps\t{%2, %0|%0, %2}
10601 movss\t{%1, %0|%0, %1}
10602 punpckldq\t{%2, %0|%0, %2}
10603 movd\t{%1, %0|%0, %1}"
10604 [(set_attr "mmx_isa" "*,*,native,native")
10605 (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
10606 (set_attr "mode" "V4SF,SF,DI,DI")])
10608 (define_insn "*vec_concatv4sf"
10609 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
10611 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
10612 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
10615 movlhps\t{%2, %0|%0, %2}
10616 vmovlhps\t{%2, %1, %0|%0, %1, %2}
10617 movhps\t{%2, %0|%0, %q2}
10618 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10619 [(set_attr "isa" "noavx,avx,noavx,avx")
10620 (set_attr "type" "ssemov")
10621 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
10622 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
10624 (define_insn "*vec_concatv4sf_0"
10625 [(set (match_operand:V4SF 0 "register_operand" "=v")
10627 (match_operand:V2SF 1 "nonimmediate_operand" "vm")
10628 (match_operand:V2SF 2 "const0_operand")))]
10630 "%vmovq\t{%1, %0|%0, %1}"
10631 [(set_attr "type" "ssemov")
10632 (set_attr "prefix" "maybe_vex")
10633 (set_attr "mode" "DF")])
10635 ;; Avoid combining registers from different units in a single alternative,
10636 ;; see comment above inline_secondary_memory_needed function in i386.cc
10637 (define_insn "vec_set<mode>_0"
10638 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
10639 "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x ,m ,m ,m")
10640 (vec_merge:VI4F_128
10641 (vec_duplicate:VI4F_128
10642 (match_operand:<ssescalarmode> 2 "general_operand"
10643 " Yr,*x,v,m,r ,m,x,v,?rm,?rm,?rm,!x,?re,!*fF"))
10644 (match_operand:VI4F_128 1 "nonimm_or_0_operand"
10645 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
10649 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
10650 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
10651 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
10652 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
10653 %vmovd\t{%2, %0|%0, %2}
10654 movss\t{%2, %0|%0, %2}
10655 movss\t{%2, %0|%0, %2}
10656 vmovss\t{%2, %1, %0|%0, %1, %2}
10657 pinsrd\t{$0, %2, %0|%0, %2, 0}
10658 pinsrd\t{$0, %2, %0|%0, %2, 0}
10659 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
10664 (cond [(eq_attr "alternative" "0,1,8,9")
10665 (const_string "sse4_noavx")
10666 (eq_attr "alternative" "2,7,10")
10667 (const_string "avx")
10668 (eq_attr "alternative" "3,4")
10669 (const_string "sse2")
10670 (eq_attr "alternative" "5,6")
10671 (const_string "noavx")
10673 (const_string "*")))
10675 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
10676 (const_string "sselog")
10677 (eq_attr "alternative" "12")
10678 (const_string "imov")
10679 (eq_attr "alternative" "13")
10680 (const_string "fmov")
10682 (const_string "ssemov")))
10683 (set (attr "prefix_extra")
10684 (if_then_else (eq_attr "alternative" "8,9,10")
10686 (const_string "*")))
10687 (set (attr "length_immediate")
10688 (if_then_else (eq_attr "alternative" "8,9,10")
10690 (const_string "*")))
10691 (set (attr "prefix")
10692 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
10693 (const_string "orig")
10694 (eq_attr "alternative" "2")
10695 (const_string "maybe_evex")
10696 (eq_attr "alternative" "3,4")
10697 (const_string "maybe_vex")
10698 (eq_attr "alternative" "7,10")
10699 (const_string "vex")
10701 (const_string "*")))
10702 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
10703 (set (attr "preferred_for_speed")
10704 (cond [(eq_attr "alternative" "4")
10705 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
10707 (symbol_ref "true")))])
10709 (define_insn "@vec_set<mode>_0"
10710 [(set (match_operand:V8_128 0 "register_operand"
10711 "=v,v,v,x,x,Yr,*x,x,x,x,v,v")
10713 (vec_duplicate:V8_128
10714 (match_operand:<ssescalarmode> 2 "nonimmediate_operand"
10715 " r,m,v,r,m,Yr,*x,r,m,x,r,m"))
10716 (match_operand:V8_128 1 "reg_or_0_operand"
10717 " C,C,v,0,0,0 ,0 ,x,x,x,v,v")
10721 vmovw\t{%k2, %0|%0, %k2}
10722 vmovw\t{%2, %0|%0, %2}
10723 vmovsh\t{%2, %1, %0|%0, %1, %2}
10724 pinsrw\t{$0, %k2, %0|%0, %k2, 0}
10725 pinsrw\t{$0, %2, %0|%0, %2, 0}
10726 pblendw\t{$1, %2, %0|%0, %2, 1}
10727 pblendw\t{$1, %2, %0|%0, %2, 1}
10728 vpinsrw\t{$0, %k2, %1, %0|%0, %1, %k2, 0}
10729 vpinsrw\t{$0, %2, %1, %0|%0, %1, %2, 0}
10730 vpblendw\t{$1, %2, %1, %0|%0, %1, %2, 1}
10731 vpinsrw\t{$0, %k2, %1, %0|%0, %1, %k2, 0}
10732 vpinsrw\t{$0, %2, %1, %0|%0, %1, %2, 0}"
10734 (cond [(eq_attr "alternative" "0,1,2")
10735 (const_string "avx512fp16")
10736 (eq_attr "alternative" "3,4")
10737 (const_string "noavx")
10738 (eq_attr "alternative" "5,6")
10739 (const_string "sse4_noavx")
10740 (eq_attr "alternative" "7,8,9")
10741 (const_string "avx")
10742 (eq_attr "alternative" "10,11")
10743 (const_string "avx512bw")
10745 (const_string "*")))
10747 (if_then_else (eq_attr "alternative" "0,1,2,5,6,9")
10748 (const_string "ssemov")
10749 (const_string "sselog")))
10750 (set (attr "prefix_data16")
10751 (if_then_else (eq_attr "alternative" "3,4")
10753 (const_string "*")))
10754 (set (attr "prefix_extra")
10755 (if_then_else (eq_attr "alternative" "5,6,7,8,9")
10757 (const_string "*")))
10758 (set (attr "length_immediate")
10759 (if_then_else (eq_attr "alternative" "0,1,2")
10761 (const_string "1")))
10762 (set (attr "prefix")
10763 (cond [(eq_attr "alternative" "0,1,2,10,11")
10764 (const_string "evex")
10765 (eq_attr "alternative" "7,8,9")
10766 (const_string "vex")
10768 (const_string "orig")))
10770 (if_then_else (eq_attr "alternative" "0,1,2")
10771 (const_string "HF")
10772 (const_string "TI")))
10773 (set (attr "enabled")
10774 (cond [(and (not (match_test "<MODE>mode == V8HFmode || <MODE>mode == V8BFmode"))
10775 (eq_attr "alternative" "2"))
10776 (symbol_ref "false")
10778 (const_string "*")))])
10780 ;; vmovw clears also the higer bits
10781 (define_insn "vec_set<mode>_0"
10782 [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v")
10783 (vec_merge:VI2F_256_512
10784 (vec_duplicate:VI2F_256_512
10785 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m"))
10786 (match_operand:VI2F_256_512 1 "const0_operand")
10788 "TARGET_AVX512FP16"
10790 vmovw\t{%k2, %x0|%x0, %k2}
10791 vmovw\t{%2, %x0|%x0, %2}"
10792 [(set_attr "type" "ssemov")
10793 (set_attr "prefix" "evex")
10794 (set_attr "mode" "HF")])
10796 (define_insn_and_split "*vec_set<mode>_0_zero_extendhi"
10797 [(set (match_operand:VI48_AVX512F 0 "register_operand")
10798 (vec_merge:VI48_AVX512F
10799 (vec_duplicate:VI48_AVX512F
10800 (zero_extend:<ssescalarmode>
10801 (match_operand:HI 1 "nonimmediate_operand")))
10802 (match_operand:VI48_AVX512F 2 "const0_operand")
10804 "TARGET_AVX512FP16 && ix86_pre_reload_split ()"
10809 rtx dest = gen_reg_rtx (<ssewvecmode>mode);
10810 emit_insn (gen_vec_set<ssewvecmodelower>_0 (dest,
10811 CONST0_RTX (<ssewvecmode>mode),
10813 emit_move_insn (operands[0],
10814 lowpart_subreg (<MODE>mode, dest, <ssewvecmode>mode));
10818 (define_insn_and_split "*vec_setv2di_0_zero_extendhi_1"
10819 [(set (match_operand:V2DI 0 "register_operand")
10822 (match_operand:HI 1 "nonimmediate_operand"))
10824 "TARGET_AVX512FP16 && ix86_pre_reload_split ()"
10829 rtx dest = gen_reg_rtx (V8HImode);
10830 emit_insn (gen_vec_setv8hi_0 (dest, CONST0_RTX (V8HImode), operands[1]));
10831 emit_move_insn (operands[0],
10832 lowpart_subreg (V2DImode, dest, V8HImode));
10836 (define_mode_iterator V8BFH_128 [V8HF V8BF])
10838 (define_insn "avx512fp16_mov<mode>"
10839 [(set (match_operand:V8BFH_128 0 "register_operand" "=v")
10840 (vec_merge:V8BFH_128
10841 (match_operand:V8BFH_128 2 "register_operand" "v")
10842 (match_operand:V8BFH_128 1 "register_operand" "v")
10844 "TARGET_AVX512FP16"
10845 "vmovsh\t{%2, %1, %0|%0, %1, %2}"
10846 [(set_attr "type" "ssemov")
10847 (set_attr "prefix" "evex")
10848 (set_attr "mode" "HF")])
10850 ;; A subset is vec_setv4sf.
10851 (define_insn "*vec_setv4sf_sse4_1"
10852 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
10854 (vec_duplicate:V4SF
10855 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
10856 (match_operand:V4SF 1 "register_operand" "0,0,v")
10857 (match_operand:SI 3 "const_int_operand")))]
10859 && ((unsigned) exact_log2 (INTVAL (operands[3]))
10860 < GET_MODE_NUNITS (V4SFmode))"
10862 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
10863 switch (which_alternative)
10867 return "insertps\t{%3, %2, %0|%0, %2, %3}";
10869 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10871 gcc_unreachable ();
10874 [(set_attr "isa" "noavx,noavx,avx")
10875 (set_attr "type" "sselog")
10876 (set_attr "prefix_data16" "1,1,*")
10877 (set_attr "prefix_extra" "1")
10878 (set_attr "length_immediate" "1")
10879 (set_attr "prefix" "orig,orig,maybe_evex")
10880 (set_attr "mode" "V4SF")])
10882 ;; All of vinsertps, vmovss, vmovd clear also the higher bits.
10883 (define_insn "vec_set<mode>_0"
10884 [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
10885 (vec_merge:VI4F_256_512
10886 (vec_duplicate:VI4F_256_512
10887 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
10888 (match_operand:VI4F_256_512 1 "const0_operand")
10892 vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
10893 vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
10894 vmovd\t{%2, %x0|%x0, %2}"
10895 [(set (attr "type")
10896 (if_then_else (eq_attr "alternative" "0")
10897 (const_string "sselog")
10898 (const_string "ssemov")))
10899 (set_attr "prefix" "maybe_evex")
10900 (set_attr "mode" "SF,<ssescalarmode>,SI")
10901 (set (attr "preferred_for_speed")
10902 (cond [(eq_attr "alternative" "2")
10903 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
10905 (symbol_ref "true")))])
10907 (define_insn_and_split "*vec_set<mode>_0_zero_extendsi"
10908 [(set (match_operand:VI8 0 "register_operand")
10912 (match_operand:SI 1 "nonimmediate_operand")))
10913 (match_operand:VI8 2 "const0_operand")
10915 "TARGET_SSE2 && ix86_pre_reload_split ()"
10920 rtx dest = gen_reg_rtx (<ssepackmode>mode);
10921 emit_insn (gen_vec_set<ssepackmodelower>_0 (dest,
10922 CONST0_RTX (<ssepackmode>mode),
10924 emit_move_insn (operands[0],
10925 lowpart_subreg (<MODE>mode, dest, <ssepackmode>mode));
10929 (define_insn_and_split "*vec_setv2di_0_zero_extendsi_1"
10930 [(set (match_operand:V2DI 0 "register_operand")
10933 (match_operand:SI 1 "nonimmediate_operand"))
10935 "TARGET_SSE2 && ix86_pre_reload_split ()"
10940 rtx dest = gen_reg_rtx (V4SImode);
10941 emit_insn (gen_vec_setv4si_0 (dest, CONST0_RTX (V4SImode), operands[1]));
10942 emit_move_insn (operands[0],
10943 lowpart_subreg (V2DImode, dest, V4SImode));
10947 (define_insn "@sse4_1_insertps_<mode>"
10948 [(set (match_operand:VI4F_128 0 "register_operand" "=Yr,*x,v")
10950 [(match_operand:VI4F_128 2 "nonimmediate_operand" "Yrm,*xm,vm")
10951 (match_operand:VI4F_128 1 "register_operand" "0,0,v")
10952 (match_operand:SI 3 "const_0_to_255_operand")]
10956 if (MEM_P (operands[2]))
10958 unsigned count_s = INTVAL (operands[3]) >> 6;
10960 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
10961 operands[2] = adjust_address_nv (operands[2],
10962 <ssescalarmode>mode, count_s * 4);
10964 switch (which_alternative)
10968 return "insertps\t{%3, %2, %0|%0, %2, %3}";
10970 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10972 gcc_unreachable ();
10975 [(set_attr "isa" "noavx,noavx,avx")
10976 (set_attr "type" "sselog")
10977 (set_attr "prefix_data16" "1,1,*")
10978 (set_attr "prefix_extra" "1")
10979 (set_attr "length_immediate" "1")
10980 (set_attr "prefix" "orig,orig,maybe_evex")
10981 (set_attr "mode" "V4SF")])
10984 [(set (match_operand:VI4F_128 0 "memory_operand")
10985 (vec_merge:VI4F_128
10986 (vec_duplicate:VI4F_128
10987 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
10990 "TARGET_SSE && reload_completed"
10991 [(set (match_dup 0) (match_dup 1))]
10992 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
10994 ;; Standard scalar operation patterns which preserve the rest of the
10995 ;; vector for combiner.
10996 (define_insn "vec_setv2df_0"
10997 [(set (match_operand:V2DF 0 "register_operand" "=x,v,x,v")
10999 (vec_duplicate:V2DF
11000 (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
11001 (match_operand:V2DF 1 "register_operand" " 0,v,0,v")
11005 movsd\t{%2, %0|%0, %2}
11006 vmovsd\t{%2, %1, %0|%0, %1, %2}
11007 movlpd\t{%2, %0|%0, %2}
11008 vmovlpd\t{%2, %1, %0|%0, %1, %2}"
11009 [(set_attr "isa" "noavx,avx,noavx,avx")
11010 (set_attr "type" "ssemov")
11011 (set_attr "mode" "DF")])
11013 (define_expand "vec_set<mode>"
11014 [(match_operand:V_128 0 "register_operand")
11015 (match_operand:<ssescalarmode> 1 "register_operand")
11016 (match_operand 2 "vec_setm_sse41_operand")]
11019 if (CONST_INT_P (operands[2]))
11020 ix86_expand_vector_set (false, operands[0], operands[1],
11021 INTVAL (operands[2]));
11023 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
11027 (define_expand "vec_set<mode>"
11028 [(match_operand:V8BFH_128 0 "register_operand")
11029 (match_operand:<ssescalarmode> 1 "register_operand")
11030 (match_operand 2 "vec_setm_sse41_operand")]
11033 if (CONST_INT_P (operands[2]))
11034 ix86_expand_vector_set (false, operands[0], operands[1],
11035 INTVAL (operands[2]));
11037 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
11041 (define_expand "vec_set<mode>"
11042 [(match_operand:V_256_512 0 "register_operand")
11043 (match_operand:<ssescalarmode> 1 "register_operand")
11044 (match_operand 2 "vec_setm_avx2_operand")]
11047 if (CONST_INT_P (operands[2]))
11048 ix86_expand_vector_set (false, operands[0], operands[1],
11049 INTVAL (operands[2]));
11051 ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
11055 (define_insn_and_split "*vec_extractv4sf_0"
11056 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
11058 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
11059 (parallel [(const_int 0)])))]
11060 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11062 "&& reload_completed"
11063 [(set (match_dup 0) (match_dup 1))]
11064 "operands[1] = gen_lowpart (SFmode, operands[1]);")
11066 (define_insn_and_split "*sse4_1_extractps"
11067 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
11069 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
11070 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
11073 extractps\t{%2, %1, %0|%0, %1, %2}
11074 extractps\t{%2, %1, %0|%0, %1, %2}
11075 vextractps\t{%2, %1, %0|%0, %1, %2}
11078 "&& reload_completed && SSE_REG_P (operands[0])"
11081 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
11082 switch (INTVAL (operands[2]))
11086 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
11087 operands[2], operands[2],
11088 GEN_INT (INTVAL (operands[2]) + 4),
11089 GEN_INT (INTVAL (operands[2]) + 4)));
11092 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
11095 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
11096 gcc_unreachable ();
11100 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
11101 (set_attr "type" "sselog,sselog,sselog,*,*")
11102 (set_attr "prefix_data16" "1,1,1,*,*")
11103 (set_attr "prefix_extra" "1,1,1,*,*")
11104 (set_attr "length_immediate" "1,1,1,*,*")
11105 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
11106 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
11108 (define_insn_and_split "*vec_extractv4sf_mem"
11109 [(set (match_operand:SF 0 "register_operand" "=v,?r,f")
11111 (match_operand:V4SF 1 "memory_operand" "o,o,o")
11112 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
11115 "&& reload_completed"
11116 [(set (match_dup 0) (match_dup 1))]
11118 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
11121 (define_mode_attr extract_type
11122 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
11124 (define_mode_attr extract_suf
11125 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")
11126 (V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")])
11128 (define_mode_iterator AVX512_VEC
11129 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
11131 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
11132 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
11133 (match_operand:AVX512_VEC 1 "register_operand")
11134 (match_operand:SI 2 "const_0_to_3_operand")
11135 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
11136 (match_operand:QI 4 "register_operand")]
11140 mask = INTVAL (operands[2]);
11141 rtx dest = operands[0];
11143 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
11144 dest = gen_reg_rtx (<ssequartermode>mode);
11146 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
11147 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
11148 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
11149 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
11152 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
11153 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
11155 if (dest != operands[0])
11156 emit_move_insn (operands[0], dest);
11160 (define_insn "avx512dq_vextract<shuffletype>64x2_1_mask"
11161 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
11162 (vec_merge:<ssequartermode>
11163 (vec_select:<ssequartermode>
11164 (match_operand:V8FI 1 "register_operand" "v,v")
11165 (parallel [(match_operand 2 "const_0_to_7_operand")
11166 (match_operand 3 "const_0_to_7_operand")]))
11167 (match_operand:<ssequartermode> 4 "nonimm_or_0_operand" "0C,0")
11168 (match_operand:QI 5 "register_operand" "Yk,Yk")))]
11170 && INTVAL (operands[2]) % 2 == 0
11171 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
11172 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[4]))"
11174 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
11175 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2}";
11177 [(set_attr "type" "sselog1")
11178 (set_attr "prefix_extra" "1")
11179 (set_attr "length_immediate" "1")
11180 (set_attr "prefix" "evex")
11181 (set_attr "mode" "<sseinsnmode>")])
11183 (define_insn "*avx512dq_vextract<shuffletype>64x2_1"
11184 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
11185 (vec_select:<ssequartermode>
11186 (match_operand:V8FI 1 "register_operand" "v")
11187 (parallel [(match_operand 2 "const_0_to_7_operand")
11188 (match_operand 3 "const_0_to_7_operand")])))]
11190 && INTVAL (operands[2]) % 2 == 0
11191 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
11193 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
11194 return "vextract<shuffletype>64x2\t{%2, %1, %0|%0, %1, %2}";
11196 [(set_attr "type" "sselog1")
11197 (set_attr "prefix_extra" "1")
11198 (set_attr "length_immediate" "1")
11199 (set_attr "prefix" "evex")
11200 (set_attr "mode" "<sseinsnmode>")])
11203 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
11204 (vec_select:<ssequartermode>
11205 (match_operand:V8FI 1 "register_operand")
11206 (parallel [(const_int 0) (const_int 1)])))]
11208 && reload_completed
11209 && (TARGET_AVX512VL
11210 || REG_P (operands[0])
11211 || !EXT_REX_SSE_REG_P (operands[1]))"
11212 [(set (match_dup 0) (match_dup 1))]
11214 if (!TARGET_AVX512VL
11215 && REG_P (operands[0])
11216 && EXT_REX_SSE_REG_P (operands[1]))
11218 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
11220 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
11223 (define_insn "avx512f_vextract<shuffletype>32x4_1_mask"
11224 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=v,m")
11225 (vec_merge:<ssequartermode>
11226 (vec_select:<ssequartermode>
11227 (match_operand:V16FI 1 "register_operand" "v,v")
11228 (parallel [(match_operand 2 "const_0_to_15_operand")
11229 (match_operand 3 "const_0_to_15_operand")
11230 (match_operand 4 "const_0_to_15_operand")
11231 (match_operand 5 "const_0_to_15_operand")]))
11232 (match_operand:<ssequartermode> 6 "nonimm_or_0_operand" "0C,0")
11233 (match_operand:QI 7 "register_operand" "Yk,Yk")))]
11235 && INTVAL (operands[2]) % 4 == 0
11236 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
11237 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
11238 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
11239 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[6]))"
11241 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
11242 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}%N6|%0%{%7%}%N6, %1, %2}";
11244 [(set_attr "type" "sselog1")
11245 (set_attr "prefix_extra" "1")
11246 (set_attr "length_immediate" "1")
11247 (set_attr "prefix" "evex")
11248 (set_attr "mode" "<sseinsnmode>")])
11250 (define_insn "*avx512f_vextract<shuffletype>32x4_1"
11251 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
11252 (vec_select:<ssequartermode>
11253 (match_operand:V16FI 1 "register_operand" "v")
11254 (parallel [(match_operand 2 "const_0_to_15_operand")
11255 (match_operand 3 "const_0_to_15_operand")
11256 (match_operand 4 "const_0_to_15_operand")
11257 (match_operand 5 "const_0_to_15_operand")])))]
11259 && INTVAL (operands[2]) % 4 == 0
11260 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
11261 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
11262 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
11264 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
11265 return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
11267 [(set_attr "type" "sselog1")
11268 (set_attr "prefix_extra" "1")
11269 (set_attr "length_immediate" "1")
11270 (set_attr "prefix" "evex")
11271 (set_attr "mode" "<sseinsnmode>")])
11274 [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
11275 (vec_select:<ssequartermode>
11276 (match_operand:V16FI 1 "register_operand")
11277 (parallel [(const_int 0) (const_int 1)
11278 (const_int 2) (const_int 3)])))]
11280 && reload_completed
11281 && (TARGET_AVX512VL
11282 || REG_P (operands[0])
11283 || !EXT_REX_SSE_REG_P (operands[1]))"
11284 [(set (match_dup 0) (match_dup 1))]
11286 if (!TARGET_AVX512VL
11287 && REG_P (operands[0])
11288 && EXT_REX_SSE_REG_P (operands[1]))
11290 = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
11292 operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
11295 (define_mode_attr extract_type_2
11296 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
11298 (define_mode_attr extract_suf_2
11299 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
11301 (define_mode_iterator AVX512_VEC_2
11302 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
11304 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
11305 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11306 (match_operand:AVX512_VEC_2 1 "register_operand")
11307 (match_operand:SI 2 "const_0_to_1_operand")
11308 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
11309 (match_operand:QI 4 "register_operand")]
11312 rtx (*insn)(rtx, rtx, rtx, rtx);
11313 rtx dest = operands[0];
11315 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
11316 dest = gen_reg_rtx (<ssehalfvecmode>mode);
11318 switch (INTVAL (operands[2]))
11321 insn = gen_vec_extract_lo_<mode>_mask;
11324 insn = gen_vec_extract_hi_<mode>_mask;
11327 gcc_unreachable ();
11330 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
11331 if (dest != operands[0])
11332 emit_move_insn (operands[0], dest);
11337 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11338 (vec_select:<ssehalfvecmode>
11339 (match_operand:V8FI 1 "nonimmediate_operand")
11340 (parallel [(const_int 0) (const_int 1)
11341 (const_int 2) (const_int 3)])))]
11342 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
11343 && reload_completed
11344 && (TARGET_AVX512VL
11345 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
11346 [(set (match_dup 0) (match_dup 1))]
11347 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
11349 (define_insn "vec_extract_lo_<mode>_mask"
11350 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11351 (vec_merge:<ssehalfvecmode>
11352 (vec_select:<ssehalfvecmode>
11353 (match_operand:V8FI 1 "register_operand" "v,v")
11354 (parallel [(const_int 0) (const_int 1)
11355 (const_int 2) (const_int 3)]))
11356 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11357 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11359 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11360 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
11361 [(set_attr "type" "sselog1")
11362 (set_attr "prefix_extra" "1")
11363 (set_attr "length_immediate" "1")
11364 (set_attr "memory" "none,store")
11365 (set_attr "prefix" "evex")
11366 (set_attr "mode" "<sseinsnmode>")])
11368 (define_insn "vec_extract_lo_<mode>"
11369 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,vm,v")
11370 (vec_select:<ssehalfvecmode>
11371 (match_operand:V8FI 1 "nonimmediate_operand" "v,v,vm")
11372 (parallel [(const_int 0) (const_int 1)
11373 (const_int 2) (const_int 3)])))]
11374 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11376 if (!TARGET_AVX512VL && !MEM_P (operands[1]))
11377 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
11381 [(set_attr "type" "sselog1")
11382 (set_attr "prefix_extra" "1")
11383 (set_attr "length_immediate" "1")
11384 (set_attr "memory" "none,store,load")
11385 (set_attr "prefix" "evex")
11386 (set_attr "mode" "<sseinsnmode>")])
11388 (define_insn "vec_extract_hi_<mode>_mask"
11389 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11390 (vec_merge:<ssehalfvecmode>
11391 (vec_select:<ssehalfvecmode>
11392 (match_operand:V8FI 1 "register_operand" "v,v")
11393 (parallel [(const_int 4) (const_int 5)
11394 (const_int 6) (const_int 7)]))
11395 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11396 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11398 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11399 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
11400 [(set_attr "type" "sselog1")
11401 (set_attr "prefix_extra" "1")
11402 (set_attr "length_immediate" "1")
11403 (set_attr "prefix" "evex")
11404 (set_attr "mode" "<sseinsnmode>")])
11406 (define_insn "vec_extract_hi_<mode>"
11407 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
11408 (vec_select:<ssehalfvecmode>
11409 (match_operand:V8FI 1 "register_operand" "v")
11410 (parallel [(const_int 4) (const_int 5)
11411 (const_int 6) (const_int 7)])))]
11413 "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
11414 [(set_attr "type" "sselog1")
11415 (set_attr "prefix_extra" "1")
11416 (set_attr "length_immediate" "1")
11417 (set_attr "prefix" "evex")
11418 (set_attr "mode" "<sseinsnmode>")])
11420 (define_insn "vec_extract_hi_<mode>_mask"
11421 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11422 (vec_merge:<ssehalfvecmode>
11423 (vec_select:<ssehalfvecmode>
11424 (match_operand:V16FI 1 "register_operand" "v,v")
11425 (parallel [(const_int 8) (const_int 9)
11426 (const_int 10) (const_int 11)
11427 (const_int 12) (const_int 13)
11428 (const_int 14) (const_int 15)]))
11429 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11430 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11432 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11433 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
11434 [(set_attr "type" "sselog1")
11435 (set_attr "prefix_extra" "1")
11436 (set_attr "length_immediate" "1")
11437 (set_attr "prefix" "evex")
11438 (set_attr "mode" "<sseinsnmode>")])
11440 (define_insn "vec_extract_hi_<mode>"
11441 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,vm")
11442 (vec_select:<ssehalfvecmode>
11443 (match_operand:V16FI 1 "register_operand" "v,v")
11444 (parallel [(const_int 8) (const_int 9)
11445 (const_int 10) (const_int 11)
11446 (const_int 12) (const_int 13)
11447 (const_int 14) (const_int 15)])))]
11450 vextract<shuffletype>32x8\t{$0x1, %1, %0|%0, %1, 0x1}
11451 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
11452 [(set_attr "type" "sselog1")
11453 (set_attr "prefix_extra" "1")
11454 (set_attr "isa" "avx512dq,noavx512dq")
11455 (set_attr "length_immediate" "1")
11456 (set_attr "prefix" "evex")
11457 (set_attr "mode" "<sseinsnmode>")])
11459 (define_mode_iterator VI48F_256_DQ
11460 [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
11462 (define_expand "avx512vl_vextractf128<mode>"
11463 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11464 (match_operand:VI48F_256_DQ 1 "register_operand")
11465 (match_operand:SI 2 "const_0_to_1_operand")
11466 (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
11467 (match_operand:QI 4 "register_operand")]
11470 rtx (*insn)(rtx, rtx, rtx, rtx);
11471 rtx dest = operands[0];
11474 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
11475 /* For V8S[IF]mode there are maskm insns with =m and 0
11477 ? !rtx_equal_p (dest, operands[3])
11478 /* For V4D[IF]mode, hi insns don't allow memory, and
11479 lo insns have =m and 0C constraints. */
11480 : (operands[2] != const0_rtx
11481 || (!rtx_equal_p (dest, operands[3])
11482 && GET_CODE (operands[3]) != CONST_VECTOR))))
11483 dest = gen_reg_rtx (<ssehalfvecmode>mode);
11484 switch (INTVAL (operands[2]))
11487 insn = gen_vec_extract_lo_<mode>_mask;
11490 insn = gen_vec_extract_hi_<mode>_mask;
11493 gcc_unreachable ();
11496 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
11497 if (dest != operands[0])
11498 emit_move_insn (operands[0], dest);
11502 (define_expand "avx_vextractf128<mode>"
11503 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11504 (match_operand:V_256H 1 "register_operand")
11505 (match_operand:SI 2 "const_0_to_1_operand")]
11508 rtx (*insn)(rtx, rtx);
11510 switch (INTVAL (operands[2]))
11513 insn = gen_vec_extract_lo_<mode>;
11516 insn = gen_vec_extract_hi_<mode>;
11519 gcc_unreachable ();
11522 emit_insn (insn (operands[0], operands[1]));
11526 (define_insn "vec_extract_lo_<mode>_mask"
11527 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11528 (vec_merge:<ssehalfvecmode>
11529 (vec_select:<ssehalfvecmode>
11530 (match_operand:V16FI 1 "register_operand" "v,v")
11531 (parallel [(const_int 0) (const_int 1)
11532 (const_int 2) (const_int 3)
11533 (const_int 4) (const_int 5)
11534 (const_int 6) (const_int 7)]))
11535 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11536 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11538 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11539 "vextract<shuffletype>32x8\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
11540 [(set_attr "type" "sselog1")
11541 (set_attr "prefix_extra" "1")
11542 (set_attr "length_immediate" "1")
11543 (set_attr "memory" "none,store")
11544 (set_attr "prefix" "evex")
11545 (set_attr "mode" "<sseinsnmode>")])
11547 (define_insn "vec_extract_lo_<mode>"
11548 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
11549 (vec_select:<ssehalfvecmode>
11550 (match_operand:V16FI 1 "nonimmediate_operand" "v,m,v")
11551 (parallel [(const_int 0) (const_int 1)
11552 (const_int 2) (const_int 3)
11553 (const_int 4) (const_int 5)
11554 (const_int 6) (const_int 7)])))]
11556 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11558 if (!TARGET_AVX512VL
11559 && !REG_P (operands[0])
11560 && EXT_REX_SSE_REG_P (operands[1]))
11562 if (TARGET_AVX512DQ)
11563 return "vextract<shuffletype>32x8\t{$0x0, %1, %0|%0, %1, 0x0}";
11565 return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
11570 [(set_attr "type" "sselog1")
11571 (set_attr "prefix_extra" "1")
11572 (set_attr "length_immediate" "1")
11573 (set_attr "memory" "none,load,store")
11574 (set_attr "prefix" "evex")
11575 (set_attr "mode" "<sseinsnmode>")])
11578 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11579 (vec_select:<ssehalfvecmode>
11580 (match_operand:V16FI 1 "nonimmediate_operand")
11581 (parallel [(const_int 0) (const_int 1)
11582 (const_int 2) (const_int 3)
11583 (const_int 4) (const_int 5)
11584 (const_int 6) (const_int 7)])))]
11585 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
11586 && reload_completed
11587 && (TARGET_AVX512VL
11588 || REG_P (operands[0])
11589 || !EXT_REX_SSE_REG_P (operands[1]))"
11590 [(set (match_dup 0) (match_dup 1))]
11592 if (!TARGET_AVX512VL
11593 && REG_P (operands[0])
11594 && EXT_REX_SSE_REG_P (operands[1]))
11596 = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
11598 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
11601 (define_insn "vec_extract_lo_<mode>_mask"
11602 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11603 (vec_merge:<ssehalfvecmode>
11604 (vec_select:<ssehalfvecmode>
11605 (match_operand:VI8F_256 1 "register_operand" "v,v")
11606 (parallel [(const_int 0) (const_int 1)]))
11607 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11608 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11611 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11612 "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
11613 [(set_attr "type" "sselog1")
11614 (set_attr "prefix_extra" "1")
11615 (set_attr "length_immediate" "1")
11616 (set_attr "memory" "none,store")
11617 (set_attr "prefix" "evex")
11618 (set_attr "mode" "XI")])
11620 (define_insn "vec_extract_lo_<mode>"
11621 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
11622 (vec_select:<ssehalfvecmode>
11623 (match_operand:VI8F_256 1 "nonimmediate_operand" "v,vm")
11624 (parallel [(const_int 0) (const_int 1)])))]
11626 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11630 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11631 (vec_select:<ssehalfvecmode>
11632 (match_operand:VI8F_256 1 "nonimmediate_operand")
11633 (parallel [(const_int 0) (const_int 1)])))]
11634 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
11635 && reload_completed"
11636 [(set (match_dup 0) (match_dup 1))]
11637 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
11639 (define_insn "vec_extract_hi_<mode>_mask"
11640 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11641 (vec_merge:<ssehalfvecmode>
11642 (vec_select:<ssehalfvecmode>
11643 (match_operand:VI8F_256 1 "register_operand" "v,v")
11644 (parallel [(const_int 2) (const_int 3)]))
11645 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11646 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11649 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11650 "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
11651 [(set_attr "type" "sselog1")
11652 (set_attr "prefix_extra" "1")
11653 (set_attr "length_immediate" "1")
11654 (set_attr "prefix" "vex")
11655 (set_attr "mode" "<sseinsnmode>")])
11657 (define_insn "vec_extract_hi_<mode>"
11658 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
11659 (vec_select:<ssehalfvecmode>
11660 (match_operand:VI8F_256 1 "register_operand" "v")
11661 (parallel [(const_int 2) (const_int 3)])))]
11664 if (TARGET_AVX512VL)
11666 if (TARGET_AVX512DQ)
11667 return "vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}";
11669 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
11672 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
11674 [(set_attr "type" "sselog1")
11675 (set_attr "prefix_extra" "1")
11676 (set_attr "length_immediate" "1")
11677 (set_attr "prefix" "vex")
11678 (set_attr "mode" "<sseinsnmode>")])
11681 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
11682 (vec_select:<ssehalfvecmode>
11683 (match_operand:VI4F_256 1 "nonimmediate_operand")
11684 (parallel [(const_int 0) (const_int 1)
11685 (const_int 2) (const_int 3)])))]
11686 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
11687 && reload_completed"
11688 [(set (match_dup 0) (match_dup 1))]
11689 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
11691 (define_insn "vec_extract_lo_<mode>_mask"
11692 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11693 (vec_merge:<ssehalfvecmode>
11694 (vec_select:<ssehalfvecmode>
11695 (match_operand:VI4F_256 1 "register_operand" "v,v")
11696 (parallel [(const_int 0) (const_int 1)
11697 (const_int 2) (const_int 3)]))
11698 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11699 (match_operand:QI 3 "register_operand" "Yk,Yk")))]
11701 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11702 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x0}"
11703 [(set_attr "type" "sselog1")
11704 (set_attr "prefix_extra" "1")
11705 (set_attr "length_immediate" "1")
11706 (set_attr "prefix" "evex")
11707 (set_attr "mode" "<sseinsnmode>")])
11709 (define_insn "vec_extract_lo_<mode>"
11710 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm,v")
11711 (vec_select:<ssehalfvecmode>
11712 (match_operand:VI4F_256 1 "nonimmediate_operand" "v,vm")
11713 (parallel [(const_int 0) (const_int 1)
11714 (const_int 2) (const_int 3)])))]
11716 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11718 [(set_attr "type" "sselog1")
11719 (set_attr "prefix_extra" "1")
11720 (set_attr "length_immediate" "1")
11721 (set_attr "prefix" "evex")
11722 (set_attr "mode" "<sseinsnmode>")])
11724 (define_insn "vec_extract_hi_<mode>_mask"
11725 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v,m")
11726 (vec_merge:<ssehalfvecmode>
11727 (vec_select:<ssehalfvecmode>
11728 (match_operand:VI4F_256 1 "register_operand" "v,v")
11729 (parallel [(const_int 4) (const_int 5)
11730 (const_int 6) (const_int 7)]))
11731 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C,0")
11732 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
11734 && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[2]))"
11735 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
11736 [(set_attr "type" "sselog1")
11737 (set_attr "length_immediate" "1")
11738 (set_attr "prefix" "evex")
11739 (set_attr "mode" "<sseinsnmode>")])
11741 (define_insn "vec_extract_hi_<mode>"
11742 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
11743 (vec_select:<ssehalfvecmode>
11744 (match_operand:VI4F_256 1 "register_operand" "x, v")
11745 (parallel [(const_int 4) (const_int 5)
11746 (const_int 6) (const_int 7)])))]
11749 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
11750 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
11751 [(set_attr "isa" "*, avx512vl")
11752 (set_attr "prefix" "vex, evex")
11753 (set_attr "type" "sselog1")
11754 (set_attr "length_immediate" "1")
11755 (set_attr "mode" "<sseinsnmode>")])
11757 (define_insn_and_split "@vec_extract_lo_<mode>"
11758 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,v,m")
11759 (vec_select:<ssehalfvecmode>
11760 (match_operand:V32_512 1 "nonimmediate_operand" "v,m,v")
11761 (parallel [(const_int 0) (const_int 1)
11762 (const_int 2) (const_int 3)
11763 (const_int 4) (const_int 5)
11764 (const_int 6) (const_int 7)
11765 (const_int 8) (const_int 9)
11766 (const_int 10) (const_int 11)
11767 (const_int 12) (const_int 13)
11768 (const_int 14) (const_int 15)])))]
11769 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11771 if (TARGET_AVX512VL
11772 || REG_P (operands[0])
11773 || !EXT_REX_SSE_REG_P (operands[1]))
11776 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
11778 "&& reload_completed
11779 && (TARGET_AVX512VL
11780 || REG_P (operands[0])
11781 || !EXT_REX_SSE_REG_P (operands[1]))"
11782 [(set (match_dup 0) (match_dup 1))]
11784 if (!TARGET_AVX512VL
11785 && REG_P (operands[0])
11786 && EXT_REX_SSE_REG_P (operands[1]))
11787 operands[0] = lowpart_subreg (<MODE>mode, operands[0],
11788 <ssehalfvecmode>mode);
11790 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
11792 [(set_attr "type" "sselog1")
11793 (set_attr "prefix_extra" "1")
11794 (set_attr "length_immediate" "1")
11795 (set_attr "memory" "none,load,store")
11796 (set_attr "prefix" "evex")
11797 (set_attr "mode" "XI")])
11799 (define_insn "@vec_extract_hi_<mode>"
11800 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
11801 (vec_select:<ssehalfvecmode>
11802 (match_operand:V32_512 1 "register_operand" "v")
11803 (parallel [(const_int 16) (const_int 17)
11804 (const_int 18) (const_int 19)
11805 (const_int 20) (const_int 21)
11806 (const_int 22) (const_int 23)
11807 (const_int 24) (const_int 25)
11808 (const_int 26) (const_int 27)
11809 (const_int 28) (const_int 29)
11810 (const_int 30) (const_int 31)])))]
11812 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
11813 [(set_attr "type" "sselog1")
11814 (set_attr "prefix_extra" "1")
11815 (set_attr "length_immediate" "1")
11816 (set_attr "prefix" "evex")
11817 (set_attr "mode" "XI")])
11819 (define_insn_and_split "@vec_extract_lo_<mode>"
11820 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
11821 (vec_select:<ssehalfvecmode>
11822 (match_operand:V16_256 1 "nonimmediate_operand" "vm,v")
11823 (parallel [(const_int 0) (const_int 1)
11824 (const_int 2) (const_int 3)
11825 (const_int 4) (const_int 5)
11826 (const_int 6) (const_int 7)])))]
11827 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11829 "&& reload_completed"
11830 [(set (match_dup 0) (match_dup 1))]
11831 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
11833 (define_insn "@vec_extract_hi_<mode>"
11834 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm,vm,vm")
11835 (vec_select:<ssehalfvecmode>
11836 (match_operand:V16_256 1 "register_operand" "x,v,v")
11837 (parallel [(const_int 8) (const_int 9)
11838 (const_int 10) (const_int 11)
11839 (const_int 12) (const_int 13)
11840 (const_int 14) (const_int 15)])))]
11843 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
11844 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
11845 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
11846 [(set_attr "type" "sselog1")
11847 (set_attr "prefix_extra" "1")
11848 (set_attr "length_immediate" "1")
11849 (set_attr "isa" "*,avx512dq,avx512f")
11850 (set_attr "prefix" "vex,evex,evex")
11851 (set_attr "mode" "OI")])
11853 (define_insn_and_split "vec_extract_lo_v64qi"
11854 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
11856 (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
11857 (parallel [(const_int 0) (const_int 1)
11858 (const_int 2) (const_int 3)
11859 (const_int 4) (const_int 5)
11860 (const_int 6) (const_int 7)
11861 (const_int 8) (const_int 9)
11862 (const_int 10) (const_int 11)
11863 (const_int 12) (const_int 13)
11864 (const_int 14) (const_int 15)
11865 (const_int 16) (const_int 17)
11866 (const_int 18) (const_int 19)
11867 (const_int 20) (const_int 21)
11868 (const_int 22) (const_int 23)
11869 (const_int 24) (const_int 25)
11870 (const_int 26) (const_int 27)
11871 (const_int 28) (const_int 29)
11872 (const_int 30) (const_int 31)])))]
11873 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11875 if (TARGET_AVX512VL
11876 || REG_P (operands[0])
11877 || !EXT_REX_SSE_REG_P (operands[1]))
11880 return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
11882 "&& reload_completed
11883 && (TARGET_AVX512VL
11884 || REG_P (operands[0])
11885 || !EXT_REX_SSE_REG_P (operands[1]))"
11886 [(set (match_dup 0) (match_dup 1))]
11888 if (!TARGET_AVX512VL
11889 && REG_P (operands[0])
11890 && EXT_REX_SSE_REG_P (operands[1]))
11891 operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
11893 operands[1] = gen_lowpart (V32QImode, operands[1]);
11895 [(set_attr "type" "sselog1")
11896 (set_attr "prefix_extra" "1")
11897 (set_attr "length_immediate" "1")
11898 (set_attr "memory" "none,load,store")
11899 (set_attr "prefix" "evex")
11900 (set_attr "mode" "XI")])
11902 (define_insn "vec_extract_hi_v64qi"
11903 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
11905 (match_operand:V64QI 1 "register_operand" "v")
11906 (parallel [(const_int 32) (const_int 33)
11907 (const_int 34) (const_int 35)
11908 (const_int 36) (const_int 37)
11909 (const_int 38) (const_int 39)
11910 (const_int 40) (const_int 41)
11911 (const_int 42) (const_int 43)
11912 (const_int 44) (const_int 45)
11913 (const_int 46) (const_int 47)
11914 (const_int 48) (const_int 49)
11915 (const_int 50) (const_int 51)
11916 (const_int 52) (const_int 53)
11917 (const_int 54) (const_int 55)
11918 (const_int 56) (const_int 57)
11919 (const_int 58) (const_int 59)
11920 (const_int 60) (const_int 61)
11921 (const_int 62) (const_int 63)])))]
11923 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
11924 [(set_attr "type" "sselog1")
11925 (set_attr "prefix_extra" "1")
11926 (set_attr "length_immediate" "1")
11927 (set_attr "prefix" "evex")
11928 (set_attr "mode" "XI")])
11930 (define_insn_and_split "vec_extract_lo_v32qi"
11931 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
11933 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
11934 (parallel [(const_int 0) (const_int 1)
11935 (const_int 2) (const_int 3)
11936 (const_int 4) (const_int 5)
11937 (const_int 6) (const_int 7)
11938 (const_int 8) (const_int 9)
11939 (const_int 10) (const_int 11)
11940 (const_int 12) (const_int 13)
11941 (const_int 14) (const_int 15)])))]
11942 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11944 "&& reload_completed"
11945 [(set (match_dup 0) (match_dup 1))]
11946 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
11948 (define_insn "vec_extract_hi_v32qi"
11949 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
11951 (match_operand:V32QI 1 "register_operand" "x,v,v")
11952 (parallel [(const_int 16) (const_int 17)
11953 (const_int 18) (const_int 19)
11954 (const_int 20) (const_int 21)
11955 (const_int 22) (const_int 23)
11956 (const_int 24) (const_int 25)
11957 (const_int 26) (const_int 27)
11958 (const_int 28) (const_int 29)
11959 (const_int 30) (const_int 31)])))]
11962 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
11963 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
11964 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
11965 [(set_attr "type" "sselog1")
11966 (set_attr "prefix_extra" "1")
11967 (set_attr "length_immediate" "1")
11968 (set_attr "isa" "*,avx512dq,avx512f")
11969 (set_attr "prefix" "vex,evex,evex")
11970 (set_attr "mode" "OI")])
11972 ;; NB: *vec_extract<mode>_0 must be placed before *vec_extracthf.
11973 ;; Otherwise, it will be ignored.
11974 (define_insn_and_split "*vec_extract<mode>_0"
11975 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=v,m,r")
11976 (vec_select:<ssescalarmode>
11977 (match_operand:VF_AVX512HFBF16 1 "nonimmediate_operand" "vm,v,m")
11978 (parallel [(const_int 0)])))]
11979 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
11981 "&& reload_completed"
11982 [(set (match_dup 0) (match_dup 1))]
11983 "operands[1] = gen_lowpart (<ssescalarmode>mode, operands[1]);")
11985 (define_insn "*vec_extract<mode>"
11986 [(set (match_operand:HFBF 0 "register_sse4nonimm_operand" "=?r,m,x,v")
11988 (match_operand:<ssevecmode> 1 "register_operand" "v,v,0,v")
11990 [(match_operand:SI 2 "const_0_to_7_operand")])))]
11993 switch (which_alternative)
11996 return "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}";
11998 return "%vpextrw\t{%2, %1, %0|%0, %1, %2}";
12001 operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
12002 return "psrldq\t{%2, %0|%0, %2}";
12004 operands[2] = GEN_INT (INTVAL (operands[2]) * 2);
12005 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12008 gcc_unreachable ();
12011 [(set_attr "isa" "*,sse4,noavx,avx")
12012 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1")
12013 (set_attr "prefix" "maybe_evex")
12014 (set_attr "mode" "TI")])
12016 ;; Modes handled by vec_extract patterns.
12017 (define_mode_iterator VEC_EXTRACT_MODE
12018 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
12019 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
12020 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
12021 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
12022 (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
12023 (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF
12024 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
12025 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
12026 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
12028 (define_expand "vec_extract<mode><ssescalarmodelower>"
12029 [(match_operand:<ssescalarmode> 0 "register_operand")
12030 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
12031 (match_operand 2 "const_int_operand")]
12034 ix86_expand_vector_extract (false, operands[0], operands[1],
12035 INTVAL (operands[2]));
12039 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
12040 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
12041 (match_operand:V_256_512 1 "register_operand")
12042 (match_operand 2 "const_0_to_1_operand")]
12045 if (INTVAL (operands[2]))
12046 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
12048 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
12052 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12054 ;; Parallel double-precision floating point element swizzling
12056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12058 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
12059 [(set (match_operand:V8DF 0 "register_operand" "=v")
12062 (match_operand:V8DF 1 "register_operand" "v")
12063 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
12064 (parallel [(const_int 1) (const_int 9)
12065 (const_int 3) (const_int 11)
12066 (const_int 5) (const_int 13)
12067 (const_int 7) (const_int 15)])))]
12069 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12070 [(set_attr "type" "sselog")
12071 (set_attr "prefix" "evex")
12072 (set_attr "mode" "V8DF")])
12074 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
12075 (define_insn "avx_unpckhpd256<mask_name>"
12076 [(set (match_operand:V4DF 0 "register_operand" "=v")
12079 (match_operand:V4DF 1 "register_operand" "v")
12080 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
12081 (parallel [(const_int 1) (const_int 5)
12082 (const_int 3) (const_int 7)])))]
12083 "TARGET_AVX && <mask_avx512vl_condition>"
12084 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12085 [(set_attr "type" "sselog")
12086 (set_attr "prefix" "vex")
12087 (set_attr "mode" "V4DF")])
12089 (define_expand "vec_interleave_highv4df"
12090 [(set (match_dup 3)
12093 (match_operand:V4DF 1 "register_operand")
12094 (match_operand:V4DF 2 "nonimmediate_operand"))
12095 (parallel [(const_int 0) (const_int 4)
12096 (const_int 2) (const_int 6)])))
12102 (parallel [(const_int 1) (const_int 5)
12103 (const_int 3) (const_int 7)])))
12104 (set (match_operand:V4DF 0 "register_operand")
12109 (parallel [(const_int 2) (const_int 3)
12110 (const_int 6) (const_int 7)])))]
12113 operands[3] = gen_reg_rtx (V4DFmode);
12114 operands[4] = gen_reg_rtx (V4DFmode);
12118 (define_insn "avx512vl_unpckhpd128_mask"
12119 [(set (match_operand:V2DF 0 "register_operand" "=v")
12123 (match_operand:V2DF 1 "register_operand" "v")
12124 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
12125 (parallel [(const_int 1) (const_int 3)]))
12126 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
12127 (match_operand:QI 4 "register_operand" "Yk")))]
12129 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
12130 [(set_attr "type" "sselog")
12131 (set_attr "prefix" "evex")
12132 (set_attr "mode" "V2DF")])
12134 (define_expand "vec_interleave_highv2df"
12135 [(set (match_operand:V2DF 0 "register_operand")
12138 (match_operand:V2DF 1 "nonimmediate_operand")
12139 (match_operand:V2DF 2 "nonimmediate_operand"))
12140 (parallel [(const_int 1)
12144 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
12145 operands[2] = force_reg (V2DFmode, operands[2]);
12148 (define_insn "*vec_interleave_highv2df"
12149 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m")
12152 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,v")
12153 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,0,v,0"))
12154 (parallel [(const_int 1)
12156 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
12158 unpckhpd\t{%2, %0|%0, %2}
12159 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
12160 movlpd\t{%H1, %0|%0, %H1}
12161 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
12162 %vmovhpd\t{%1, %0|%q0, %1}"
12163 [(set_attr "isa" "noavx,avx,noavx,avx,*")
12164 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12165 (set (attr "prefix_data16")
12166 (if_then_else (eq_attr "alternative" "2,4")
12168 (const_string "*")))
12169 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
12170 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,V1DF")])
12172 (define_insn "avx512f_movddup512<mask_name>"
12173 [(set (match_operand:V8DF 0 "register_operand" "=v")
12176 (match_operand:V8DF 1 "nonimmediate_operand" "m")
12178 (parallel [(const_int 0) (const_int 8)
12179 (const_int 2) (const_int 10)
12180 (const_int 4) (const_int 12)
12181 (const_int 6) (const_int 14)])))]
12183 "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12184 [(set_attr "type" "sselog1")
12185 (set_attr "prefix" "evex")
12186 (set_attr "mode" "V8DF")])
12188 (define_insn "avx512f_unpcklpd512<mask_name>"
12189 [(set (match_operand:V8DF 0 "register_operand" "=v")
12192 (match_operand:V8DF 1 "register_operand" "v")
12193 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
12194 (parallel [(const_int 0) (const_int 8)
12195 (const_int 2) (const_int 10)
12196 (const_int 4) (const_int 12)
12197 (const_int 6) (const_int 14)])))]
12199 "vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12200 [(set_attr "type" "sselog")
12201 (set_attr "prefix" "evex")
12202 (set_attr "mode" "V8DF")])
12204 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
12205 (define_insn "avx_movddup256<mask_name>"
12206 [(set (match_operand:V4DF 0 "register_operand" "=v")
12209 (match_operand:V4DF 1 "nonimmediate_operand" "m")
12211 (parallel [(const_int 0) (const_int 4)
12212 (const_int 2) (const_int 6)])))]
12213 "TARGET_AVX && <mask_avx512vl_condition>"
12214 "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
12215 [(set_attr "type" "sselog1")
12216 (set_attr "prefix" "<mask_prefix>")
12217 (set_attr "mode" "V4DF")])
12219 (define_insn "avx_unpcklpd256<mask_name>"
12220 [(set (match_operand:V4DF 0 "register_operand" "=v")
12223 (match_operand:V4DF 1 "register_operand" " v")
12224 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
12225 (parallel [(const_int 0) (const_int 4)
12226 (const_int 2) (const_int 6)])))]
12227 "TARGET_AVX && <mask_avx512vl_condition>"
12228 "vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12229 [(set_attr "type" "sselog")
12230 (set_attr "prefix" "<mask_prefix>")
12231 (set_attr "mode" "V4DF")])
12233 (define_expand "vec_interleave_lowv4df"
12234 [(set (match_dup 3)
12237 (match_operand:V4DF 1 "register_operand")
12238 (match_operand:V4DF 2 "nonimmediate_operand"))
12239 (parallel [(const_int 0) (const_int 4)
12240 (const_int 2) (const_int 6)])))
12246 (parallel [(const_int 1) (const_int 5)
12247 (const_int 3) (const_int 7)])))
12248 (set (match_operand:V4DF 0 "register_operand")
12253 (parallel [(const_int 0) (const_int 1)
12254 (const_int 4) (const_int 5)])))]
12257 operands[3] = gen_reg_rtx (V4DFmode);
12258 operands[4] = gen_reg_rtx (V4DFmode);
12261 (define_insn "avx512vl_unpcklpd128_mask"
12262 [(set (match_operand:V2DF 0 "register_operand" "=v")
12266 (match_operand:V2DF 1 "register_operand" "v")
12267 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
12268 (parallel [(const_int 0) (const_int 2)]))
12269 (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
12270 (match_operand:QI 4 "register_operand" "Yk")))]
12272 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
12273 [(set_attr "type" "sselog")
12274 (set_attr "prefix" "evex")
12275 (set_attr "mode" "V2DF")])
12277 (define_expand "vec_interleave_lowv2df"
12278 [(set (match_operand:V2DF 0 "register_operand")
12281 (match_operand:V2DF 1 "nonimmediate_operand")
12282 (match_operand:V2DF 2 "nonimmediate_operand"))
12283 (parallel [(const_int 0)
12287 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
12288 operands[1] = force_reg (V2DFmode, operands[1]);
12291 (define_insn "*vec_interleave_lowv2df"
12292 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,o")
12295 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0")
12296 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v"))
12297 (parallel [(const_int 0)
12299 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
12301 unpcklpd\t{%2, %0|%0, %2}
12302 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
12303 movhpd\t{%2, %0|%0, %q2}
12304 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
12305 %vmovlpd\t{%2, %H0|%H0, %2}"
12306 [(set_attr "isa" "noavx,avx,noavx,avx,*")
12307 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12308 (set (attr "prefix_data16")
12309 (if_then_else (eq_attr "alternative" "2,4")
12311 (const_string "*")))
12312 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
12313 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,V1DF")])
12316 [(set (match_operand:V2DF 0 "memory_operand")
12319 (match_operand:V2DF 1 "register_operand")
12321 (parallel [(const_int 0)
12323 "TARGET_SSE3 && reload_completed"
12326 rtx low = gen_lowpart (DFmode, operands[1]);
12328 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
12329 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
12334 [(set (match_operand:V2DF 0 "register_operand")
12337 (match_operand:V2DF 1 "memory_operand")
12339 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
12340 (match_operand:SI 3 "const_int_operand")])))]
12341 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
12342 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
12344 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
12347 (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
12348 [(set (match_operand:VFH_128 0 "register_operand" "=v")
12351 [(match_operand:VFH_128 1 "register_operand" "v")
12352 (match_operand:VFH_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
12357 "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
12358 [(set_attr "prefix" "evex")
12359 (set_attr "mode" "<ssescalarmode>")])
12361 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
12362 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
12363 (unspec:VFH_AVX512VL
12364 [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
12365 (match_operand:VFH_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
12368 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
12369 [(set_attr "prefix" "evex")
12370 (set_attr "mode" "<MODE>")])
12372 (define_expand "<avx512>_vternlog<mode>_maskz"
12373 [(match_operand:VI48_AVX512VL 0 "register_operand")
12374 (match_operand:VI48_AVX512VL 1 "register_operand")
12375 (match_operand:VI48_AVX512VL 2 "register_operand")
12376 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
12377 (match_operand:SI 4 "const_0_to_255_operand")
12378 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12381 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
12382 operands[0], operands[1], operands[2], operands[3],
12383 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
12387 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
12388 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12389 (unspec:VI48_AVX512VL
12390 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
12391 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
12392 (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr")
12393 (match_operand:SI 4 "const_0_to_255_operand")]
12396 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
12397 [(set_attr "type" "sselog")
12398 (set_attr "prefix" "evex")
12399 (set_attr "mode" "<sseinsnmode>")])
12401 (define_insn "*<avx512>_vternlog<mode>_all"
12402 [(set (match_operand:V 0 "register_operand" "=v")
12404 [(match_operand:V 1 "register_operand" "0")
12405 (match_operand:V 2 "register_operand" "v")
12406 (match_operand:V 3 "bcst_vector_operand" "vmBr")
12407 (match_operand:SI 4 "const_0_to_255_operand")]
12410 /* Disallow embeded broadcast for vector HFmode since
12411 it's not real AVX512FP16 instruction. */
12412 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
12413 || GET_CODE (operands[3]) != VEC_DUPLICATE)"
12414 "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
12415 [(set_attr "type" "sselog")
12416 (set_attr "prefix" "evex")
12417 (set_attr "mode" "<sseinsnmode>")])
12419 ;; There must be lots of other combinations like
12422 ;; (any_logic:V op1 op2)
12423 ;; (any_logic:V op1 op3))
12427 ;; (any_logic:V op1, op2)
12433 (define_code_iterator any_logic1 [and ior xor])
12434 (define_code_iterator any_logic2 [and ior xor])
12435 (define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
12437 (define_insn_and_split "*<avx512>_vpternlog<mode>_1"
12438 [(set (match_operand:V 0 "register_operand")
12441 (match_operand:V 1 "regmem_or_bitnot_regmem_operand")
12442 (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
12444 (match_operand:V 3 "regmem_or_bitnot_regmem_operand")
12445 (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
12446 "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
12447 && ix86_pre_reload_split ()
12448 && (rtx_equal_p (STRIP_UNARY (operands[1]),
12449 STRIP_UNARY (operands[4]))
12450 || rtx_equal_p (STRIP_UNARY (operands[2]),
12451 STRIP_UNARY (operands[4]))
12452 || rtx_equal_p (STRIP_UNARY (operands[1]),
12453 STRIP_UNARY (operands[3]))
12454 || rtx_equal_p (STRIP_UNARY (operands[2]),
12455 STRIP_UNARY (operands[3])))"
12458 [(set (match_dup 0)
12466 /* VPTERNLOGD reg6, reg2, reg1, imm8. */
12472 int reg_mask, tmp1, tmp2;
12473 if (rtx_equal_p (STRIP_UNARY (operands[1]),
12474 STRIP_UNARY (operands[4])))
12478 operands[6] = operands[3];
12480 else if (rtx_equal_p (STRIP_UNARY (operands[2]),
12481 STRIP_UNARY (operands[4])))
12485 operands[6] = operands[3];
12487 else if (rtx_equal_p (STRIP_UNARY (operands[1]),
12488 STRIP_UNARY (operands[3])))
12492 operands[6] = operands[4];
12498 operands[6] = operands[4];
12501 reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
12502 reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
12503 reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
12504 reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
12506 tmp1 = reg1 <any_logic1:logic_op> reg2;
12507 tmp2 = reg3 <any_logic2:logic_op> reg4;
12508 reg_mask = tmp1 <any_logic:logic_op> tmp2;
12511 operands[1] = STRIP_UNARY (operands[1]);
12512 operands[2] = STRIP_UNARY (operands[2]);
12513 operands[6] = STRIP_UNARY (operands[6]);
12514 if (!register_operand (operands[2], <MODE>mode))
12515 operands[2] = force_reg (<MODE>mode, operands[2]);
12516 if (!register_operand (operands[6], <MODE>mode))
12517 operands[6] = force_reg (<MODE>mode, operands[6]);
12518 operands[5] = GEN_INT (reg_mask);
12521 (define_insn_and_split "*<avx512>_vpternlog<mode>_2"
12522 [(set (match_operand:V 0 "register_operand")
12526 (match_operand:V 1 "regmem_or_bitnot_regmem_operand")
12527 (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
12528 (match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
12529 (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
12530 "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
12531 && ix86_pre_reload_split ()
12532 && (rtx_equal_p (STRIP_UNARY (operands[1]),
12533 STRIP_UNARY (operands[4]))
12534 || rtx_equal_p (STRIP_UNARY (operands[2]),
12535 STRIP_UNARY (operands[4]))
12536 || rtx_equal_p (STRIP_UNARY (operands[1]),
12537 STRIP_UNARY (operands[3]))
12538 || rtx_equal_p (STRIP_UNARY (operands[2]),
12539 STRIP_UNARY (operands[3])))"
12542 [(set (match_dup 0)
12550 /* VPTERNLOGD reg6, reg2, reg1, imm8. */
12556 int reg_mask, tmp1, tmp2;
12557 if (rtx_equal_p (STRIP_UNARY (operands[1]),
12558 STRIP_UNARY (operands[4])))
12562 operands[6] = operands[3];
12564 else if (rtx_equal_p (STRIP_UNARY (operands[2]),
12565 STRIP_UNARY (operands[4])))
12569 operands[6] = operands[3];
12571 else if (rtx_equal_p (STRIP_UNARY (operands[1]),
12572 STRIP_UNARY (operands[3])))
12576 operands[6] = operands[4];
12582 operands[6] = operands[4];
12585 reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
12586 reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
12587 reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
12588 reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
12590 tmp1 = reg1 <any_logic2:logic_op> reg2;
12591 tmp2 = tmp1 <any_logic1:logic_op> reg3;
12592 reg_mask = tmp2 <any_logic:logic_op> reg4;
12595 operands[1] = STRIP_UNARY (operands[1]);
12596 operands[2] = STRIP_UNARY (operands[2]);
12597 operands[6] = STRIP_UNARY (operands[6]);
12598 operands[5] = GEN_INT (reg_mask);
12599 if (!register_operand (operands[2], <MODE>mode))
12600 operands[2] = force_reg (<MODE>mode, operands[2]);
12601 if (!register_operand (operands[6], <MODE>mode))
12602 operands[6] = force_reg (<MODE>mode, operands[6]);
12606 (define_insn_and_split "*<avx512>_vpternlog<mode>_3"
12607 [(set (match_operand:V 0 "register_operand")
12610 (match_operand:V 1 "regmem_or_bitnot_regmem_operand")
12611 (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
12612 (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
12613 "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
12614 && ix86_pre_reload_split ()"
12617 [(set (match_dup 0)
12625 /* VPTERNLOGD reg3, reg2, reg1, imm8. */
12629 int reg_mask, tmp1;
12631 reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
12632 reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
12633 reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
12635 tmp1 = reg1 <any_logic1:logic_op> reg2;
12636 reg_mask = tmp1 <any_logic:logic_op> reg3;
12639 operands[1] = STRIP_UNARY (operands[1]);
12640 operands[2] = STRIP_UNARY (operands[2]);
12641 operands[3] = STRIP_UNARY (operands[3]);
12642 operands[4] = GEN_INT (reg_mask);
12643 if (!register_operand (operands[2], <MODE>mode))
12644 operands[2] = force_reg (<MODE>mode, operands[2]);
12645 if (!register_operand (operands[3], <MODE>mode))
12646 operands[3] = force_reg (<MODE>mode, operands[3]);
12650 (define_insn "<avx512>_vternlog<mode>_mask"
12651 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12652 (vec_merge:VI48_AVX512VL
12653 (unspec:VI48_AVX512VL
12654 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
12655 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
12656 (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr")
12657 (match_operand:SI 4 "const_0_to_255_operand")]
12660 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
12662 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
12663 [(set_attr "type" "sselog")
12664 (set_attr "prefix" "evex")
12665 (set_attr "mode" "<sseinsnmode>")])
12667 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
12668 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
12669 (unspec:VFH_AVX512VL [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12672 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
12673 [(set_attr "prefix" "evex")
12674 (set_attr "mode" "<MODE>")])
12676 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
12677 [(set (match_operand:VFH_128 0 "register_operand" "=v")
12680 [(match_operand:VFH_128 1 "register_operand" "v")
12681 (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
12686 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
12687 [(set_attr "prefix" "evex")
12688 (set_attr "mode" "<ssescalarmode>")])
12690 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
12691 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12692 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12693 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
12694 (match_operand:SI 3 "const_0_to_255_operand")]
12697 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12698 [(set_attr "prefix" "evex")
12699 (set_attr "mode" "<sseinsnmode>")])
12701 (define_mode_attr vec_extract_imm_predicate
12702 [(V16SF "const_0_to_15_operand") (V8SF "const_0_to_7_operand")
12703 (V16SI "const_0_to_15_operand") (V8SI "const_0_to_7_operand")
12704 (V8DF "const_0_to_7_operand") (V4DF "const_0_to_3_operand")
12705 (V8DI "const_0_to_7_operand") (V4DI "const_0_to_3_operand")])
12707 (define_insn "*vec_extract<mode><ssescalarmodelower>_valign"
12708 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=v")
12709 (vec_select:<ssescalarmode>
12710 (match_operand:V48_256_512_AVX512VL 1 "register_operand" "v")
12711 (parallel [(match_operand 2 "<vec_extract_imm_predicate>")])))]
12713 && INTVAL(operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode) >= 16"
12715 int byte_offset = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12716 if (byte_offset % 16 == 0)
12718 operands[2] = GEN_INT (byte_offset / 16);
12719 if (byte_offset / 16 == 1)
12720 return "vextract<shuffletype><extract_suf>\t{%2, %t1, %x0|%x0, %t1, %2}";
12722 return "vextract<shuffletype><extract_suf>\t{%2, %1, %x0|%x0, %1, %2}";
12725 return "valign<ternlogsuffix>\t{%2, %1, %1, %<xtg_mode>0|%<xtg_mode>0, %1, %1, %2}";
12727 [(set_attr "prefix" "maybe_evex")
12728 (set_attr "mode" "<sseintvecinsnmode>")])
12730 (define_expand "avx512f_shufps512_mask"
12731 [(match_operand:V16SF 0 "register_operand")
12732 (match_operand:V16SF 1 "register_operand")
12733 (match_operand:V16SF 2 "nonimmediate_operand")
12734 (match_operand:SI 3 "const_0_to_255_operand")
12735 (match_operand:V16SF 4 "register_operand")
12736 (match_operand:HI 5 "register_operand")]
12739 int mask = INTVAL (operands[3]);
12740 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
12741 GEN_INT ((mask >> 0) & 3),
12742 GEN_INT ((mask >> 2) & 3),
12743 GEN_INT (((mask >> 4) & 3) + 16),
12744 GEN_INT (((mask >> 6) & 3) + 16),
12745 GEN_INT (((mask >> 0) & 3) + 4),
12746 GEN_INT (((mask >> 2) & 3) + 4),
12747 GEN_INT (((mask >> 4) & 3) + 20),
12748 GEN_INT (((mask >> 6) & 3) + 20),
12749 GEN_INT (((mask >> 0) & 3) + 8),
12750 GEN_INT (((mask >> 2) & 3) + 8),
12751 GEN_INT (((mask >> 4) & 3) + 24),
12752 GEN_INT (((mask >> 6) & 3) + 24),
12753 GEN_INT (((mask >> 0) & 3) + 12),
12754 GEN_INT (((mask >> 2) & 3) + 12),
12755 GEN_INT (((mask >> 4) & 3) + 28),
12756 GEN_INT (((mask >> 6) & 3) + 28),
12757 operands[4], operands[5]));
12762 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
12763 [(match_operand:VF_AVX512VL 0 "register_operand")
12764 (match_operand:VF_AVX512VL 1 "register_operand")
12765 (match_operand:VF_AVX512VL 2 "register_operand")
12766 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
12767 (match_operand:SI 4 "const_0_to_255_operand")
12768 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12771 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
12772 operands[0], operands[1], operands[2], operands[3],
12773 operands[4], CONST0_RTX (<MODE>mode), operands[5]
12774 <round_saeonly_expand_operand6>));
12778 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
12779 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
12780 (unspec:VF_AVX512VL
12781 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
12782 (match_operand:VF_AVX512VL 2 "register_operand" "v")
12783 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
12784 (match_operand:SI 4 "const_0_to_255_operand")]
12787 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
12788 [(set_attr "prefix" "evex")
12789 (set_attr "mode" "<MODE>")])
12791 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
12792 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
12793 (vec_merge:VF_AVX512VL
12794 (unspec:VF_AVX512VL
12795 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
12796 (match_operand:VF_AVX512VL 2 "register_operand" "v")
12797 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
12798 (match_operand:SI 4 "const_0_to_255_operand")]
12801 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
12803 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
12804 [(set_attr "prefix" "evex")
12805 (set_attr "mode" "<MODE>")])
12807 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
12808 [(match_operand:VF_128 0 "register_operand")
12809 (match_operand:VF_128 1 "register_operand")
12810 (match_operand:VF_128 2 "register_operand")
12811 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
12812 (match_operand:SI 4 "const_0_to_255_operand")
12813 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12816 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
12817 operands[0], operands[1], operands[2], operands[3],
12818 operands[4], CONST0_RTX (<MODE>mode), operands[5]
12819 <round_saeonly_expand_operand6>));
12823 (define_insn "avx512f_sfixupimm<mode><maskz_scalar_name><round_saeonly_name>"
12824 [(set (match_operand:VF_128 0 "register_operand" "=v")
12827 [(match_operand:VF_128 1 "register_operand" "0")
12828 (match_operand:VF_128 2 "register_operand" "v")
12829 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
12830 (match_operand:SI 4 "const_0_to_255_operand")]
12835 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_maskz_scalar_op5>%3, %2, %0<maskz_scalar_op5>|%0<maskz_scalar_op5>, %2, %<iptr>3<round_saeonly_maskz_scalar_op5>, %4}";
12836 [(set_attr "prefix" "evex")
12837 (set_attr "mode" "<ssescalarmode>")])
12839 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
12840 [(set (match_operand:VF_128 0 "register_operand" "=v")
12844 [(match_operand:VF_128 1 "register_operand" "0")
12845 (match_operand:VF_128 2 "register_operand" "v")
12846 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
12847 (match_operand:SI 4 "const_0_to_255_operand")]
12852 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
12854 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
12855 [(set_attr "prefix" "evex")
12856 (set_attr "mode" "<ssescalarmode>")])
12858 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
12859 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
12860 (unspec:VFH_AVX512VL
12861 [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
12862 (match_operand:SI 2 "const_0_to_255_operand")]
12865 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
12866 [(set_attr "length_immediate" "1")
12867 (set_attr "prefix" "evex")
12868 (set_attr "mode" "<MODE>")])
12870 (define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
12871 [(set (match_operand:VFH_128 0 "register_operand" "=v")
12874 [(match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
12875 (match_operand:SI 3 "const_0_to_255_operand")]
12877 (match_operand:VFH_128 1 "register_operand" "v")
12880 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
12881 [(set_attr "length_immediate" "1")
12882 (set_attr "prefix" "evex")
12883 (set_attr "mode" "<MODE>")])
12885 (define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
12886 [(set (match_operand:VFH_128 0 "register_operand" "=v")
12888 (vec_duplicate:VFH_128
12889 (unspec:<ssescalarmode>
12890 [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
12891 (match_operand:SI 3 "const_0_to_255_operand")]
12893 (match_operand:VFH_128 1 "register_operand" "v")
12896 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
12897 [(set_attr "length_immediate" "1")
12898 (set_attr "prefix" "evex")
12899 (set_attr "mode" "<MODE>")])
12901 ;; One bit in mask selects 2 elements.
12902 (define_insn "avx512f_shufps512_1<mask_name>"
12903 [(set (match_operand:V16SF 0 "register_operand" "=v")
12906 (match_operand:V16SF 1 "register_operand" "v")
12907 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
12908 (parallel [(match_operand 3 "const_0_to_3_operand")
12909 (match_operand 4 "const_0_to_3_operand")
12910 (match_operand 5 "const_16_to_19_operand")
12911 (match_operand 6 "const_16_to_19_operand")
12912 (match_operand 7 "const_4_to_7_operand")
12913 (match_operand 8 "const_4_to_7_operand")
12914 (match_operand 9 "const_20_to_23_operand")
12915 (match_operand 10 "const_20_to_23_operand")
12916 (match_operand 11 "const_8_to_11_operand")
12917 (match_operand 12 "const_8_to_11_operand")
12918 (match_operand 13 "const_24_to_27_operand")
12919 (match_operand 14 "const_24_to_27_operand")
12920 (match_operand 15 "const_12_to_15_operand")
12921 (match_operand 16 "const_12_to_15_operand")
12922 (match_operand 17 "const_28_to_31_operand")
12923 (match_operand 18 "const_28_to_31_operand")])))]
12925 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
12926 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
12927 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
12928 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
12929 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
12930 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
12931 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
12932 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
12933 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
12934 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
12935 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
12936 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
12939 mask = INTVAL (operands[3]);
12940 mask |= INTVAL (operands[4]) << 2;
12941 mask |= (INTVAL (operands[5]) - 16) << 4;
12942 mask |= (INTVAL (operands[6]) - 16) << 6;
12943 operands[3] = GEN_INT (mask);
12945 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12947 [(set_attr "type" "sselog")
12948 (set_attr "length_immediate" "1")
12949 (set_attr "prefix" "evex")
12950 (set_attr "mode" "V16SF")])
12952 (define_expand "avx512f_shufpd512_mask"
12953 [(match_operand:V8DF 0 "register_operand")
12954 (match_operand:V8DF 1 "register_operand")
12955 (match_operand:V8DF 2 "nonimmediate_operand")
12956 (match_operand:SI 3 "const_0_to_255_operand")
12957 (match_operand:V8DF 4 "register_operand")
12958 (match_operand:QI 5 "register_operand")]
12961 int mask = INTVAL (operands[3]);
12962 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
12963 GEN_INT (mask & 1),
12964 GEN_INT (mask & 2 ? 9 : 8),
12965 GEN_INT (mask & 4 ? 3 : 2),
12966 GEN_INT (mask & 8 ? 11 : 10),
12967 GEN_INT (mask & 16 ? 5 : 4),
12968 GEN_INT (mask & 32 ? 13 : 12),
12969 GEN_INT (mask & 64 ? 7 : 6),
12970 GEN_INT (mask & 128 ? 15 : 14),
12971 operands[4], operands[5]));
12975 (define_insn "avx512f_shufpd512_1<mask_name>"
12976 [(set (match_operand:V8DF 0 "register_operand" "=v")
12979 (match_operand:V8DF 1 "register_operand" "v")
12980 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
12981 (parallel [(match_operand 3 "const_0_to_1_operand")
12982 (match_operand 4 "const_8_to_9_operand")
12983 (match_operand 5 "const_2_to_3_operand")
12984 (match_operand 6 "const_10_to_11_operand")
12985 (match_operand 7 "const_4_to_5_operand")
12986 (match_operand 8 "const_12_to_13_operand")
12987 (match_operand 9 "const_6_to_7_operand")
12988 (match_operand 10 "const_14_to_15_operand")])))]
12992 mask = INTVAL (operands[3]);
12993 mask |= (INTVAL (operands[4]) - 8) << 1;
12994 mask |= (INTVAL (operands[5]) - 2) << 2;
12995 mask |= (INTVAL (operands[6]) - 10) << 3;
12996 mask |= (INTVAL (operands[7]) - 4) << 4;
12997 mask |= (INTVAL (operands[8]) - 12) << 5;
12998 mask |= (INTVAL (operands[9]) - 6) << 6;
12999 mask |= (INTVAL (operands[10]) - 14) << 7;
13000 operands[3] = GEN_INT (mask);
13002 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
13004 [(set_attr "type" "sselog")
13005 (set_attr "length_immediate" "1")
13006 (set_attr "prefix" "evex")
13007 (set_attr "mode" "V8DF")])
13009 (define_expand "avx_shufpd256<mask_expand4_name>"
13010 [(match_operand:V4DF 0 "register_operand")
13011 (match_operand:V4DF 1 "register_operand")
13012 (match_operand:V4DF 2 "nonimmediate_operand")
13013 (match_operand:SI 3 "const_int_operand")]
13016 int mask = INTVAL (operands[3]);
13017 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
13020 GEN_INT (mask & 1),
13021 GEN_INT (mask & 2 ? 5 : 4),
13022 GEN_INT (mask & 4 ? 3 : 2),
13023 GEN_INT (mask & 8 ? 7 : 6)
13024 <mask_expand4_args>));
13028 (define_insn "avx_shufpd256_1<mask_name>"
13029 [(set (match_operand:V4DF 0 "register_operand" "=v")
13032 (match_operand:V4DF 1 "register_operand" "v")
13033 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
13034 (parallel [(match_operand 3 "const_0_to_1_operand")
13035 (match_operand 4 "const_4_to_5_operand")
13036 (match_operand 5 "const_2_to_3_operand")
13037 (match_operand 6 "const_6_to_7_operand")])))]
13038 "TARGET_AVX && <mask_avx512vl_condition>"
13041 mask = INTVAL (operands[3]);
13042 mask |= (INTVAL (operands[4]) - 4) << 1;
13043 mask |= (INTVAL (operands[5]) - 2) << 2;
13044 mask |= (INTVAL (operands[6]) - 6) << 3;
13045 operands[3] = GEN_INT (mask);
13047 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
13049 [(set_attr "type" "sseshuf")
13050 (set_attr "length_immediate" "1")
13051 (set_attr "prefix" "vex")
13052 (set_attr "mode" "V4DF")])
13054 (define_expand "sse2_shufpd<mask_expand4_name>"
13055 [(match_operand:V2DF 0 "register_operand")
13056 (match_operand:V2DF 1 "register_operand")
13057 (match_operand:V2DF 2 "vector_operand")
13058 (match_operand:SI 3 "const_int_operand")]
13061 int mask = INTVAL (operands[3]);
13062 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
13063 operands[2], GEN_INT (mask & 1),
13064 GEN_INT (mask & 2 ? 3 : 2)
13065 <mask_expand4_args>));
13069 (define_insn "sse2_shufpd_v2df_mask"
13070 [(set (match_operand:V2DF 0 "register_operand" "=v")
13074 (match_operand:V2DF 1 "register_operand" "v")
13075 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
13076 (parallel [(match_operand 3 "const_0_to_1_operand")
13077 (match_operand 4 "const_2_to_3_operand")]))
13078 (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
13079 (match_operand:QI 6 "register_operand" "Yk")))]
13083 mask = INTVAL (operands[3]);
13084 mask |= (INTVAL (operands[4]) - 2) << 1;
13085 operands[3] = GEN_INT (mask);
13087 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
13089 [(set_attr "type" "sseshuf")
13090 (set_attr "length_immediate" "1")
13091 (set_attr "prefix" "evex")
13092 (set_attr "mode" "V2DF")])
13094 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
13095 (define_insn "avx2_interleave_highv4di<mask_name>"
13096 [(set (match_operand:V4DI 0 "register_operand" "=v")
13099 (match_operand:V4DI 1 "register_operand" "v")
13100 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
13101 (parallel [(const_int 1)
13105 "TARGET_AVX2 && <mask_avx512vl_condition>"
13106 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13107 [(set_attr "type" "sselog")
13108 (set_attr "prefix" "vex")
13109 (set_attr "mode" "OI")])
13111 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
13112 [(set (match_operand:V8DI 0 "register_operand" "=v")
13115 (match_operand:V8DI 1 "register_operand" "v")
13116 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
13117 (parallel [(const_int 1) (const_int 9)
13118 (const_int 3) (const_int 11)
13119 (const_int 5) (const_int 13)
13120 (const_int 7) (const_int 15)])))]
13122 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13123 [(set_attr "type" "sselog")
13124 (set_attr "prefix" "evex")
13125 (set_attr "mode" "XI")])
13127 (define_insn "vec_interleave_highv2di<mask_name>"
13128 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
13131 (match_operand:V2DI 1 "register_operand" "0,v")
13132 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
13133 (parallel [(const_int 1)
13135 "TARGET_SSE2 && <mask_avx512vl_condition>"
13137 punpckhqdq\t{%2, %0|%0, %2}
13138 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13139 [(set_attr "isa" "noavx,avx")
13140 (set_attr "type" "sselog")
13141 (set_attr "prefix_data16" "1,*")
13142 (set_attr "prefix" "orig,<mask_prefix>")
13143 (set_attr "mode" "TI")])
13145 (define_insn "avx2_interleave_lowv4di<mask_name>"
13146 [(set (match_operand:V4DI 0 "register_operand" "=v")
13149 (match_operand:V4DI 1 "register_operand" "v")
13150 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
13151 (parallel [(const_int 0)
13155 "TARGET_AVX2 && <mask_avx512vl_condition>"
13156 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13157 [(set_attr "type" "sselog")
13158 (set_attr "prefix" "vex")
13159 (set_attr "mode" "OI")])
13161 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
13162 [(set (match_operand:V8DI 0 "register_operand" "=v")
13165 (match_operand:V8DI 1 "register_operand" "v")
13166 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
13167 (parallel [(const_int 0) (const_int 8)
13168 (const_int 2) (const_int 10)
13169 (const_int 4) (const_int 12)
13170 (const_int 6) (const_int 14)])))]
13172 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13173 [(set_attr "type" "sselog")
13174 (set_attr "prefix" "evex")
13175 (set_attr "mode" "XI")])
13177 (define_insn "vec_interleave_lowv2di<mask_name>"
13178 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
13181 (match_operand:V2DI 1 "register_operand" "0,v")
13182 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
13183 (parallel [(const_int 0)
13185 "TARGET_SSE2 && <mask_avx512vl_condition>"
13187 punpcklqdq\t{%2, %0|%0, %2}
13188 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13189 [(set_attr "isa" "noavx,avx")
13190 (set_attr "type" "sselog")
13191 (set_attr "prefix_data16" "1,*")
13192 (set_attr "prefix" "orig,vex")
13193 (set_attr "mode" "TI")])
13195 (define_insn "sse2_shufpd_<mode>"
13196 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
13197 (vec_select:VI8F_128
13198 (vec_concat:<ssedoublevecmode>
13199 (match_operand:VI8F_128 1 "register_operand" "0,v")
13200 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
13201 (parallel [(match_operand 3 "const_0_to_1_operand")
13202 (match_operand 4 "const_2_to_3_operand")])))]
13206 mask = INTVAL (operands[3]);
13207 mask |= (INTVAL (operands[4]) - 2) << 1;
13208 operands[3] = GEN_INT (mask);
13210 switch (which_alternative)
13213 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
13215 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13217 gcc_unreachable ();
13220 [(set_attr "isa" "noavx,avx")
13221 (set_attr "type" "sseshuf")
13222 (set_attr "length_immediate" "1")
13223 (set_attr "prefix" "orig,maybe_evex")
13224 (set_attr "mode" "V2DF")])
13226 ;; Avoid combining registers from different units in a single alternative,
13227 ;; see comment above inline_secondary_memory_needed function in i386.cc
13228 (define_insn "sse2_storehpd"
13229 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
13231 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
13232 (parallel [(const_int 1)])))]
13233 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13235 %vmovhpd\t{%1, %0|%0, %1}
13237 vunpckhpd\t{%d1, %0|%0, %d1}
13241 [(set_attr "isa" "*,noavx,avx,*,*,*")
13242 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
13243 (set (attr "prefix_data16")
13245 (and (eq_attr "alternative" "0")
13246 (not (match_test "TARGET_AVX")))
13248 (const_string "*")))
13249 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
13250 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
13253 [(set (match_operand:DF 0 "register_operand")
13255 (match_operand:V2DF 1 "memory_operand")
13256 (parallel [(const_int 1)])))]
13257 "TARGET_SSE2 && reload_completed"
13258 [(set (match_dup 0) (match_dup 1))]
13259 "operands[1] = adjust_address (operands[1], DFmode, 8);")
13261 (define_insn "*vec_extractv2df_1_sse"
13262 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
13264 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
13265 (parallel [(const_int 1)])))]
13266 "!TARGET_SSE2 && TARGET_SSE
13267 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13269 movhps\t{%1, %0|%0, %1}
13270 movhlps\t{%1, %0|%0, %1}
13271 movlps\t{%H1, %0|%0, %H1}"
13272 [(set_attr "type" "ssemov")
13273 (set_attr "mode" "V2SF,V4SF,V2SF")])
13275 ;; Avoid combining registers from different units in a single alternative,
13276 ;; see comment above inline_secondary_memory_needed function in i386.cc
13277 (define_insn "sse2_storelpd"
13278 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
13280 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
13281 (parallel [(const_int 0)])))]
13282 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13284 %vmovlpd\t{%1, %0|%0, %1}
13289 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
13290 (set (attr "prefix_data16")
13291 (if_then_else (eq_attr "alternative" "0")
13293 (const_string "*")))
13294 (set_attr "prefix" "maybe_vex")
13295 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
13298 [(set (match_operand:DF 0 "register_operand")
13300 (match_operand:V2DF 1 "nonimmediate_operand")
13301 (parallel [(const_int 0)])))]
13302 "TARGET_SSE2 && reload_completed"
13303 [(set (match_dup 0) (match_dup 1))]
13304 "operands[1] = gen_lowpart (DFmode, operands[1]);")
13306 (define_insn "*vec_extractv2df_0_sse"
13307 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
13309 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
13310 (parallel [(const_int 0)])))]
13311 "!TARGET_SSE2 && TARGET_SSE
13312 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13314 movlps\t{%1, %0|%0, %1}
13315 movaps\t{%1, %0|%0, %1}
13316 movlps\t{%1, %0|%0, %q1}"
13317 [(set_attr "type" "ssemov")
13318 (set_attr "mode" "V2SF,V4SF,V2SF")])
13320 (define_expand "sse2_loadhpd_exp"
13321 [(set (match_operand:V2DF 0 "nonimmediate_operand")
13324 (match_operand:V2DF 1 "nonimmediate_operand")
13325 (parallel [(const_int 0)]))
13326 (match_operand:DF 2 "nonimmediate_operand")))]
13329 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
13331 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
13333 /* Fix up the destination if needed. */
13334 if (dst != operands[0])
13335 emit_move_insn (operands[0], dst);
13340 ;; Avoid combining registers from different units in a single alternative,
13341 ;; see comment above inline_secondary_memory_needed function in i386.cc
13342 (define_insn "sse2_loadhpd"
13343 [(set (match_operand:V2DF 0 "nonimmediate_operand"
13344 "=x,v,x,v ,o,o ,o")
13347 (match_operand:V2DF 1 "nonimmediate_operand"
13348 " 0,v,0,v ,0,0 ,0")
13349 (parallel [(const_int 0)]))
13350 (match_operand:DF 2 "nonimmediate_operand"
13351 " m,m,x,Yv,x,*f,r")))]
13352 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13354 movhpd\t{%2, %0|%0, %2}
13355 vmovhpd\t{%2, %1, %0|%0, %1, %2}
13356 unpcklpd\t{%2, %0|%0, %2}
13357 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
13361 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
13362 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
13363 (set (attr "prefix_data16")
13364 (if_then_else (eq_attr "alternative" "0")
13366 (const_string "*")))
13367 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
13368 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
13371 [(set (match_operand:V2DF 0 "memory_operand")
13373 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
13374 (match_operand:DF 1 "register_operand")))]
13375 "TARGET_SSE2 && reload_completed"
13376 [(set (match_dup 0) (match_dup 1))]
13377 "operands[0] = adjust_address (operands[0], DFmode, 8);")
13379 (define_expand "sse2_loadlpd_exp"
13380 [(set (match_operand:V2DF 0 "nonimmediate_operand")
13382 (match_operand:DF 2 "nonimmediate_operand")
13384 (match_operand:V2DF 1 "nonimmediate_operand")
13385 (parallel [(const_int 1)]))))]
13388 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
13390 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
13392 /* Fix up the destination if needed. */
13393 if (dst != operands[0])
13394 emit_move_insn (operands[0], dst);
13399 ;; Avoid combining registers from different units in a single alternative,
13400 ;; see comment above inline_secondary_memory_needed function in i386.cc
13401 (define_insn "sse2_loadlpd"
13402 [(set (match_operand:V2DF 0 "nonimmediate_operand"
13403 "=v,x,v,x,v,x,x,v,m,m ,m")
13405 (match_operand:DF 2 "nonimmediate_operand"
13406 "vm,m,m,x,v,0,0,v,x,*f,r")
13408 (match_operand:V2DF 1 "nonimm_or_0_operand"
13409 " C,0,v,0,v,x,o,o,0,0 ,0")
13410 (parallel [(const_int 1)]))))]
13411 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13413 %vmovq\t{%2, %0|%0, %2}
13414 movlpd\t{%2, %0|%0, %2}
13415 vmovlpd\t{%2, %1, %0|%0, %1, %2}
13416 movsd\t{%2, %0|%0, %2}
13417 vmovsd\t{%2, %1, %0|%0, %1, %2}
13418 shufpd\t{$2, %1, %0|%0, %1, 2}
13419 movhpd\t{%H1, %0|%0, %H1}
13420 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
13424 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
13426 (cond [(eq_attr "alternative" "5")
13427 (const_string "sselog")
13428 (eq_attr "alternative" "9")
13429 (const_string "fmov")
13430 (eq_attr "alternative" "10")
13431 (const_string "imov")
13433 (const_string "ssemov")))
13434 (set (attr "prefix_data16")
13435 (if_then_else (eq_attr "alternative" "1,6")
13437 (const_string "*")))
13438 (set (attr "length_immediate")
13439 (if_then_else (eq_attr "alternative" "5")
13441 (const_string "*")))
13442 (set (attr "prefix")
13443 (cond [(eq_attr "alternative" "0")
13444 (const_string "maybe_vex")
13445 (eq_attr "alternative" "1,3,5,6")
13446 (const_string "orig")
13447 (eq_attr "alternative" "2,4,7")
13448 (const_string "maybe_evex")
13450 (const_string "*")))
13451 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
13454 [(set (match_operand:V2DF 0 "memory_operand")
13456 (match_operand:DF 1 "register_operand")
13457 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
13458 "TARGET_SSE2 && reload_completed"
13459 [(set (match_dup 0) (match_dup 1))]
13460 "operands[0] = adjust_address (operands[0], DFmode, 0);")
13462 (define_insn "sse2_movsd_<mode>"
13463 [(set (match_operand:VI8F_128 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
13464 (vec_merge:VI8F_128
13465 (match_operand:VI8F_128 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
13466 (match_operand:VI8F_128 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
13470 movsd\t{%2, %0|%0, %2}
13471 vmovsd\t{%2, %1, %0|%0, %1, %2}
13472 movlpd\t{%2, %0|%0, %q2}
13473 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
13474 %vmovlpd\t{%2, %0|%q0, %2}
13475 shufpd\t{$2, %1, %0|%0, %1, 2}
13476 movhps\t{%H1, %0|%0, %H1}
13477 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
13478 %vmovhps\t{%1, %H0|%H0, %1}"
13479 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
13482 (eq_attr "alternative" "5")
13483 (const_string "sselog")
13484 (const_string "ssemov")))
13485 (set (attr "prefix_data16")
13487 (and (eq_attr "alternative" "2,4")
13488 (not (match_test "TARGET_AVX")))
13490 (const_string "*")))
13491 (set (attr "length_immediate")
13492 (if_then_else (eq_attr "alternative" "5")
13494 (const_string "*")))
13495 (set (attr "prefix")
13496 (cond [(eq_attr "alternative" "1,3,7")
13497 (const_string "maybe_evex")
13498 (eq_attr "alternative" "4,8")
13499 (const_string "maybe_vex")
13501 (const_string "orig")))
13502 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
13504 (define_insn "vec_dupv2df<mask_name>"
13505 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
13506 (vec_duplicate:V2DF
13507 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
13508 "TARGET_SSE2 && <mask_avx512vl_condition>"
13511 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
13512 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
13513 [(set_attr "isa" "noavx,sse3,avx512vl")
13514 (set_attr "type" "sselog1")
13515 (set_attr "prefix" "orig,maybe_vex,evex")
13516 (set_attr "mode" "V2DF,DF,DF")])
13518 (define_insn "vec_concatv2df"
13519 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,x, v,x,x")
13521 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,0,x,vm,0,0")
13522 (match_operand:DF 2 "nonimm_or_0_operand" " x,x,v,m,m, C,x,m")))]
13523 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13525 unpcklpd\t{%2, %0|%0, %2}
13526 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
13527 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
13528 movhpd\t{%2, %0|%0, %2}
13529 vmovhpd\t{%2, %1, %0|%0, %1, %2}
13530 %vmovq\t{%1, %0|%0, %1}
13531 movlhps\t{%2, %0|%0, %2}
13532 movhps\t{%2, %0|%0, %2}"
13534 (cond [(eq_attr "alternative" "0,3")
13535 (const_string "sse2_noavx")
13536 (eq_attr "alternative" "1,4")
13537 (const_string "avx")
13538 (eq_attr "alternative" "2")
13539 (const_string "avx512vl")
13540 (eq_attr "alternative" "5")
13541 (const_string "sse2")
13543 (const_string "noavx")))
13546 (eq_attr "alternative" "0,1,2")
13547 (const_string "sselog")
13548 (const_string "ssemov")))
13549 (set (attr "prefix_data16")
13550 (if_then_else (eq_attr "alternative" "3")
13552 (const_string "*")))
13553 (set (attr "prefix")
13554 (cond [(eq_attr "alternative" "1,4")
13555 (const_string "vex")
13556 (eq_attr "alternative" "2")
13557 (const_string "evex")
13558 (eq_attr "alternative" "5")
13559 (const_string "maybe_vex")
13561 (const_string "orig")))
13562 (set_attr "mode" "V2DF,V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
13564 ;; vmovq clears also the higher bits.
13565 (define_insn "vec_set<mode>_0"
13566 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
13567 (vec_merge:VF2_512_256
13568 (vec_duplicate:VF2_512_256
13569 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
13570 (match_operand:VF2_512_256 1 "const0_operand")
13573 "vmovq\t{%2, %x0|%x0, %2}"
13574 [(set_attr "type" "ssemov")
13575 (set_attr "prefix" "maybe_evex")
13576 (set_attr "mode" "DF")])
13578 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13580 ;; Parallel integer down-conversion operations
13582 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13584 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
13585 (define_mode_attr pmov_src_mode
13586 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
13587 (define_mode_attr pmov_src_lower
13588 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
13589 (define_mode_attr pmov_suff_1
13590 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
13592 (define_expand "trunc<pmov_src_lower><mode>2"
13593 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
13594 (truncate:PMOV_DST_MODE_1
13595 (match_operand:<pmov_src_mode> 1 "register_operand")))]
13598 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
13599 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
13600 (any_truncate:PMOV_DST_MODE_1
13601 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
13603 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
13604 [(set_attr "type" "ssemov")
13605 (set_attr "memory" "none,store")
13606 (set_attr "prefix" "evex")
13607 (set_attr "mode" "<sseinsnmode>")])
13609 (define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1"
13610 [(set (match_operand:V16HI 0 "nonimmediate_operand")
13613 [(match_operand:V32HI 1 "register_operand")
13614 (match_operand:V32HI 2 "permvar_truncate_operand")]
13616 (parallel [(const_int 0) (const_int 1)
13617 (const_int 2) (const_int 3)
13618 (const_int 4) (const_int 5)
13619 (const_int 6) (const_int 7)
13620 (const_int 8) (const_int 9)
13621 (const_int 10) (const_int 11)
13622 (const_int 12) (const_int 13)
13623 (const_int 14) (const_int 15)])))]
13624 "TARGET_AVX512BW && ix86_pre_reload_split ()"
13627 [(set (match_dup 0)
13628 (truncate:V16HI (match_dup 1)))]
13629 "operands[1] = lowpart_subreg (V16SImode,
13630 force_reg (V32HImode, operands[1]),
13633 (define_insn_and_split "*avx512bw_permvar_truncv16siv16hi_1_hf"
13634 [(set (match_operand:V16HF 0 "nonimmediate_operand")
13638 [(match_operand:V32HI 1 "register_operand")
13639 (match_operand:V32HI 2 "permvar_truncate_operand")]
13640 UNSPEC_VPERMVAR) 0)
13641 (parallel [(const_int 0) (const_int 1)
13642 (const_int 2) (const_int 3)
13643 (const_int 4) (const_int 5)
13644 (const_int 6) (const_int 7)
13645 (const_int 8) (const_int 9)
13646 (const_int 10) (const_int 11)
13647 (const_int 12) (const_int 13)
13648 (const_int 14) (const_int 15)])))]
13649 "TARGET_AVX512BW && ix86_pre_reload_split ()"
13652 [(set (match_dup 0)
13653 (truncate:V16HI (match_dup 1)))]
13655 operands[1] = lowpart_subreg (V16SImode,
13656 force_reg (V32HImode, operands[1]),
13658 if (MEM_P (operands[0]))
13659 operands[0] = lowpart_subreg (V16HImode, operands[0], V16HFmode);
13662 rtx op0 = gen_reg_rtx (V16HImode);
13663 emit_insn (gen_truncv16siv16hi2 (op0, operands[1]));
13664 emit_move_insn (operands[0], lowpart_subreg (V16HFmode, op0, V16HImode));
13670 (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1"
13671 [(set (match_operand:V8HI 0 "nonimmediate_operand")
13674 [(match_operand:V16HI 1 "register_operand")
13675 (match_operand:V16HI 2 "permvar_truncate_operand")]
13677 (parallel [(const_int 0) (const_int 1)
13678 (const_int 2) (const_int 3)
13679 (const_int 4) (const_int 5)
13680 (const_int 6) (const_int 7)])))]
13681 "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()"
13684 [(set (match_dup 0)
13685 (truncate:V8HI (match_dup 1)))]
13686 "operands[1] = lowpart_subreg (V8SImode,
13687 force_reg (V16HImode, operands[1]),
13690 (define_insn_and_split "*avx512f_permvar_truncv8siv8hi_1_hf"
13691 [(set (match_operand:V8HF 0 "nonimmediate_operand")
13695 [(match_operand:V16HI 1 "register_operand")
13696 (match_operand:V16HI 2 "permvar_truncate_operand")]
13697 UNSPEC_VPERMVAR) 0)
13698 (parallel [(const_int 0) (const_int 1)
13699 (const_int 2) (const_int 3)
13700 (const_int 4) (const_int 5)
13701 (const_int 6) (const_int 7)])))]
13702 "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()"
13705 [(set (match_dup 0)
13706 (truncate:V8HI (match_dup 1)))]
13708 operands[1] = lowpart_subreg (V8SImode,
13709 force_reg (V16HImode, operands[1]),
13711 if (MEM_P (operands[0]))
13712 operands[0] = lowpart_subreg (V8HImode, operands[0], V8HFmode);
13715 rtx op0 = gen_reg_rtx (V8HImode);
13716 emit_insn (gen_truncv8siv8hi2 (op0, operands[1]));
13717 emit_move_insn (operands[0], lowpart_subreg (V8HFmode, op0, V8HImode));
13722 (define_insn_and_split "*avx512f_vpermvar_truncv8div8si_1"
13723 [(set (match_operand:V8SI 0 "nonimmediate_operand")
13726 [(match_operand:V16SI 1 "register_operand")
13727 (match_operand:V16SI 2 "permvar_truncate_operand")]
13729 (parallel [(const_int 0) (const_int 1)
13730 (const_int 2) (const_int 3)
13731 (const_int 4) (const_int 5)
13732 (const_int 6) (const_int 7)])))]
13733 "TARGET_AVX512F && ix86_pre_reload_split ()"
13736 [(set (match_dup 0)
13737 (truncate:V8SI (match_dup 1)))]
13738 "operands[1] = lowpart_subreg (V8DImode,
13739 force_reg (V16SImode, operands[1]),
13742 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
13743 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
13744 (vec_merge:PMOV_DST_MODE_1
13745 (any_truncate:PMOV_DST_MODE_1
13746 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
13747 (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
13748 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
13750 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13751 [(set_attr "type" "ssemov")
13752 (set_attr "memory" "none,store")
13753 (set_attr "prefix" "evex")
13754 (set_attr "mode" "<sseinsnmode>")])
13756 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
13757 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
13758 (vec_merge:PMOV_DST_MODE_1
13759 (any_truncate:PMOV_DST_MODE_1
13760 (match_operand:<pmov_src_mode> 1 "register_operand"))
13762 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
13765 (define_expand "truncv32hiv32qi2"
13766 [(set (match_operand:V32QI 0 "nonimmediate_operand")
13768 (match_operand:V32HI 1 "register_operand")))]
13771 (define_insn "avx512bw_<code>v32hiv32qi2"
13772 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
13773 (any_truncate:V32QI
13774 (match_operand:V32HI 1 "register_operand" "v,v")))]
13776 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
13777 [(set_attr "type" "ssemov")
13778 (set_attr "memory" "none,store")
13779 (set_attr "prefix" "evex")
13780 (set_attr "mode" "XI")])
13782 (define_insn_and_split "*avx512f_permvar_truncv32hiv32qi_1"
13783 [(set (match_operand:V32QI 0 "nonimmediate_operand")
13786 [(match_operand:V64QI 1 "register_operand")
13787 (match_operand:V64QI 2 "permvar_truncate_operand")]
13789 (parallel [(const_int 0) (const_int 1)
13790 (const_int 2) (const_int 3)
13791 (const_int 4) (const_int 5)
13792 (const_int 6) (const_int 7)
13793 (const_int 8) (const_int 9)
13794 (const_int 10) (const_int 11)
13795 (const_int 12) (const_int 13)
13796 (const_int 14) (const_int 15)
13797 (const_int 16) (const_int 17)
13798 (const_int 18) (const_int 19)
13799 (const_int 20) (const_int 21)
13800 (const_int 22) (const_int 23)
13801 (const_int 24) (const_int 25)
13802 (const_int 26) (const_int 27)
13803 (const_int 28) (const_int 29)
13804 (const_int 30) (const_int 31)])))]
13805 "TARGET_AVX512VBMI && ix86_pre_reload_split ()"
13808 [(set (match_dup 0)
13809 (truncate:V32QI (match_dup 1)))]
13810 "operands[1] = lowpart_subreg (V32HImode,
13811 force_reg (V64QImode, operands[1]),
13814 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
13815 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
13817 (any_truncate:V32QI
13818 (match_operand:V32HI 1 "register_operand" "v,v"))
13819 (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
13820 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
13822 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13823 [(set_attr "type" "ssemov")
13824 (set_attr "memory" "none,store")
13825 (set_attr "prefix" "evex")
13826 (set_attr "mode" "XI")])
13828 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
13829 [(set (match_operand:V32QI 0 "nonimmediate_operand")
13831 (any_truncate:V32QI
13832 (match_operand:V32HI 1 "register_operand"))
13834 (match_operand:SI 2 "register_operand")))]
13837 (define_mode_iterator PMOV_DST_MODE_2
13838 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
13839 (define_mode_attr pmov_suff_2
13840 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
13842 (define_expand "trunc<ssedoublemodelower><mode>2"
13843 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
13844 (truncate:PMOV_DST_MODE_2
13845 (match_operand:<ssedoublemode> 1 "register_operand")))]
13848 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
13849 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
13850 (any_truncate:PMOV_DST_MODE_2
13851 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
13853 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
13854 [(set_attr "type" "ssemov")
13855 (set_attr "memory" "none,store")
13856 (set_attr "prefix" "evex")
13857 (set_attr "mode" "<sseinsnmode>")])
13859 (define_insn_and_split "*avx512f_permvar_truncv16hiv16qi_1"
13860 [(set (match_operand:V16QI 0 "nonimmediate_operand")
13863 [(match_operand:V32QI 1 "register_operand")
13864 (match_operand:V32QI 2 "permvar_truncate_operand")]
13866 (parallel [(const_int 0) (const_int 1)
13867 (const_int 2) (const_int 3)
13868 (const_int 4) (const_int 5)
13869 (const_int 6) (const_int 7)
13870 (const_int 8) (const_int 9)
13871 (const_int 10) (const_int 11)
13872 (const_int 12) (const_int 13)
13873 (const_int 14) (const_int 15)])))]
13874 "TARGET_AVX512VL && TARGET_AVX512VBMI
13875 && ix86_pre_reload_split ()"
13878 [(set (match_dup 0)
13879 (truncate:V16QI (match_dup 1)))]
13880 "operands[1] = lowpart_subreg (V16HImode,
13881 force_reg (V32QImode, operands[1]),
13884 (define_insn_and_split "*avx512f_permvar_truncv4div4si_1"
13885 [(set (match_operand:V4SI 0 "nonimmediate_operand")
13888 [(match_operand:V8SI 1 "register_operand")
13889 (match_operand:V8SI 2 "permvar_truncate_operand")]
13891 (parallel [(const_int 0) (const_int 1)
13892 (const_int 2) (const_int 3)])))]
13893 "TARGET_AVX512VL && ix86_pre_reload_split ()"
13896 [(set (match_dup 0)
13897 (truncate:V4SI (match_dup 1)))]
13898 "operands[1] = lowpart_subreg (V4DImode,
13899 force_reg (V8SImode, operands[1]),
13902 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
13903 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
13904 (vec_merge:PMOV_DST_MODE_2
13905 (any_truncate:PMOV_DST_MODE_2
13906 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
13907 (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
13908 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
13910 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13911 [(set_attr "type" "ssemov")
13912 (set_attr "memory" "none,store")
13913 (set_attr "prefix" "evex")
13914 (set_attr "mode" "<sseinsnmode>")])
13916 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
13917 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
13918 (vec_merge:PMOV_DST_MODE_2
13919 (any_truncate:PMOV_DST_MODE_2
13920 (match_operand:<ssedoublemode> 1 "register_operand"))
13922 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
13925 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
13926 (define_mode_attr pmov_dst_3_lower
13927 [(V4DI "v4qi") (V2DI "v2qi") (V8SI "v8qi") (V4SI "v4qi") (V8HI "v8qi")])
13928 (define_mode_attr pmov_dst_3
13929 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
13930 (define_mode_attr pmov_dst_zeroed_3
13931 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
13932 (define_mode_attr pmov_suff_3
13933 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
13935 (define_expand "trunc<mode><pmov_dst_3_lower>2"
13936 [(set (match_operand:<pmov_dst_3> 0 "register_operand")
13937 (truncate:<pmov_dst_3>
13938 (match_operand:PMOV_SRC_MODE_3 1 "register_operand")))]
13941 rtx op0 = gen_reg_rtx (V16QImode);
13943 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>qi2
13944 (op0, operands[1], CONST0_RTX (<pmov_dst_zeroed_3>mode)));
13946 emit_move_insn (operands[0],
13947 lowpart_subreg (<pmov_dst_3>mode, op0, V16QImode));
13951 (define_insn "avx512vl_<code><mode>v<ssescalarnum>qi2"
13952 [(set (match_operand:V16QI 0 "register_operand" "=v")
13954 (any_truncate:<pmov_dst_3>
13955 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
13956 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
13958 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
13959 [(set_attr "type" "ssemov")
13960 (set_attr "prefix" "evex")
13961 (set_attr "mode" "TI")])
13963 (define_insn_and_split "*avx512f_pshufb_truncv8hiv8qi_1"
13964 [(set (match_operand:DI 0 "register_operand")
13968 [(match_operand:V16QI 1 "register_operand")
13969 (match_operand:V16QI 2 "pshufb_truncv8hiv8qi_operand")]
13971 (parallel [(const_int 0)])))]
13972 "TARGET_AVX512VL && TARGET_AVX512BW && ix86_pre_reload_split ()"
13977 rtx op1 = gen_reg_rtx (V8QImode);
13978 operands[1] = lowpart_subreg (V8HImode,
13979 force_reg (V16QImode, operands[1]),
13981 emit_insn (gen_truncv8hiv8qi2 (op1, operands[1]));
13982 emit_move_insn (operands[0], lowpart_subreg (DImode, op1, V8QImode));
13986 (define_insn "*avx512vl_<code>v2div2qi2_store_1"
13987 [(set (match_operand:V2QI 0 "memory_operand" "=m")
13989 (match_operand:V2DI 1 "register_operand" "v")))]
13991 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
13992 [(set_attr "type" "ssemov")
13993 (set_attr "memory" "store")
13994 (set_attr "prefix" "evex")
13995 (set_attr "mode" "TI")])
13997 (define_insn_and_split "*avx512vl_<code>v2div2qi2_store_2"
13998 [(set (match_operand:HI 0 "memory_operand")
14001 (match_operand:V2DI 1 "register_operand")) 0))]
14002 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14005 [(set (match_dup 0)
14006 (any_truncate:V2QI (match_dup 1)))]
14007 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
14009 (define_insn "avx512vl_<code>v2div2qi2_mask"
14010 [(set (match_operand:V16QI 0 "register_operand" "=v")
14014 (match_operand:V2DI 1 "register_operand" "v"))
14016 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
14017 (parallel [(const_int 0) (const_int 1)]))
14018 (match_operand:QI 3 "register_operand" "Yk"))
14019 (const_vector:V14QI [(const_int 0) (const_int 0)
14020 (const_int 0) (const_int 0)
14021 (const_int 0) (const_int 0)
14022 (const_int 0) (const_int 0)
14023 (const_int 0) (const_int 0)
14024 (const_int 0) (const_int 0)
14025 (const_int 0) (const_int 0)])))]
14027 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14028 [(set_attr "type" "ssemov")
14029 (set_attr "prefix" "evex")
14030 (set_attr "mode" "TI")])
14032 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
14033 [(set (match_operand:V16QI 0 "register_operand" "=v")
14037 (match_operand:V2DI 1 "register_operand" "v"))
14038 (const_vector:V2QI [(const_int 0) (const_int 0)])
14039 (match_operand:QI 2 "register_operand" "Yk"))
14040 (const_vector:V14QI [(const_int 0) (const_int 0)
14041 (const_int 0) (const_int 0)
14042 (const_int 0) (const_int 0)
14043 (const_int 0) (const_int 0)
14044 (const_int 0) (const_int 0)
14045 (const_int 0) (const_int 0)
14046 (const_int 0) (const_int 0)])))]
14048 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14049 [(set_attr "type" "ssemov")
14050 (set_attr "prefix" "evex")
14051 (set_attr "mode" "TI")])
14053 (define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
14054 [(set (match_operand:V2QI 0 "memory_operand" "=m")
14057 (match_operand:V2DI 1 "register_operand" "v"))
14059 (match_operand:QI 2 "register_operand" "Yk")))]
14061 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14062 [(set_attr "type" "ssemov")
14063 (set_attr "memory" "store")
14064 (set_attr "prefix" "evex")
14065 (set_attr "mode" "TI")])
14067 (define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
14068 [(set (match_operand:HI 0 "memory_operand")
14072 (match_operand:V2DI 1 "register_operand"))
14078 (parallel [(const_int 0) (const_int 1)]))
14079 (match_operand:QI 2 "register_operand")) 0))]
14080 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14083 [(set (match_dup 0)
14085 (any_truncate:V2QI (match_dup 1))
14088 "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
14090 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
14091 [(set (match_operand:V4QI 0 "memory_operand" "=m")
14093 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
14095 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
14096 [(set_attr "type" "ssemov")
14097 (set_attr "memory" "store")
14098 (set_attr "prefix" "evex")
14099 (set_attr "mode" "TI")])
14101 (define_insn_and_split "*avx512vl_<code><mode>v4qi2_store_2"
14102 [(set (match_operand:SI 0 "memory_operand")
14105 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
14106 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14109 [(set (match_dup 0)
14110 (any_truncate:V4QI (match_dup 1)))]
14111 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
14113 (define_insn "avx512vl_<code><mode>v4qi2_mask"
14114 [(set (match_operand:V16QI 0 "register_operand" "=v")
14118 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
14120 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
14121 (parallel [(const_int 0) (const_int 1)
14122 (const_int 2) (const_int 3)]))
14123 (match_operand:QI 3 "register_operand" "Yk"))
14124 (const_vector:V12QI [(const_int 0) (const_int 0)
14125 (const_int 0) (const_int 0)
14126 (const_int 0) (const_int 0)
14127 (const_int 0) (const_int 0)
14128 (const_int 0) (const_int 0)
14129 (const_int 0) (const_int 0)])))]
14131 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14132 [(set_attr "type" "ssemov")
14133 (set_attr "prefix" "evex")
14134 (set_attr "mode" "TI")])
14136 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
14137 [(set (match_operand:V16QI 0 "register_operand" "=v")
14141 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
14142 (const_vector:V4QI [(const_int 0) (const_int 0)
14143 (const_int 0) (const_int 0)])
14144 (match_operand:QI 2 "register_operand" "Yk"))
14145 (const_vector:V12QI [(const_int 0) (const_int 0)
14146 (const_int 0) (const_int 0)
14147 (const_int 0) (const_int 0)
14148 (const_int 0) (const_int 0)
14149 (const_int 0) (const_int 0)
14150 (const_int 0) (const_int 0)])))]
14152 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14153 [(set_attr "type" "ssemov")
14154 (set_attr "prefix" "evex")
14155 (set_attr "mode" "TI")])
14157 (define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
14158 [(set (match_operand:V4QI 0 "memory_operand" "=m")
14161 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
14163 (match_operand:QI 2 "register_operand" "Yk")))]
14165 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14166 [(set_attr "type" "ssemov")
14167 (set_attr "memory" "store")
14168 (set_attr "prefix" "evex")
14169 (set_attr "mode" "TI")])
14171 (define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
14172 [(set (match_operand:SI 0 "memory_operand")
14176 (match_operand:VI4_128_8_256 1 "register_operand"))
14182 (parallel [(const_int 0) (const_int 1)
14183 (const_int 2) (const_int 3)]))
14184 (match_operand:QI 2 "register_operand")) 0))]
14185 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14188 [(set (match_dup 0)
14190 (any_truncate:V4QI (match_dup 1))
14193 "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
14195 (define_mode_iterator VI2_128_BW_4_256
14196 [(V8HI "TARGET_AVX512BW") V8SI])
14198 (define_insn "*avx512vl_<code><mode>v8qi2_store_1"
14199 [(set (match_operand:V8QI 0 "memory_operand" "=m")
14201 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")))]
14203 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
14204 [(set_attr "type" "ssemov")
14205 (set_attr "memory" "store")
14206 (set_attr "prefix" "evex")
14207 (set_attr "mode" "TI")])
14209 (define_insn_and_split "*avx512vl_<code><mode>v8qi2_store_2"
14210 [(set (match_operand:DI 0 "memory_operand" "=m")
14213 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v")) 0))]
14214 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14217 [(set (match_dup 0)
14218 (any_truncate:V8QI (match_dup 1)))]
14219 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
14221 (define_insn "avx512vl_<code><mode>v8qi2_mask"
14222 [(set (match_operand:V16QI 0 "register_operand" "=v")
14226 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
14228 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
14229 (parallel [(const_int 0) (const_int 1)
14230 (const_int 2) (const_int 3)
14231 (const_int 4) (const_int 5)
14232 (const_int 6) (const_int 7)]))
14233 (match_operand:QI 3 "register_operand" "Yk"))
14234 (const_vector:V8QI [(const_int 0) (const_int 0)
14235 (const_int 0) (const_int 0)
14236 (const_int 0) (const_int 0)
14237 (const_int 0) (const_int 0)])))]
14239 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14240 [(set_attr "type" "ssemov")
14241 (set_attr "prefix" "evex")
14242 (set_attr "mode" "TI")])
14244 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
14245 [(set (match_operand:V16QI 0 "register_operand" "=v")
14249 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
14250 (const_vector:V8QI [(const_int 0) (const_int 0)
14251 (const_int 0) (const_int 0)
14252 (const_int 0) (const_int 0)
14253 (const_int 0) (const_int 0)])
14254 (match_operand:QI 2 "register_operand" "Yk"))
14255 (const_vector:V8QI [(const_int 0) (const_int 0)
14256 (const_int 0) (const_int 0)
14257 (const_int 0) (const_int 0)
14258 (const_int 0) (const_int 0)])))]
14260 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14261 [(set_attr "type" "ssemov")
14262 (set_attr "prefix" "evex")
14263 (set_attr "mode" "TI")])
14265 (define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
14266 [(set (match_operand:V8QI 0 "memory_operand" "=m")
14269 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
14271 (match_operand:QI 2 "register_operand" "Yk")))]
14273 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14274 [(set_attr "type" "ssemov")
14275 (set_attr "memory" "store")
14276 (set_attr "prefix" "evex")
14277 (set_attr "mode" "TI")])
14279 (define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
14280 [(set (match_operand:DI 0 "memory_operand")
14284 (match_operand:VI2_128_BW_4_256 1 "register_operand"))
14290 (parallel [(const_int 0) (const_int 1)
14291 (const_int 2) (const_int 3)
14292 (const_int 4) (const_int 5)
14293 (const_int 6) (const_int 7)]))
14294 (match_operand:QI 2 "register_operand")) 0))]
14295 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14298 [(set (match_dup 0)
14300 (any_truncate:V8QI (match_dup 1))
14303 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
14305 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
14306 (define_mode_attr pmov_dst_4
14307 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
14308 (define_mode_attr pmov_dst_4_lower
14309 [(V4DI "v4hi") (V2DI "v2hi") (V4SI "v4hi")])
14310 (define_mode_attr pmov_dst_zeroed_4
14311 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
14312 (define_mode_attr pmov_suff_4
14313 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
14315 (define_expand "trunc<mode><pmov_dst_4_lower>2"
14316 [(set (match_operand:<pmov_dst_4> 0 "register_operand")
14317 (truncate:<pmov_dst_4>
14318 (match_operand:PMOV_SRC_MODE_4 1 "register_operand")))]
14321 rtx op0 = gen_reg_rtx (V8HImode);
14323 emit_insn (gen_avx512vl_truncate<mode>v<ssescalarnum>hi2
14324 (op0, operands[1], CONST0_RTX (<pmov_dst_zeroed_4>mode)));
14326 emit_move_insn (operands[0],
14327 lowpart_subreg (<pmov_dst_4>mode, op0, V8HImode));
14331 (define_insn "avx512vl_<code><mode>v<ssescalarnum>hi2"
14332 [(set (match_operand:V8HI 0 "register_operand" "=v")
14334 (any_truncate:<pmov_dst_4>
14335 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
14336 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
14338 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
14339 [(set_attr "type" "ssemov")
14340 (set_attr "prefix" "evex")
14341 (set_attr "mode" "TI")])
14343 (define_insn_and_split "*avx512f_pshufb_truncv4siv4hi_1"
14344 [(set (match_operand:DI 0 "register_operand")
14348 [(match_operand:V16QI 1 "register_operand")
14349 (match_operand:V16QI 2 "pshufb_truncv4siv4hi_operand")]
14351 (parallel [(const_int 0)])))]
14352 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14357 rtx op1 = gen_reg_rtx (V4HImode);
14358 operands[1] = lowpart_subreg (V4SImode,
14359 force_reg (V16QImode, operands[1]),
14361 emit_insn (gen_truncv4siv4hi2 (op1, operands[1]));
14362 emit_move_insn (operands[0], lowpart_subreg (DImode, op1, V4HImode));
14366 (define_insn "*avx512vl_<code><mode>v4hi2_store_1"
14367 [(set (match_operand:V4HI 0 "memory_operand" "=m")
14369 (match_operand:VI4_128_8_256 1 "register_operand" "v")))]
14371 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
14372 [(set_attr "type" "ssemov")
14373 (set_attr "memory" "store")
14374 (set_attr "prefix" "evex")
14375 (set_attr "mode" "TI")])
14377 (define_insn_and_split "*avx512vl_<code><mode>v4hi2_store_2"
14378 [(set (match_operand:DI 0 "memory_operand")
14381 (match_operand:VI4_128_8_256 1 "register_operand")) 0))]
14382 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14385 [(set (match_dup 0)
14386 (any_truncate:V4HI (match_dup 1)))]
14387 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
14389 (define_insn "avx512vl_<code><mode>v4hi2_mask"
14390 [(set (match_operand:V8HI 0 "register_operand" "=v")
14394 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
14396 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
14397 (parallel [(const_int 0) (const_int 1)
14398 (const_int 2) (const_int 3)]))
14399 (match_operand:QI 3 "register_operand" "Yk"))
14400 (const_vector:V4HI [(const_int 0) (const_int 0)
14401 (const_int 0) (const_int 0)])))]
14403 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14404 [(set_attr "type" "ssemov")
14405 (set_attr "prefix" "evex")
14406 (set_attr "mode" "TI")])
14408 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
14409 [(set (match_operand:V8HI 0 "register_operand" "=v")
14413 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
14414 (const_vector:V4HI [(const_int 0) (const_int 0)
14415 (const_int 0) (const_int 0)])
14416 (match_operand:QI 2 "register_operand" "Yk"))
14417 (const_vector:V4HI [(const_int 0) (const_int 0)
14418 (const_int 0) (const_int 0)])))]
14420 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14421 [(set_attr "type" "ssemov")
14422 (set_attr "prefix" "evex")
14423 (set_attr "mode" "TI")])
14425 (define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
14426 [(set (match_operand:V4HI 0 "memory_operand" "=m")
14429 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
14431 (match_operand:QI 2 "register_operand" "Yk")))]
14434 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
14435 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
14436 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
14438 [(set_attr "type" "ssemov")
14439 (set_attr "memory" "store")
14440 (set_attr "prefix" "evex")
14441 (set_attr "mode" "TI")])
14443 (define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
14444 [(set (match_operand:DI 0 "memory_operand")
14448 (match_operand:VI4_128_8_256 1 "register_operand"))
14454 (parallel [(const_int 0) (const_int 1)
14455 (const_int 2) (const_int 3)]))
14456 (match_operand:QI 2 "register_operand")) 0))]
14457 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14460 [(set (match_dup 0)
14462 (any_truncate:V4HI (match_dup 1))
14465 "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
14468 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
14469 [(set (match_operand:V2HI 0 "memory_operand" "=m")
14471 (match_operand:V2DI 1 "register_operand" "v")))]
14473 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
14474 [(set_attr "type" "ssemov")
14475 (set_attr "memory" "store")
14476 (set_attr "prefix" "evex")
14477 (set_attr "mode" "TI")])
14479 (define_insn_and_split "*avx512vl_<code>v2div2hi2_store_2"
14480 [(set (match_operand:SI 0 "memory_operand")
14483 (match_operand:V2DI 1 "register_operand")) 0))]
14484 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14487 [(set (match_dup 0)
14488 (any_truncate:V2HI (match_dup 1)))]
14489 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
14491 (define_insn "avx512vl_<code>v2div2hi2_mask"
14492 [(set (match_operand:V8HI 0 "register_operand" "=v")
14496 (match_operand:V2DI 1 "register_operand" "v"))
14498 (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
14499 (parallel [(const_int 0) (const_int 1)]))
14500 (match_operand:QI 3 "register_operand" "Yk"))
14501 (const_vector:V6HI [(const_int 0) (const_int 0)
14502 (const_int 0) (const_int 0)
14503 (const_int 0) (const_int 0)])))]
14505 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14506 [(set_attr "type" "ssemov")
14507 (set_attr "prefix" "evex")
14508 (set_attr "mode" "TI")])
14510 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
14511 [(set (match_operand:V8HI 0 "register_operand" "=v")
14515 (match_operand:V2DI 1 "register_operand" "v"))
14516 (const_vector:V2HI [(const_int 0) (const_int 0)])
14517 (match_operand:QI 2 "register_operand" "Yk"))
14518 (const_vector:V6HI [(const_int 0) (const_int 0)
14519 (const_int 0) (const_int 0)
14520 (const_int 0) (const_int 0)])))]
14522 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14523 [(set_attr "type" "ssemov")
14524 (set_attr "prefix" "evex")
14525 (set_attr "mode" "TI")])
14527 (define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
14528 [(set (match_operand:V2HI 0 "memory_operand" "=m")
14531 (match_operand:V2DI 1 "register_operand" "v"))
14533 (match_operand:QI 2 "register_operand" "Yk")))]
14535 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14536 [(set_attr "type" "ssemov")
14537 (set_attr "memory" "store")
14538 (set_attr "prefix" "evex")
14539 (set_attr "mode" "TI")])
14541 (define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
14542 [(set (match_operand:SI 0 "memory_operand")
14546 (match_operand:V2DI 1 "register_operand"))
14552 (parallel [(const_int 0) (const_int 1)]))
14553 (match_operand:QI 2 "register_operand")) 0))]
14554 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14557 [(set (match_dup 0)
14559 (any_truncate:V2HI (match_dup 1))
14562 "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
14564 (define_expand "truncv2div2si2"
14565 [(set (match_operand:V2SI 0 "register_operand")
14567 (match_operand:V2DI 1 "register_operand")))]
14570 rtx op0 = gen_reg_rtx (V4SImode);
14572 emit_insn (gen_avx512vl_truncatev2div2si2
14573 (op0, operands[1], CONST0_RTX (V2SImode)));
14575 emit_move_insn (operands[0],
14576 lowpart_subreg (V2SImode, op0, V4SImode));
14580 (define_insn "avx512vl_<code>v2div2si2"
14581 [(set (match_operand:V4SI 0 "register_operand" "=v")
14584 (match_operand:V2DI 1 "register_operand" "v"))
14585 (match_operand:V2SI 2 "const0_operand")))]
14587 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
14588 [(set_attr "type" "ssemov")
14589 (set_attr "prefix" "evex")
14590 (set_attr "mode" "TI")])
14592 (define_insn_and_split "*avx512f_pshufd_truncv2div2si_1"
14593 [(set (match_operand:DI 0 "register_operand")
14597 (match_operand:V4SI 1 "register_operand")
14598 (parallel [(const_int 0) (const_int 2)
14599 (const_int 2) (const_int 3)])) 0)
14600 (parallel [(const_int 0)])))]
14601 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14606 rtx op1 = gen_reg_rtx (V2SImode);
14607 operands[1] = lowpart_subreg (V2DImode,
14608 force_reg (V4SImode, operands[1]),
14610 emit_insn (gen_truncv2div2si2 (op1, operands[1]));
14611 emit_move_insn (operands[0], lowpart_subreg (DImode, op1, V2SImode));
14615 (define_insn "*avx512vl_<code>v2div2si2_store_1"
14616 [(set (match_operand:V2SI 0 "memory_operand" "=m")
14618 (match_operand:V2DI 1 "register_operand" "v")))]
14620 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
14621 [(set_attr "type" "ssemov")
14622 (set_attr "memory" "store")
14623 (set_attr "prefix" "evex")
14624 (set_attr "mode" "TI")])
14626 (define_insn_and_split "*avx512vl_<code>v2div2si2_store_2"
14627 [(set (match_operand:DI 0 "memory_operand")
14630 (match_operand:V2DI 1 "register_operand")) 0))]
14631 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14634 [(set (match_dup 0)
14635 (any_truncate:V2SI (match_dup 1)))]
14636 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
14638 (define_insn "avx512vl_<code>v2div2si2_mask"
14639 [(set (match_operand:V4SI 0 "register_operand" "=v")
14643 (match_operand:V2DI 1 "register_operand" "v"))
14645 (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
14646 (parallel [(const_int 0) (const_int 1)]))
14647 (match_operand:QI 3 "register_operand" "Yk"))
14648 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
14650 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14651 [(set_attr "type" "ssemov")
14652 (set_attr "prefix" "evex")
14653 (set_attr "mode" "TI")])
14655 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
14656 [(set (match_operand:V4SI 0 "register_operand" "=v")
14660 (match_operand:V2DI 1 "register_operand" "v"))
14661 (const_vector:V2SI [(const_int 0) (const_int 0)])
14662 (match_operand:QI 2 "register_operand" "Yk"))
14663 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
14665 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14666 [(set_attr "type" "ssemov")
14667 (set_attr "prefix" "evex")
14668 (set_attr "mode" "TI")])
14670 (define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
14671 [(set (match_operand:V2SI 0 "memory_operand" "=m")
14674 (match_operand:V2DI 1 "register_operand" "v"))
14676 (match_operand:QI 2 "register_operand" "Yk")))]
14678 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14679 [(set_attr "type" "ssemov")
14680 (set_attr "memory" "store")
14681 (set_attr "prefix" "evex")
14682 (set_attr "mode" "TI")])
14684 (define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
14685 [(set (match_operand:DI 0 "memory_operand")
14689 (match_operand:V2DI 1 "register_operand"))
14695 (parallel [(const_int 0) (const_int 1)]))
14696 (match_operand:QI 2 "register_operand")) 0))]
14697 "TARGET_AVX512VL && ix86_pre_reload_split ()"
14700 [(set (match_dup 0)
14702 (any_truncate:V2SI (match_dup 1))
14705 "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
14707 (define_expand "truncv8div8qi2"
14708 [(set (match_operand:V8QI 0 "register_operand")
14710 (match_operand:V8DI 1 "register_operand")))]
14713 rtx op0 = gen_reg_rtx (V16QImode);
14715 emit_insn (gen_avx512f_truncatev8div16qi2 (op0, operands[1]));
14717 emit_move_insn (operands[0],
14718 lowpart_subreg (V8QImode, op0, V16QImode));
14722 (define_insn "avx512f_<code>v8div16qi2"
14723 [(set (match_operand:V16QI 0 "register_operand" "=v")
14726 (match_operand:V8DI 1 "register_operand" "v"))
14727 (const_vector:V8QI [(const_int 0) (const_int 0)
14728 (const_int 0) (const_int 0)
14729 (const_int 0) (const_int 0)
14730 (const_int 0) (const_int 0)])))]
14732 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
14733 [(set_attr "type" "ssemov")
14734 (set_attr "prefix" "evex")
14735 (set_attr "mode" "TI")])
14737 (define_insn "*avx512f_<code>v8div16qi2_store_1"
14738 [(set (match_operand:V8QI 0 "memory_operand" "=m")
14740 (match_operand:V8DI 1 "register_operand" "v")))]
14742 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
14743 [(set_attr "type" "ssemov")
14744 (set_attr "memory" "store")
14745 (set_attr "prefix" "evex")
14746 (set_attr "mode" "TI")])
14748 (define_insn_and_split "*avx512f_<code>v8div16qi2_store_2"
14749 [(set (match_operand:DI 0 "memory_operand")
14752 (match_operand:V8DI 1 "register_operand")) 0))]
14753 "TARGET_AVX512F && ix86_pre_reload_split ()"
14756 [(set (match_dup 0)
14757 (any_truncate:V8QI (match_dup 1)))]
14758 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
14760 (define_insn "avx512f_<code>v8div16qi2_mask"
14761 [(set (match_operand:V16QI 0 "register_operand" "=v")
14765 (match_operand:V8DI 1 "register_operand" "v"))
14767 (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
14768 (parallel [(const_int 0) (const_int 1)
14769 (const_int 2) (const_int 3)
14770 (const_int 4) (const_int 5)
14771 (const_int 6) (const_int 7)]))
14772 (match_operand:QI 3 "register_operand" "Yk"))
14773 (const_vector:V8QI [(const_int 0) (const_int 0)
14774 (const_int 0) (const_int 0)
14775 (const_int 0) (const_int 0)
14776 (const_int 0) (const_int 0)])))]
14778 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14779 [(set_attr "type" "ssemov")
14780 (set_attr "prefix" "evex")
14781 (set_attr "mode" "TI")])
14783 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
14784 [(set (match_operand:V16QI 0 "register_operand" "=v")
14788 (match_operand:V8DI 1 "register_operand" "v"))
14789 (const_vector:V8QI [(const_int 0) (const_int 0)
14790 (const_int 0) (const_int 0)
14791 (const_int 0) (const_int 0)
14792 (const_int 0) (const_int 0)])
14793 (match_operand:QI 2 "register_operand" "Yk"))
14794 (const_vector:V8QI [(const_int 0) (const_int 0)
14795 (const_int 0) (const_int 0)
14796 (const_int 0) (const_int 0)
14797 (const_int 0) (const_int 0)])))]
14799 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
14800 [(set_attr "type" "ssemov")
14801 (set_attr "prefix" "evex")
14802 (set_attr "mode" "TI")])
14804 (define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
14805 [(set (match_operand:V8QI 0 "memory_operand" "=m")
14808 (match_operand:V8DI 1 "register_operand" "v"))
14810 (match_operand:QI 2 "register_operand" "Yk")))]
14812 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
14813 [(set_attr "type" "ssemov")
14814 (set_attr "memory" "store")
14815 (set_attr "prefix" "evex")
14816 (set_attr "mode" "TI")])
14818 (define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
14819 [(set (match_operand:DI 0 "memory_operand")
14823 (match_operand:V8DI 1 "register_operand"))
14829 (parallel [(const_int 0) (const_int 1)
14830 (const_int 2) (const_int 3)
14831 (const_int 4) (const_int 5)
14832 (const_int 6) (const_int 7)]))
14833 (match_operand:QI 2 "register_operand")) 0))]
14834 "TARGET_AVX512F && ix86_pre_reload_split ()"
14837 [(set (match_dup 0)
14839 (any_truncate:V8QI (match_dup 1))
14842 "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
14844 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14846 ;; Parallel integral arithmetic
14848 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14850 (define_expand "neg<mode>2"
14851 [(set (match_operand:VI_AVX2 0 "register_operand")
14854 (match_operand:VI_AVX2 1 "vector_operand")))]
14856 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
14858 (define_expand "<insn><mode>3"
14859 [(set (match_operand:VI_AVX2 0 "register_operand")
14861 (match_operand:VI_AVX2 1 "vector_operand")
14862 (match_operand:VI_AVX2 2 "vector_operand")))]
14864 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
14866 (define_expand "cond_<insn><mode>"
14867 [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
14868 (vec_merge:VI1248_AVX512VLBW
14869 (plusminus:VI1248_AVX512VLBW
14870 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand")
14871 (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand"))
14872 (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand")
14873 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
14876 emit_insn (gen_<insn><mode>3_mask (operands[0],
14884 (define_expand "<insn><mode>3_mask"
14885 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
14886 (vec_merge:VI48_AVX512VL
14887 (plusminus:VI48_AVX512VL
14888 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
14889 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
14890 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
14891 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14893 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
14895 (define_expand "<insn><mode>3_mask"
14896 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
14897 (vec_merge:VI12_AVX512VL
14898 (plusminus:VI12_AVX512VL
14899 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
14900 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
14901 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
14902 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14904 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
14906 (define_insn "*<insn><mode>3"
14907 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
14909 (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
14910 (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
14911 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14913 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
14914 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14915 [(set_attr "isa" "noavx,avx")
14916 (set_attr "type" "sseiadd")
14917 (set_attr "prefix_data16" "1,*")
14918 (set_attr "prefix" "orig,maybe_evex")
14919 (set_attr "mode" "<sseinsnmode>")])
14921 (define_insn "*<insn><mode>3_mask"
14922 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14923 (vec_merge:VI48_AVX512VL
14924 (plusminus:VI48_AVX512VL
14925 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
14926 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
14927 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
14928 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14929 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14930 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
14931 [(set_attr "type" "sseiadd")
14932 (set_attr "prefix" "evex")
14933 (set_attr "mode" "<sseinsnmode>")])
14935 (define_insn "*<insn><mode>3_mask"
14936 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14937 (vec_merge:VI12_AVX512VL
14938 (plusminus:VI12_AVX512VL
14939 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
14940 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
14941 (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
14942 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14943 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14944 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
14945 [(set_attr "type" "sseiadd")
14946 (set_attr "prefix" "evex")
14947 (set_attr "mode" "<sseinsnmode>")])
14949 (define_expand "<sse2_avx2>_<insn><mode>3<mask_name>"
14950 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
14951 (sat_plusminus:VI12_AVX2_AVX512BW
14952 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
14953 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
14954 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14955 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
14957 (define_insn "*<sse2_avx2>_<insn><mode>3<mask_name>"
14958 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
14959 (sat_plusminus:VI12_AVX2_AVX512BW
14960 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,<v_Yw>")
14961 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))]
14962 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14963 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
14965 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
14966 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14967 [(set_attr "isa" "noavx,avx")
14968 (set_attr "type" "sseiadd")
14969 (set_attr "prefix_data16" "1,*")
14970 (set_attr "prefix" "orig,maybe_evex")
14971 (set_attr "mode" "TI")])
14973 ;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
14975 [(set (match_operand:VI12_AVX2 0 "register_operand")
14977 (us_minus:VI12_AVX2
14978 (match_operand:VI12_AVX2 1 "vector_operand")
14979 (match_operand:VI12_AVX2 2 "vector_operand"))
14980 (match_operand:VI12_AVX2 3 "const0_operand")))]
14982 && (<MODE>mode != V8HImode || TARGET_SSE4_1)
14983 && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
14984 [(set (match_dup 4)
14985 (umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
14987 (eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
14988 "operands[4] = gen_reg_rtx (<MODE>mode);")
14990 (define_expand "mul<mode>3"
14991 [(set (match_operand:VI1_AVX512 0 "register_operand")
14992 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
14993 (match_operand:VI1_AVX512 2 "register_operand")))]
14996 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
15000 (define_expand "cond_mul<mode>"
15001 [(set (match_operand:VI2_AVX512VL 0 "register_operand")
15002 (vec_merge:VI2_AVX512VL
15004 (match_operand:VI2_AVX512VL 2 "vector_operand")
15005 (match_operand:VI2_AVX512VL 3 "vector_operand"))
15006 (match_operand:VI2_AVX512VL 4 "nonimm_or_0_operand")
15007 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
15010 emit_insn (gen_mul<mode>3_mask (operands[0],
15018 (define_expand "mul<mode>3<mask_name>"
15019 [(set (match_operand:VI2_AVX2 0 "register_operand")
15020 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
15021 (match_operand:VI2_AVX2 2 "vector_operand")))]
15022 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15023 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
15025 (define_insn "*mul<mode>3<mask_name>"
15026 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
15027 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
15028 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
15029 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
15030 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15032 pmullw\t{%2, %0|%0, %2}
15033 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15034 [(set_attr "isa" "noavx,avx")
15035 (set_attr "type" "sseimul")
15036 (set_attr "prefix_data16" "1,*")
15037 (set_attr "prefix" "orig,vex")
15038 (set_attr "mode" "<sseinsnmode>")])
15040 (define_expand "<s>mul<mode>3_highpart<mask_name>"
15041 [(set (match_operand:VI2_AVX2 0 "register_operand")
15043 (lshiftrt:<ssedoublemode>
15044 (mult:<ssedoublemode>
15045 (any_extend:<ssedoublemode>
15046 (match_operand:VI2_AVX2 1 "vector_operand"))
15047 (any_extend:<ssedoublemode>
15048 (match_operand:VI2_AVX2 2 "vector_operand")))
15051 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15052 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
15054 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
15055 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
15057 (lshiftrt:<ssedoublemode>
15058 (mult:<ssedoublemode>
15059 (any_extend:<ssedoublemode>
15060 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
15061 (any_extend:<ssedoublemode>
15062 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
15064 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
15065 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15067 pmulh<u>w\t{%2, %0|%0, %2}
15068 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15069 [(set_attr "isa" "noavx,avx")
15070 (set_attr "type" "sseimul")
15071 (set_attr "prefix_data16" "1,*")
15072 (set_attr "prefix" "orig,vex")
15073 (set_attr "mode" "<sseinsnmode>")])
15075 (define_expand "vec_widen_umult_even_v16si<mask_name>"
15076 [(set (match_operand:V8DI 0 "register_operand")
15080 (match_operand:V16SI 1 "nonimmediate_operand")
15081 (parallel [(const_int 0) (const_int 2)
15082 (const_int 4) (const_int 6)
15083 (const_int 8) (const_int 10)
15084 (const_int 12) (const_int 14)])))
15087 (match_operand:V16SI 2 "nonimmediate_operand")
15088 (parallel [(const_int 0) (const_int 2)
15089 (const_int 4) (const_int 6)
15090 (const_int 8) (const_int 10)
15091 (const_int 12) (const_int 14)])))))]
15093 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
15095 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
15096 [(set (match_operand:V8DI 0 "register_operand" "=v")
15100 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
15101 (parallel [(const_int 0) (const_int 2)
15102 (const_int 4) (const_int 6)
15103 (const_int 8) (const_int 10)
15104 (const_int 12) (const_int 14)])))
15107 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
15108 (parallel [(const_int 0) (const_int 2)
15109 (const_int 4) (const_int 6)
15110 (const_int 8) (const_int 10)
15111 (const_int 12) (const_int 14)])))))]
15112 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15113 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15114 [(set_attr "type" "sseimul")
15115 (set_attr "prefix_extra" "1")
15116 (set_attr "prefix" "evex")
15117 (set_attr "mode" "XI")])
15119 (define_expand "vec_widen_umult_even_v8si<mask_name>"
15120 [(set (match_operand:V4DI 0 "register_operand")
15124 (match_operand:V8SI 1 "nonimmediate_operand")
15125 (parallel [(const_int 0) (const_int 2)
15126 (const_int 4) (const_int 6)])))
15129 (match_operand:V8SI 2 "nonimmediate_operand")
15130 (parallel [(const_int 0) (const_int 2)
15131 (const_int 4) (const_int 6)])))))]
15132 "TARGET_AVX2 && <mask_avx512vl_condition>"
15133 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
15135 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
15136 [(set (match_operand:V4DI 0 "register_operand" "=v")
15140 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
15141 (parallel [(const_int 0) (const_int 2)
15142 (const_int 4) (const_int 6)])))
15145 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
15146 (parallel [(const_int 0) (const_int 2)
15147 (const_int 4) (const_int 6)])))))]
15148 "TARGET_AVX2 && <mask_avx512vl_condition>
15149 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15150 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15151 [(set_attr "type" "sseimul")
15152 (set_attr "prefix" "maybe_evex")
15153 (set_attr "mode" "OI")])
15155 (define_expand "vec_widen_umult_even_v4si<mask_name>"
15156 [(set (match_operand:V2DI 0 "register_operand")
15160 (match_operand:V4SI 1 "vector_operand")
15161 (parallel [(const_int 0) (const_int 2)])))
15164 (match_operand:V4SI 2 "vector_operand")
15165 (parallel [(const_int 0) (const_int 2)])))))]
15166 "TARGET_SSE2 && <mask_avx512vl_condition>"
15167 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
15169 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
15170 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
15174 (match_operand:V4SI 1 "vector_operand" "%0,v")
15175 (parallel [(const_int 0) (const_int 2)])))
15178 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
15179 (parallel [(const_int 0) (const_int 2)])))))]
15180 "TARGET_SSE2 && <mask_avx512vl_condition>
15181 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15183 pmuludq\t{%2, %0|%0, %2}
15184 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15185 [(set_attr "isa" "noavx,avx")
15186 (set_attr "type" "sseimul")
15187 (set_attr "prefix_data16" "1,*")
15188 (set_attr "prefix" "orig,maybe_evex")
15189 (set_attr "mode" "TI")])
15191 (define_expand "vec_widen_smult_even_v16si<mask_name>"
15192 [(set (match_operand:V8DI 0 "register_operand")
15196 (match_operand:V16SI 1 "nonimmediate_operand")
15197 (parallel [(const_int 0) (const_int 2)
15198 (const_int 4) (const_int 6)
15199 (const_int 8) (const_int 10)
15200 (const_int 12) (const_int 14)])))
15203 (match_operand:V16SI 2 "nonimmediate_operand")
15204 (parallel [(const_int 0) (const_int 2)
15205 (const_int 4) (const_int 6)
15206 (const_int 8) (const_int 10)
15207 (const_int 12) (const_int 14)])))))]
15209 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
15211 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
15212 [(set (match_operand:V8DI 0 "register_operand" "=v")
15216 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
15217 (parallel [(const_int 0) (const_int 2)
15218 (const_int 4) (const_int 6)
15219 (const_int 8) (const_int 10)
15220 (const_int 12) (const_int 14)])))
15223 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
15224 (parallel [(const_int 0) (const_int 2)
15225 (const_int 4) (const_int 6)
15226 (const_int 8) (const_int 10)
15227 (const_int 12) (const_int 14)])))))]
15228 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15229 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15230 [(set_attr "type" "sseimul")
15231 (set_attr "prefix_extra" "1")
15232 (set_attr "prefix" "evex")
15233 (set_attr "mode" "XI")])
15235 (define_expand "vec_widen_smult_even_v8si<mask_name>"
15236 [(set (match_operand:V4DI 0 "register_operand")
15240 (match_operand:V8SI 1 "nonimmediate_operand")
15241 (parallel [(const_int 0) (const_int 2)
15242 (const_int 4) (const_int 6)])))
15245 (match_operand:V8SI 2 "nonimmediate_operand")
15246 (parallel [(const_int 0) (const_int 2)
15247 (const_int 4) (const_int 6)])))))]
15248 "TARGET_AVX2 && <mask_avx512vl_condition>"
15249 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
15251 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
15252 [(set (match_operand:V4DI 0 "register_operand" "=v")
15256 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
15257 (parallel [(const_int 0) (const_int 2)
15258 (const_int 4) (const_int 6)])))
15261 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
15262 (parallel [(const_int 0) (const_int 2)
15263 (const_int 4) (const_int 6)])))))]
15264 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15265 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15266 [(set_attr "type" "sseimul")
15267 (set_attr "prefix_extra" "1")
15268 (set_attr "prefix" "vex")
15269 (set_attr "mode" "OI")])
15271 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
15272 [(set (match_operand:V2DI 0 "register_operand")
15276 (match_operand:V4SI 1 "vector_operand")
15277 (parallel [(const_int 0) (const_int 2)])))
15280 (match_operand:V4SI 2 "vector_operand")
15281 (parallel [(const_int 0) (const_int 2)])))))]
15282 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15283 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
15285 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
15286 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15290 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
15291 (parallel [(const_int 0) (const_int 2)])))
15294 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
15295 (parallel [(const_int 0) (const_int 2)])))))]
15296 "TARGET_SSE4_1 && <mask_avx512vl_condition>
15297 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15299 pmuldq\t{%2, %0|%0, %2}
15300 pmuldq\t{%2, %0|%0, %2}
15301 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15302 [(set_attr "isa" "noavx,noavx,avx")
15303 (set_attr "type" "sseimul")
15304 (set_attr "prefix_data16" "1,1,*")
15305 (set_attr "prefix_extra" "1")
15306 (set_attr "prefix" "orig,orig,vex")
15307 (set_attr "mode" "TI")])
15309 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
15310 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
15311 (unspec:<sseunpackmode>
15312 [(match_operand:VI2_AVX2 1 "register_operand" "v")
15313 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
15314 UNSPEC_PMADDWD512))]
15315 "TARGET_AVX512BW && <mask_mode512bit_condition>"
15316 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
15317 [(set_attr "type" "sseiadd")
15318 (set_attr "prefix" "evex")
15319 (set_attr "mode" "XI")])
15321 (define_expand "avx2_pmaddwd"
15322 [(set (match_operand:V8SI 0 "register_operand")
15327 (match_operand:V16HI 1 "nonimmediate_operand")
15328 (parallel [(const_int 0) (const_int 2)
15329 (const_int 4) (const_int 6)
15330 (const_int 8) (const_int 10)
15331 (const_int 12) (const_int 14)])))
15334 (match_operand:V16HI 2 "nonimmediate_operand")
15335 (parallel [(const_int 0) (const_int 2)
15336 (const_int 4) (const_int 6)
15337 (const_int 8) (const_int 10)
15338 (const_int 12) (const_int 14)]))))
15341 (vec_select:V8HI (match_dup 1)
15342 (parallel [(const_int 1) (const_int 3)
15343 (const_int 5) (const_int 7)
15344 (const_int 9) (const_int 11)
15345 (const_int 13) (const_int 15)])))
15347 (vec_select:V8HI (match_dup 2)
15348 (parallel [(const_int 1) (const_int 3)
15349 (const_int 5) (const_int 7)
15350 (const_int 9) (const_int 11)
15351 (const_int 13) (const_int 15)]))))))]
15353 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
15355 (define_insn "*avx2_pmaddwd"
15356 [(set (match_operand:V8SI 0 "register_operand" "=Yw")
15361 (match_operand:V16HI 1 "nonimmediate_operand" "%Yw")
15362 (parallel [(const_int 0) (const_int 2)
15363 (const_int 4) (const_int 6)
15364 (const_int 8) (const_int 10)
15365 (const_int 12) (const_int 14)])))
15368 (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")
15369 (parallel [(const_int 0) (const_int 2)
15370 (const_int 4) (const_int 6)
15371 (const_int 8) (const_int 10)
15372 (const_int 12) (const_int 14)]))))
15375 (vec_select:V8HI (match_dup 1)
15376 (parallel [(const_int 1) (const_int 3)
15377 (const_int 5) (const_int 7)
15378 (const_int 9) (const_int 11)
15379 (const_int 13) (const_int 15)])))
15381 (vec_select:V8HI (match_dup 2)
15382 (parallel [(const_int 1) (const_int 3)
15383 (const_int 5) (const_int 7)
15384 (const_int 9) (const_int 11)
15385 (const_int 13) (const_int 15)]))))))]
15386 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15387 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
15388 [(set_attr "type" "sseiadd")
15389 (set_attr "prefix" "vex")
15390 (set_attr "mode" "OI")])
15392 (define_expand "sse2_pmaddwd"
15393 [(set (match_operand:V4SI 0 "register_operand")
15398 (match_operand:V8HI 1 "vector_operand")
15399 (parallel [(const_int 0) (const_int 2)
15400 (const_int 4) (const_int 6)])))
15403 (match_operand:V8HI 2 "vector_operand")
15404 (parallel [(const_int 0) (const_int 2)
15405 (const_int 4) (const_int 6)]))))
15408 (vec_select:V4HI (match_dup 1)
15409 (parallel [(const_int 1) (const_int 3)
15410 (const_int 5) (const_int 7)])))
15412 (vec_select:V4HI (match_dup 2)
15413 (parallel [(const_int 1) (const_int 3)
15414 (const_int 5) (const_int 7)]))))))]
15416 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
15418 (define_insn "*sse2_pmaddwd"
15419 [(set (match_operand:V4SI 0 "register_operand" "=x,Yw")
15424 (match_operand:V8HI 1 "vector_operand" "%0,Yw")
15425 (parallel [(const_int 0) (const_int 2)
15426 (const_int 4) (const_int 6)])))
15429 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")
15430 (parallel [(const_int 0) (const_int 2)
15431 (const_int 4) (const_int 6)]))))
15434 (vec_select:V4HI (match_dup 1)
15435 (parallel [(const_int 1) (const_int 3)
15436 (const_int 5) (const_int 7)])))
15438 (vec_select:V4HI (match_dup 2)
15439 (parallel [(const_int 1) (const_int 3)
15440 (const_int 5) (const_int 7)]))))))]
15441 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15443 pmaddwd\t{%2, %0|%0, %2}
15444 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
15445 [(set_attr "isa" "noavx,avx")
15446 (set_attr "type" "sseiadd")
15447 (set_attr "atom_unit" "simul")
15448 (set_attr "prefix_data16" "1,*")
15449 (set_attr "prefix" "orig,vex")
15450 (set_attr "mode" "TI")])
15452 (define_expand "cond_mul<mode>"
15453 [(set (match_operand:VI8_AVX512VL 0 "register_operand")
15454 (vec_merge:VI8_AVX512VL
15456 (match_operand:VI8_AVX512VL 2 "vector_operand")
15457 (match_operand:VI8_AVX512VL 3 "vector_operand"))
15458 (match_operand:VI8_AVX512VL 4 "nonimm_or_0_operand")
15459 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
15462 emit_insn (gen_avx512dq_mul<mode>3_mask (operands[0],
15470 (define_expand "avx512dq_mul<mode>3<mask_name>"
15471 [(set (match_operand:VI8_AVX512VL 0 "register_operand")
15473 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand")
15474 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand")))]
15475 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
15476 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
15478 (define_insn "*avx512dq_mul<mode>3<mask_name>"
15479 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
15481 (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
15482 (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
15483 "TARGET_AVX512DQ && <mask_mode512bit_condition>
15484 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
15486 if (TARGET_DEST_FALSE_DEP_FOR_GLC
15487 && <mask3_dest_false_dep_for_glc_cond>
15488 && !reg_mentioned_p (operands[0], operands[1])
15489 && !reg_mentioned_p (operands[0], operands[2]))
15490 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
15491 return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
15493 [(set_attr "type" "sseimul")
15494 (set_attr "prefix" "evex")
15495 (set_attr "mode" "<sseinsnmode>")])
15497 (define_expand "cond_mul<mode>"
15498 [(set (match_operand:VI4_AVX512VL 0 "register_operand")
15499 (vec_merge:VI4_AVX512VL
15501 (match_operand:VI4_AVX512VL 2 "vector_operand")
15502 (match_operand:VI4_AVX512VL 3 "vector_operand"))
15503 (match_operand:VI4_AVX512VL 4 "nonimm_or_0_operand")
15504 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
15507 emit_insn (gen_mul<mode>3_mask (operands[0],
15515 (define_expand "mul<mode>3<mask_name>"
15516 [(set (match_operand:VI4_AVX512F 0 "register_operand")
15518 (match_operand:VI4_AVX512F 1 "general_vector_operand")
15519 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
15520 "TARGET_SSE2 && <mask_mode512bit_condition>"
15524 if (!vector_operand (operands[1], <MODE>mode))
15525 operands[1] = force_reg (<MODE>mode, operands[1]);
15526 if (!vector_operand (operands[2], <MODE>mode))
15527 operands[2] = force_reg (<MODE>mode, operands[2]);
15528 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
15532 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
15537 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
15538 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
15540 (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
15541 (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
15542 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
15543 && <mask_mode512bit_condition>"
15545 pmulld\t{%2, %0|%0, %2}
15546 pmulld\t{%2, %0|%0, %2}
15547 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15548 [(set_attr "isa" "noavx,noavx,avx")
15549 (set_attr "type" "sseimul")
15550 (set_attr "prefix_extra" "1")
15551 (set_attr "prefix" "<bcst_mask_prefix4>")
15552 (set_attr "btver2_decode" "vector,vector,vector")
15553 (set_attr "mode" "<sseinsnmode>")])
15555 (define_expand "mul<mode>3"
15556 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
15557 (mult:VI8_AVX2_AVX512F
15558 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
15559 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
15562 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
15566 (define_expand "vec_widen_<s>mult_hi_<mode>"
15567 [(match_operand:<sseunpackmode> 0 "register_operand")
15568 (any_extend:<sseunpackmode>
15569 (match_operand:VI124_AVX2 1 "register_operand"))
15570 (match_operand:VI124_AVX2 2 "register_operand")]
15573 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
15578 (define_expand "vec_widen_<s>mult_lo_<mode>"
15579 [(match_operand:<sseunpackmode> 0 "register_operand")
15580 (any_extend:<sseunpackmode>
15581 (match_operand:VI124_AVX2 1 "register_operand"))
15582 (match_operand:VI124_AVX2 2 "register_operand")]
15585 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
15590 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
15591 ;; named patterns, but signed V4SI needs special help for plain SSE2.
15592 (define_expand "vec_widen_smult_even_v4si"
15593 [(match_operand:V2DI 0 "register_operand")
15594 (match_operand:V4SI 1 "vector_operand")
15595 (match_operand:V4SI 2 "vector_operand")]
15598 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
15603 (define_expand "vec_widen_<s>mult_odd_<mode>"
15604 [(match_operand:<sseunpackmode> 0 "register_operand")
15605 (any_extend:<sseunpackmode>
15606 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
15607 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
15610 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
15615 (define_mode_attr SDOT_PMADD_SUF
15616 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
15618 (define_mode_attr SDOT_VPDP_SUF
15619 [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
15621 (define_expand "sdot_prod<mode>"
15622 [(match_operand:<sseunpackmode> 0 "register_operand")
15623 (match_operand:VI2_AVX512VNNIBW 1 "register_operand")
15624 (match_operand:VI2_AVX512VNNIBW 2 "register_operand")
15625 (match_operand:<sseunpackmode> 3 "register_operand")]
15628 /* Try with vnni instructions. */
15629 if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI)
15630 || (<MODE_SIZE> < 64
15631 && ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)))
15633 operands[1] = lowpart_subreg (<sseunpackmode>mode,
15634 force_reg (<MODE>mode, operands[1]),
15636 operands[2] = lowpart_subreg (<sseunpackmode>mode,
15637 force_reg (<MODE>mode, operands[2]),
15639 emit_insn (gen_rtx_SET (operands[0], operands[3]));
15640 emit_insn (gen_vpdpwssd_<SDOT_VPDP_SUF> (operands[0], operands[3],
15641 operands[1], operands[2]));
15643 /* Otherwise use pmaddwd + paddd. */
15646 rtx t = gen_reg_rtx (<sseunpackmode>mode);
15647 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
15648 emit_insn (gen_rtx_SET (operands[0],
15649 gen_rtx_PLUS (<sseunpackmode>mode,
15655 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
15656 ;; back together when madd is available.
15657 (define_expand "sdot_prodv4si"
15658 [(match_operand:V2DI 0 "register_operand")
15659 (match_operand:V4SI 1 "register_operand")
15660 (match_operand:V4SI 2 "register_operand")
15661 (match_operand:V2DI 3 "register_operand")]
15664 rtx t = gen_reg_rtx (V2DImode);
15665 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
15666 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
15670 (define_expand "uavg<mode>3_ceil"
15671 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
15672 (truncate:VI12_AVX2_AVX512BW
15673 (lshiftrt:<ssedoublemode>
15674 (plus:<ssedoublemode>
15675 (plus:<ssedoublemode>
15676 (zero_extend:<ssedoublemode>
15677 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
15678 (zero_extend:<ssedoublemode>
15679 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
15684 operands[3] = CONST1_RTX(<ssedoublemode>mode);
15685 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15688 (define_expand "usadv16qi"
15689 [(match_operand:V4SI 0 "register_operand")
15690 (match_operand:V16QI 1 "register_operand")
15691 (match_operand:V16QI 2 "vector_operand")
15692 (match_operand:V4SI 3 "vector_operand")]
15695 rtx t1 = gen_reg_rtx (V2DImode);
15696 rtx t2 = gen_reg_rtx (V4SImode);
15697 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
15698 convert_move (t2, t1, 0);
15699 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
15703 (define_expand "usadv32qi"
15704 [(match_operand:V8SI 0 "register_operand")
15705 (match_operand:V32QI 1 "register_operand")
15706 (match_operand:V32QI 2 "nonimmediate_operand")
15707 (match_operand:V8SI 3 "nonimmediate_operand")]
15710 rtx t1 = gen_reg_rtx (V4DImode);
15711 rtx t2 = gen_reg_rtx (V8SImode);
15712 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
15713 convert_move (t2, t1, 0);
15714 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
15718 (define_expand "usadv64qi"
15719 [(match_operand:V16SI 0 "register_operand")
15720 (match_operand:V64QI 1 "register_operand")
15721 (match_operand:V64QI 2 "nonimmediate_operand")
15722 (match_operand:V16SI 3 "nonimmediate_operand")]
15725 rtx t1 = gen_reg_rtx (V8DImode);
15726 rtx t2 = gen_reg_rtx (V16SImode);
15727 emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
15728 convert_move (t2, t1, 0);
15729 emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
15733 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
15734 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
15735 (ashiftrt:VI248_AVX512BW_1
15736 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
15737 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
15739 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15740 [(set_attr "type" "sseishft")
15741 (set (attr "length_immediate")
15742 (if_then_else (match_operand 2 "const_int_operand")
15744 (const_string "0")))
15745 (set_attr "mode" "<sseinsnmode>")])
15747 (define_insn "ashr<mode>3"
15748 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,<v_Yw>")
15749 (ashiftrt:VI24_AVX2
15750 (match_operand:VI24_AVX2 1 "register_operand" "0,<v_Yw>")
15751 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
15754 psra<ssemodesuffix>\t{%2, %0|%0, %2}
15755 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15756 [(set_attr "isa" "noavx,avx")
15757 (set_attr "type" "sseishft")
15758 (set (attr "length_immediate")
15759 (if_then_else (match_operand 2 "const_int_operand")
15761 (const_string "0")))
15762 (set_attr "prefix_data16" "1,*")
15763 (set_attr "prefix" "orig,vex")
15764 (set_attr "mode" "<sseinsnmode>")])
15766 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
15767 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
15768 (ashiftrt:VI248_AVX512BW_AVX512VL
15769 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
15770 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
15772 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15773 [(set_attr "type" "sseishft")
15774 (set (attr "length_immediate")
15775 (if_then_else (match_operand 2 "const_int_operand")
15777 (const_string "0")))
15778 (set_attr "mode" "<sseinsnmode>")])
15780 (define_expand "ashr<mode>3"
15781 [(set (match_operand:VI248_AVX512BW 0 "register_operand")
15782 (ashiftrt:VI248_AVX512BW
15783 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
15784 (match_operand:DI 2 "nonmemory_operand")))]
15787 (define_expand "ashrv4di3"
15788 [(set (match_operand:V4DI 0 "register_operand")
15790 (match_operand:V4DI 1 "nonimmediate_operand")
15791 (match_operand:DI 2 "nonmemory_operand")))]
15794 if (!TARGET_AVX512VL)
15796 if (CONST_INT_P (operands[2]) && UINTVAL (operands[2]) >= 63)
15798 rtx zero = force_reg (V4DImode, CONST0_RTX (V4DImode));
15799 emit_insn (gen_avx2_gtv4di3 (operands[0], zero, operands[1]));
15802 if (operands[2] == const0_rtx)
15804 emit_move_insn (operands[0], operands[1]);
15807 operands[1] = force_reg (V4DImode, operands[1]);
15808 if (CONST_INT_P (operands[2]))
15810 vec_perm_builder sel (8, 8, 1);
15811 sel.quick_grow (8);
15813 rtx op1 = lowpart_subreg (V8SImode, operands[1], V4DImode);
15814 rtx target = gen_reg_rtx (V8SImode);
15815 if (INTVAL (operands[2]) > 32)
15817 arg0 = gen_reg_rtx (V8SImode);
15818 arg1 = gen_reg_rtx (V8SImode);
15819 emit_insn (gen_ashrv8si3 (arg1, op1, GEN_INT (31)));
15820 emit_insn (gen_ashrv8si3 (arg0, op1,
15821 GEN_INT (INTVAL (operands[2]) - 32)));
15831 else if (INTVAL (operands[2]) == 32)
15834 arg1 = gen_reg_rtx (V8SImode);
15835 emit_insn (gen_ashrv8si3 (arg1, op1, GEN_INT (31)));
15847 arg0 = gen_reg_rtx (V4DImode);
15848 arg1 = gen_reg_rtx (V8SImode);
15849 emit_insn (gen_lshrv4di3 (arg0, operands[1], operands[2]));
15850 emit_insn (gen_ashrv8si3 (arg1, op1, operands[2]));
15851 arg0 = lowpart_subreg (V8SImode, arg0, V4DImode);
15861 vec_perm_indices indices (sel, 2, 8);
15862 bool ok = targetm.vectorize.vec_perm_const (V8SImode, V8SImode,
15863 target, arg0, arg1,
15866 emit_move_insn (operands[0],
15867 lowpart_subreg (V4DImode, target, V8SImode));
15871 rtx zero = force_reg (V4DImode, CONST0_RTX (V4DImode));
15872 rtx zero_or_all_ones = gen_reg_rtx (V4DImode);
15873 emit_insn (gen_avx2_gtv4di3 (zero_or_all_ones, zero, operands[1]));
15874 rtx lshr_res = gen_reg_rtx (V4DImode);
15875 emit_insn (gen_lshrv4di3 (lshr_res, operands[1], operands[2]));
15876 rtx ashl_res = gen_reg_rtx (V4DImode);
15880 amount = gen_reg_rtx (DImode);
15881 emit_insn (gen_subdi3 (amount, force_reg (DImode, GEN_INT (64)),
15886 rtx temp = gen_reg_rtx (SImode);
15887 emit_insn (gen_subsi3 (temp, force_reg (SImode, GEN_INT (64)),
15888 lowpart_subreg (SImode, operands[2],
15890 amount = gen_reg_rtx (V4SImode);
15891 emit_insn (gen_vec_setv4si_0 (amount, CONST0_RTX (V4SImode),
15894 amount = lowpart_subreg (DImode, amount, GET_MODE (amount));
15895 emit_insn (gen_ashlv4di3 (ashl_res, zero_or_all_ones, amount));
15896 emit_insn (gen_iorv4di3 (operands[0], lshr_res, ashl_res));
15901 (define_insn "<mask_codefor><insn><mode>3<mask_name>"
15902 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
15903 (any_lshift:VI248_AVX512BW_2
15904 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
15905 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
15907 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15908 [(set_attr "type" "sseishft")
15909 (set (attr "length_immediate")
15910 (if_then_else (match_operand 2 "const_int_operand")
15912 (const_string "0")))
15913 (set_attr "mode" "<sseinsnmode>")])
15915 (define_insn "<insn><mode>3"
15916 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,<v_Yw>")
15917 (any_lshift:VI248_AVX2
15918 (match_operand:VI248_AVX2 1 "register_operand" "0,<v_Yw>")
15919 (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
15922 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
15923 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15924 [(set_attr "isa" "noavx,avx")
15925 (set_attr "type" "sseishft")
15926 (set (attr "length_immediate")
15927 (if_then_else (match_operand 2 "const_int_operand")
15929 (const_string "0")))
15930 (set_attr "prefix_data16" "1,*")
15931 (set_attr "prefix" "orig,vex")
15932 (set_attr "mode" "<sseinsnmode>")])
15934 (define_insn "<insn><mode>3<mask_name>"
15935 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
15936 (any_lshift:VI248_AVX512BW
15937 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
15938 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
15940 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15941 [(set_attr "type" "sseishft")
15942 (set (attr "length_immediate")
15943 (if_then_else (match_operand 2 "const_int_operand")
15945 (const_string "0")))
15946 (set_attr "mode" "<sseinsnmode>")])
15948 ;; PR target/101796: Transfrom movl+vpbranchcastw+vpsravw to vpsraw
15949 ;; when COUNT is immediate.
15951 [(set (match_operand:VI248_AVX512BW 0 "register_operand")
15952 (any_shift:VI248_AVX512BW
15953 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand")
15954 (match_operand:VI248_AVX512BW 2 "const_vector_duplicate_operand")))]
15955 "TARGET_AVX512F && GET_MODE_UNIT_BITSIZE (<MODE>mode)
15956 > INTVAL (XVECEXP (operands[2], 0, 0))"
15957 [(set (match_dup 0)
15958 (any_shift:VI248_AVX512BW
15961 "operands[3] = XVECEXP (operands[2], 0, 0);")
15963 (define_expand "vec_shl_<mode>"
15964 [(set (match_dup 3)
15966 (match_operand:V_128 1 "register_operand")
15967 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
15968 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
15971 operands[1] = gen_lowpart (V1TImode, operands[1]);
15972 operands[3] = gen_reg_rtx (V1TImode);
15973 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
15976 (define_expand "vec_shr_<mode>"
15977 [(set (match_dup 3)
15979 (match_operand:V_128 1 "register_operand")
15980 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
15981 (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
15984 operands[1] = gen_lowpart (V1TImode, operands[1]);
15985 operands[3] = gen_reg_rtx (V1TImode);
15986 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
15989 (define_expand "ashlv1ti3"
15990 [(set (match_operand:V1TI 0 "register_operand")
15992 (match_operand:V1TI 1 "register_operand")
15993 (match_operand:QI 2 "general_operand")))]
15994 "TARGET_SSE2 && TARGET_64BIT"
15996 if (!CONST_INT_P (operands[2]))
15998 ix86_expand_v1ti_shift (ASHIFT, operands);
16003 (define_insn_and_split "*ashlv1ti3_internal"
16004 [(set (match_operand:V1TI 0 "register_operand")
16006 (match_operand:V1TI 1 "register_operand")
16007 (match_operand:SI 2 "const_0_to_255_not_mul_8_operand")))]
16008 "TARGET_SSE2 && TARGET_64BIT
16009 && ix86_pre_reload_split ()"
16014 ix86_expand_v1ti_shift (ASHIFT, operands);
16018 (define_expand "lshrv1ti3"
16019 [(set (match_operand:V1TI 0 "register_operand")
16021 (match_operand:V1TI 1 "register_operand")
16022 (match_operand:QI 2 "general_operand")))]
16023 "TARGET_SSE2 && TARGET_64BIT"
16025 if (!CONST_INT_P (operands[2]))
16027 ix86_expand_v1ti_shift (LSHIFTRT, operands);
16032 (define_insn_and_split "*lshrv1ti3_internal"
16033 [(set (match_operand:V1TI 0 "register_operand")
16035 (match_operand:V1TI 1 "register_operand")
16036 (match_operand:SI 2 "const_0_to_255_not_mul_8_operand")))]
16037 "TARGET_SSE2 && TARGET_64BIT
16038 && ix86_pre_reload_split ()"
16043 ix86_expand_v1ti_shift (LSHIFTRT, operands);
16047 (define_expand "ashrv1ti3"
16048 [(set (match_operand:V1TI 0 "register_operand")
16050 (match_operand:V1TI 1 "register_operand")
16051 (match_operand:QI 2 "general_operand")))]
16052 "TARGET_SSE2 && TARGET_64BIT"
16054 if (!CONST_INT_P (operands[2]))
16056 ix86_expand_v1ti_ashiftrt (operands);
16062 (define_insn_and_split "*ashrv1ti3_internal"
16063 [(set (match_operand:V1TI 0 "register_operand")
16065 (match_operand:V1TI 1 "register_operand")
16066 (match_operand:SI 2 "const_0_to_255_operand")))]
16067 "TARGET_SSE2 && TARGET_64BIT
16068 && ix86_pre_reload_split ()"
16073 ix86_expand_v1ti_ashiftrt (operands);
16077 (define_expand "rotlv1ti3"
16078 [(set (match_operand:V1TI 0 "register_operand")
16080 (match_operand:V1TI 1 "register_operand")
16081 (match_operand:QI 2 "general_operand")))]
16082 "TARGET_SSE2 && TARGET_64BIT"
16084 if (!CONST_INT_P (operands[2]))
16086 ix86_expand_v1ti_rotate (ROTATE, operands);
16091 (define_insn_and_split "*rotlv1ti3_internal"
16092 [(set (match_operand:V1TI 0 "register_operand")
16094 (match_operand:V1TI 1 "register_operand")
16095 (match_operand:SI 2 "const_0_to_255_operand")))]
16096 "TARGET_SSE2 && TARGET_64BIT
16097 && ix86_pre_reload_split ()"
16102 ix86_expand_v1ti_rotate (ROTATE, operands);
16106 (define_expand "rotrv1ti3"
16107 [(set (match_operand:V1TI 0 "register_operand")
16109 (match_operand:V1TI 1 "register_operand")
16110 (match_operand:QI 2 "general_operand")))]
16111 "TARGET_SSE2 && TARGET_64BIT"
16113 if (!CONST_INT_P (operands[2]))
16115 ix86_expand_v1ti_rotate (ROTATERT, operands);
16120 (define_insn_and_split "*rotrv1ti3_internal"
16121 [(set (match_operand:V1TI 0 "register_operand")
16123 (match_operand:V1TI 1 "register_operand")
16124 (match_operand:SI 2 "const_0_to_255_operand")))]
16125 "TARGET_SSE2 && TARGET_64BIT
16126 && ix86_pre_reload_split ()"
16131 ix86_expand_v1ti_rotate (ROTATERT, operands);
16135 (define_insn "avx512bw_<insn><mode>3"
16136 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
16137 (any_lshift:VIMAX_AVX512VL
16138 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
16139 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
16142 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
16143 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
16145 [(set_attr "type" "sseishft")
16146 (set_attr "length_immediate" "1")
16147 (set_attr "prefix" "maybe_evex")
16148 (set_attr "mode" "<sseinsnmode>")])
16150 (define_insn "<sse2_avx2>_<insn><mode>3"
16151 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw")
16152 (any_lshift:VIMAX_AVX2
16153 (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw")
16154 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))]
16157 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
16159 switch (which_alternative)
16162 return "p<vshift>dq\t{%2, %0|%0, %2}";
16164 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
16166 gcc_unreachable ();
16169 [(set_attr "isa" "noavx,avx")
16170 (set_attr "type" "sseishft")
16171 (set_attr "length_immediate" "1")
16172 (set_attr "atom_unit" "sishuf")
16173 (set_attr "prefix_data16" "1,*")
16174 (set_attr "prefix" "orig,vex")
16175 (set_attr "mode" "<sseinsnmode>")])
16177 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
16178 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16179 (any_rotate:VI48_AVX512VL
16180 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
16181 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
16183 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16184 [(set_attr "prefix" "evex")
16185 (set_attr "mode" "<sseinsnmode>")])
16187 (define_insn "<avx512>_<rotate><mode><mask_name>"
16188 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16189 (any_rotate:VI48_AVX512VL
16190 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
16191 (match_operand:SI 2 "const_0_to_255_operand")))]
16193 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16194 [(set_attr "prefix" "evex")
16195 (set_attr "mode" "<sseinsnmode>")])
16197 (define_expand "<code><mode>3"
16198 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
16199 (maxmin:VI124_256_AVX512F_AVX512BW
16200 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
16201 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
16203 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
16205 (define_insn "*avx2_<code><mode>3"
16206 [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
16208 (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
16209 (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
16210 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16211 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16212 [(set_attr "type" "sseiadd")
16213 (set_attr "prefix_extra" "1")
16214 (set_attr "prefix" "vex")
16215 (set_attr "mode" "OI")])
16217 (define_expand "cond_<code><mode>"
16218 [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
16219 (vec_merge:VI1248_AVX512VLBW
16220 (maxmin:VI1248_AVX512VLBW
16221 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand")
16222 (match_operand:VI1248_AVX512VLBW 3 "nonimmediate_operand"))
16223 (match_operand:VI1248_AVX512VLBW 4 "nonimm_or_0_operand")
16224 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
16227 emit_insn (gen_<code><mode>3_mask (operands[0],
16235 (define_expand "<code><mode>3_mask"
16236 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
16237 (vec_merge:VI48_AVX512VL
16238 (maxmin:VI48_AVX512VL
16239 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
16240 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
16241 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
16242 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16244 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
16246 (define_insn "*avx512f_<code><mode>3<mask_name>"
16247 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16248 (maxmin:VI48_AVX512VL
16249 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
16250 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
16251 "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16252 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16253 [(set_attr "type" "sseiadd")
16254 (set_attr "prefix_extra" "1")
16255 (set_attr "prefix" "maybe_evex")
16256 (set_attr "mode" "<sseinsnmode>")])
16258 (define_insn "<mask_codefor><code><mode>3<mask_name>"
16259 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16260 (maxmin:VI12_AVX512VL
16261 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
16262 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
16264 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16265 [(set_attr "type" "sseiadd")
16266 (set_attr "prefix" "evex")
16267 (set_attr "mode" "<sseinsnmode>")])
16269 (define_expand "<code><mode>3"
16270 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
16271 (maxmin:VI8_AVX2_AVX512F
16272 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
16273 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
16277 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
16281 enum rtx_code code;
16286 xops[0] = operands[0];
16288 if (<CODE> == SMAX || <CODE> == UMAX)
16290 xops[1] = operands[1];
16291 xops[2] = operands[2];
16295 xops[1] = operands[2];
16296 xops[2] = operands[1];
16299 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
16301 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
16302 xops[4] = operands[1];
16303 xops[5] = operands[2];
16305 ok = ix86_expand_int_vcond (xops);
16311 (define_expand "<code><mode>3"
16312 [(set (match_operand:VI124_128 0 "register_operand")
16314 (match_operand:VI124_128 1 "vector_operand")
16315 (match_operand:VI124_128 2 "vector_operand")))]
16318 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
16319 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
16325 xops[0] = operands[0];
16326 operands[1] = force_reg (<MODE>mode, operands[1]);
16327 operands[2] = force_reg (<MODE>mode, operands[2]);
16329 if (<CODE> == SMAX)
16331 xops[1] = operands[1];
16332 xops[2] = operands[2];
16336 xops[1] = operands[2];
16337 xops[2] = operands[1];
16340 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
16341 xops[4] = operands[1];
16342 xops[5] = operands[2];
16344 ok = ix86_expand_int_vcond (xops);
16350 (define_insn "*sse4_1_<code><mode>3<mask_name>"
16351 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
16353 (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
16354 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
16356 && <mask_mode512bit_condition>
16357 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16359 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
16360 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
16361 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16362 [(set_attr "isa" "noavx,noavx,avx")
16363 (set_attr "type" "sseiadd")
16364 (set_attr "prefix_extra" "1,1,*")
16365 (set_attr "prefix" "orig,orig,vex")
16366 (set_attr "mode" "TI")])
16368 (define_insn "*<code>v8hi3"
16369 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
16371 (match_operand:V8HI 1 "vector_operand" "%0,Yw")
16372 (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))]
16373 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16375 p<maxmin_int>w\t{%2, %0|%0, %2}
16376 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
16377 [(set_attr "isa" "noavx,avx")
16378 (set_attr "type" "sseiadd")
16379 (set_attr "prefix_data16" "1,*")
16380 (set_attr "prefix_extra" "*,1")
16381 (set_attr "prefix" "orig,vex")
16382 (set_attr "mode" "TI")])
16384 (define_expand "<code><mode>3"
16385 [(set (match_operand:VI124_128 0 "register_operand")
16387 (match_operand:VI124_128 1 "vector_operand")
16388 (match_operand:VI124_128 2 "vector_operand")))]
16391 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
16392 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
16393 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
16395 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
16396 operands[1] = force_reg (<MODE>mode, operands[1]);
16397 if (rtx_equal_p (op3, op2))
16398 op3 = gen_reg_rtx (V8HImode);
16399 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
16400 emit_insn (gen_addv8hi3 (op0, op3, op2));
16408 operands[1] = force_reg (<MODE>mode, operands[1]);
16409 operands[2] = force_reg (<MODE>mode, operands[2]);
16411 xops[0] = operands[0];
16413 if (<CODE> == UMAX)
16415 xops[1] = operands[1];
16416 xops[2] = operands[2];
16420 xops[1] = operands[2];
16421 xops[2] = operands[1];
16424 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
16425 xops[4] = operands[1];
16426 xops[5] = operands[2];
16428 ok = ix86_expand_int_vcond (xops);
16434 (define_insn "*sse4_1_<code><mode>3<mask_name>"
16435 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
16437 (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
16438 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
16440 && <mask_mode512bit_condition>
16441 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16443 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
16444 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
16445 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16446 [(set_attr "isa" "noavx,noavx,avx")
16447 (set_attr "type" "sseiadd")
16448 (set_attr "prefix_extra" "1,1,*")
16449 (set_attr "prefix" "orig,orig,vex")
16450 (set_attr "mode" "TI")])
16452 (define_insn "*<code>v16qi3"
16453 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
16455 (match_operand:V16QI 1 "vector_operand" "%0,Yw")
16456 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))]
16457 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16459 p<maxmin_int>b\t{%2, %0|%0, %2}
16460 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
16461 [(set_attr "isa" "noavx,avx")
16462 (set_attr "type" "sseiadd")
16463 (set_attr "prefix_data16" "1,*")
16464 (set_attr "prefix_extra" "*,1")
16465 (set_attr "prefix" "orig,vex")
16466 (set_attr "mode" "TI")])
16468 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16470 ;; Parallel integral comparisons
16472 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16474 (define_insn "*avx2_eq<mode>3"
16475 [(set (match_operand:VI_256 0 "register_operand" "=x")
16477 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
16478 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
16479 "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16480 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16481 [(set_attr "type" "ssecmp")
16482 (set_attr "prefix_extra" "1")
16483 (set_attr "prefix" "vex")
16484 (set_attr "mode" "OI")])
16486 (define_insn_and_split "*avx2_pcmp<mode>3_1"
16487 [(set (match_operand:VI_128_256 0 "register_operand")
16488 (vec_merge:VI_128_256
16489 (match_operand:VI_128_256 1 "vector_all_ones_operand")
16490 (match_operand:VI_128_256 2 "const0_operand")
16491 (unspec:<avx512fmaskmode>
16492 [(match_operand:VI_128_256 3 "nonimmediate_operand")
16493 (match_operand:VI_128_256 4 "nonimmediate_operand")
16494 (match_operand:SI 5 "const_0_to_7_operand")]
16496 "TARGET_AVX512VL && ix86_pre_reload_split ()
16497 /* EQ is commutative. */
16498 && ((INTVAL (operands[5]) == 0
16499 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
16500 /* NLE aka GT, 3 must be register. */
16501 || (INTVAL (operands[5]) == 6
16502 && !MEM_P (operands[3]))
16503 /* LT, 4 must be register and we swap operands. */
16504 || (INTVAL (operands[5]) == 1
16505 && !MEM_P (operands[4])))"
16510 if (INTVAL (operands[5]) == 1)
16511 std::swap (operands[3], operands[4]);
16512 enum rtx_code code = INTVAL (operands[5]) ? GT : EQ;
16513 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
16514 operands[3], operands[4]));
16518 (define_insn_and_split "*avx2_pcmp<mode>3_2"
16519 [(set (match_operand:VI_128_256 0 "register_operand")
16520 (vec_merge:VI_128_256
16521 (match_operand:VI_128_256 1 "vector_all_ones_operand")
16522 (match_operand:VI_128_256 2 "const0_operand")
16523 (not:<avx512fmaskmode>
16524 (unspec:<avx512fmaskmode>
16525 [(match_operand:VI_128_256 3 "nonimmediate_operand")
16526 (match_operand:VI_128_256 4 "nonimmediate_operand")
16527 (match_operand:SI 5 "const_0_to_7_operand")]
16529 "TARGET_AVX512VL && ix86_pre_reload_split ()
16530 /* NE is commutative. */
16531 && ((INTVAL (operands[5]) == 4
16532 && !(MEM_P (operands[3]) && MEM_P (operands[4])))
16533 /* LE, 3 must be register. */
16534 || (INTVAL (operands[5]) == 2
16535 && !MEM_P (operands[3]))
16536 /* NLT aka GE, 4 must be register and we swap operands. */
16537 || (INTVAL (operands[5]) == 5
16538 && !MEM_P (operands[4])))"
16543 if (INTVAL (operands[5]) == 5)
16544 std::swap (operands[3], operands[4]);
16545 enum rtx_code code = INTVAL (operands[5]) != 4 ? GT : EQ;
16546 emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
16547 operands[3], operands[4]));
16551 (define_insn_and_split "*avx2_pcmp<mode>3_3"
16552 [(set (match_operand:VI1_AVX2 0 "register_operand")
16553 (vec_merge:VI1_AVX2
16554 (match_operand:VI1_AVX2 1 "vector_operand")
16555 (match_operand:VI1_AVX2 2 "vector_operand")
16556 (unspec:<avx512fmaskmode>
16557 [(match_operand:VI1_AVX2 3 "register_operand")
16558 (match_operand:VI1_AVX2 4 "const0_operand")
16559 (match_operand:SI 5 "const_0_to_7_operand")]
16561 "TARGET_AVX512VL && ix86_pre_reload_split ()
16563 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
16564 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
16567 [(set (match_dup 0)
16576 if (INTVAL (operands[5]) == 5)
16577 std::swap (operands[1], operands[2]);
16580 (define_insn_and_split "*avx2_pcmp<mode>3_4"
16581 [(set (match_operand:VI1_AVX2 0 "register_operand")
16582 (vec_merge:VI1_AVX2
16583 (match_operand:VI1_AVX2 1 "vector_operand")
16584 (match_operand:VI1_AVX2 2 "vector_operand")
16585 (unspec:<avx512fmaskmode>
16586 [(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)
16587 (match_operand:VI1_AVX2 4 "const0_operand")
16588 (match_operand:SI 5 "const_0_to_7_operand")]
16590 "TARGET_AVX512VL && ix86_pre_reload_split ()
16591 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
16592 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
16594 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
16595 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
16598 [(set (match_dup 0)
16607 if (INTVAL (operands[5]) == 1)
16608 std::swap (operands[1], operands[2]);
16609 operands[3] = gen_lowpart (<MODE>mode, operands[3]);
16612 (define_insn_and_split "*avx2_pcmp<mode>3_5"
16613 [(set (match_operand:VI1_AVX2 0 "register_operand")
16614 (vec_merge:VI1_AVX2
16615 (match_operand:VI1_AVX2 1 "vector_operand")
16616 (match_operand:VI1_AVX2 2 "vector_operand")
16617 (unspec:<avx512fmaskmode>
16618 [(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))
16619 (match_operand:VI1_AVX2 4 "const0_operand")
16620 (match_operand:SI 5 "const_0_to_7_operand")]
16622 "TARGET_AVX512VL && ix86_pre_reload_split ()
16624 && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
16625 || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
16628 [(set (match_dup 0)
16637 if (INTVAL (operands[5]) == 1)
16638 std::swap (operands[1], operands[2]);
16641 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
16642 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
16643 (unspec:<avx512fmaskmode>
16644 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
16645 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
16649 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
16651 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
16652 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
16653 (unspec:<avx512fmaskmode>
16654 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
16655 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
16659 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
16661 (define_insn "*sse4_1_eqv2di3"
16662 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
16664 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
16665 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
16666 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16668 pcmpeqq\t{%2, %0|%0, %2}
16669 pcmpeqq\t{%2, %0|%0, %2}
16670 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
16671 [(set_attr "isa" "noavx,noavx,avx")
16672 (set_attr "type" "ssecmp")
16673 (set_attr "prefix_extra" "1")
16674 (set_attr "prefix" "orig,orig,vex")
16675 (set_attr "mode" "TI")])
16677 (define_insn "*sse2_eq<mode>3"
16678 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
16680 (match_operand:VI124_128 1 "vector_operand" "%0,x")
16681 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
16683 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16685 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
16686 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16687 [(set_attr "isa" "noavx,avx")
16688 (set_attr "type" "ssecmp")
16689 (set_attr "prefix_data16" "1,*")
16690 (set_attr "prefix" "orig,vex")
16691 (set_attr "mode" "TI")])
16693 (define_insn "sse4_2_gtv2di3"
16694 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
16696 (match_operand:V2DI 1 "register_operand" "0,0,x")
16697 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
16700 pcmpgtq\t{%2, %0|%0, %2}
16701 pcmpgtq\t{%2, %0|%0, %2}
16702 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
16703 [(set_attr "isa" "noavx,noavx,avx")
16704 (set_attr "type" "ssecmp")
16705 (set_attr "prefix_extra" "1")
16706 (set_attr "prefix" "orig,orig,vex")
16707 (set_attr "mode" "TI")])
16709 (define_insn "avx2_gt<mode>3"
16710 [(set (match_operand:VI_256 0 "register_operand" "=x")
16712 (match_operand:VI_256 1 "register_operand" "x")
16713 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
16715 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16716 [(set_attr "type" "ssecmp")
16717 (set_attr "prefix_extra" "1")
16718 (set_attr "prefix" "vex")
16719 (set_attr "mode" "OI")])
16721 (define_expand "<avx512>_gt<mode>3<mask_scalar_merge_name>"
16722 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
16723 (unspec:<avx512fmaskmode>
16724 [(match_operand:VI48_AVX512VL 1 "register_operand")
16725 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
16726 (const_int 6)] UNSPEC_PCMP))]
16729 (define_expand "<avx512>_gt<mode>3<mask_scalar_merge_name>"
16730 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
16731 (unspec:<avx512fmaskmode>
16732 [(match_operand:VI12_AVX512VL 1 "register_operand")
16733 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
16734 (const_int 6)] UNSPEC_PCMP))]
16737 (define_insn "*sse2_gt<mode>3"
16738 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
16740 (match_operand:VI124_128 1 "register_operand" "0,x")
16741 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
16744 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
16745 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16746 [(set_attr "isa" "noavx,avx")
16747 (set_attr "type" "ssecmp")
16748 (set_attr "prefix_data16" "1,*")
16749 (set_attr "prefix" "orig,vex")
16750 (set_attr "mode" "TI")])
16752 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
16753 [(set (match_operand:V_512 0 "register_operand")
16754 (if_then_else:V_512
16755 (match_operator 3 ""
16756 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
16757 (match_operand:VI_AVX512BW 5 "general_operand")])
16758 (match_operand:V_512 1)
16759 (match_operand:V_512 2)))]
16761 && (GET_MODE_NUNITS (<V_512:MODE>mode)
16762 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
16764 bool ok = ix86_expand_int_vcond (operands);
16769 (define_expand "vcond<V_256:mode><VI_256:mode>"
16770 [(set (match_operand:V_256 0 "register_operand")
16771 (if_then_else:V_256
16772 (match_operator 3 ""
16773 [(match_operand:VI_256 4 "nonimmediate_operand")
16774 (match_operand:VI_256 5 "general_operand")])
16775 (match_operand:V_256 1)
16776 (match_operand:V_256 2)))]
16778 && (GET_MODE_NUNITS (<V_256:MODE>mode)
16779 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
16781 bool ok = ix86_expand_int_vcond (operands);
16786 (define_expand "vcond<V_128:mode><VI124_128:mode>"
16787 [(set (match_operand:V_128 0 "register_operand")
16788 (if_then_else:V_128
16789 (match_operator 3 ""
16790 [(match_operand:VI124_128 4 "vector_operand")
16791 (match_operand:VI124_128 5 "general_operand")])
16792 (match_operand:V_128 1)
16793 (match_operand:V_128 2)))]
16795 && (GET_MODE_NUNITS (<V_128:MODE>mode)
16796 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
16798 bool ok = ix86_expand_int_vcond (operands);
16803 (define_expand "vcond<VI8F_128:mode>v2di"
16804 [(set (match_operand:VI8F_128 0 "register_operand")
16805 (if_then_else:VI8F_128
16806 (match_operator 3 ""
16807 [(match_operand:V2DI 4 "vector_operand")
16808 (match_operand:V2DI 5 "general_operand")])
16809 (match_operand:VI8F_128 1)
16810 (match_operand:VI8F_128 2)))]
16813 bool ok = ix86_expand_int_vcond (operands);
16818 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
16819 [(set (match_operand:V_512 0 "register_operand")
16820 (if_then_else:V_512
16821 (match_operator 3 ""
16822 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
16823 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
16824 (match_operand:V_512 1 "general_operand")
16825 (match_operand:V_512 2 "general_operand")))]
16827 && (GET_MODE_NUNITS (<V_512:MODE>mode)
16828 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
16830 bool ok = ix86_expand_int_vcond (operands);
16835 (define_expand "vcondu<V_256:mode><VI_256:mode>"
16836 [(set (match_operand:V_256 0 "register_operand")
16837 (if_then_else:V_256
16838 (match_operator 3 ""
16839 [(match_operand:VI_256 4 "nonimmediate_operand")
16840 (match_operand:VI_256 5 "nonimmediate_operand")])
16841 (match_operand:V_256 1 "general_operand")
16842 (match_operand:V_256 2 "general_operand")))]
16844 && (GET_MODE_NUNITS (<V_256:MODE>mode)
16845 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
16847 bool ok = ix86_expand_int_vcond (operands);
16852 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
16853 [(set (match_operand:V_128 0 "register_operand")
16854 (if_then_else:V_128
16855 (match_operator 3 ""
16856 [(match_operand:VI124_128 4 "vector_operand")
16857 (match_operand:VI124_128 5 "vector_operand")])
16858 (match_operand:V_128 1 "general_operand")
16859 (match_operand:V_128 2 "general_operand")))]
16861 && (GET_MODE_NUNITS (<V_128:MODE>mode)
16862 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
16864 bool ok = ix86_expand_int_vcond (operands);
16869 (define_expand "vcondu<VI8F_128:mode>v2di"
16870 [(set (match_operand:VI8F_128 0 "register_operand")
16871 (if_then_else:VI8F_128
16872 (match_operator 3 ""
16873 [(match_operand:V2DI 4 "vector_operand")
16874 (match_operand:V2DI 5 "vector_operand")])
16875 (match_operand:VI8F_128 1 "general_operand")
16876 (match_operand:VI8F_128 2 "general_operand")))]
16879 bool ok = ix86_expand_int_vcond (operands);
16884 (define_expand "vcondu<mode><sseintvecmodelower>"
16885 [(set (match_operand:VF_AVX512FP16VL 0 "register_operand")
16886 (if_then_else:VF_AVX512FP16VL
16887 (match_operator 3 ""
16888 [(match_operand:<sseintvecmode> 4 "vector_operand")
16889 (match_operand:<sseintvecmode> 5 "vector_operand")])
16890 (match_operand:VF_AVX512FP16VL 1 "general_operand")
16891 (match_operand:VF_AVX512FP16VL 2 "general_operand")))]
16892 "TARGET_AVX512FP16"
16894 bool ok = ix86_expand_int_vcond (operands);
16899 (define_expand "vcondeq<VI8F_128:mode>v2di"
16900 [(set (match_operand:VI8F_128 0 "register_operand")
16901 (if_then_else:VI8F_128
16902 (match_operator 3 ""
16903 [(match_operand:V2DI 4 "vector_operand")
16904 (match_operand:V2DI 5 "general_operand")])
16905 (match_operand:VI8F_128 1)
16906 (match_operand:VI8F_128 2)))]
16909 bool ok = ix86_expand_int_vcond (operands);
16914 (define_mode_iterator VEC_PERM_AVX2
16915 [V16QI V8HI V4SI V2DI V4SF V2DF
16916 (V8HF "TARGET_AVX512FP16")
16917 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
16918 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
16919 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
16920 (V16HF "TARGET_AVX512FP16")
16921 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
16922 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
16923 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")
16924 (V32HF "TARGET_AVX512FP16")])
16926 (define_expand "vec_perm<mode>"
16927 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
16928 (match_operand:VEC_PERM_AVX2 1 "register_operand")
16929 (match_operand:VEC_PERM_AVX2 2 "register_operand")
16930 (match_operand:<sseintvecmode> 3 "register_operand")]
16931 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
16933 ix86_expand_vec_perm (operands);
16937 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16939 ;; Parallel bitwise logical operations
16941 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16943 (define_expand "one_cmpl<mode>2"
16944 [(set (match_operand:VI 0 "register_operand")
16945 (xor:VI (match_operand:VI 1 "vector_operand")
16949 operands[2] = CONSTM1_RTX (<MODE>mode);
16951 if (!TARGET_AVX512F)
16952 operands[2] = force_reg (<MODE>mode, operands[2]);
16955 (define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
16956 [(set (match_operand:VI 0 "register_operand" "=v,v")
16957 (xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
16958 (match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
16960 && (!<mask_applied>
16961 || <ssescalarmode>mode == SImode
16962 || <ssescalarmode>mode == DImode)"
16964 if (TARGET_AVX512VL)
16965 return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
16967 return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
16969 [(set_attr "type" "sselog")
16970 (set_attr "prefix" "evex")
16972 (if_then_else (match_test "TARGET_AVX512VL")
16973 (const_string "<sseinsnmode>")
16974 (const_string "XI")))
16975 (set (attr "enabled")
16976 (if_then_else (eq_attr "alternative" "1")
16977 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
16980 (define_expand "<sse2_avx2>_andnot<mode>3"
16981 [(set (match_operand:VI_AVX2 0 "register_operand")
16983 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
16984 (match_operand:VI_AVX2 2 "vector_operand")))]
16987 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
16988 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
16989 (vec_merge:VI48_AVX512VL
16992 (match_operand:VI48_AVX512VL 1 "register_operand"))
16993 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
16994 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
16995 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16998 (define_insn "*andnot<mode>3"
16999 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
17001 (not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
17002 (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
17008 const char *ssesuffix;
17010 switch (get_attr_mode (insn))
17013 gcc_assert (TARGET_AVX512F);
17016 gcc_assert (TARGET_AVX2);
17019 gcc_assert (TARGET_SSE2);
17021 switch (<MODE>mode)
17025 /* There is no vpandnb or vpandnw instruction, nor vpandn for
17026 512-bit vectors. Use vpandnq instead. */
17031 ssesuffix = "<ssemodesuffix>";
17037 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
17038 ? "<ssemodesuffix>" : "");
17041 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
17046 gcc_assert (TARGET_AVX512F);
17049 gcc_assert (TARGET_AVX);
17052 gcc_assert (TARGET_SSE);
17058 gcc_unreachable ();
17061 switch (which_alternative)
17064 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
17068 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
17071 gcc_unreachable ();
17074 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
17075 output_asm_insn (buf, operands);
17078 [(set_attr "isa" "noavx,avx,avx")
17079 (set_attr "type" "sselog")
17080 (set (attr "prefix_data16")
17082 (and (eq_attr "alternative" "0")
17083 (eq_attr "mode" "TI"))
17085 (const_string "*")))
17086 (set_attr "prefix" "orig,vex,evex")
17088 (cond [(match_test "TARGET_AVX2")
17089 (const_string "<sseinsnmode>")
17090 (match_test "TARGET_AVX")
17092 (match_test "<MODE_SIZE> > 16")
17093 (const_string "V8SF")
17094 (const_string "<sseinsnmode>"))
17095 (ior (not (match_test "TARGET_SSE2"))
17096 (match_test "optimize_function_for_size_p (cfun)"))
17097 (const_string "V4SF")
17099 (const_string "<sseinsnmode>")))])
17101 ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
17103 [(set (match_operand:VI48_128 0 "register_operand")
17105 (vec_duplicate:VI48_128
17106 (not:<ssescalarmode>
17107 (match_operand:<ssescalarmode> 1 "register_operand")))
17108 (match_operand:VI48_128 2 "vector_operand")))]
17110 [(set (match_dup 3)
17111 (vec_duplicate:VI48_128 (match_dup 1)))
17113 (and:VI48_128 (not:VI48_128 (match_dup 3))
17115 "operands[3] = gen_reg_rtx (<MODE>mode);")
17117 ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
17119 [(set (match_operand:VI124_AVX2 0 "register_operand")
17121 (vec_duplicate:VI124_AVX2
17122 (not:<ssescalarmode>
17123 (match_operand:<ssescalarmode> 1 "register_operand")))
17124 (match_operand:VI124_AVX2 2 "vector_operand")))]
17126 [(set (match_dup 3)
17127 (vec_duplicate:VI124_AVX2 (match_dup 1)))
17129 (and:VI124_AVX2 (not:VI124_AVX2 (match_dup 3))
17131 "operands[3] = gen_reg_rtx (<MODE>mode);")
17133 (define_insn "*andnot<mode>3_mask"
17134 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
17135 (vec_merge:VI48_AVX512VL
17138 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
17139 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
17140 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
17141 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17143 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
17144 [(set_attr "type" "sselog")
17145 (set_attr "prefix" "evex")
17146 (set_attr "mode" "<sseinsnmode>")])
17148 (define_expand "<code><mode>3"
17149 [(set (match_operand:VI 0 "register_operand")
17151 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
17152 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
17155 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
17159 (define_expand "cond_<code><mode>"
17160 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
17161 (vec_merge:VI48_AVX512VL
17162 (any_logic:VI48_AVX512VL
17163 (match_operand:VI48_AVX512VL 2 "vector_operand")
17164 (match_operand:VI48_AVX512VL 3 "vector_operand"))
17165 (match_operand:VI48_AVX512VL 4 "nonimm_or_0_operand")
17166 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
17169 emit_insn (gen_<code><mode>3_mask (operands[0],
17177 (define_expand "<code><mode>3_mask"
17178 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
17179 (vec_merge:VI48_AVX512VL
17180 (any_logic:VI48_AVX512VL
17181 (match_operand:VI48_AVX512VL 1 "bcst_vector_operand")
17182 (match_operand:VI48_AVX512VL 2 "bcst_vector_operand"))
17183 (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
17184 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
17186 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
17188 (define_insn "*<code><mode>3<mask_name>"
17189 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
17190 (any_logic:VI48_AVX_AVX512F
17191 (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
17192 (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
17193 "TARGET_SSE && <mask_mode512bit_condition>
17194 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
17199 const char *ssesuffix;
17201 switch (get_attr_mode (insn))
17204 gcc_assert (TARGET_AVX512F);
17207 gcc_assert (TARGET_AVX2);
17210 gcc_assert (TARGET_SSE2);
17212 switch (<MODE>mode)
17216 ssesuffix = "<ssemodesuffix>";
17222 ssesuffix = (TARGET_AVX512VL
17223 && (<mask_applied> || which_alternative == 2)
17224 ? "<ssemodesuffix>" : "");
17227 gcc_unreachable ();
17232 gcc_assert (TARGET_AVX);
17235 gcc_assert (TARGET_SSE);
17241 gcc_unreachable ();
17244 switch (which_alternative)
17247 if (<mask_applied>)
17248 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
17250 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
17254 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
17257 gcc_unreachable ();
17260 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
17261 output_asm_insn (buf, operands);
17264 [(set_attr "isa" "noavx,avx,avx")
17265 (set_attr "type" "sselog")
17266 (set (attr "prefix_data16")
17268 (and (eq_attr "alternative" "0")
17269 (eq_attr "mode" "TI"))
17271 (const_string "*")))
17272 (set_attr "prefix" "<mask_prefix3>,evex")
17274 (cond [(match_test "TARGET_AVX2")
17275 (const_string "<sseinsnmode>")
17276 (match_test "TARGET_AVX")
17278 (match_test "<MODE_SIZE> > 16")
17279 (const_string "V8SF")
17280 (const_string "<sseinsnmode>"))
17281 (ior (not (match_test "TARGET_SSE2"))
17282 (match_test "optimize_function_for_size_p (cfun)"))
17283 (const_string "V4SF")
17285 (const_string "<sseinsnmode>")))])
17287 (define_insn "*<code><mode>3"
17288 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
17289 (any_logic:VI12_AVX_AVX512F
17290 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
17291 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
17292 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17297 const char *ssesuffix;
17299 switch (get_attr_mode (insn))
17302 gcc_assert (TARGET_AVX512F);
17305 gcc_assert (TARGET_AVX2);
17308 gcc_assert (TARGET_SSE2);
17310 switch (<MODE>mode)
17320 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
17323 gcc_unreachable ();
17328 gcc_assert (TARGET_AVX);
17331 gcc_assert (TARGET_SSE);
17337 gcc_unreachable ();
17340 switch (which_alternative)
17343 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
17347 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
17350 gcc_unreachable ();
17353 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
17354 output_asm_insn (buf, operands);
17357 [(set_attr "isa" "noavx,avx,avx")
17358 (set_attr "type" "sselog")
17359 (set (attr "prefix_data16")
17361 (and (eq_attr "alternative" "0")
17362 (eq_attr "mode" "TI"))
17364 (const_string "*")))
17365 (set_attr "prefix" "orig,vex,evex")
17367 (cond [(match_test "TARGET_AVX2")
17368 (const_string "<sseinsnmode>")
17369 (match_test "TARGET_AVX")
17371 (match_test "<MODE_SIZE> > 16")
17372 (const_string "V8SF")
17373 (const_string "<sseinsnmode>"))
17374 (ior (not (match_test "TARGET_SSE2"))
17375 (match_test "optimize_function_for_size_p (cfun)"))
17376 (const_string "V4SF")
17378 (const_string "<sseinsnmode>")))])
17380 (define_insn "<code>v1ti3"
17381 [(set (match_operand:V1TI 0 "register_operand" "=x,x,v")
17383 (match_operand:V1TI 1 "register_operand" "%0,x,v")
17384 (match_operand:V1TI 2 "vector_operand" "xBm,xm,vm")))]
17387 p<logic>\t{%2, %0|%0, %2}
17388 vp<logic>\t{%2, %1, %0|%0, %1, %2}
17389 vp<logic>d\t{%2, %1, %0|%0, %1, %2}"
17390 [(set_attr "isa" "noavx,avx,avx512vl")
17391 (set_attr "prefix" "orig,vex,evex")
17392 (set_attr "prefix_data16" "1,*,*")
17393 (set_attr "type" "sselog")
17394 (set_attr "mode" "TI")])
17396 (define_expand "one_cmplv1ti2"
17397 [(set (match_operand:V1TI 0 "register_operand")
17398 (xor:V1TI (match_operand:V1TI 1 "register_operand")
17402 operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode));
17405 (define_mode_iterator AVX512ZEXTMASK
17406 [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
17408 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
17409 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
17410 (unspec:<avx512fmaskmode>
17411 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
17412 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
17415 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
17416 [(set_attr "prefix" "evex")
17417 (set_attr "mode" "<sseinsnmode>")])
17419 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
17420 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
17421 (unspec:<avx512fmaskmode>
17422 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
17423 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
17426 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
17427 [(set_attr "prefix" "evex")
17428 (set_attr "mode" "<sseinsnmode>")])
17430 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
17431 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
17432 (zero_extend:AVX512ZEXTMASK
17433 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
17434 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
17435 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
17438 && (<AVX512ZEXTMASK:MODE_SIZE>
17439 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
17440 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17441 [(set_attr "prefix" "evex")
17442 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
17444 (define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
17445 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
17446 (zero_extend:AVX512ZEXTMASK
17447 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
17448 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
17449 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
17450 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
17452 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
17454 && (<AVX512ZEXTMASK:MODE_SIZE>
17455 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
17456 "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
17457 [(set_attr "prefix" "evex")
17458 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
17460 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
17461 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
17462 (zero_extend:AVX512ZEXTMASK
17463 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
17464 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
17465 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
17468 && (<AVX512ZEXTMASK:MODE_SIZE>
17469 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
17470 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17471 [(set_attr "prefix" "evex")
17472 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
17474 (define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
17475 [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
17476 (zero_extend:AVX512ZEXTMASK
17477 (and:<VI1248_AVX512VLBW:avx512fmaskmode>
17478 (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
17479 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
17480 (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
17482 (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
17484 && (<AVX512ZEXTMASK:MODE_SIZE>
17485 > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
17486 "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
17487 [(set_attr "prefix" "evex")
17488 (set_attr "mode" "<VI1248_AVX512VLBW:sseinsnmode>")])
17490 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17492 ;; Parallel integral element swizzling
17494 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17496 (define_expand "vec_pack_trunc_<mode>"
17497 [(match_operand:<ssepackmode> 0 "register_operand")
17498 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
17499 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
17502 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
17503 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
17504 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
17508 (define_expand "vec_pack_trunc_qi"
17510 [(set (match_operand:HI 0 "register_operand")
17512 (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
17514 (zero_extend:HI (match_operand:QI 1 "register_operand"))))
17515 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
17518 (define_expand "vec_pack_trunc_<mode>"
17520 [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
17521 (ior:<DOUBLEMASKMODE>
17522 (ashift:<DOUBLEMASKMODE>
17523 (zero_extend:<DOUBLEMASKMODE>
17524 (match_operand:SWI24 2 "register_operand"))
17526 (zero_extend:<DOUBLEMASKMODE>
17527 (match_operand:SWI24 1 "register_operand"))))
17528 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
17531 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
17534 (define_expand "vec_pack_sbool_trunc_qi"
17535 [(match_operand:QI 0 "register_operand")
17536 (match_operand:QI 1 "register_operand")
17537 (match_operand:QI 2 "register_operand")
17538 (match_operand:QI 3 "const_int_operand")]
17541 HOST_WIDE_INT nunits = INTVAL (operands[3]);
17542 rtx mask, tem1, tem2;
17543 if (nunits != 8 && nunits != 4)
17545 mask = gen_reg_rtx (QImode);
17546 emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
17547 tem1 = gen_reg_rtx (QImode);
17548 emit_insn (gen_kandqi (tem1, operands[1], mask));
17549 if (TARGET_AVX512DQ)
17551 tem2 = gen_reg_rtx (QImode);
17552 emit_insn (gen_kashiftqi (tem2, operands[2],
17553 GEN_INT (nunits / 2)));
17557 tem2 = gen_reg_rtx (HImode);
17558 emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
17560 GEN_INT (nunits / 2)));
17561 tem2 = lowpart_subreg (QImode, tem2, HImode);
17563 emit_insn (gen_kiorqi (operands[0], tem1, tem2));
17567 (define_insn "<sse2_avx2>_packsswb<mask_name>"
17568 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
17569 (vec_concat:VI1_AVX512
17570 (ss_truncate:<ssehalfvecmode>
17571 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
17572 (ss_truncate:<ssehalfvecmode>
17573 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
17574 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17576 packsswb\t{%2, %0|%0, %2}
17577 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17578 [(set_attr "isa" "noavx,avx")
17579 (set_attr "type" "sselog")
17580 (set_attr "prefix_data16" "1,*")
17581 (set_attr "prefix" "orig,<mask_prefix>")
17582 (set_attr "mode" "<sseinsnmode>")])
17584 (define_insn "<sse2_avx2>_packssdw<mask_name>"
17585 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
17586 (vec_concat:VI2_AVX2
17587 (ss_truncate:<ssehalfvecmode>
17588 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
17589 (ss_truncate:<ssehalfvecmode>
17590 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
17591 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17593 packssdw\t{%2, %0|%0, %2}
17594 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17595 [(set_attr "isa" "noavx,avx")
17596 (set_attr "type" "sselog")
17597 (set_attr "prefix_data16" "1,*")
17598 (set_attr "prefix" "orig,<mask_prefix>")
17599 (set_attr "mode" "<sseinsnmode>")])
17601 (define_insn "<sse2_avx2>_packuswb<mask_name>"
17602 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
17603 (vec_concat:VI1_AVX512
17604 (us_truncate:<ssehalfvecmode>
17605 (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
17606 (us_truncate:<ssehalfvecmode>
17607 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
17608 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17610 packuswb\t{%2, %0|%0, %2}
17611 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17612 [(set_attr "isa" "noavx,avx")
17613 (set_attr "type" "sselog")
17614 (set_attr "prefix_data16" "1,*")
17615 (set_attr "prefix" "orig,<mask_prefix>")
17616 (set_attr "mode" "<sseinsnmode>")])
17618 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
17619 [(set (match_operand:V64QI 0 "register_operand" "=v")
17622 (match_operand:V64QI 1 "register_operand" "v")
17623 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
17624 (parallel [(const_int 8) (const_int 72)
17625 (const_int 9) (const_int 73)
17626 (const_int 10) (const_int 74)
17627 (const_int 11) (const_int 75)
17628 (const_int 12) (const_int 76)
17629 (const_int 13) (const_int 77)
17630 (const_int 14) (const_int 78)
17631 (const_int 15) (const_int 79)
17632 (const_int 24) (const_int 88)
17633 (const_int 25) (const_int 89)
17634 (const_int 26) (const_int 90)
17635 (const_int 27) (const_int 91)
17636 (const_int 28) (const_int 92)
17637 (const_int 29) (const_int 93)
17638 (const_int 30) (const_int 94)
17639 (const_int 31) (const_int 95)
17640 (const_int 40) (const_int 104)
17641 (const_int 41) (const_int 105)
17642 (const_int 42) (const_int 106)
17643 (const_int 43) (const_int 107)
17644 (const_int 44) (const_int 108)
17645 (const_int 45) (const_int 109)
17646 (const_int 46) (const_int 110)
17647 (const_int 47) (const_int 111)
17648 (const_int 56) (const_int 120)
17649 (const_int 57) (const_int 121)
17650 (const_int 58) (const_int 122)
17651 (const_int 59) (const_int 123)
17652 (const_int 60) (const_int 124)
17653 (const_int 61) (const_int 125)
17654 (const_int 62) (const_int 126)
17655 (const_int 63) (const_int 127)])))]
17657 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17658 [(set_attr "type" "sselog")
17659 (set_attr "prefix" "evex")
17660 (set_attr "mode" "XI")])
17662 (define_insn "avx2_interleave_highv32qi<mask_name>"
17663 [(set (match_operand:V32QI 0 "register_operand" "=Yw")
17666 (match_operand:V32QI 1 "register_operand" "Yw")
17667 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
17668 (parallel [(const_int 8) (const_int 40)
17669 (const_int 9) (const_int 41)
17670 (const_int 10) (const_int 42)
17671 (const_int 11) (const_int 43)
17672 (const_int 12) (const_int 44)
17673 (const_int 13) (const_int 45)
17674 (const_int 14) (const_int 46)
17675 (const_int 15) (const_int 47)
17676 (const_int 24) (const_int 56)
17677 (const_int 25) (const_int 57)
17678 (const_int 26) (const_int 58)
17679 (const_int 27) (const_int 59)
17680 (const_int 28) (const_int 60)
17681 (const_int 29) (const_int 61)
17682 (const_int 30) (const_int 62)
17683 (const_int 31) (const_int 63)])))]
17684 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17685 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17686 [(set_attr "type" "sselog")
17687 (set_attr "prefix" "<mask_prefix>")
17688 (set_attr "mode" "OI")])
17690 (define_insn "vec_interleave_highv16qi<mask_name>"
17691 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
17694 (match_operand:V16QI 1 "register_operand" "0,Yw")
17695 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
17696 (parallel [(const_int 8) (const_int 24)
17697 (const_int 9) (const_int 25)
17698 (const_int 10) (const_int 26)
17699 (const_int 11) (const_int 27)
17700 (const_int 12) (const_int 28)
17701 (const_int 13) (const_int 29)
17702 (const_int 14) (const_int 30)
17703 (const_int 15) (const_int 31)])))]
17704 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17706 punpckhbw\t{%2, %0|%0, %2}
17707 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17708 [(set_attr "isa" "noavx,avx")
17709 (set_attr "type" "sselog")
17710 (set_attr "prefix_data16" "1,*")
17711 (set_attr "prefix" "orig,<mask_prefix>")
17712 (set_attr "mode" "TI")])
17714 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
17715 [(set (match_operand:V64QI 0 "register_operand" "=v")
17718 (match_operand:V64QI 1 "register_operand" "v")
17719 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
17720 (parallel [(const_int 0) (const_int 64)
17721 (const_int 1) (const_int 65)
17722 (const_int 2) (const_int 66)
17723 (const_int 3) (const_int 67)
17724 (const_int 4) (const_int 68)
17725 (const_int 5) (const_int 69)
17726 (const_int 6) (const_int 70)
17727 (const_int 7) (const_int 71)
17728 (const_int 16) (const_int 80)
17729 (const_int 17) (const_int 81)
17730 (const_int 18) (const_int 82)
17731 (const_int 19) (const_int 83)
17732 (const_int 20) (const_int 84)
17733 (const_int 21) (const_int 85)
17734 (const_int 22) (const_int 86)
17735 (const_int 23) (const_int 87)
17736 (const_int 32) (const_int 96)
17737 (const_int 33) (const_int 97)
17738 (const_int 34) (const_int 98)
17739 (const_int 35) (const_int 99)
17740 (const_int 36) (const_int 100)
17741 (const_int 37) (const_int 101)
17742 (const_int 38) (const_int 102)
17743 (const_int 39) (const_int 103)
17744 (const_int 48) (const_int 112)
17745 (const_int 49) (const_int 113)
17746 (const_int 50) (const_int 114)
17747 (const_int 51) (const_int 115)
17748 (const_int 52) (const_int 116)
17749 (const_int 53) (const_int 117)
17750 (const_int 54) (const_int 118)
17751 (const_int 55) (const_int 119)])))]
17753 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17754 [(set_attr "type" "sselog")
17755 (set_attr "prefix" "evex")
17756 (set_attr "mode" "XI")])
17758 (define_insn "avx2_interleave_lowv32qi<mask_name>"
17759 [(set (match_operand:V32QI 0 "register_operand" "=Yw")
17762 (match_operand:V32QI 1 "register_operand" "Yw")
17763 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm"))
17764 (parallel [(const_int 0) (const_int 32)
17765 (const_int 1) (const_int 33)
17766 (const_int 2) (const_int 34)
17767 (const_int 3) (const_int 35)
17768 (const_int 4) (const_int 36)
17769 (const_int 5) (const_int 37)
17770 (const_int 6) (const_int 38)
17771 (const_int 7) (const_int 39)
17772 (const_int 16) (const_int 48)
17773 (const_int 17) (const_int 49)
17774 (const_int 18) (const_int 50)
17775 (const_int 19) (const_int 51)
17776 (const_int 20) (const_int 52)
17777 (const_int 21) (const_int 53)
17778 (const_int 22) (const_int 54)
17779 (const_int 23) (const_int 55)])))]
17780 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17781 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17782 [(set_attr "type" "sselog")
17783 (set_attr "prefix" "maybe_vex")
17784 (set_attr "mode" "OI")])
17786 (define_insn "vec_interleave_lowv16qi<mask_name>"
17787 [(set (match_operand:V16QI 0 "register_operand" "=x,Yw")
17790 (match_operand:V16QI 1 "register_operand" "0,Yw")
17791 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm"))
17792 (parallel [(const_int 0) (const_int 16)
17793 (const_int 1) (const_int 17)
17794 (const_int 2) (const_int 18)
17795 (const_int 3) (const_int 19)
17796 (const_int 4) (const_int 20)
17797 (const_int 5) (const_int 21)
17798 (const_int 6) (const_int 22)
17799 (const_int 7) (const_int 23)])))]
17800 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17802 punpcklbw\t{%2, %0|%0, %2}
17803 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17804 [(set_attr "isa" "noavx,avx")
17805 (set_attr "type" "sselog")
17806 (set_attr "prefix_data16" "1,*")
17807 (set_attr "prefix" "orig,vex")
17808 (set_attr "mode" "TI")])
17810 (define_insn "avx512bw_interleave_high<mode><mask_name>"
17811 [(set (match_operand:V32_512 0 "register_operand" "=v")
17812 (vec_select:V32_512
17813 (vec_concat:<ssedoublevecmode>
17814 (match_operand:V32_512 1 "register_operand" "v")
17815 (match_operand:V32_512 2 "nonimmediate_operand" "vm"))
17816 (parallel [(const_int 4) (const_int 36)
17817 (const_int 5) (const_int 37)
17818 (const_int 6) (const_int 38)
17819 (const_int 7) (const_int 39)
17820 (const_int 12) (const_int 44)
17821 (const_int 13) (const_int 45)
17822 (const_int 14) (const_int 46)
17823 (const_int 15) (const_int 47)
17824 (const_int 20) (const_int 52)
17825 (const_int 21) (const_int 53)
17826 (const_int 22) (const_int 54)
17827 (const_int 23) (const_int 55)
17828 (const_int 28) (const_int 60)
17829 (const_int 29) (const_int 61)
17830 (const_int 30) (const_int 62)
17831 (const_int 31) (const_int 63)])))]
17833 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17834 [(set_attr "type" "sselog")
17835 (set_attr "prefix" "evex")
17836 (set_attr "mode" "XI")])
17838 (define_insn "avx2_interleave_high<mode><mask_name>"
17839 [(set (match_operand:V16_256 0 "register_operand" "=Yw")
17840 (vec_select:V16_256
17841 (vec_concat:<ssedoublevecmode>
17842 (match_operand:V16_256 1 "register_operand" "Yw")
17843 (match_operand:V16_256 2 "nonimmediate_operand" "Ywm"))
17844 (parallel [(const_int 4) (const_int 20)
17845 (const_int 5) (const_int 21)
17846 (const_int 6) (const_int 22)
17847 (const_int 7) (const_int 23)
17848 (const_int 12) (const_int 28)
17849 (const_int 13) (const_int 29)
17850 (const_int 14) (const_int 30)
17851 (const_int 15) (const_int 31)])))]
17852 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17853 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17854 [(set_attr "type" "sselog")
17855 (set_attr "prefix" "maybe_evex")
17856 (set_attr "mode" "OI")])
17858 (define_insn "@vec_interleave_high<mode><mask_name>"
17859 [(set (match_operand:V8_128 0 "register_operand" "=x,Yw")
17861 (vec_concat:<ssedoublevecmode>
17862 (match_operand:V8_128 1 "register_operand" "0,Yw")
17863 (match_operand:V8_128 2 "vector_operand" "xBm,Ywm"))
17864 (parallel [(const_int 4) (const_int 12)
17865 (const_int 5) (const_int 13)
17866 (const_int 6) (const_int 14)
17867 (const_int 7) (const_int 15)])))]
17868 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17870 punpckhwd\t{%2, %0|%0, %2}
17871 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17872 [(set_attr "isa" "noavx,avx")
17873 (set_attr "type" "sselog")
17874 (set_attr "prefix_data16" "1,*")
17875 (set_attr "prefix" "orig,maybe_vex")
17876 (set_attr "mode" "TI")])
17878 (define_insn "<mask_codefor>avx512bw_interleave_low<mode><mask_name>"
17879 [(set (match_operand:V32_512 0 "register_operand" "=v")
17880 (vec_select:V32_512
17881 (vec_concat:<ssedoublevecmode>
17882 (match_operand:V32_512 1 "register_operand" "v")
17883 (match_operand:V32_512 2 "nonimmediate_operand" "vm"))
17884 (parallel [(const_int 0) (const_int 32)
17885 (const_int 1) (const_int 33)
17886 (const_int 2) (const_int 34)
17887 (const_int 3) (const_int 35)
17888 (const_int 8) (const_int 40)
17889 (const_int 9) (const_int 41)
17890 (const_int 10) (const_int 42)
17891 (const_int 11) (const_int 43)
17892 (const_int 16) (const_int 48)
17893 (const_int 17) (const_int 49)
17894 (const_int 18) (const_int 50)
17895 (const_int 19) (const_int 51)
17896 (const_int 24) (const_int 56)
17897 (const_int 25) (const_int 57)
17898 (const_int 26) (const_int 58)
17899 (const_int 27) (const_int 59)])))]
17901 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17902 [(set_attr "type" "sselog")
17903 (set_attr "prefix" "evex")
17904 (set_attr "mode" "XI")])
17906 (define_insn "avx2_interleave_low<mode><mask_name>"
17907 [(set (match_operand:V16_256 0 "register_operand" "=Yw")
17908 (vec_select:V16_256
17909 (vec_concat:<ssedoublevecmode>
17910 (match_operand:V16_256 1 "register_operand" "Yw")
17911 (match_operand:V16_256 2 "nonimmediate_operand" "Ywm"))
17912 (parallel [(const_int 0) (const_int 16)
17913 (const_int 1) (const_int 17)
17914 (const_int 2) (const_int 18)
17915 (const_int 3) (const_int 19)
17916 (const_int 8) (const_int 24)
17917 (const_int 9) (const_int 25)
17918 (const_int 10) (const_int 26)
17919 (const_int 11) (const_int 27)])))]
17920 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17921 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17922 [(set_attr "type" "sselog")
17923 (set_attr "prefix" "maybe_evex")
17924 (set_attr "mode" "OI")])
17926 (define_insn "@vec_interleave_low<mode><mask_name>"
17927 [(set (match_operand:V8_128 0 "register_operand" "=x,Yw")
17929 (vec_concat:<ssedoublevecmode>
17930 (match_operand:V8_128 1 "register_operand" "0,Yw")
17931 (match_operand:V8_128 2 "vector_operand" "xBm,Ywm"))
17932 (parallel [(const_int 0) (const_int 8)
17933 (const_int 1) (const_int 9)
17934 (const_int 2) (const_int 10)
17935 (const_int 3) (const_int 11)])))]
17936 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
17938 punpcklwd\t{%2, %0|%0, %2}
17939 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17940 [(set_attr "isa" "noavx,avx")
17941 (set_attr "type" "sselog")
17942 (set_attr "prefix_data16" "1,*")
17943 (set_attr "prefix" "orig,maybe_evex")
17944 (set_attr "mode" "TI")])
17946 (define_insn "avx2_interleave_highv8si<mask_name>"
17947 [(set (match_operand:V8SI 0 "register_operand" "=v")
17950 (match_operand:V8SI 1 "register_operand" "v")
17951 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
17952 (parallel [(const_int 2) (const_int 10)
17953 (const_int 3) (const_int 11)
17954 (const_int 6) (const_int 14)
17955 (const_int 7) (const_int 15)])))]
17956 "TARGET_AVX2 && <mask_avx512vl_condition>"
17957 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17958 [(set_attr "type" "sselog")
17959 (set_attr "prefix" "maybe_evex")
17960 (set_attr "mode" "OI")])
17962 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
17963 [(set (match_operand:V16SI 0 "register_operand" "=v")
17966 (match_operand:V16SI 1 "register_operand" "v")
17967 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
17968 (parallel [(const_int 2) (const_int 18)
17969 (const_int 3) (const_int 19)
17970 (const_int 6) (const_int 22)
17971 (const_int 7) (const_int 23)
17972 (const_int 10) (const_int 26)
17973 (const_int 11) (const_int 27)
17974 (const_int 14) (const_int 30)
17975 (const_int 15) (const_int 31)])))]
17977 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17978 [(set_attr "type" "sselog")
17979 (set_attr "prefix" "evex")
17980 (set_attr "mode" "XI")])
17983 (define_insn "vec_interleave_highv4si<mask_name>"
17984 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
17987 (match_operand:V4SI 1 "register_operand" "0,v")
17988 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
17989 (parallel [(const_int 2) (const_int 6)
17990 (const_int 3) (const_int 7)])))]
17991 "TARGET_SSE2 && <mask_avx512vl_condition>"
17993 punpckhdq\t{%2, %0|%0, %2}
17994 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17995 [(set_attr "isa" "noavx,avx")
17996 (set_attr "type" "sselog")
17997 (set_attr "prefix_data16" "1,*")
17998 (set_attr "prefix" "orig,maybe_vex")
17999 (set_attr "mode" "TI")])
18001 (define_insn "avx2_interleave_lowv8si<mask_name>"
18002 [(set (match_operand:V8SI 0 "register_operand" "=v")
18005 (match_operand:V8SI 1 "register_operand" "v")
18006 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
18007 (parallel [(const_int 0) (const_int 8)
18008 (const_int 1) (const_int 9)
18009 (const_int 4) (const_int 12)
18010 (const_int 5) (const_int 13)])))]
18011 "TARGET_AVX2 && <mask_avx512vl_condition>"
18012 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18013 [(set_attr "type" "sselog")
18014 (set_attr "prefix" "maybe_evex")
18015 (set_attr "mode" "OI")])
18017 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
18018 [(set (match_operand:V16SI 0 "register_operand" "=v")
18021 (match_operand:V16SI 1 "register_operand" "v")
18022 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
18023 (parallel [(const_int 0) (const_int 16)
18024 (const_int 1) (const_int 17)
18025 (const_int 4) (const_int 20)
18026 (const_int 5) (const_int 21)
18027 (const_int 8) (const_int 24)
18028 (const_int 9) (const_int 25)
18029 (const_int 12) (const_int 28)
18030 (const_int 13) (const_int 29)])))]
18032 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18033 [(set_attr "type" "sselog")
18034 (set_attr "prefix" "evex")
18035 (set_attr "mode" "XI")])
18037 (define_insn "vec_interleave_lowv4si<mask_name>"
18038 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
18041 (match_operand:V4SI 1 "register_operand" "0,v")
18042 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
18043 (parallel [(const_int 0) (const_int 4)
18044 (const_int 1) (const_int 5)])))]
18045 "TARGET_SSE2 && <mask_avx512vl_condition>"
18047 punpckldq\t{%2, %0|%0, %2}
18048 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18049 [(set_attr "isa" "noavx,avx")
18050 (set_attr "type" "sselog")
18051 (set_attr "prefix_data16" "1,*")
18052 (set_attr "prefix" "orig,vex")
18053 (set_attr "mode" "TI")])
18055 (define_expand "vec_interleave_high<mode>"
18056 [(match_operand:VI_256 0 "register_operand")
18057 (match_operand:VI_256 1 "register_operand")
18058 (match_operand:VI_256 2 "nonimmediate_operand")]
18061 rtx t1 = gen_reg_rtx (<MODE>mode);
18062 rtx t2 = gen_reg_rtx (<MODE>mode);
18063 rtx t3 = gen_reg_rtx (V4DImode);
18064 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
18065 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
18066 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
18067 gen_lowpart (V4DImode, t2),
18068 GEN_INT (1 + (3 << 4))));
18069 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
18073 (define_expand "vec_interleave_low<mode>"
18074 [(match_operand:VI_256 0 "register_operand")
18075 (match_operand:VI_256 1 "register_operand")
18076 (match_operand:VI_256 2 "nonimmediate_operand")]
18079 rtx t1 = gen_reg_rtx (<MODE>mode);
18080 rtx t2 = gen_reg_rtx (<MODE>mode);
18081 rtx t3 = gen_reg_rtx (V4DImode);
18082 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
18083 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
18084 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
18085 gen_lowpart (V4DImode, t2),
18086 GEN_INT (0 + (2 << 4))));
18087 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
18091 ;; Modes handled by pinsr patterns.
18092 (define_mode_iterator PINSR_MODE
18093 [(V16QI "TARGET_SSE4_1") V8HI V8HF V8BF
18094 (V4SI "TARGET_SSE4_1")
18095 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
18097 (define_mode_attr sse2p4_1
18098 [(V16QI "sse4_1") (V8HI "sse2") (V8HF "sse2")
18099 (V8BF "sse2") (V4SI "sse4_1") (V2DI "sse4_1")])
18101 (define_mode_attr pinsr_evex_isa
18102 [(V16QI "avx512bw") (V8HI "avx512bw") (V8HF "avx512bw")
18103 (V8BF "avx512bw") (V4SI "avx512dq") (V2DI "avx512dq")])
18105 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
18106 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
18107 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,&x")
18108 (vec_merge:PINSR_MODE
18109 (vec_duplicate:PINSR_MODE
18110 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m,x"))
18111 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v,x")
18112 (match_operand:SI 3 "const_int_operand")))]
18114 && ((unsigned) exact_log2 (INTVAL (operands[3]))
18115 < GET_MODE_NUNITS (<MODE>mode))"
18117 HOST_WIDE_INT items = INTVAL (operands[3]);
18119 operands[3] = GEN_INT (exact_log2 (items));
18121 switch (which_alternative)
18124 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
18125 return "pinsr<sseintmodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
18128 return "pinsr<sseintmodesuffix>\t{%3, %2, %0|%0, %2, %3}";
18131 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
18132 return "vpinsr<sseintmodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
18136 return "vpinsr<sseintmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18138 /* This pattern needs to be shadowed with vec_set{v8hi,v8hf}_0. */
18139 gcc_assert (items > 1);
18142 gcc_unreachable ();
18145 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>,avx2")
18146 (set_attr "type" "sselog")
18147 (set (attr "prefix_rex")
18149 (and (not (match_test "TARGET_AVX"))
18150 (match_test "GET_MODE_NUNITS (<MODE>mode) == 2"))
18152 (const_string "*")))
18153 (set (attr "prefix_data16")
18155 (and (not (match_test "TARGET_AVX"))
18156 (match_test "GET_MODE_NUNITS (<MODE>mode) == 8"))
18158 (const_string "*")))
18159 (set (attr "prefix_extra")
18161 (and (not (match_test "TARGET_AVX"))
18162 (match_test "GET_MODE_NUNITS (<MODE>mode) == 8"))
18164 (const_string "1")))
18165 (set_attr "length_immediate" "1")
18166 (set_attr "prefix" "orig,orig,vex,vex,evex,evex,vex")
18167 (set_attr "mode" "TI")
18168 (set (attr "enabled")
18169 (cond [(and (not (match_test "GET_MODE_NUNITS (<MODE>mode) == 8"))
18170 (eq_attr "alternative" "6"))
18171 (symbol_ref "false")
18173 (const_string "*")))])
18175 ;; For TARGET_AVX2, implement insert from XMM reg with PBROADCASTW + PBLENDW.
18177 [(set (match_operand:V8_128 0 "sse_reg_operand")
18179 (vec_duplicate:V8_128
18180 (match_operand:<ssescalarmode> 2 "sse_reg_operand"))
18181 (match_operand:V8_128 1 "sse_reg_operand")
18182 (match_operand:SI 3 "const_int_operand")))]
18183 "TARGET_AVX2 && reload_completed
18184 && INTVAL (operands[3]) > 1
18185 && ((unsigned) exact_log2 (INTVAL (operands[3]))
18186 < GET_MODE_NUNITS (<MODE>mode))"
18187 [(set (match_dup 0)
18188 (vec_duplicate:V8_128 (match_dup 2)))
18190 (vec_merge:V8_128 (match_dup 0) (match_dup 1) (match_dup 3)))])
18192 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
18193 [(match_operand:AVX512_VEC 0 "register_operand")
18194 (match_operand:AVX512_VEC 1 "register_operand")
18195 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
18196 (match_operand:SI 3 "const_0_to_3_operand")
18197 (match_operand:AVX512_VEC 4 "register_operand")
18198 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18201 int mask, selector;
18202 mask = INTVAL (operands[3]);
18203 selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
18204 ? 0xFFFF ^ (0x000F << mask * 4)
18205 : 0xFF ^ (0x03 << mask * 2));
18206 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
18207 (operands[0], operands[1], operands[2], GEN_INT (selector),
18208 operands[4], operands[5]));
18212 (define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
18213 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
18214 (vec_merge:AVX512_VEC
18215 (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
18216 (vec_duplicate:AVX512_VEC
18217 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
18218 (match_operand:SI 3 "const_int_operand")))]
18220 && (INTVAL (operands[3])
18221 == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
18223 if (which_alternative == 0)
18224 return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
18225 switch (<MODE>mode)
18228 if (misaligned_operand (operands[2], <ssequartermode>mode))
18229 return "vmovupd\t{%2, %x0|%x0, %2}";
18231 return "vmovapd\t{%2, %x0|%x0, %2}";
18233 if (misaligned_operand (operands[2], <ssequartermode>mode))
18234 return "vmovups\t{%2, %x0|%x0, %2}";
18236 return "vmovaps\t{%2, %x0|%x0, %2}";
18238 if (misaligned_operand (operands[2], <ssequartermode>mode))
18239 return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
18240 : "vmovdqu\t{%2, %x0|%x0, %2}";
18242 return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
18243 : "vmovdqa\t{%2, %x0|%x0, %2}";
18245 if (misaligned_operand (operands[2], <ssequartermode>mode))
18246 return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
18247 : "vmovdqu\t{%2, %x0|%x0, %2}";
18249 return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
18250 : "vmovdqa\t{%2, %x0|%x0, %2}";
18252 gcc_unreachable ();
18255 [(set_attr "type" "sselog,ssemov,ssemov")
18256 (set_attr "length_immediate" "1,0,0")
18257 (set_attr "prefix" "evex,vex,evex")
18258 (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
18260 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
18261 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
18262 (vec_merge:AVX512_VEC
18263 (match_operand:AVX512_VEC 1 "register_operand" "v")
18264 (vec_duplicate:AVX512_VEC
18265 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
18266 (match_operand:SI 3 "const_int_operand")))]
18270 int selector = INTVAL (operands[3]);
18272 if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
18274 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
18276 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
18278 else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
18281 gcc_unreachable ();
18283 operands[3] = GEN_INT (mask);
18285 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
18287 [(set_attr "type" "sselog")
18288 (set_attr "length_immediate" "1")
18289 (set_attr "prefix" "evex")
18290 (set_attr "mode" "<sseinsnmode>")])
18292 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
18293 [(match_operand:AVX512_VEC_2 0 "register_operand")
18294 (match_operand:AVX512_VEC_2 1 "register_operand")
18295 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18296 (match_operand:SI 3 "const_0_to_1_operand")
18297 (match_operand:AVX512_VEC_2 4 "register_operand")
18298 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18301 int mask = INTVAL (operands[3]);
18303 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
18304 operands[2], operands[4],
18307 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
18308 operands[2], operands[4],
18313 (define_insn "vec_set_lo_<mode><mask_name>"
18314 [(set (match_operand:V16FI 0 "register_operand" "=v")
18316 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18317 (vec_select:<ssehalfvecmode>
18318 (match_operand:V16FI 1 "register_operand" "v")
18319 (parallel [(const_int 8) (const_int 9)
18320 (const_int 10) (const_int 11)
18321 (const_int 12) (const_int 13)
18322 (const_int 14) (const_int 15)]))))]
18324 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
18325 [(set_attr "type" "sselog")
18326 (set_attr "length_immediate" "1")
18327 (set_attr "prefix" "evex")
18328 (set_attr "mode" "<sseinsnmode>")])
18330 (define_insn "vec_set_hi_<mode><mask_name>"
18331 [(set (match_operand:V16FI 0 "register_operand" "=v")
18333 (vec_select:<ssehalfvecmode>
18334 (match_operand:V16FI 1 "register_operand" "v")
18335 (parallel [(const_int 0) (const_int 1)
18336 (const_int 2) (const_int 3)
18337 (const_int 4) (const_int 5)
18338 (const_int 6) (const_int 7)]))
18339 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18341 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
18342 [(set_attr "type" "sselog")
18343 (set_attr "length_immediate" "1")
18344 (set_attr "prefix" "evex")
18345 (set_attr "mode" "<sseinsnmode>")])
18347 (define_insn "vec_set_lo_<mode><mask_name>"
18348 [(set (match_operand:V8FI 0 "register_operand" "=v")
18350 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18351 (vec_select:<ssehalfvecmode>
18352 (match_operand:V8FI 1 "register_operand" "v")
18353 (parallel [(const_int 4) (const_int 5)
18354 (const_int 6) (const_int 7)]))))]
18356 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
18357 [(set_attr "type" "sselog")
18358 (set_attr "length_immediate" "1")
18359 (set_attr "prefix" "evex")
18360 (set_attr "mode" "XI")])
18362 (define_insn "vec_set_hi_<mode><mask_name>"
18363 [(set (match_operand:V8FI 0 "register_operand" "=v")
18365 (vec_select:<ssehalfvecmode>
18366 (match_operand:V8FI 1 "register_operand" "v")
18367 (parallel [(const_int 0) (const_int 1)
18368 (const_int 2) (const_int 3)]))
18369 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18371 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
18372 [(set_attr "type" "sselog")
18373 (set_attr "length_immediate" "1")
18374 (set_attr "prefix" "evex")
18375 (set_attr "mode" "XI")])
18377 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
18378 [(match_operand:VI8F_256 0 "register_operand")
18379 (match_operand:VI8F_256 1 "register_operand")
18380 (match_operand:VI8F_256 2 "nonimmediate_operand")
18381 (match_operand:SI 3 "const_0_to_3_operand")
18382 (match_operand:VI8F_256 4 "register_operand")
18383 (match_operand:QI 5 "register_operand")]
18386 int mask = INTVAL (operands[3]);
18387 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
18388 (operands[0], operands[1], operands[2],
18389 GEN_INT (((mask >> 0) & 1) * 2 + 0),
18390 GEN_INT (((mask >> 0) & 1) * 2 + 1),
18391 GEN_INT (((mask >> 1) & 1) * 2 + 4),
18392 GEN_INT (((mask >> 1) & 1) * 2 + 5),
18393 operands[4], operands[5]));
18397 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
18398 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18399 (vec_select:VI8F_256
18400 (vec_concat:<ssedoublemode>
18401 (match_operand:VI8F_256 1 "register_operand" "v")
18402 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
18403 (parallel [(match_operand 3 "const_0_to_3_operand")
18404 (match_operand 4 "const_0_to_3_operand")
18405 (match_operand 5 "const_4_to_7_operand")
18406 (match_operand 6 "const_4_to_7_operand")])))]
18408 && (INTVAL (operands[3]) & 1) == 0
18409 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
18410 && (INTVAL (operands[5]) & 1) == 0
18411 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
18414 mask = INTVAL (operands[3]) / 2;
18415 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
18416 operands[3] = GEN_INT (mask);
18417 if (INTVAL (operands[3]) == 2 && !<mask_applied>)
18418 return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}";
18419 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
18421 [(set_attr "type" "sselog")
18422 (set_attr "length_immediate" "1")
18423 (set_attr "prefix" "evex")
18424 (set_attr "mode" "XI")])
18426 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
18427 [(match_operand:V8FI 0 "register_operand")
18428 (match_operand:V8FI 1 "register_operand")
18429 (match_operand:V8FI 2 "nonimmediate_operand")
18430 (match_operand:SI 3 "const_0_to_255_operand")
18431 (match_operand:V8FI 4 "register_operand")
18432 (match_operand:QI 5 "register_operand")]
18435 int mask = INTVAL (operands[3]);
18436 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
18437 (operands[0], operands[1], operands[2],
18438 GEN_INT (((mask >> 0) & 3) * 2),
18439 GEN_INT (((mask >> 0) & 3) * 2 + 1),
18440 GEN_INT (((mask >> 2) & 3) * 2),
18441 GEN_INT (((mask >> 2) & 3) * 2 + 1),
18442 GEN_INT (((mask >> 4) & 3) * 2 + 8),
18443 GEN_INT (((mask >> 4) & 3) * 2 + 9),
18444 GEN_INT (((mask >> 6) & 3) * 2 + 8),
18445 GEN_INT (((mask >> 6) & 3) * 2 + 9),
18446 operands[4], operands[5]));
18450 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
18451 [(set (match_operand:V8FI 0 "register_operand" "=v")
18453 (vec_concat:<ssedoublemode>
18454 (match_operand:V8FI 1 "register_operand" "v")
18455 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
18456 (parallel [(match_operand 3 "const_0_to_7_operand")
18457 (match_operand 4 "const_0_to_7_operand")
18458 (match_operand 5 "const_0_to_7_operand")
18459 (match_operand 6 "const_0_to_7_operand")
18460 (match_operand 7 "const_8_to_15_operand")
18461 (match_operand 8 "const_8_to_15_operand")
18462 (match_operand 9 "const_8_to_15_operand")
18463 (match_operand 10 "const_8_to_15_operand")])))]
18465 && (INTVAL (operands[3]) & 1) == 0
18466 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
18467 && (INTVAL (operands[5]) & 1) == 0
18468 && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
18469 && (INTVAL (operands[7]) & 1) == 0
18470 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
18471 && (INTVAL (operands[9]) & 1) == 0
18472 && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
18475 mask = INTVAL (operands[3]) / 2;
18476 mask |= INTVAL (operands[5]) / 2 << 2;
18477 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
18478 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
18479 operands[3] = GEN_INT (mask);
18481 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
18483 [(set_attr "type" "sselog")
18484 (set_attr "length_immediate" "1")
18485 (set_attr "prefix" "evex")
18486 (set_attr "mode" "<sseinsnmode>")])
18488 (define_insn "*avx512f_shuf_<shuffletype>64x2_1<mask_name>_1"
18489 [(set (match_operand:V8FI 0 "register_operand" "=v")
18491 (match_operand:V8FI 1 "register_operand" "v")
18492 (parallel [(match_operand 2 "const_0_to_7_operand")
18493 (match_operand 3 "const_0_to_7_operand")
18494 (match_operand 4 "const_0_to_7_operand")
18495 (match_operand 5 "const_0_to_7_operand")
18496 (match_operand 6 "const_0_to_7_operand")
18497 (match_operand 7 "const_0_to_7_operand")
18498 (match_operand 8 "const_0_to_7_operand")
18499 (match_operand 9 "const_0_to_7_operand")])))]
18501 && (INTVAL (operands[2]) & 1) == 0
18502 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
18503 && (INTVAL (operands[4]) & 1) == 0
18504 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
18505 && (INTVAL (operands[6]) & 1) == 0
18506 && INTVAL (operands[6]) == INTVAL (operands[7]) - 1
18507 && (INTVAL (operands[8]) & 1) == 0
18508 && INTVAL (operands[8]) == INTVAL (operands[9]) - 1"
18511 mask = INTVAL (operands[2]) / 2;
18512 mask |= INTVAL (operands[4]) / 2 << 2;
18513 mask |= INTVAL (operands[6]) / 2 << 4;
18514 mask |= INTVAL (operands[8]) / 2 << 6;
18515 operands[2] = GEN_INT (mask);
18517 return "vshuf<shuffletype>64x2\t{%2, %1, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %1, %2}";
18519 [(set_attr "type" "sselog")
18520 (set_attr "length_immediate" "1")
18521 (set_attr "prefix" "evex")
18522 (set_attr "mode" "<sseinsnmode>")])
18524 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
18525 [(match_operand:VI4F_256 0 "register_operand")
18526 (match_operand:VI4F_256 1 "register_operand")
18527 (match_operand:VI4F_256 2 "nonimmediate_operand")
18528 (match_operand:SI 3 "const_0_to_3_operand")
18529 (match_operand:VI4F_256 4 "register_operand")
18530 (match_operand:QI 5 "register_operand")]
18533 int mask = INTVAL (operands[3]);
18534 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
18535 (operands[0], operands[1], operands[2],
18536 GEN_INT (((mask >> 0) & 1) * 4 + 0),
18537 GEN_INT (((mask >> 0) & 1) * 4 + 1),
18538 GEN_INT (((mask >> 0) & 1) * 4 + 2),
18539 GEN_INT (((mask >> 0) & 1) * 4 + 3),
18540 GEN_INT (((mask >> 1) & 1) * 4 + 8),
18541 GEN_INT (((mask >> 1) & 1) * 4 + 9),
18542 GEN_INT (((mask >> 1) & 1) * 4 + 10),
18543 GEN_INT (((mask >> 1) & 1) * 4 + 11),
18544 operands[4], operands[5]));
18548 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
18549 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18550 (vec_select:VI4F_256
18551 (vec_concat:<ssedoublemode>
18552 (match_operand:VI4F_256 1 "register_operand" "v")
18553 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
18554 (parallel [(match_operand 3 "const_0_to_7_operand")
18555 (match_operand 4 "const_0_to_7_operand")
18556 (match_operand 5 "const_0_to_7_operand")
18557 (match_operand 6 "const_0_to_7_operand")
18558 (match_operand 7 "const_8_to_15_operand")
18559 (match_operand 8 "const_8_to_15_operand")
18560 (match_operand 9 "const_8_to_15_operand")
18561 (match_operand 10 "const_8_to_15_operand")])))]
18563 && (INTVAL (operands[3]) & 3) == 0
18564 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
18565 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
18566 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
18567 && (INTVAL (operands[7]) & 3) == 0
18568 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
18569 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
18570 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
18573 mask = INTVAL (operands[3]) / 4;
18574 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
18575 operands[3] = GEN_INT (mask);
18577 if (INTVAL (operands[3]) == 2 && !<mask_applied>)
18578 return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}";
18580 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
18582 [(set_attr "type" "sselog")
18583 (set_attr "length_immediate" "1")
18584 (set_attr "prefix" "evex")
18585 (set_attr "mode" "<sseinsnmode>")])
18587 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
18588 [(match_operand:V16FI 0 "register_operand")
18589 (match_operand:V16FI 1 "register_operand")
18590 (match_operand:V16FI 2 "nonimmediate_operand")
18591 (match_operand:SI 3 "const_0_to_255_operand")
18592 (match_operand:V16FI 4 "register_operand")
18593 (match_operand:HI 5 "register_operand")]
18596 int mask = INTVAL (operands[3]);
18597 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
18598 (operands[0], operands[1], operands[2],
18599 GEN_INT (((mask >> 0) & 3) * 4),
18600 GEN_INT (((mask >> 0) & 3) * 4 + 1),
18601 GEN_INT (((mask >> 0) & 3) * 4 + 2),
18602 GEN_INT (((mask >> 0) & 3) * 4 + 3),
18603 GEN_INT (((mask >> 2) & 3) * 4),
18604 GEN_INT (((mask >> 2) & 3) * 4 + 1),
18605 GEN_INT (((mask >> 2) & 3) * 4 + 2),
18606 GEN_INT (((mask >> 2) & 3) * 4 + 3),
18607 GEN_INT (((mask >> 4) & 3) * 4 + 16),
18608 GEN_INT (((mask >> 4) & 3) * 4 + 17),
18609 GEN_INT (((mask >> 4) & 3) * 4 + 18),
18610 GEN_INT (((mask >> 4) & 3) * 4 + 19),
18611 GEN_INT (((mask >> 6) & 3) * 4 + 16),
18612 GEN_INT (((mask >> 6) & 3) * 4 + 17),
18613 GEN_INT (((mask >> 6) & 3) * 4 + 18),
18614 GEN_INT (((mask >> 6) & 3) * 4 + 19),
18615 operands[4], operands[5]));
18619 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
18620 [(set (match_operand:V16FI 0 "register_operand" "=v")
18622 (vec_concat:<ssedoublemode>
18623 (match_operand:V16FI 1 "register_operand" "v")
18624 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
18625 (parallel [(match_operand 3 "const_0_to_15_operand")
18626 (match_operand 4 "const_0_to_15_operand")
18627 (match_operand 5 "const_0_to_15_operand")
18628 (match_operand 6 "const_0_to_15_operand")
18629 (match_operand 7 "const_0_to_15_operand")
18630 (match_operand 8 "const_0_to_15_operand")
18631 (match_operand 9 "const_0_to_15_operand")
18632 (match_operand 10 "const_0_to_15_operand")
18633 (match_operand 11 "const_16_to_31_operand")
18634 (match_operand 12 "const_16_to_31_operand")
18635 (match_operand 13 "const_16_to_31_operand")
18636 (match_operand 14 "const_16_to_31_operand")
18637 (match_operand 15 "const_16_to_31_operand")
18638 (match_operand 16 "const_16_to_31_operand")
18639 (match_operand 17 "const_16_to_31_operand")
18640 (match_operand 18 "const_16_to_31_operand")])))]
18642 && (INTVAL (operands[3]) & 3) == 0
18643 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
18644 && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
18645 && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
18646 && (INTVAL (operands[7]) & 3) == 0
18647 && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
18648 && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
18649 && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
18650 && (INTVAL (operands[11]) & 3) == 0
18651 && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
18652 && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
18653 && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
18654 && (INTVAL (operands[15]) & 3) == 0
18655 && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
18656 && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
18657 && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
18660 mask = INTVAL (operands[3]) / 4;
18661 mask |= INTVAL (operands[7]) / 4 << 2;
18662 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
18663 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
18664 operands[3] = GEN_INT (mask);
18666 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
18668 [(set_attr "type" "sselog")
18669 (set_attr "length_immediate" "1")
18670 (set_attr "prefix" "evex")
18671 (set_attr "mode" "<sseinsnmode>")])
18673 (define_insn "*avx512f_shuf_<shuffletype>32x4_1<mask_name>_1"
18674 [(set (match_operand:V16FI 0 "register_operand" "=v")
18676 (match_operand:V16FI 1 "register_operand" "v")
18677 (parallel [(match_operand 2 "const_0_to_15_operand")
18678 (match_operand 3 "const_0_to_15_operand")
18679 (match_operand 4 "const_0_to_15_operand")
18680 (match_operand 5 "const_0_to_15_operand")
18681 (match_operand 6 "const_0_to_15_operand")
18682 (match_operand 7 "const_0_to_15_operand")
18683 (match_operand 8 "const_0_to_15_operand")
18684 (match_operand 9 "const_0_to_15_operand")
18685 (match_operand 10 "const_0_to_15_operand")
18686 (match_operand 11 "const_0_to_15_operand")
18687 (match_operand 12 "const_0_to_15_operand")
18688 (match_operand 13 "const_0_to_15_operand")
18689 (match_operand 14 "const_0_to_15_operand")
18690 (match_operand 15 "const_0_to_15_operand")
18691 (match_operand 16 "const_0_to_15_operand")
18692 (match_operand 17 "const_0_to_15_operand")])))]
18694 && (INTVAL (operands[2]) & 3) == 0
18695 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
18696 && INTVAL (operands[2]) == INTVAL (operands[4]) - 2
18697 && INTVAL (operands[2]) == INTVAL (operands[5]) - 3
18698 && (INTVAL (operands[6]) & 3) == 0
18699 && INTVAL (operands[6]) == INTVAL (operands[7]) - 1
18700 && INTVAL (operands[6]) == INTVAL (operands[8]) - 2
18701 && INTVAL (operands[6]) == INTVAL (operands[9]) - 3
18702 && (INTVAL (operands[10]) & 3) == 0
18703 && INTVAL (operands[10]) == INTVAL (operands[11]) - 1
18704 && INTVAL (operands[10]) == INTVAL (operands[12]) - 2
18705 && INTVAL (operands[10]) == INTVAL (operands[13]) - 3
18706 && (INTVAL (operands[14]) & 3) == 0
18707 && INTVAL (operands[14]) == INTVAL (operands[15]) - 1
18708 && INTVAL (operands[14]) == INTVAL (operands[16]) - 2
18709 && INTVAL (operands[14]) == INTVAL (operands[17]) - 3"
18712 mask = INTVAL (operands[2]) / 4;
18713 mask |= INTVAL (operands[6]) / 4 << 2;
18714 mask |= INTVAL (operands[10]) / 4 << 4;
18715 mask |= INTVAL (operands[14]) / 4 << 6;
18716 operands[2] = GEN_INT (mask);
18718 return "vshuf<shuffletype>32x4\t{%2, %1, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %1, %2}";
18720 [(set_attr "type" "sselog")
18721 (set_attr "length_immediate" "1")
18722 (set_attr "prefix" "evex")
18723 (set_attr "mode" "<sseinsnmode>")])
18725 (define_expand "avx512f_pshufdv3_mask"
18726 [(match_operand:V16SI 0 "register_operand")
18727 (match_operand:V16SI 1 "nonimmediate_operand")
18728 (match_operand:SI 2 "const_0_to_255_operand")
18729 (match_operand:V16SI 3 "register_operand")
18730 (match_operand:HI 4 "register_operand")]
18733 int mask = INTVAL (operands[2]);
18734 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
18735 GEN_INT ((mask >> 0) & 3),
18736 GEN_INT ((mask >> 2) & 3),
18737 GEN_INT ((mask >> 4) & 3),
18738 GEN_INT ((mask >> 6) & 3),
18739 GEN_INT (((mask >> 0) & 3) + 4),
18740 GEN_INT (((mask >> 2) & 3) + 4),
18741 GEN_INT (((mask >> 4) & 3) + 4),
18742 GEN_INT (((mask >> 6) & 3) + 4),
18743 GEN_INT (((mask >> 0) & 3) + 8),
18744 GEN_INT (((mask >> 2) & 3) + 8),
18745 GEN_INT (((mask >> 4) & 3) + 8),
18746 GEN_INT (((mask >> 6) & 3) + 8),
18747 GEN_INT (((mask >> 0) & 3) + 12),
18748 GEN_INT (((mask >> 2) & 3) + 12),
18749 GEN_INT (((mask >> 4) & 3) + 12),
18750 GEN_INT (((mask >> 6) & 3) + 12),
18751 operands[3], operands[4]));
18755 (define_insn "avx512f_pshufd_1<mask_name>"
18756 [(set (match_operand:V16SI 0 "register_operand" "=v")
18758 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
18759 (parallel [(match_operand 2 "const_0_to_3_operand")
18760 (match_operand 3 "const_0_to_3_operand")
18761 (match_operand 4 "const_0_to_3_operand")
18762 (match_operand 5 "const_0_to_3_operand")
18763 (match_operand 6 "const_4_to_7_operand")
18764 (match_operand 7 "const_4_to_7_operand")
18765 (match_operand 8 "const_4_to_7_operand")
18766 (match_operand 9 "const_4_to_7_operand")
18767 (match_operand 10 "const_8_to_11_operand")
18768 (match_operand 11 "const_8_to_11_operand")
18769 (match_operand 12 "const_8_to_11_operand")
18770 (match_operand 13 "const_8_to_11_operand")
18771 (match_operand 14 "const_12_to_15_operand")
18772 (match_operand 15 "const_12_to_15_operand")
18773 (match_operand 16 "const_12_to_15_operand")
18774 (match_operand 17 "const_12_to_15_operand")])))]
18776 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
18777 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
18778 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
18779 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
18780 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
18781 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
18782 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
18783 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
18784 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
18785 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
18786 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
18787 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
18790 mask |= INTVAL (operands[2]) << 0;
18791 mask |= INTVAL (operands[3]) << 2;
18792 mask |= INTVAL (operands[4]) << 4;
18793 mask |= INTVAL (operands[5]) << 6;
18794 operands[2] = GEN_INT (mask);
18796 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
18798 [(set_attr "type" "sselog1")
18799 (set_attr "prefix" "evex")
18800 (set_attr "length_immediate" "1")
18801 (set_attr "mode" "XI")])
18803 (define_expand "avx512vl_pshufdv3_mask"
18804 [(match_operand:V8SI 0 "register_operand")
18805 (match_operand:V8SI 1 "nonimmediate_operand")
18806 (match_operand:SI 2 "const_0_to_255_operand")
18807 (match_operand:V8SI 3 "register_operand")
18808 (match_operand:QI 4 "register_operand")]
18811 int mask = INTVAL (operands[2]);
18812 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
18813 GEN_INT ((mask >> 0) & 3),
18814 GEN_INT ((mask >> 2) & 3),
18815 GEN_INT ((mask >> 4) & 3),
18816 GEN_INT ((mask >> 6) & 3),
18817 GEN_INT (((mask >> 0) & 3) + 4),
18818 GEN_INT (((mask >> 2) & 3) + 4),
18819 GEN_INT (((mask >> 4) & 3) + 4),
18820 GEN_INT (((mask >> 6) & 3) + 4),
18821 operands[3], operands[4]));
18825 (define_expand "avx2_pshufdv3"
18826 [(match_operand:V8SI 0 "register_operand")
18827 (match_operand:V8SI 1 "nonimmediate_operand")
18828 (match_operand:SI 2 "const_0_to_255_operand")]
18831 int mask = INTVAL (operands[2]);
18832 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
18833 GEN_INT ((mask >> 0) & 3),
18834 GEN_INT ((mask >> 2) & 3),
18835 GEN_INT ((mask >> 4) & 3),
18836 GEN_INT ((mask >> 6) & 3),
18837 GEN_INT (((mask >> 0) & 3) + 4),
18838 GEN_INT (((mask >> 2) & 3) + 4),
18839 GEN_INT (((mask >> 4) & 3) + 4),
18840 GEN_INT (((mask >> 6) & 3) + 4)));
18844 (define_insn "avx2_pshufd_1<mask_name>"
18845 [(set (match_operand:V8SI 0 "register_operand" "=v")
18847 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
18848 (parallel [(match_operand 2 "const_0_to_3_operand")
18849 (match_operand 3 "const_0_to_3_operand")
18850 (match_operand 4 "const_0_to_3_operand")
18851 (match_operand 5 "const_0_to_3_operand")
18852 (match_operand 6 "const_4_to_7_operand")
18853 (match_operand 7 "const_4_to_7_operand")
18854 (match_operand 8 "const_4_to_7_operand")
18855 (match_operand 9 "const_4_to_7_operand")])))]
18857 && <mask_avx512vl_condition>
18858 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
18859 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
18860 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
18861 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
18864 mask |= INTVAL (operands[2]) << 0;
18865 mask |= INTVAL (operands[3]) << 2;
18866 mask |= INTVAL (operands[4]) << 4;
18867 mask |= INTVAL (operands[5]) << 6;
18868 operands[2] = GEN_INT (mask);
18870 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
18872 [(set_attr "type" "sselog1")
18873 (set_attr "prefix" "maybe_evex")
18874 (set_attr "length_immediate" "1")
18875 (set_attr "mode" "OI")])
18877 (define_expand "avx512vl_pshufd_mask"
18878 [(match_operand:V4SI 0 "register_operand")
18879 (match_operand:V4SI 1 "nonimmediate_operand")
18880 (match_operand:SI 2 "const_0_to_255_operand")
18881 (match_operand:V4SI 3 "register_operand")
18882 (match_operand:QI 4 "register_operand")]
18885 int mask = INTVAL (operands[2]);
18886 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
18887 GEN_INT ((mask >> 0) & 3),
18888 GEN_INT ((mask >> 2) & 3),
18889 GEN_INT ((mask >> 4) & 3),
18890 GEN_INT ((mask >> 6) & 3),
18891 operands[3], operands[4]));
18895 (define_expand "sse2_pshufd"
18896 [(match_operand:V4SI 0 "register_operand")
18897 (match_operand:V4SI 1 "vector_operand")
18898 (match_operand:SI 2 "const_int_operand")]
18901 int mask = INTVAL (operands[2]);
18902 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
18903 GEN_INT ((mask >> 0) & 3),
18904 GEN_INT ((mask >> 2) & 3),
18905 GEN_INT ((mask >> 4) & 3),
18906 GEN_INT ((mask >> 6) & 3)));
18910 (define_insn "sse2_pshufd_1<mask_name>"
18911 [(set (match_operand:V4SI 0 "register_operand" "=v")
18913 (match_operand:V4SI 1 "vector_operand" "vBm")
18914 (parallel [(match_operand 2 "const_0_to_3_operand")
18915 (match_operand 3 "const_0_to_3_operand")
18916 (match_operand 4 "const_0_to_3_operand")
18917 (match_operand 5 "const_0_to_3_operand")])))]
18918 "TARGET_SSE2 && <mask_avx512vl_condition>"
18921 mask |= INTVAL (operands[2]) << 0;
18922 mask |= INTVAL (operands[3]) << 2;
18923 mask |= INTVAL (operands[4]) << 4;
18924 mask |= INTVAL (operands[5]) << 6;
18925 operands[2] = GEN_INT (mask);
18927 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
18929 [(set_attr "type" "sselog1")
18930 (set_attr "prefix_data16" "1")
18931 (set_attr "prefix" "<mask_prefix2>")
18932 (set_attr "length_immediate" "1")
18933 (set_attr "mode" "TI")])
18935 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
18936 [(set (match_operand:V32HI 0 "register_operand" "=v")
18938 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
18939 (match_operand:SI 2 "const_0_to_255_operand")]
18942 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18943 [(set_attr "type" "sselog")
18944 (set_attr "prefix" "evex")
18945 (set_attr "mode" "XI")])
18947 (define_expand "avx512vl_pshuflwv3_mask"
18948 [(match_operand:V16HI 0 "register_operand")
18949 (match_operand:V16HI 1 "nonimmediate_operand")
18950 (match_operand:SI 2 "const_0_to_255_operand")
18951 (match_operand:V16HI 3 "register_operand")
18952 (match_operand:HI 4 "register_operand")]
18953 "TARGET_AVX512VL && TARGET_AVX512BW"
18955 int mask = INTVAL (operands[2]);
18956 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
18957 GEN_INT ((mask >> 0) & 3),
18958 GEN_INT ((mask >> 2) & 3),
18959 GEN_INT ((mask >> 4) & 3),
18960 GEN_INT ((mask >> 6) & 3),
18961 GEN_INT (((mask >> 0) & 3) + 8),
18962 GEN_INT (((mask >> 2) & 3) + 8),
18963 GEN_INT (((mask >> 4) & 3) + 8),
18964 GEN_INT (((mask >> 6) & 3) + 8),
18965 operands[3], operands[4]));
18969 (define_expand "avx2_pshuflwv3"
18970 [(match_operand:V16HI 0 "register_operand")
18971 (match_operand:V16HI 1 "nonimmediate_operand")
18972 (match_operand:SI 2 "const_0_to_255_operand")]
18975 int mask = INTVAL (operands[2]);
18976 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
18977 GEN_INT ((mask >> 0) & 3),
18978 GEN_INT ((mask >> 2) & 3),
18979 GEN_INT ((mask >> 4) & 3),
18980 GEN_INT ((mask >> 6) & 3),
18981 GEN_INT (((mask >> 0) & 3) + 8),
18982 GEN_INT (((mask >> 2) & 3) + 8),
18983 GEN_INT (((mask >> 4) & 3) + 8),
18984 GEN_INT (((mask >> 6) & 3) + 8)));
18988 (define_insn "avx2_pshuflw_1<mask_name>"
18989 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
18991 (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
18992 (parallel [(match_operand 2 "const_0_to_3_operand")
18993 (match_operand 3 "const_0_to_3_operand")
18994 (match_operand 4 "const_0_to_3_operand")
18995 (match_operand 5 "const_0_to_3_operand")
19000 (match_operand 6 "const_8_to_11_operand")
19001 (match_operand 7 "const_8_to_11_operand")
19002 (match_operand 8 "const_8_to_11_operand")
19003 (match_operand 9 "const_8_to_11_operand")
19007 (const_int 15)])))]
19009 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
19010 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
19011 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
19012 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
19013 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
19016 mask |= INTVAL (operands[2]) << 0;
19017 mask |= INTVAL (operands[3]) << 2;
19018 mask |= INTVAL (operands[4]) << 4;
19019 mask |= INTVAL (operands[5]) << 6;
19020 operands[2] = GEN_INT (mask);
19022 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
19024 [(set_attr "type" "sselog")
19025 (set_attr "prefix" "maybe_evex")
19026 (set_attr "length_immediate" "1")
19027 (set_attr "mode" "OI")])
19029 (define_expand "avx512vl_pshuflw_mask"
19030 [(match_operand:V8HI 0 "register_operand")
19031 (match_operand:V8HI 1 "nonimmediate_operand")
19032 (match_operand:SI 2 "const_0_to_255_operand")
19033 (match_operand:V8HI 3 "register_operand")
19034 (match_operand:QI 4 "register_operand")]
19035 "TARGET_AVX512VL && TARGET_AVX512BW"
19037 int mask = INTVAL (operands[2]);
19038 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
19039 GEN_INT ((mask >> 0) & 3),
19040 GEN_INT ((mask >> 2) & 3),
19041 GEN_INT ((mask >> 4) & 3),
19042 GEN_INT ((mask >> 6) & 3),
19043 operands[3], operands[4]));
19047 (define_expand "sse2_pshuflw"
19048 [(match_operand:V8HI 0 "register_operand")
19049 (match_operand:V8HI 1 "vector_operand")
19050 (match_operand:SI 2 "const_int_operand")]
19053 int mask = INTVAL (operands[2]);
19054 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
19055 GEN_INT ((mask >> 0) & 3),
19056 GEN_INT ((mask >> 2) & 3),
19057 GEN_INT ((mask >> 4) & 3),
19058 GEN_INT ((mask >> 6) & 3)));
19062 (define_insn "sse2_pshuflw_1<mask_name>"
19063 [(set (match_operand:V8HI 0 "register_operand" "=Yw")
19065 (match_operand:V8HI 1 "vector_operand" "YwBm")
19066 (parallel [(match_operand 2 "const_0_to_3_operand")
19067 (match_operand 3 "const_0_to_3_operand")
19068 (match_operand 4 "const_0_to_3_operand")
19069 (match_operand 5 "const_0_to_3_operand")
19074 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
19077 mask |= INTVAL (operands[2]) << 0;
19078 mask |= INTVAL (operands[3]) << 2;
19079 mask |= INTVAL (operands[4]) << 4;
19080 mask |= INTVAL (operands[5]) << 6;
19081 operands[2] = GEN_INT (mask);
19083 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
19085 [(set_attr "type" "sselog")
19086 (set_attr "prefix_data16" "0")
19087 (set_attr "prefix_rep" "1")
19088 (set_attr "prefix" "maybe_vex")
19089 (set_attr "length_immediate" "1")
19090 (set_attr "mode" "TI")])
19092 (define_expand "avx2_pshufhwv3"
19093 [(match_operand:V16HI 0 "register_operand")
19094 (match_operand:V16HI 1 "nonimmediate_operand")
19095 (match_operand:SI 2 "const_0_to_255_operand")]
19098 int mask = INTVAL (operands[2]);
19099 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
19100 GEN_INT (((mask >> 0) & 3) + 4),
19101 GEN_INT (((mask >> 2) & 3) + 4),
19102 GEN_INT (((mask >> 4) & 3) + 4),
19103 GEN_INT (((mask >> 6) & 3) + 4),
19104 GEN_INT (((mask >> 0) & 3) + 12),
19105 GEN_INT (((mask >> 2) & 3) + 12),
19106 GEN_INT (((mask >> 4) & 3) + 12),
19107 GEN_INT (((mask >> 6) & 3) + 12)));
19111 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
19112 [(set (match_operand:V32HI 0 "register_operand" "=v")
19114 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
19115 (match_operand:SI 2 "const_0_to_255_operand")]
19118 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19119 [(set_attr "type" "sselog")
19120 (set_attr "prefix" "evex")
19121 (set_attr "mode" "XI")])
19123 (define_expand "avx512vl_pshufhwv3_mask"
19124 [(match_operand:V16HI 0 "register_operand")
19125 (match_operand:V16HI 1 "nonimmediate_operand")
19126 (match_operand:SI 2 "const_0_to_255_operand")
19127 (match_operand:V16HI 3 "register_operand")
19128 (match_operand:HI 4 "register_operand")]
19129 "TARGET_AVX512VL && TARGET_AVX512BW"
19131 int mask = INTVAL (operands[2]);
19132 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
19133 GEN_INT (((mask >> 0) & 3) + 4),
19134 GEN_INT (((mask >> 2) & 3) + 4),
19135 GEN_INT (((mask >> 4) & 3) + 4),
19136 GEN_INT (((mask >> 6) & 3) + 4),
19137 GEN_INT (((mask >> 0) & 3) + 12),
19138 GEN_INT (((mask >> 2) & 3) + 12),
19139 GEN_INT (((mask >> 4) & 3) + 12),
19140 GEN_INT (((mask >> 6) & 3) + 12),
19141 operands[3], operands[4]));
19145 (define_insn "avx2_pshufhw_1<mask_name>"
19146 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
19148 (match_operand:V16HI 1 "nonimmediate_operand" "Ywm")
19149 (parallel [(const_int 0)
19153 (match_operand 2 "const_4_to_7_operand")
19154 (match_operand 3 "const_4_to_7_operand")
19155 (match_operand 4 "const_4_to_7_operand")
19156 (match_operand 5 "const_4_to_7_operand")
19161 (match_operand 6 "const_12_to_15_operand")
19162 (match_operand 7 "const_12_to_15_operand")
19163 (match_operand 8 "const_12_to_15_operand")
19164 (match_operand 9 "const_12_to_15_operand")])))]
19166 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
19167 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
19168 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
19169 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
19170 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
19173 mask |= (INTVAL (operands[2]) - 4) << 0;
19174 mask |= (INTVAL (operands[3]) - 4) << 2;
19175 mask |= (INTVAL (operands[4]) - 4) << 4;
19176 mask |= (INTVAL (operands[5]) - 4) << 6;
19177 operands[2] = GEN_INT (mask);
19179 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
19181 [(set_attr "type" "sselog")
19182 (set_attr "prefix" "maybe_evex")
19183 (set_attr "length_immediate" "1")
19184 (set_attr "mode" "OI")])
19186 (define_expand "avx512vl_pshufhw_mask"
19187 [(match_operand:V8HI 0 "register_operand")
19188 (match_operand:V8HI 1 "nonimmediate_operand")
19189 (match_operand:SI 2 "const_0_to_255_operand")
19190 (match_operand:V8HI 3 "register_operand")
19191 (match_operand:QI 4 "register_operand")]
19192 "TARGET_AVX512VL && TARGET_AVX512BW"
19194 int mask = INTVAL (operands[2]);
19195 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
19196 GEN_INT (((mask >> 0) & 3) + 4),
19197 GEN_INT (((mask >> 2) & 3) + 4),
19198 GEN_INT (((mask >> 4) & 3) + 4),
19199 GEN_INT (((mask >> 6) & 3) + 4),
19200 operands[3], operands[4]));
19204 (define_expand "sse2_pshufhw"
19205 [(match_operand:V8HI 0 "register_operand")
19206 (match_operand:V8HI 1 "vector_operand")
19207 (match_operand:SI 2 "const_int_operand")]
19210 int mask = INTVAL (operands[2]);
19211 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
19212 GEN_INT (((mask >> 0) & 3) + 4),
19213 GEN_INT (((mask >> 2) & 3) + 4),
19214 GEN_INT (((mask >> 4) & 3) + 4),
19215 GEN_INT (((mask >> 6) & 3) + 4)));
19219 (define_insn "sse2_pshufhw_1<mask_name>"
19220 [(set (match_operand:V8HI 0 "register_operand" "=Yw")
19222 (match_operand:V8HI 1 "vector_operand" "YwBm")
19223 (parallel [(const_int 0)
19227 (match_operand 2 "const_4_to_7_operand")
19228 (match_operand 3 "const_4_to_7_operand")
19229 (match_operand 4 "const_4_to_7_operand")
19230 (match_operand 5 "const_4_to_7_operand")])))]
19231 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
19234 mask |= (INTVAL (operands[2]) - 4) << 0;
19235 mask |= (INTVAL (operands[3]) - 4) << 2;
19236 mask |= (INTVAL (operands[4]) - 4) << 4;
19237 mask |= (INTVAL (operands[5]) - 4) << 6;
19238 operands[2] = GEN_INT (mask);
19240 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
19242 [(set_attr "type" "sselog")
19243 (set_attr "prefix_rep" "1")
19244 (set_attr "prefix_data16" "0")
19245 (set_attr "prefix" "maybe_vex")
19246 (set_attr "length_immediate" "1")
19247 (set_attr "mode" "TI")])
19249 (define_expand "sse2_loadd"
19250 [(set (match_operand:V4SI 0 "register_operand")
19252 (vec_duplicate:V4SI
19253 (match_operand:SI 1 "nonimmediate_operand"))
19257 "operands[2] = CONST0_RTX (V4SImode);")
19259 (define_insn "sse2_loadld"
19260 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x,x,v")
19262 (vec_duplicate:V4SI
19263 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
19264 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
19268 %vmovd\t{%2, %0|%0, %2}
19269 %vmovd\t{%2, %0|%0, %2}
19270 movss\t{%2, %0|%0, %2}
19271 movss\t{%2, %0|%0, %2}
19272 vmovss\t{%2, %1, %0|%0, %1, %2}"
19273 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
19274 (set_attr "type" "ssemov")
19275 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
19276 (set_attr "mode" "TI,TI,V4SF,SF,SF")
19277 (set (attr "preferred_for_speed")
19278 (cond [(eq_attr "alternative" "1")
19279 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
19281 (symbol_ref "true")))])
19283 ;; QI and HI modes handled by pextr patterns.
19284 (define_mode_iterator PEXTR_MODE12
19285 [(V16QI "TARGET_SSE4_1") V8HI])
19287 (define_insn_and_split "*vec_extract<mode>_0_mem"
19288 [(set (match_operand:<ssescalarmode> 0 "memory_operand")
19289 (vec_select:<ssescalarmode>
19290 (match_operand:PEXTR_MODE12 1 "register_operand")
19291 (parallel [(const_int 0)])))]
19294 && (TARGET_INTER_UNIT_MOVES_FROM_VEC
19295 || optimize_function_for_speed_p (cfun))
19296 && ix86_pre_reload_split ()"
19299 [(set (match_dup 2) (match_dup 3))
19300 (set (match_dup 0) (match_dup 4))]
19302 operands[2] = gen_reg_rtx (SImode);
19303 operands[3] = gen_lowpart (SImode, force_reg (<MODE>mode, operands[1]));
19304 operands[4] = gen_lowpart (<ssescalarmode>mode, operands[2]);
19307 (define_insn "*vec_extract<mode>"
19308 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m")
19309 (vec_select:<ssescalarmode>
19310 (match_operand:PEXTR_MODE12 1 "register_operand" "YW,YW")
19312 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
19315 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
19316 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19317 [(set_attr "isa" "*,sse4")
19318 (set_attr "type" "sselog1")
19319 (set_attr "prefix_data16" "1")
19320 (set (attr "prefix_extra")
19322 (and (eq_attr "alternative" "0,2")
19323 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
19325 (const_string "1")))
19326 (set_attr "length_immediate" "1")
19327 (set_attr "prefix" "maybe_vex,maybe_vex")
19328 (set_attr "mode" "TI")])
19330 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
19331 [(set (match_operand:SWI48 0 "register_operand" "=r")
19333 (vec_select:<PEXTR_MODE12:ssescalarmode>
19334 (match_operand:PEXTR_MODE12 1 "register_operand" "YW")
19336 [(match_operand:SI 2
19337 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
19339 "%vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
19340 [(set_attr "type" "sselog1")
19341 (set_attr "prefix_data16" "1")
19342 (set (attr "prefix_extra")
19344 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
19346 (const_string "1")))
19347 (set_attr "length_immediate" "1")
19348 (set_attr "prefix" "maybe_vex")
19349 (set_attr "mode" "TI")])
19351 (define_insn "*vec_extractv16qi_zext"
19352 [(set (match_operand:HI 0 "register_operand" "=r")
19355 (match_operand:V16QI 1 "register_operand" "YW")
19357 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
19359 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
19360 [(set_attr "type" "sselog1")
19361 (set_attr "prefix_data16" "1")
19362 (set_attr "prefix_extra" "1")
19363 (set_attr "length_immediate" "1")
19364 (set_attr "prefix" "maybe_vex")
19365 (set_attr "mode" "TI")])
19367 (define_insn "*vec_extract<mode>_mem"
19368 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
19369 (vec_select:<ssescalarmode>
19370 (match_operand:VI12_128 1 "memory_operand" "o")
19372 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
19376 (define_insn "*vec_extract<ssevecmodelower>_0"
19377 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,v ,m")
19379 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
19380 (parallel [(const_int 0)])))]
19381 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19383 [(set_attr "isa" "*,sse2,*,*")
19384 (set (attr "preferred_for_speed")
19385 (cond [(eq_attr "alternative" "1")
19386 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
19388 (symbol_ref "true")))])
19390 (define_insn "*vec_extractv2di_0_sse"
19391 [(set (match_operand:DI 0 "nonimmediate_operand" "=r,x ,m")
19393 (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
19394 (parallel [(const_int 0)])))]
19395 "TARGET_SSE && !TARGET_64BIT
19396 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19398 [(set_attr "isa" "sse4,*,*")
19399 (set (attr "preferred_for_speed")
19400 (cond [(eq_attr "alternative" "0")
19401 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
19403 (symbol_ref "true")))])
19406 [(set (match_operand:DI 0 "general_reg_operand")
19408 (match_operand:V2DI 1 "register_operand")
19409 (parallel [(const_int 0)])))]
19410 "TARGET_SSE4_1 && !TARGET_64BIT
19411 && reload_completed"
19412 [(set (match_dup 2) (match_dup 4))
19416 (parallel [(const_int 1)])))]
19418 operands[4] = gen_lowpart (SImode, operands[1]);
19419 operands[5] = gen_lowpart (V4SImode, operands[1]);
19420 split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
19424 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
19426 (match_operand:<ssevecmode> 1 "register_operand")
19427 (parallel [(const_int 0)])))]
19428 "TARGET_SSE && reload_completed"
19429 [(set (match_dup 0) (match_dup 1))]
19430 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
19432 (define_insn "*vec_extractv4si_0_zext_sse4"
19433 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
19436 (match_operand:V4SI 1 "register_operand" "v,x,v")
19437 (parallel [(const_int 0)]))))]
19438 "TARGET_64BIT && TARGET_SSE4_1"
19440 [(set_attr "isa" "*,*,avx512f")
19441 (set (attr "preferred_for_speed")
19442 (cond [(eq_attr "alternative" "0")
19443 (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
19445 (symbol_ref "true")))])
19447 (define_insn "*vec_extractv4si_0_zext"
19448 [(set (match_operand:DI 0 "register_operand" "=r")
19451 (match_operand:V4SI 1 "register_operand" "x")
19452 (parallel [(const_int 0)]))))]
19453 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
19457 [(set (match_operand:DI 0 "register_operand")
19460 (match_operand:V4SI 1 "register_operand")
19461 (parallel [(const_int 0)]))))]
19462 "TARGET_SSE2 && reload_completed"
19463 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
19464 "operands[1] = gen_lowpart (SImode, operands[1]);")
19466 (define_insn "*vec_extractv4si"
19467 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,Yw")
19469 (match_operand:V4SI 1 "register_operand" " x, v, 0, 0,Yw")
19470 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
19473 switch (which_alternative)
19477 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
19481 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
19482 return "psrldq\t{%2, %0|%0, %2}";
19485 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
19486 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
19489 gcc_unreachable ();
19492 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx")
19493 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1")
19494 (set (attr "prefix_extra")
19495 (if_then_else (eq_attr "alternative" "0,1")
19497 (const_string "*")))
19498 (set_attr "length_immediate" "1")
19499 (set_attr "prefix" "maybe_vex,evex,orig,orig,maybe_vex")
19500 (set_attr "mode" "TI")])
19502 (define_insn "*vec_extractv4si_zext"
19503 [(set (match_operand:DI 0 "register_operand" "=r,r")
19506 (match_operand:V4SI 1 "register_operand" "x,v")
19507 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
19508 "TARGET_64BIT && TARGET_SSE4_1"
19509 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
19510 [(set_attr "isa" "*,avx512dq")
19511 (set_attr "type" "sselog1")
19512 (set_attr "prefix_extra" "1")
19513 (set_attr "length_immediate" "1")
19514 (set_attr "prefix" "maybe_vex")
19515 (set_attr "mode" "TI")])
19517 (define_insn "*vec_extractv4si_mem"
19518 [(set (match_operand:SI 0 "register_operand" "=x,r")
19520 (match_operand:V4SI 1 "memory_operand" "o,o")
19521 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
19525 (define_insn_and_split "*vec_extractv4si_zext_mem"
19526 [(set (match_operand:DI 0 "register_operand" "=x,r")
19529 (match_operand:V4SI 1 "memory_operand" "o,o")
19530 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
19531 "TARGET_64BIT && TARGET_SSE"
19533 "&& reload_completed"
19534 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
19536 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
19539 (define_insn "*vec_extractv2di_1"
19540 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
19542 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
19543 (parallel [(const_int 1)])))]
19544 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19546 %vpextrq\t{$1, %1, %0|%0, %1, 1}
19547 vpextrq\t{$1, %1, %0|%0, %1, 1}
19548 %vmovhps\t{%1, %0|%0, %1}
19549 psrldq\t{$8, %0|%0, 8}
19550 vpsrldq\t{$8, %1, %0|%0, %1, 8}
19551 vpsrldq\t{$8, %1, %0|%0, %1, 8}
19552 movhlps\t{%1, %0|%0, %1}
19556 (cond [(eq_attr "alternative" "0")
19557 (const_string "x64_sse4")
19558 (eq_attr "alternative" "1")
19559 (const_string "x64_avx512dq")
19560 (eq_attr "alternative" "3")
19561 (const_string "sse2_noavx")
19562 (eq_attr "alternative" "4")
19563 (const_string "avx")
19564 (eq_attr "alternative" "5")
19565 (const_string "avx512bw")
19566 (eq_attr "alternative" "6")
19567 (const_string "noavx")
19568 (eq_attr "alternative" "8")
19569 (const_string "x64")
19571 (const_string "*")))
19573 (cond [(eq_attr "alternative" "2,6,7")
19574 (const_string "ssemov")
19575 (eq_attr "alternative" "3,4,5")
19576 (const_string "sseishft1")
19577 (eq_attr "alternative" "8")
19578 (const_string "imov")
19580 (const_string "sselog1")))
19581 (set (attr "length_immediate")
19582 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
19584 (const_string "*")))
19585 (set (attr "prefix_rex")
19586 (if_then_else (eq_attr "alternative" "0,1")
19588 (const_string "*")))
19589 (set (attr "prefix_extra")
19590 (if_then_else (eq_attr "alternative" "0,1")
19592 (const_string "*")))
19593 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
19594 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
19597 [(set (match_operand:<ssescalarmode> 0 "register_operand")
19598 (vec_select:<ssescalarmode>
19599 (match_operand:VI_128 1 "memory_operand")
19601 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
19602 "TARGET_SSE && reload_completed"
19603 [(set (match_dup 0) (match_dup 1))]
19605 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
19607 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
19610 (define_insn "*vec_extractv2ti"
19611 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
19613 (match_operand:V2TI 1 "register_operand" "x,v")
19615 [(match_operand:SI 2 "const_0_to_1_operand")])))]
19618 vextract%~128\t{%2, %1, %0|%0, %1, %2}
19619 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
19620 [(set_attr "type" "sselog")
19621 (set_attr "prefix_extra" "1")
19622 (set_attr "length_immediate" "1")
19623 (set_attr "prefix" "vex,evex")
19624 (set_attr "mode" "OI")])
19626 (define_insn "*vec_extractv4ti"
19627 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
19629 (match_operand:V4TI 1 "register_operand" "v")
19631 [(match_operand:SI 2 "const_0_to_3_operand")])))]
19633 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
19634 [(set_attr "type" "sselog")
19635 (set_attr "prefix_extra" "1")
19636 (set_attr "length_immediate" "1")
19637 (set_attr "prefix" "evex")
19638 (set_attr "mode" "XI")])
19640 (define_mode_iterator VEXTRACTI128_MODE
19641 [(V4TI "TARGET_AVX512F") V2TI])
19644 [(set (match_operand:TI 0 "nonimmediate_operand")
19646 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
19647 (parallel [(const_int 0)])))]
19649 && reload_completed
19650 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
19651 [(set (match_dup 0) (match_dup 1))]
19652 "operands[1] = gen_lowpart (TImode, operands[1]);")
19654 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
19655 ;; vector modes into vec_extract*.
19657 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
19658 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
19659 "can_create_pseudo_p ()
19660 && REG_P (operands[1])
19661 && VECTOR_MODE_P (GET_MODE (operands[1]))
19662 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
19663 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
19664 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
19665 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
19666 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
19667 (parallel [(const_int 0)])))]
19671 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
19674 if (<MODE>mode == SImode)
19676 tmp = gen_reg_rtx (V8SImode);
19677 emit_insn (gen_vec_extract_lo_v16si (tmp,
19678 gen_lowpart (V16SImode,
19683 tmp = gen_reg_rtx (V4DImode);
19684 emit_insn (gen_vec_extract_lo_v8di (tmp,
19685 gen_lowpart (V8DImode,
19691 tmp = gen_reg_rtx (<ssevecmode>mode);
19692 if (<MODE>mode == SImode)
19693 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
19696 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
19701 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
19706 (define_insn "*vec_concatv2si_sse4_1"
19707 [(set (match_operand:V2SI 0 "register_operand"
19708 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
19710 (match_operand:SI 1 "nonimmediate_operand"
19711 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
19712 (match_operand:SI 2 "nonimm_or_0_operand"
19713 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
19714 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19716 pinsrd\t{$1, %2, %0|%0, %2, 1}
19717 pinsrd\t{$1, %2, %0|%0, %2, 1}
19718 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
19719 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
19720 punpckldq\t{%2, %0|%0, %2}
19721 punpckldq\t{%2, %0|%0, %2}
19722 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
19723 %vmovd\t{%1, %0|%0, %1}
19724 punpckldq\t{%2, %0|%0, %2}
19725 movd\t{%1, %0|%0, %1}"
19726 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
19727 (set (attr "mmx_isa")
19728 (if_then_else (eq_attr "alternative" "8,9")
19729 (const_string "native")
19730 (const_string "*")))
19732 (cond [(eq_attr "alternative" "7")
19733 (const_string "ssemov")
19734 (eq_attr "alternative" "8")
19735 (const_string "mmxcvt")
19736 (eq_attr "alternative" "9")
19737 (const_string "mmxmov")
19739 (const_string "sselog")))
19740 (set (attr "prefix_extra")
19741 (if_then_else (eq_attr "alternative" "0,1,2,3")
19743 (const_string "*")))
19744 (set (attr "length_immediate")
19745 (if_then_else (eq_attr "alternative" "0,1,2,3")
19747 (const_string "*")))
19748 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
19749 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
19751 ;; ??? In theory we can match memory for the MMX alternative, but allowing
19752 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
19753 ;; alternatives pretty much forces the MMX alternative to be chosen.
19754 (define_insn "*vec_concatv2si"
19755 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,x,x,*y,*y")
19757 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
19758 (match_operand:SI 2 "reg_or_0_operand" " x,C ,x,C,*y,C")))]
19759 "TARGET_SSE && !TARGET_SSE4_1"
19761 punpckldq\t{%2, %0|%0, %2}
19762 movd\t{%1, %0|%0, %1}
19763 unpcklps\t{%2, %0|%0, %2}
19764 movss\t{%1, %0|%0, %1}
19765 punpckldq\t{%2, %0|%0, %2}
19766 movd\t{%1, %0|%0, %1}"
19767 [(set_attr "isa" "sse2,sse2,*,*,*,*")
19768 (set_attr "mmx_isa" "*,*,*,*,native,native")
19769 (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
19770 (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
19772 (define_insn "*vec_concat<mode>"
19773 [(set (match_operand:VI124_128 0 "register_operand" "=x,v,x,x,v")
19774 (vec_concat:VI124_128
19775 (match_operand:<ssehalfvecmode> 1 "register_operand" " 0,v,0,0,v")
19776 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" " x,v,x,m,m")))]
19779 punpcklqdq\t{%2, %0|%0, %2}
19780 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
19781 movlhps\t{%2, %0|%0, %2}
19782 movhps\t{%2, %0|%0, %q2}
19783 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
19784 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
19785 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
19786 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
19787 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
19789 (define_insn_and_split "*vec_concatv16qi_permt2"
19790 [(set (match_operand:V16QI 0 "register_operand")
19792 [(const_vector:V16QI [(const_int 0) (const_int 1)
19793 (const_int 2) (const_int 3)
19794 (const_int 4) (const_int 5)
19795 (const_int 6) (const_int 7)
19796 (const_int 16) (const_int 17)
19797 (const_int 18) (const_int 19)
19798 (const_int 20) (const_int 21)
19799 (const_int 22) (const_int 23)])
19800 (match_operand:V16QI 1 "register_operand")
19801 (match_operand:V16QI 2 "nonimmediate_operand")]
19803 "TARGET_AVX512VL && TARGET_AVX512VBMI
19804 && ix86_pre_reload_split ()"
19807 [(set (match_dup 0)
19808 (vec_concat:V16QI (match_dup 1) (match_dup 2)))]
19810 operands[1] = lowpart_subreg (V8QImode,
19811 force_reg (V16QImode, operands[1]),
19813 if (!MEM_P (operands[2]))
19814 operands[2] = force_reg (V16QImode, operands[2]);
19815 operands[2] = lowpart_subreg (V8QImode, operands[2], V16QImode);
19818 (define_insn_and_split "*vec_concatv8hi_permt2"
19819 [(set (match_operand:V8HI 0 "register_operand")
19821 [(const_vector:V8HI [(const_int 0) (const_int 1)
19822 (const_int 2) (const_int 3)
19823 (const_int 8) (const_int 9)
19824 (const_int 10) (const_int 11)])
19825 (match_operand:V8HI 1 "register_operand")
19826 (match_operand:V8HI 2 "nonimmediate_operand")]
19828 "TARGET_AVX512VL && TARGET_AVX512BW
19829 && ix86_pre_reload_split ()"
19832 [(set (match_dup 0)
19833 (vec_concat:V8HI (match_dup 1) (match_dup 2)))]
19835 operands[1] = lowpart_subreg (V4HImode,
19836 force_reg (V8HImode, operands[1]),
19838 if (!MEM_P (operands[2]))
19839 operands[2] = force_reg (V8HImode, operands[2]);
19840 operands[2] = lowpart_subreg (V4HImode, operands[2], V8HImode);
19843 (define_insn "*vec_concat<mode>_0"
19844 [(set (match_operand:VI124_128 0 "register_operand" "=v,x")
19845 (vec_concat:VI124_128
19846 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm,?!*y")
19847 (match_operand:<ssehalfvecmode> 2 "const0_operand")))]
19850 %vmovq\t{%1, %0|%0, %1}
19851 movq2dq\t{%1, %0|%0, %1}"
19852 [(set_attr "mmx_isa" "*,native")
19853 (set_attr "type" "ssemov")
19854 (set_attr "prefix" "maybe_vex,orig")
19855 (set_attr "mode" "TI")])
19857 (define_insn "vec_concatv2di"
19858 [(set (match_operand:V2DI 0 "register_operand"
19859 "=Yr,*x,x ,v ,x,v ,x,x,v")
19861 (match_operand:DI 1 "register_operand"
19862 " 0, 0,x ,Yv,0,Yv,0,0,v")
19863 (match_operand:DI 2 "nonimmediate_operand"
19864 " rm,rm,rm,rm,x,Yv,x,m,m")))]
19867 pinsrq\t{$1, %2, %0|%0, %2, 1}
19868 pinsrq\t{$1, %2, %0|%0, %2, 1}
19869 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
19870 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
19871 punpcklqdq\t{%2, %0|%0, %2}
19872 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
19873 movlhps\t{%2, %0|%0, %2}
19874 movhps\t{%2, %0|%0, %2}
19875 vmovhps\t{%2, %1, %0|%0, %1, %2}"
19877 (cond [(eq_attr "alternative" "0,1")
19878 (const_string "x64_sse4_noavx")
19879 (eq_attr "alternative" "2")
19880 (const_string "x64_avx")
19881 (eq_attr "alternative" "3")
19882 (const_string "x64_avx512dq")
19883 (eq_attr "alternative" "4")
19884 (const_string "sse2_noavx")
19885 (eq_attr "alternative" "5,8")
19886 (const_string "avx")
19888 (const_string "noavx")))
19891 (eq_attr "alternative" "0,1,2,3,4,5")
19892 (const_string "sselog")
19893 (const_string "ssemov")))
19894 (set (attr "prefix_rex")
19895 (if_then_else (eq_attr "alternative" "0,1,2,3")
19897 (const_string "*")))
19898 (set (attr "prefix_extra")
19899 (if_then_else (eq_attr "alternative" "0,1,2,3")
19901 (const_string "*")))
19902 (set (attr "length_immediate")
19903 (if_then_else (eq_attr "alternative" "0,1,2,3")
19905 (const_string "*")))
19906 (set (attr "prefix")
19907 (cond [(eq_attr "alternative" "2")
19908 (const_string "vex")
19909 (eq_attr "alternative" "3")
19910 (const_string "evex")
19911 (eq_attr "alternative" "5,8")
19912 (const_string "maybe_evex")
19914 (const_string "orig")))
19915 (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
19917 (define_insn "*vec_concatv2di_0"
19918 [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x")
19920 (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
19921 (match_operand:DI 2 "const0_operand")))]
19924 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
19925 %vmovq\t{%1, %0|%0, %1}
19926 movq2dq\t{%1, %0|%0, %1}"
19927 [(set_attr "isa" "x64,*,*")
19928 (set_attr "mmx_isa" "*,*,native")
19929 (set_attr "type" "ssemov")
19930 (set_attr "prefix_rex" "1,*,*")
19931 (set_attr "prefix" "maybe_vex,maybe_vex,orig")
19932 (set_attr "mode" "TI")
19933 (set (attr "preferred_for_speed")
19934 (cond [(eq_attr "alternative" "0")
19935 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
19937 (symbol_ref "true")))])
19939 ;; vmovq clears also the higher bits.
19940 (define_insn "vec_set<mode>_0"
19941 [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
19942 (vec_merge:VI8_AVX_AVX512F
19943 (vec_duplicate:VI8_AVX_AVX512F
19944 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
19945 (match_operand:VI8_AVX_AVX512F 1 "const0_operand")
19948 "vmovq\t{%2, %x0|%x0, %2}"
19949 [(set_attr "isa" "x64,*")
19950 (set_attr "type" "ssemov")
19951 (set_attr "prefix_rex" "1,*")
19952 (set_attr "prefix" "maybe_evex")
19953 (set_attr "mode" "TI")
19954 (set (attr "preferred_for_speed")
19955 (cond [(eq_attr "alternative" "0")
19956 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
19958 (symbol_ref "true")))])
19960 (define_expand "vec_unpacks_lo_<mode>"
19961 [(match_operand:<sseunpackmode> 0 "register_operand")
19962 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
19964 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
19966 (define_expand "vec_unpacks_hi_<mode>"
19967 [(match_operand:<sseunpackmode> 0 "register_operand")
19968 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
19970 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
19972 (define_expand "vec_unpacku_lo_<mode>"
19973 [(match_operand:<sseunpackmode> 0 "register_operand")
19974 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
19976 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
19978 (define_expand "vec_unpacks_sbool_lo_qi"
19979 [(match_operand:QI 0 "register_operand")
19980 (match_operand:QI 1 "register_operand")
19981 (match_operand:QI 2 "const_int_operand")]
19984 if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
19986 emit_move_insn (operands[0], operands[1]);
19990 (define_expand "vec_unpacks_lo_hi"
19991 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
19992 (match_operand:HI 1 "register_operand"))]
19995 (define_expand "vec_unpacks_lo_si"
19996 [(set (match_operand:HI 0 "register_operand")
19997 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
20000 (define_expand "vec_unpacks_lo_di"
20001 [(set (match_operand:SI 0 "register_operand")
20002 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
20005 (define_expand "vec_unpacku_hi_<mode>"
20006 [(match_operand:<sseunpackmode> 0 "register_operand")
20007 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
20009 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
20011 (define_expand "vec_unpacks_sbool_hi_qi"
20012 [(match_operand:QI 0 "register_operand")
20013 (match_operand:QI 1 "register_operand")
20014 (match_operand:QI 2 "const_int_operand")]
20017 HOST_WIDE_INT nunits = INTVAL (operands[2]);
20018 if (nunits != 8 && nunits != 4)
20020 if (TARGET_AVX512DQ)
20021 emit_insn (gen_klshiftrtqi (operands[0], operands[1],
20022 GEN_INT (nunits / 2)));
20025 rtx tem = gen_reg_rtx (HImode);
20026 emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
20028 GEN_INT (nunits / 2)));
20029 emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
20034 (define_expand "vec_unpacks_hi_hi"
20036 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
20037 (lshiftrt:HI (match_operand:HI 1 "register_operand")
20039 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
20042 (define_expand "vec_unpacks_hi_<mode>"
20044 [(set (subreg:SWI48x
20045 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
20046 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
20048 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
20050 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
20052 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20058 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
20059 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
20060 (truncate:VI12_AVX2_AVX512BW
20061 (lshiftrt:<ssedoublemode>
20062 (plus:<ssedoublemode>
20063 (plus:<ssedoublemode>
20064 (zero_extend:<ssedoublemode>
20065 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
20066 (zero_extend:<ssedoublemode>
20067 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
20068 (match_dup <mask_expand_op3>))
20070 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
20072 operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
20073 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
20076 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
20077 [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
20078 (truncate:VI12_AVX2_AVX512BW
20079 (lshiftrt:<ssedoublemode>
20080 (plus:<ssedoublemode>
20081 (plus:<ssedoublemode>
20082 (zero_extend:<ssedoublemode>
20083 (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,<v_Yw>"))
20084 (zero_extend:<ssedoublemode>
20085 (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")))
20086 (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
20088 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
20089 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20091 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
20092 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20093 [(set_attr "isa" "noavx,avx")
20094 (set_attr "type" "sseiadd")
20095 (set_attr "prefix_data16" "1,*")
20096 (set_attr "prefix" "orig,<mask_prefix>")
20097 (set_attr "mode" "<sseinsnmode>")])
20099 ;; The correct representation for this is absolutely enormous, and
20100 ;; surely not generally useful.
20101 (define_expand "<sse2_avx2>_psadbw"
20102 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
20103 (unspec:VI8_AVX2_AVX512BW
20104 [(match_operand:<ssebytemode> 1 "vector_operand")
20105 (match_operand:<ssebytemode> 2 "vector_operand")]
20108 "ix86_fixup_binary_operands_no_copy (PLUS, <ssebytemode>mode, operands);")
20110 (define_insn "*<sse2_avx2>_psadbw"
20111 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW")
20112 (unspec:VI8_AVX2_AVX512BW
20113 [(match_operand:<ssebytemode> 1 "vector_operand" "%0,YW")
20114 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,YWm")]
20117 && ix86_binary_operator_ok (PLUS, <ssebytemode>mode, operands)"
20119 psadbw\t{%2, %0|%0, %2}
20120 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
20121 [(set_attr "isa" "noavx,avx")
20122 (set_attr "type" "sseiadd")
20123 (set_attr "atom_unit" "simul")
20124 (set_attr "prefix_data16" "1,*")
20125 (set_attr "prefix" "orig,maybe_evex")
20126 (set_attr "mode" "<sseinsnmode>")])
20128 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
20129 [(set (match_operand:SI 0 "register_operand" "=r")
20131 [(match_operand:VF_128_256 1 "register_operand" "x")]
20134 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
20135 [(set_attr "type" "ssemov")
20136 (set_attr "prefix" "maybe_vex")
20137 (set_attr "mode" "<MODE>")])
20139 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
20140 [(set (match_operand:DI 0 "register_operand" "=r")
20143 [(match_operand:VF_128_256 1 "register_operand" "x")]
20145 "TARGET_64BIT && TARGET_SSE"
20146 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
20147 [(set_attr "type" "ssemov")
20148 (set_attr "prefix" "maybe_vex")
20149 (set_attr "mode" "<MODE>")])
20151 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
20152 [(set (match_operand:SI 0 "register_operand" "=r")
20155 (match_operand:<sseintvecmode> 1 "register_operand" "x")
20156 (match_operand:<sseintvecmode> 2 "const0_operand"))]
20160 "&& reload_completed"
20161 [(set (match_dup 0)
20162 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
20163 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
20164 [(set_attr "type" "ssemov")
20165 (set_attr "prefix" "maybe_vex")
20166 (set_attr "mode" "<MODE>")])
20168 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
20169 [(set (match_operand:DI 0 "register_operand" "=r")
20173 (match_operand:<sseintvecmode> 1 "register_operand" "x")
20174 (match_operand:<sseintvecmode> 2 "const0_operand"))]
20176 "TARGET_64BIT && TARGET_SSE"
20178 "&& reload_completed"
20179 [(set (match_dup 0)
20180 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
20181 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
20182 [(set_attr "type" "ssemov")
20183 (set_attr "prefix" "maybe_vex")
20184 (set_attr "mode" "<MODE>")])
20186 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
20187 [(set (match_operand:SI 0 "register_operand" "=r")
20189 [(subreg:VF_128_256
20190 (ashiftrt:<sseintvecmode>
20191 (match_operand:<sseintvecmode> 1 "register_operand" "x")
20192 (match_operand:QI 2 "const_int_operand")) 0)]
20196 "&& reload_completed"
20197 [(set (match_dup 0)
20198 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
20199 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
20200 [(set_attr "type" "ssemov")
20201 (set_attr "prefix" "maybe_vex")
20202 (set_attr "mode" "<MODE>")])
20204 (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
20205 [(set (match_operand:DI 0 "register_operand" "=r")
20208 [(subreg:VF_128_256
20209 (ashiftrt:<sseintvecmode>
20210 (match_operand:<sseintvecmode> 1 "register_operand" "x")
20211 (match_operand:QI 2 "const_int_operand")) 0)]
20213 "TARGET_64BIT && TARGET_SSE"
20215 "&& reload_completed"
20216 [(set (match_dup 0)
20217 (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
20218 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
20219 [(set_attr "type" "ssemov")
20220 (set_attr "prefix" "maybe_vex")
20221 (set_attr "mode" "<MODE>")])
20223 (define_insn "<sse2_avx2>_pmovmskb"
20224 [(set (match_operand:SI 0 "register_operand" "=r")
20226 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
20229 "%vpmovmskb\t{%1, %0|%0, %1}"
20230 [(set_attr "type" "ssemov")
20231 (set (attr "prefix_data16")
20233 (match_test "TARGET_AVX")
20235 (const_string "1")))
20236 (set_attr "prefix" "maybe_vex")
20237 (set_attr "mode" "SI")])
20239 (define_insn "*<sse2_avx2>_pmovmskb_zext"
20240 [(set (match_operand:DI 0 "register_operand" "=r")
20243 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
20245 "TARGET_64BIT && TARGET_SSE2"
20246 "%vpmovmskb\t{%1, %k0|%k0, %1}"
20247 [(set_attr "type" "ssemov")
20248 (set (attr "prefix_data16")
20250 (match_test "TARGET_AVX")
20252 (const_string "1")))
20253 (set_attr "prefix" "maybe_vex")
20254 (set_attr "mode" "SI")])
20256 (define_insn "*sse2_pmovmskb_ext"
20257 [(set (match_operand:DI 0 "register_operand" "=r")
20260 [(match_operand:V16QI 1 "register_operand" "x")]
20262 "TARGET_64BIT && TARGET_SSE2"
20263 "%vpmovmskb\t{%1, %k0|%k0, %1}"
20264 [(set_attr "type" "ssemov")
20265 (set (attr "prefix_data16")
20267 (match_test "TARGET_AVX")
20269 (const_string "1")))
20270 (set_attr "prefix" "maybe_vex")
20271 (set_attr "mode" "SI")])
20273 (define_insn_and_split "*sse2_pmovskb_zexthisi"
20274 [(set (match_operand:SI 0 "register_operand")
20278 [(match_operand:V16QI 1 "register_operand")]
20279 UNSPEC_MOVMSK) 0)))]
20280 "TARGET_SSE2 && ix86_pre_reload_split ()"
20283 [(set (match_dup 0)
20284 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))])
20287 [(set (match_operand:SI 0 "register_operand")
20292 [(match_operand:V16QI 1 "register_operand")]
20293 UNSPEC_MOVMSK) 0))))]
20295 [(set (match_dup 2)
20296 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
20298 (xor:SI (match_dup 2) (const_int 65535)))]
20299 "operands[2] = gen_reg_rtx (SImode);")
20302 [(set (match_operand:SI 0 "register_operand")
20304 [(not:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand"))]
20307 [(set (match_dup 2)
20308 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
20309 (set (match_dup 0) (match_dup 3))]
20311 operands[2] = gen_reg_rtx (SImode);
20312 if (GET_MODE_NUNITS (<MODE>mode) == 32)
20313 operands[3] = gen_rtx_NOT (SImode, operands[2]);
20317 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
20319 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
20324 [(set (match_operand:SI 0 "register_operand")
20326 [(subreg:VI1_AVX2 (not (match_operand 1 "register_operand")) 0)]
20329 && GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_INT
20330 && GET_MODE_SIZE (GET_MODE (operands[1])) == <MODE_SIZE>"
20331 [(set (match_dup 2)
20332 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
20333 (set (match_dup 0) (match_dup 3))]
20335 operands[2] = gen_reg_rtx (SImode);
20336 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
20337 if (GET_MODE_NUNITS (<MODE>mode) == 32)
20338 operands[3] = gen_rtx_NOT (SImode, operands[2]);
20342 = gen_int_mode ((HOST_WIDE_INT_1 << GET_MODE_NUNITS (<MODE>mode)) - 1,
20344 operands[3] = gen_rtx_XOR (SImode, operands[2], operands[3]);
20348 (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
20349 [(set (match_operand:SI 0 "register_operand" "=r")
20351 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
20352 (match_operand:VI1_AVX2 2 "const0_operand"))]
20357 [(set (match_dup 0)
20358 (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
20360 [(set_attr "type" "ssemov")
20361 (set (attr "prefix_data16")
20363 (match_test "TARGET_AVX")
20365 (const_string "1")))
20366 (set_attr "prefix" "maybe_vex")
20367 (set_attr "mode" "SI")])
20369 (define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
20370 [(set (match_operand:DI 0 "register_operand" "=r")
20373 [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
20374 (match_operand:VI1_AVX2 2 "const0_operand"))]
20376 "TARGET_64BIT && TARGET_SSE2"
20379 [(set (match_dup 0)
20380 (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
20382 [(set_attr "type" "ssemov")
20383 (set (attr "prefix_data16")
20385 (match_test "TARGET_AVX")
20387 (const_string "1")))
20388 (set_attr "prefix" "maybe_vex")
20389 (set_attr "mode" "SI")])
20391 (define_insn_and_split "*sse2_pmovmskb_ext_lt"
20392 [(set (match_operand:DI 0 "register_operand" "=r")
20395 [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
20396 (match_operand:V16QI 2 "const0_operand"))]
20398 "TARGET_64BIT && TARGET_SSE2"
20401 [(set (match_dup 0)
20402 (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
20404 [(set_attr "type" "ssemov")
20405 (set (attr "prefix_data16")
20407 (match_test "TARGET_AVX")
20409 (const_string "1")))
20410 (set_attr "prefix" "maybe_vex")
20411 (set_attr "mode" "SI")])
20413 ;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.
20414 (define_mode_attr vi1avx2const
20415 [(V32QI "0xffffffff") (V16QI "0xffff")])
20418 [(set (reg:CCZ FLAGS_REG)
20419 (compare:CCZ (unspec:SI
20421 (match_operand:VI1_AVX2 0 "vector_operand")
20422 (match_operand:VI1_AVX2 1 "const0_operand"))]
20424 (match_operand 2 "const_int_operand")))]
20425 "TARGET_SSE4_1 && (INTVAL (operands[2]) == (int) (<vi1avx2const>))"
20426 [(set (reg:CC FLAGS_REG)
20427 (unspec:CC [(match_dup 0)
20431 (define_expand "sse2_maskmovdqu"
20432 [(set (match_operand:V16QI 0 "memory_operand")
20433 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
20434 (match_operand:V16QI 2 "register_operand")
20439 (define_insn "*sse2_maskmovdqu"
20440 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
20441 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
20442 (match_operand:V16QI 2 "register_operand" "x")
20443 (mem:V16QI (match_dup 0))]
20447 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
20448 that requires %v to be at the beginning of the opcode name. */
20449 if (Pmode != word_mode)
20450 fputs ("\taddr32", asm_out_file);
20451 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
20453 [(set_attr "type" "ssemov")
20454 (set_attr "prefix_data16" "1")
20455 (set (attr "length_address")
20456 (symbol_ref ("Pmode != word_mode")))
20457 ;; The implicit %rdi operand confuses default length_vex computation.
20458 (set (attr "length_vex")
20459 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
20460 (set_attr "prefix" "maybe_vex")
20461 (set_attr "znver1_decode" "vector")
20462 (set_attr "mode" "TI")])
20464 (define_insn "sse_ldmxcsr"
20465 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
20469 [(set_attr "type" "sse")
20470 (set_attr "atom_sse_attr" "mxcsr")
20471 (set_attr "prefix" "maybe_vex")
20472 (set_attr "memory" "load")])
20474 (define_insn "sse_stmxcsr"
20475 [(set (match_operand:SI 0 "memory_operand" "=m")
20476 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
20479 [(set_attr "type" "sse")
20480 (set_attr "atom_sse_attr" "mxcsr")
20481 (set_attr "prefix" "maybe_vex")
20482 (set_attr "memory" "store")])
20484 (define_insn "sse2_clflush"
20485 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
20489 [(set_attr "type" "sse")
20490 (set_attr "atom_sse_attr" "fence")
20491 (set_attr "memory" "unknown")])
20493 ;; As per AMD and Intel ISA manuals, the first operand is extensions
20494 ;; and it goes to %ecx. The second operand received is hints and it goes
20496 (define_insn "sse3_mwait"
20497 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
20498 (match_operand:SI 1 "register_operand" "a")]
20501 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
20502 ;; Since 32bit register operands are implicitly zero extended to 64bit,
20503 ;; we only need to set up 32bit registers.
20505 [(set_attr "length" "3")])
20507 (define_insn "@sse3_monitor_<mode>"
20508 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
20509 (match_operand:SI 1 "register_operand" "c")
20510 (match_operand:SI 2 "register_operand" "d")]
20513 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
20514 ;; RCX and RDX are used. Since 32bit register operands are implicitly
20515 ;; zero extended to 64bit, we only need to set up 32bit registers.
20517 [(set (attr "length")
20518 (symbol_ref ("(Pmode != word_mode) + 3")))])
20520 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20522 ;; SSSE3 instructions
20524 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
20526 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
20528 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
20529 [(set (match_operand:V16HI 0 "register_operand" "=x")
20530 (ssse3_plusminus:V16HI
20533 (match_operand:V16HI 1 "register_operand" "x")
20534 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
20536 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
20537 (const_int 16) (const_int 18) (const_int 20) (const_int 22)
20538 (const_int 8) (const_int 10) (const_int 12) (const_int 14)
20539 (const_int 24) (const_int 26) (const_int 28) (const_int 30)]))
20541 (vec_concat:V32HI (match_dup 1) (match_dup 2))
20543 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
20544 (const_int 17) (const_int 19) (const_int 21) (const_int 23)
20545 (const_int 9) (const_int 11) (const_int 13) (const_int 15)
20546 (const_int 25) (const_int 27) (const_int 29) (const_int 31)]))))]
20548 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
20549 [(set_attr "type" "sseiadd")
20550 (set_attr "prefix_extra" "1")
20551 (set_attr "prefix" "vex")
20552 (set_attr "mode" "OI")])
20554 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
20555 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
20556 (ssse3_plusminus:V8HI
20559 (match_operand:V8HI 1 "register_operand" "0,x")
20560 (match_operand:V8HI 2 "vector_operand" "xBm,xm"))
20562 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)
20563 (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))
20565 (vec_concat:V16HI (match_dup 1) (match_dup 2))
20567 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)
20568 (const_int 9) (const_int 11) (const_int 13) (const_int 15)]))))]
20571 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
20572 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
20573 [(set_attr "isa" "noavx,avx")
20574 (set_attr "type" "sseiadd")
20575 (set_attr "atom_unit" "complex")
20576 (set_attr "prefix_data16" "1,*")
20577 (set_attr "prefix_extra" "1")
20578 (set_attr "prefix" "orig,vex")
20579 (set_attr "mode" "TI")])
20581 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
20582 [(set (match_operand:V4HI 0 "register_operand" "=y,x,x")
20583 (ssse3_plusminus:V4HI
20586 (match_operand:V4HI 1 "register_operand" "0,0,x")
20587 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,x"))
20589 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
20591 (vec_concat:V8HI (match_dup 1) (match_dup 2))
20593 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
20594 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
20596 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
20599 "TARGET_SSSE3 && reload_completed
20600 && SSE_REGNO_P (REGNO (operands[0]))"
20603 /* Generate SSE version of the operation. */
20604 rtx op0 = lowpart_subreg (V8HImode, operands[0],
20605 GET_MODE (operands[0]));
20606 rtx op1 = lowpart_subreg (V8HImode, operands[1],
20607 GET_MODE (operands[1]));
20608 rtx op2 = lowpart_subreg (V8HImode, operands[2],
20609 GET_MODE (operands[2]));
20610 emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
20611 ix86_move_vector_high_sse_to_mmx (op0);
20614 [(set_attr "mmx_isa" "native,sse_noavx,avx")
20615 (set_attr "type" "sseiadd")
20616 (set_attr "atom_unit" "complex")
20617 (set_attr "prefix_extra" "1")
20618 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
20619 (set_attr "mode" "DI,TI,TI")])
20621 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
20622 [(set (match_operand:V8SI 0 "register_operand" "=x")
20626 (match_operand:V8SI 1 "register_operand" "x")
20627 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
20629 [(const_int 0) (const_int 2) (const_int 8) (const_int 10)
20630 (const_int 4) (const_int 6) (const_int 12) (const_int 14)]))
20632 (vec_concat:V16SI (match_dup 1) (match_dup 2))
20634 [(const_int 1) (const_int 3) (const_int 9) (const_int 11)
20635 (const_int 5) (const_int 7) (const_int 13) (const_int 15)]))))]
20637 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
20638 [(set_attr "type" "sseiadd")
20639 (set_attr "prefix_extra" "1")
20640 (set_attr "prefix" "vex")
20641 (set_attr "mode" "OI")])
20643 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
20644 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
20648 (match_operand:V4SI 1 "register_operand" "0,x")
20649 (match_operand:V4SI 2 "vector_operand" "xBm,xm"))
20651 [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))
20653 (vec_concat:V8SI (match_dup 1) (match_dup 2))
20655 [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))))]
20658 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
20659 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
20660 [(set_attr "isa" "noavx,avx")
20661 (set_attr "type" "sseiadd")
20662 (set_attr "atom_unit" "complex")
20663 (set_attr "prefix_data16" "1,*")
20664 (set_attr "prefix_extra" "1")
20665 (set_attr "prefix" "orig,vex")
20666 (set_attr "mode" "TI")])
20668 (define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
20669 [(set (match_operand:V2SI 0 "register_operand" "=y,x,x")
20673 (match_operand:V2SI 1 "register_operand" "0,0,x")
20674 (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,x"))
20675 (parallel [(const_int 0) (const_int 2)]))
20677 (vec_concat:V4SI (match_dup 1) (match_dup 2))
20678 (parallel [(const_int 1) (const_int 3)]))))]
20679 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
20681 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
20684 "TARGET_SSSE3 && reload_completed
20685 && SSE_REGNO_P (REGNO (operands[0]))"
20688 /* Generate SSE version of the operation. */
20689 rtx op0 = lowpart_subreg (V4SImode, operands[0],
20690 GET_MODE (operands[0]));
20691 rtx op1 = lowpart_subreg (V4SImode, operands[1],
20692 GET_MODE (operands[1]));
20693 rtx op2 = lowpart_subreg (V4SImode, operands[2],
20694 GET_MODE (operands[2]));
20695 emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
20696 ix86_move_vector_high_sse_to_mmx (op0);
20699 [(set_attr "mmx_isa" "native,sse_noavx,avx")
20700 (set_attr "type" "sseiadd")
20701 (set_attr "atom_unit" "complex")
20702 (set_attr "prefix_extra" "1")
20703 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
20704 (set_attr "mode" "DI,TI,TI")])
20706 (define_insn "avx2_pmaddubsw256"
20707 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
20712 (match_operand:V32QI 1 "register_operand" "Yw")
20713 (parallel [(const_int 0) (const_int 2)
20714 (const_int 4) (const_int 6)
20715 (const_int 8) (const_int 10)
20716 (const_int 12) (const_int 14)
20717 (const_int 16) (const_int 18)
20718 (const_int 20) (const_int 22)
20719 (const_int 24) (const_int 26)
20720 (const_int 28) (const_int 30)])))
20723 (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")
20724 (parallel [(const_int 0) (const_int 2)
20725 (const_int 4) (const_int 6)
20726 (const_int 8) (const_int 10)
20727 (const_int 12) (const_int 14)
20728 (const_int 16) (const_int 18)
20729 (const_int 20) (const_int 22)
20730 (const_int 24) (const_int 26)
20731 (const_int 28) (const_int 30)]))))
20734 (vec_select:V16QI (match_dup 1)
20735 (parallel [(const_int 1) (const_int 3)
20736 (const_int 5) (const_int 7)
20737 (const_int 9) (const_int 11)
20738 (const_int 13) (const_int 15)
20739 (const_int 17) (const_int 19)
20740 (const_int 21) (const_int 23)
20741 (const_int 25) (const_int 27)
20742 (const_int 29) (const_int 31)])))
20744 (vec_select:V16QI (match_dup 2)
20745 (parallel [(const_int 1) (const_int 3)
20746 (const_int 5) (const_int 7)
20747 (const_int 9) (const_int 11)
20748 (const_int 13) (const_int 15)
20749 (const_int 17) (const_int 19)
20750 (const_int 21) (const_int 23)
20751 (const_int 25) (const_int 27)
20752 (const_int 29) (const_int 31)]))))))]
20754 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
20755 [(set_attr "type" "sseiadd")
20756 (set_attr "prefix_extra" "1")
20757 (set_attr "prefix" "vex")
20758 (set_attr "mode" "OI")])
20760 ;; The correct representation for this is absolutely enormous, and
20761 ;; surely not generally useful.
20762 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
20763 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
20764 (unspec:VI2_AVX512VL
20765 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
20766 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
20767 UNSPEC_PMADDUBSW512))]
20769 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
20770 [(set_attr "type" "sseiadd")
20771 (set_attr "prefix" "evex")
20772 (set_attr "mode" "XI")])
20774 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
20775 [(set (match_operand:V32HI 0 "register_operand" "=v")
20782 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
20784 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
20786 (const_vector:V32HI [(const_int 1) (const_int 1)
20787 (const_int 1) (const_int 1)
20788 (const_int 1) (const_int 1)
20789 (const_int 1) (const_int 1)
20790 (const_int 1) (const_int 1)
20791 (const_int 1) (const_int 1)
20792 (const_int 1) (const_int 1)
20793 (const_int 1) (const_int 1)
20794 (const_int 1) (const_int 1)
20795 (const_int 1) (const_int 1)
20796 (const_int 1) (const_int 1)
20797 (const_int 1) (const_int 1)
20798 (const_int 1) (const_int 1)
20799 (const_int 1) (const_int 1)
20800 (const_int 1) (const_int 1)
20801 (const_int 1) (const_int 1)]))
20804 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20805 [(set_attr "type" "sseimul")
20806 (set_attr "prefix" "evex")
20807 (set_attr "mode" "XI")])
20809 (define_insn "ssse3_pmaddubsw128"
20810 [(set (match_operand:V8HI 0 "register_operand" "=x,Yw")
20815 (match_operand:V16QI 1 "register_operand" "0,Yw")
20816 (parallel [(const_int 0) (const_int 2)
20817 (const_int 4) (const_int 6)
20818 (const_int 8) (const_int 10)
20819 (const_int 12) (const_int 14)])))
20822 (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")
20823 (parallel [(const_int 0) (const_int 2)
20824 (const_int 4) (const_int 6)
20825 (const_int 8) (const_int 10)
20826 (const_int 12) (const_int 14)]))))
20829 (vec_select:V8QI (match_dup 1)
20830 (parallel [(const_int 1) (const_int 3)
20831 (const_int 5) (const_int 7)
20832 (const_int 9) (const_int 11)
20833 (const_int 13) (const_int 15)])))
20835 (vec_select:V8QI (match_dup 2)
20836 (parallel [(const_int 1) (const_int 3)
20837 (const_int 5) (const_int 7)
20838 (const_int 9) (const_int 11)
20839 (const_int 13) (const_int 15)]))))))]
20842 pmaddubsw\t{%2, %0|%0, %2}
20843 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
20844 [(set_attr "isa" "noavx,avx")
20845 (set_attr "type" "sseiadd")
20846 (set_attr "atom_unit" "simul")
20847 (set_attr "prefix_data16" "1,*")
20848 (set_attr "prefix_extra" "1")
20849 (set_attr "prefix" "orig,vex")
20850 (set_attr "mode" "TI")])
20852 (define_insn "ssse3_pmaddubsw"
20853 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
20858 (match_operand:V8QI 1 "register_operand" "0,0,Yv")
20859 (parallel [(const_int 0) (const_int 2)
20860 (const_int 4) (const_int 6)])))
20863 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
20864 (parallel [(const_int 0) (const_int 2)
20865 (const_int 4) (const_int 6)]))))
20868 (vec_select:V4QI (match_dup 1)
20869 (parallel [(const_int 1) (const_int 3)
20870 (const_int 5) (const_int 7)])))
20872 (vec_select:V4QI (match_dup 2)
20873 (parallel [(const_int 1) (const_int 3)
20874 (const_int 5) (const_int 7)]))))))]
20875 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
20877 pmaddubsw\t{%2, %0|%0, %2}
20878 pmaddubsw\t{%2, %0|%0, %2}
20879 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
20880 [(set_attr "isa" "*,noavx,avx")
20881 (set_attr "mmx_isa" "native,*,*")
20882 (set_attr "type" "sseiadd")
20883 (set_attr "atom_unit" "simul")
20884 (set_attr "prefix_extra" "1")
20885 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
20886 (set_attr "mode" "DI,TI,TI")])
20888 (define_mode_iterator PMULHRSW
20889 [V8HI (V16HI "TARGET_AVX2")])
20891 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
20892 [(set (match_operand:PMULHRSW 0 "register_operand")
20893 (vec_merge:PMULHRSW
20895 (lshiftrt:<ssedoublemode>
20896 (plus:<ssedoublemode>
20897 (lshiftrt:<ssedoublemode>
20898 (mult:<ssedoublemode>
20899 (sign_extend:<ssedoublemode>
20900 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
20901 (sign_extend:<ssedoublemode>
20902 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
20906 (match_operand:PMULHRSW 3 "register_operand")
20907 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
20908 "TARGET_AVX512BW && TARGET_AVX512VL"
20910 operands[5] = CONST1_RTX(<MODE>mode);
20911 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
20914 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
20915 [(set (match_operand:PMULHRSW 0 "register_operand")
20917 (lshiftrt:<ssedoublemode>
20918 (plus:<ssedoublemode>
20919 (lshiftrt:<ssedoublemode>
20920 (mult:<ssedoublemode>
20921 (sign_extend:<ssedoublemode>
20922 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
20923 (sign_extend:<ssedoublemode>
20924 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
20930 operands[3] = CONST1_RTX(<MODE>mode);
20931 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
20934 (define_expand "smulhrs<mode>3"
20935 [(set (match_operand:VI2_AVX2 0 "register_operand")
20937 (lshiftrt:<ssedoublemode>
20938 (plus:<ssedoublemode>
20939 (lshiftrt:<ssedoublemode>
20940 (mult:<ssedoublemode>
20941 (sign_extend:<ssedoublemode>
20942 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
20943 (sign_extend:<ssedoublemode>
20944 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
20950 operands[3] = CONST1_RTX(<MODE>mode);
20951 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
20954 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
20955 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
20957 (lshiftrt:<ssedoublemode>
20958 (plus:<ssedoublemode>
20959 (lshiftrt:<ssedoublemode>
20960 (mult:<ssedoublemode>
20961 (sign_extend:<ssedoublemode>
20962 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>"))
20963 (sign_extend:<ssedoublemode>
20964 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))
20966 (match_operand:VI2_AVX2 3 "const1_operand"))
20968 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
20969 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
20971 pmulhrsw\t{%2, %0|%0, %2}
20972 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
20973 [(set_attr "isa" "noavx,avx")
20974 (set_attr "type" "sseimul")
20975 (set_attr "prefix_data16" "1,*")
20976 (set_attr "prefix_extra" "1")
20977 (set_attr "prefix" "orig,maybe_evex")
20978 (set_attr "mode" "<sseinsnmode>")])
20980 (define_expand "smulhrsv4hi3"
20981 [(set (match_operand:V4HI 0 "register_operand")
20988 (match_operand:V4HI 1 "register_operand"))
20990 (match_operand:V4HI 2 "register_operand")))
20994 "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
20995 "operands[3] = CONST1_RTX(V4HImode);")
20997 (define_expand "ssse3_pmulhrswv4hi3"
20998 [(set (match_operand:V4HI 0 "register_operand")
21005 (match_operand:V4HI 1 "register_mmxmem_operand"))
21007 (match_operand:V4HI 2 "register_mmxmem_operand")))
21011 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
21013 operands[3] = CONST1_RTX(V4HImode);
21014 ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
21017 (define_insn "*ssse3_pmulhrswv4hi3"
21018 [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
21025 (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
21027 (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
21029 (match_operand:V4HI 3 "const1_operand"))
21031 "(TARGET_MMX || TARGET_MMX_WITH_SSE)
21033 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21035 pmulhrsw\t{%2, %0|%0, %2}
21036 pmulhrsw\t{%2, %0|%0, %2}
21037 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
21038 [(set_attr "isa" "*,noavx,avx")
21039 (set_attr "mmx_isa" "native,*,*")
21040 (set_attr "type" "sseimul")
21041 (set_attr "prefix_extra" "1")
21042 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
21043 (set_attr "mode" "DI,TI,TI")])
21045 (define_expand "smulhrsv2hi3"
21046 [(set (match_operand:V2HI 0 "register_operand")
21053 (match_operand:V2HI 1 "register_operand"))
21055 (match_operand:V2HI 2 "register_operand")))
21060 "operands[3] = CONST1_RTX(V2HImode);")
21062 (define_insn "*smulhrsv2hi3"
21063 [(set (match_operand:V2HI 0 "register_operand" "=x,Yv")
21070 (match_operand:V2HI 1 "register_operand" "%0,Yv"))
21072 (match_operand:V2HI 2 "register_operand" "x,Yv")))
21074 (match_operand:V2HI 3 "const1_operand"))
21077 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
21079 pmulhrsw\t{%2, %0|%0, %2}
21080 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
21081 [(set_attr "isa" "noavx,avx")
21082 (set_attr "type" "sseimul")
21083 (set_attr "prefix_extra" "1")
21084 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
21085 (set_attr "mode" "TI")])
21087 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
21088 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
21090 [(match_operand:VI1_AVX512 1 "register_operand" "0,<v_Yw>")
21091 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,<v_Yw>m")]
21093 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
21095 pshufb\t{%2, %0|%0, %2}
21096 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21097 [(set_attr "isa" "noavx,avx")
21098 (set_attr "type" "sselog1")
21099 (set_attr "prefix_data16" "1,*")
21100 (set_attr "prefix_extra" "1")
21101 (set_attr "prefix" "orig,maybe_evex")
21102 (set_attr "btver2_decode" "vector")
21103 (set_attr "mode" "<sseinsnmode>")])
21105 (define_expand "ssse3_pshufbv8qi3"
21107 [(set (match_operand:V8QI 0 "register_operand")
21108 (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
21109 (match_operand:V8QI 2 "register_mmxmem_operand")
21110 (match_dup 3)] UNSPEC_PSHUFB))
21111 (clobber (match_scratch:V4SI 4))])]
21112 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
21114 operands[3] = ix86_build_const_vector (V4SImode, true,
21115 gen_int_mode (0xf7f7f7f7, SImode));
21118 (define_insn_and_split "*ssse3_pshufbv8qi3"
21119 [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
21120 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
21121 (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
21122 (match_operand:V4SI 4 "reg_or_const_vector_operand"
21125 (clobber (match_scratch:V4SI 3 "=X,&x,&Yw"))]
21126 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
21128 pshufb\t{%2, %0|%0, %2}
21131 "TARGET_SSSE3 && reload_completed
21132 && SSE_REGNO_P (REGNO (operands[0]))"
21133 [(set (match_dup 3)
21134 (and:V4SI (match_dup 3) (match_dup 2)))
21136 (unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
21138 /* Emulate MMX version of pshufb with SSE version by masking out the
21139 bit 3 of the shuffle control byte. */
21140 operands[0] = lowpart_subreg (V16QImode, operands[0],
21141 GET_MODE (operands[0]));
21142 operands[1] = lowpart_subreg (V16QImode, operands[1],
21143 GET_MODE (operands[1]));
21144 operands[2] = lowpart_subreg (V4SImode, operands[2],
21145 GET_MODE (operands[2]));
21146 operands[4] = lowpart_subreg (V16QImode, operands[3],
21147 GET_MODE (operands[3]));
21149 [(set_attr "mmx_isa" "native,sse_noavx,avx")
21150 (set_attr "prefix_extra" "1")
21151 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
21152 (set_attr "mode" "DI,TI,TI")])
21154 (define_insn "<ssse3_avx2>_psign<mode>3"
21155 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
21157 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
21158 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
21162 psign<ssemodesuffix>\t{%2, %0|%0, %2}
21163 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21164 [(set_attr "isa" "noavx,avx")
21165 (set_attr "type" "sselog1")
21166 (set_attr "prefix_data16" "1,*")
21167 (set_attr "prefix_extra" "1")
21168 (set_attr "prefix" "orig,vex")
21169 (set_attr "mode" "<sseinsnmode>")])
21171 (define_insn "ssse3_psign<mode>3"
21172 [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
21174 [(match_operand:MMXMODEI 1 "register_operand" "0,0,x")
21175 (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")]
21177 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
21179 psign<mmxvecsize>\t{%2, %0|%0, %2}
21180 psign<mmxvecsize>\t{%2, %0|%0, %2}
21181 vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
21182 [(set_attr "isa" "*,noavx,avx")
21183 (set_attr "mmx_isa" "native,*,*")
21184 (set_attr "type" "sselog1")
21185 (set_attr "prefix_extra" "1")
21186 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
21187 (set_attr "mode" "DI,TI,TI")])
21189 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
21190 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
21191 (vec_merge:VI1_AVX512
21193 [(match_operand:VI1_AVX512 1 "register_operand" "v")
21194 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
21195 (match_operand:SI 3 "const_0_to_255_mul_8_operand")]
21197 (match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
21198 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
21199 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
21201 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
21202 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
21204 [(set_attr "type" "sseishft")
21205 (set_attr "atom_unit" "sishuf")
21206 (set_attr "prefix_extra" "1")
21207 (set_attr "length_immediate" "1")
21208 (set_attr "prefix" "evex")
21209 (set_attr "mode" "<sseinsnmode>")])
21211 (define_insn "<ssse3_avx2>_palignr<mode>"
21212 [(set (match_operand:VIMAX_AVX2_AVX512BW 0 "register_operand" "=x,<v_Yw>")
21213 (unspec:VIMAX_AVX2_AVX512BW
21214 [(match_operand:VIMAX_AVX2_AVX512BW 1 "register_operand" "0,<v_Yw>")
21215 (match_operand:VIMAX_AVX2_AVX512BW 2 "vector_operand" "xBm,<v_Yw>m")
21216 (match_operand:SI 3 "const_0_to_255_mul_8_operand")]
21220 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
21222 switch (which_alternative)
21225 return "palignr\t{%3, %2, %0|%0, %2, %3}";
21227 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
21229 gcc_unreachable ();
21232 [(set_attr "isa" "noavx,avx")
21233 (set_attr "type" "sseishft")
21234 (set_attr "atom_unit" "sishuf")
21235 (set_attr "prefix_data16" "1,*")
21236 (set_attr "prefix_extra" "1")
21237 (set_attr "length_immediate" "1")
21238 (set_attr "prefix" "orig,vex")
21239 (set_attr "mode" "<sseinsnmode>")])
21241 (define_insn_and_split "ssse3_palignrdi"
21242 [(set (match_operand:DI 0 "register_operand" "=y,x,Yw")
21243 (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yw")
21244 (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yw")
21245 (match_operand:SI 3 "const_0_to_255_mul_8_operand")]
21247 "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
21249 switch (which_alternative)
21252 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
21253 return "palignr\t{%3, %2, %0|%0, %2, %3}";
21258 gcc_unreachable ();
21261 "(TARGET_SSSE3 && reload_completed
21262 && SSE_REGNO_P (REGNO (operands[0])))
21263 || operands[3] == const0_rtx
21264 || INTVAL (operands[3]) == 64"
21265 [(set (match_dup 0)
21266 (lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
21268 if (operands[3] == const0_rtx)
21270 if (!rtx_equal_p (operands[0], operands[2]))
21271 emit_move_insn (operands[0], operands[2]);
21273 emit_note (NOTE_INSN_DELETED);
21276 else if (INTVAL (operands[3]) == 64)
21278 if (!rtx_equal_p (operands[0], operands[1]))
21279 emit_move_insn (operands[0], operands[1]);
21281 emit_note (NOTE_INSN_DELETED);
21285 /* Emulate MMX palignrdi with SSE psrldq. */
21286 rtx op0 = lowpart_subreg (V2DImode, operands[0],
21287 GET_MODE (operands[0]));
21289 emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
21292 /* NB: SSE can only concatenate OP0 and OP1 to OP0. */
21293 emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
21294 /* Swap bits 0:63 with bits 64:127. */
21295 rtx mask = gen_rtx_PARALLEL (VOIDmode,
21296 gen_rtvec (4, GEN_INT (2),
21300 rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
21301 rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
21302 emit_insn (gen_rtx_SET (op1, op2));
21304 operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
21306 [(set_attr "mmx_isa" "native,sse_noavx,avx")
21307 (set_attr "type" "sseishft")
21308 (set_attr "atom_unit" "sishuf")
21309 (set_attr "prefix_extra" "1")
21310 (set_attr "length_immediate" "1")
21311 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
21312 (set_attr "mode" "DI,TI,TI")])
21314 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
21315 ;; modes for abs instruction on pre AVX-512 targets.
21316 (define_mode_iterator VI1248_AVX512VL_AVX512BW
21317 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
21318 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
21319 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
21320 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
21322 (define_insn "*abs<mode>2"
21323 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=<v_Yw>")
21324 (abs:VI1248_AVX512VL_AVX512BW
21325 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "<v_Yw>Bm")))]
21327 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
21328 [(set_attr "type" "sselog1")
21329 (set_attr "prefix_data16" "1")
21330 (set_attr "prefix_extra" "1")
21331 (set_attr "prefix" "maybe_vex")
21332 (set_attr "mode" "<sseinsnmode>")])
21334 (define_insn "abs<mode>2_mask"
21335 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
21336 (vec_merge:VI48_AVX512VL
21338 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
21339 (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
21340 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
21342 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21343 [(set_attr "type" "sselog1")
21344 (set_attr "prefix" "evex")
21345 (set_attr "mode" "<sseinsnmode>")])
21347 (define_insn "abs<mode>2_mask"
21348 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
21349 (vec_merge:VI12_AVX512VL
21351 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
21352 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
21353 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
21355 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21356 [(set_attr "type" "sselog1")
21357 (set_attr "prefix" "evex")
21358 (set_attr "mode" "<sseinsnmode>")])
21360 (define_expand "abs<mode>2"
21361 [(set (match_operand:VI_AVX2 0 "register_operand")
21363 (match_operand:VI_AVX2 1 "vector_operand")))]
21367 || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
21368 && !TARGET_AVX512VL))
21370 ix86_expand_sse2_abs (operands[0], operands[1]);
21375 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21377 ;; AMD SSE4A instructions
21379 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21381 (define_insn "sse4a_movnt<mode>"
21382 [(set (match_operand:MODEF 0 "memory_operand" "=m")
21384 [(match_operand:MODEF 1 "register_operand" "x")]
21387 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
21388 [(set_attr "type" "ssemov")
21389 (set_attr "mode" "<MODE>")])
21391 (define_insn "sse4a_vmmovnt<mode>"
21392 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
21393 (unspec:<ssescalarmode>
21394 [(vec_select:<ssescalarmode>
21395 (match_operand:VF_128 1 "register_operand" "x")
21396 (parallel [(const_int 0)]))]
21399 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
21400 [(set_attr "type" "ssemov")
21401 (set_attr "mode" "<ssescalarmode>")])
21403 (define_insn "sse4a_extrqi"
21404 [(set (match_operand:V2DI 0 "register_operand" "=x")
21405 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
21406 (match_operand 2 "const_0_to_255_operand")
21407 (match_operand 3 "const_0_to_255_operand")]
21410 "extrq\t{%3, %2, %0|%0, %2, %3}"
21411 [(set_attr "type" "sse")
21412 (set_attr "prefix_data16" "1")
21413 (set_attr "length_immediate" "2")
21414 (set_attr "mode" "TI")])
21416 (define_insn "sse4a_extrq"
21417 [(set (match_operand:V2DI 0 "register_operand" "=x")
21418 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
21419 (match_operand:V16QI 2 "register_operand" "x")]
21422 "extrq\t{%2, %0|%0, %2}"
21423 [(set_attr "type" "sse")
21424 (set_attr "prefix_data16" "1")
21425 (set_attr "mode" "TI")])
21427 (define_insn "sse4a_insertqi"
21428 [(set (match_operand:V2DI 0 "register_operand" "=x")
21429 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
21430 (match_operand:V2DI 2 "register_operand" "x")
21431 (match_operand 3 "const_0_to_255_operand")
21432 (match_operand 4 "const_0_to_255_operand")]
21435 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
21436 [(set_attr "type" "sseins")
21437 (set_attr "prefix_data16" "0")
21438 (set_attr "prefix_rep" "1")
21439 (set_attr "length_immediate" "2")
21440 (set_attr "mode" "TI")])
21442 (define_insn "sse4a_insertq"
21443 [(set (match_operand:V2DI 0 "register_operand" "=x")
21444 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
21445 (match_operand:V2DI 2 "register_operand" "x")]
21448 "insertq\t{%2, %0|%0, %2}"
21449 [(set_attr "type" "sseins")
21450 (set_attr "prefix_data16" "0")
21451 (set_attr "prefix_rep" "1")
21452 (set_attr "mode" "TI")])
21454 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21456 ;; Intel SSE4.1 instructions
21458 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21460 ;; Mapping of immediate bits for blend instructions
21461 (define_mode_attr blendbits
21462 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
21464 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
21465 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
21466 (vec_merge:VF_128_256
21467 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
21468 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
21469 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
21472 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
21473 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
21474 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21475 [(set_attr "isa" "noavx,noavx,avx")
21476 (set_attr "type" "ssemov")
21477 (set_attr "length_immediate" "1")
21478 (set_attr "prefix_data16" "1,1,*")
21479 (set_attr "prefix_extra" "1")
21480 (set_attr "prefix" "orig,orig,vex")
21481 (set_attr "mode" "<MODE>")])
21483 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
21484 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
21486 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
21487 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
21488 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
21492 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
21493 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
21494 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21495 [(set_attr "isa" "noavx,noavx,avx")
21496 (set_attr "type" "ssemov")
21497 (set_attr "length_immediate" "1")
21498 (set_attr "prefix_data16" "1,1,*")
21499 (set_attr "prefix_extra" "1")
21500 (set_attr "prefix" "orig,orig,vex")
21501 (set_attr "btver2_decode" "vector,vector,vector")
21502 (set_attr "mode" "<MODE>")])
21504 ;; Also define scalar versions. These are used for conditional move.
21505 ;; Using subregs into vector modes causes register allocation lossage.
21506 ;; These patterns do not allow memory operands because the native
21507 ;; instructions read the full 128-bits.
21509 (define_insn "sse4_1_blendv<ssemodesuffix>"
21510 [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
21512 [(match_operand:MODEF 1 "register_operand" "0,0,x")
21513 (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
21514 (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
21518 if (get_attr_mode (insn) == MODE_V4SF)
21519 return (which_alternative == 2
21520 ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21521 : "blendvps\t{%3, %2, %0|%0, %2, %3}");
21523 return (which_alternative == 2
21524 ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21525 : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
21527 [(set_attr "isa" "noavx,noavx,avx")
21528 (set_attr "type" "ssemov")
21529 (set_attr "length_immediate" "1")
21530 (set_attr "prefix_data16" "1,1,*")
21531 (set_attr "prefix_extra" "1")
21532 (set_attr "prefix" "orig,orig,vex")
21533 (set_attr "btver2_decode" "vector,vector,vector")
21535 (cond [(match_test "TARGET_AVX")
21536 (const_string "<ssevecmode>")
21537 (match_test "optimize_function_for_size_p (cfun)")
21538 (const_string "V4SF")
21539 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
21540 (const_string "V4SF")
21542 (const_string "<ssevecmode>")))])
21544 (define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
21545 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
21547 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
21548 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
21550 (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
21551 (match_operand:<sseintvecmode> 4 "const0_operand"))]
21555 "&& reload_completed"
21556 [(set (match_dup 0)
21558 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
21559 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
21560 [(set_attr "isa" "noavx,noavx,avx")
21561 (set_attr "type" "ssemov")
21562 (set_attr "length_immediate" "1")
21563 (set_attr "prefix_data16" "1,1,*")
21564 (set_attr "prefix_extra" "1")
21565 (set_attr "prefix" "orig,orig,vex")
21566 (set_attr "btver2_decode" "vector,vector,vector")
21567 (set_attr "mode" "<MODE>")])
21569 (define_mode_attr ssefltmodesuffix
21570 [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
21572 (define_mode_attr ssefltvecmode
21573 [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
21575 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
21576 [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
21577 (unspec:<ssebytemode>
21578 [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
21579 (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
21580 (subreg:<ssebytemode>
21582 (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
21583 (match_operand:VI48_AVX 4 "const0_operand")) 0)]
21587 "&& reload_completed"
21588 [(set (match_dup 0)
21589 (unspec:<ssefltvecmode>
21590 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
21592 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
21593 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
21594 operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
21595 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
21597 [(set_attr "isa" "noavx,noavx,avx")
21598 (set_attr "type" "ssemov")
21599 (set_attr "length_immediate" "1")
21600 (set_attr "prefix_data16" "1,1,*")
21601 (set_attr "prefix_extra" "1")
21602 (set_attr "prefix" "orig,orig,vex")
21603 (set_attr "btver2_decode" "vector,vector,vector")
21604 (set_attr "mode" "<ssefltvecmode>")])
21606 ;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask;
21607 (define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint"
21608 [(set (match_operand:<ssebytemode> 0 "register_operand")
21609 (unspec:<ssebytemode>
21610 [(match_operand:<ssebytemode> 1 "register_operand")
21611 (match_operand:<ssebytemode> 2 "vector_operand")
21612 (subreg:<ssebytemode>
21616 (match_operand:<ssebytemode> 3 "register_operand")) 0)
21617 (match_operand:VI48_AVX 4 "const0_operand")) 0)]
21619 "TARGET_SSE4_1 && ix86_pre_reload_split ()"
21622 [(set (match_dup 0)
21623 (unspec:<ssefltvecmode>
21624 [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))]
21626 operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
21627 operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
21628 operands[2] = force_reg (<ssefltvecmode>mode,
21629 gen_lowpart (<ssefltvecmode>mode, operands[2]));
21630 operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
21633 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
21634 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
21636 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
21637 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
21638 (match_operand:SI 3 "const_0_to_255_operand")]
21642 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
21643 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
21644 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21645 [(set_attr "isa" "noavx,noavx,avx")
21646 (set_attr "type" "ssemul")
21647 (set_attr "length_immediate" "1")
21648 (set_attr "prefix_data16" "1,1,*")
21649 (set_attr "prefix_extra" "1")
21650 (set_attr "prefix" "orig,orig,vex")
21651 (set_attr "btver2_decode" "vector,vector,vector")
21652 (set_attr "znver1_decode" "vector,vector,vector")
21653 (set_attr "mode" "<MODE>")])
21655 ;; Mode attribute used by `vmovntdqa' pattern
21656 (define_mode_attr vi8_sse4_1_avx2_avx512
21657 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
21659 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
21660 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
21661 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
21664 "%vmovntdqa\t{%1, %0|%0, %1}"
21665 [(set_attr "isa" "noavx,noavx,avx")
21666 (set_attr "type" "ssemov")
21667 (set_attr "prefix_extra" "1,1,*")
21668 (set_attr "prefix" "orig,orig,maybe_evex")
21669 (set_attr "mode" "<sseinsnmode>")])
21671 (define_insn "<sse4_1_avx2>_mpsadbw"
21672 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
21674 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
21675 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
21676 (match_operand:SI 3 "const_0_to_255_operand")]
21680 mpsadbw\t{%3, %2, %0|%0, %2, %3}
21681 mpsadbw\t{%3, %2, %0|%0, %2, %3}
21682 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21683 [(set_attr "isa" "noavx,noavx,avx")
21684 (set_attr "type" "sselog1")
21685 (set_attr "length_immediate" "1")
21686 (set_attr "prefix_extra" "1")
21687 (set_attr "prefix" "orig,orig,vex")
21688 (set_attr "btver2_decode" "vector,vector,vector")
21689 (set_attr "znver1_decode" "vector,vector,vector")
21690 (set_attr "mode" "<sseinsnmode>")])
21692 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
21693 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
21694 (vec_concat:VI2_AVX2
21695 (us_truncate:<ssehalfvecmode>
21696 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
21697 (us_truncate:<ssehalfvecmode>
21698 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
21699 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
21701 packusdw\t{%2, %0|%0, %2}
21702 packusdw\t{%2, %0|%0, %2}
21703 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21704 [(set_attr "isa" "noavx,noavx,avx")
21705 (set_attr "type" "sselog")
21706 (set_attr "prefix_extra" "1")
21707 (set_attr "prefix" "orig,orig,<mask_prefix>")
21708 (set_attr "mode" "<sseinsnmode>")])
21710 (define_insn "<sse4_1_avx2>_pblendvb"
21711 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
21713 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
21714 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
21715 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
21719 pblendvb\t{%3, %2, %0|%0, %2, %3}
21720 pblendvb\t{%3, %2, %0|%0, %2, %3}
21721 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21722 [(set_attr "isa" "noavx,noavx,avx")
21723 (set_attr "type" "ssemov")
21724 (set_attr "prefix_extra" "1")
21725 (set_attr "length_immediate" "*,*,1")
21726 (set_attr "prefix" "orig,orig,vex")
21727 (set_attr "btver2_decode" "vector,vector,vector")
21728 (set_attr "mode" "<sseinsnmode>")])
21731 [(set (match_operand:VI1_AVX2 0 "register_operand")
21733 [(match_operand:VI1_AVX2 1 "vector_operand")
21734 (match_operand:VI1_AVX2 2 "register_operand")
21735 (not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))]
21738 [(set (match_dup 0)
21740 [(match_dup 2) (match_dup 1) (match_dup 3)]
21744 [(set (match_operand:VI1_AVX2 0 "register_operand")
21746 [(match_operand:VI1_AVX2 1 "vector_operand")
21747 (match_operand:VI1_AVX2 2 "register_operand")
21748 (subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)]
21751 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
21752 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>"
21753 [(set (match_dup 0)
21755 [(match_dup 2) (match_dup 1) (match_dup 4)]
21757 "operands[4] = gen_lowpart (<MODE>mode, operands[3]);")
21759 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
21760 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
21762 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
21763 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
21764 (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
21765 (match_operand:VI1_AVX2 4 "const0_operand"))]
21770 [(set (match_dup 0)
21772 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
21774 [(set_attr "isa" "noavx,noavx,avx")
21775 (set_attr "type" "ssemov")
21776 (set_attr "prefix_extra" "1")
21777 (set_attr "length_immediate" "*,*,1")
21778 (set_attr "prefix" "orig,orig,vex")
21779 (set_attr "btver2_decode" "vector,vector,vector")
21780 (set_attr "mode" "<sseinsnmode>")])
21782 (define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt_subreg_not"
21783 [(set (match_operand:VI1_AVX2 0 "register_operand")
21785 [(match_operand:VI1_AVX2 2 "vector_operand")
21786 (match_operand:VI1_AVX2 1 "register_operand")
21789 (not (match_operand 3 "register_operand")) 0)
21790 (match_operand:VI1_AVX2 4 "const0_operand"))]
21793 && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
21794 && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
21795 && ix86_pre_reload_split ()"
21798 [(set (match_dup 0)
21800 [(match_dup 1) (match_dup 2)
21801 (lt:VI1_AVX2 (match_dup 3) (match_dup 4))] UNSPEC_BLENDV))]
21802 "operands[3] = gen_lowpart (<MODE>mode, operands[3]);")
21804 (define_insn "sse4_1_pblend<ssemodesuffix>"
21805 [(set (match_operand:V8_128 0 "register_operand" "=Yr,*x,x")
21807 (match_operand:V8_128 2 "vector_operand" "YrBm,*xBm,xm")
21808 (match_operand:V8_128 1 "register_operand" "0,0,x")
21809 (match_operand:SI 3 "const_0_to_255_operand")))]
21812 pblendw\t{%3, %2, %0|%0, %2, %3}
21813 pblendw\t{%3, %2, %0|%0, %2, %3}
21814 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21815 [(set_attr "isa" "noavx,noavx,avx")
21816 (set_attr "type" "ssemov")
21817 (set_attr "prefix_extra" "1")
21818 (set_attr "length_immediate" "1")
21819 (set_attr "prefix" "orig,orig,vex")
21820 (set_attr "mode" "TI")])
21822 ;; The builtin uses an 8-bit immediate. Expand that.
21823 (define_expand "avx2_pblend<ssemodesuffix>"
21824 [(set (match_operand:V16_256 0 "register_operand")
21826 (match_operand:V16_256 2 "nonimmediate_operand")
21827 (match_operand:V16_256 1 "register_operand")
21828 (match_operand:SI 3 "const_0_to_255_operand")))]
21831 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
21832 operands[3] = GEN_INT (val << 8 | val);
21835 (define_expand "avx2_pblend<ssemodesuffix>_1"
21836 [(set (match_operand:V16_256 0 "register_operand")
21838 (match_operand:V16_256 2 "register_operand")
21839 (match_operand:V16_256 1 "register_operand")
21840 (match_operand:SI 3 "const_int_operand")))]
21842 && !((INTVAL (operands[3]) & 0xff) && (INTVAL (operands[3]) & 0xff00))"
21844 int mask = INTVAL (operands[3]);
21846 emit_move_insn (operands[0], operands[1]);
21849 rtx tmp = gen_reg_rtx (<MODE>mode);
21850 rtx blendw_idx, blendd_idx;
21854 blendw_idx = GEN_INT (mask & 0xff);
21855 blendd_idx = GEN_INT (15);
21859 blendw_idx = GEN_INT (mask >> 8 & 0xff);
21860 blendd_idx = GEN_INT (240);
21862 emit_insn (gen_avx2_pblend<ssemodesuffix> (tmp, operands[1],
21863 operands[2], blendw_idx));
21865 rtx op0 = gen_reg_rtx (V8SImode);
21866 tmp = lowpart_subreg (V8SImode, tmp, <MODE>mode);
21867 operands[1] = lowpart_subreg (V8SImode, operands[1], <MODE>mode);
21868 emit_insn (gen_avx2_pblenddv8si (op0, operands[1],
21870 emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8SImode));
21876 (define_insn "*avx2_pblend<ssemodesuffix>"
21877 [(set (match_operand:V16_256 0 "register_operand" "=x")
21879 (match_operand:V16_256 2 "nonimmediate_operand" "xm")
21880 (match_operand:V16_256 1 "register_operand" "x")
21881 (match_operand:SI 3 "avx2_pblendw_operand")))]
21884 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
21885 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
21887 [(set_attr "type" "ssemov")
21888 (set_attr "prefix_extra" "1")
21889 (set_attr "length_immediate" "1")
21890 (set_attr "prefix" "vex")
21891 (set_attr "mode" "OI")])
21893 (define_insn "avx2_pblendd<mode>"
21894 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
21895 (vec_merge:VI4_AVX2
21896 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
21897 (match_operand:VI4_AVX2 1 "register_operand" "x")
21898 (match_operand:SI 3 "const_0_to_255_operand")))]
21900 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
21901 [(set_attr "type" "ssemov")
21902 (set_attr "prefix_extra" "1")
21903 (set_attr "length_immediate" "1")
21904 (set_attr "prefix" "vex")
21905 (set_attr "mode" "<sseinsnmode>")])
21907 (define_insn "sse4_1_phminposuw"
21908 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
21909 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
21910 UNSPEC_PHMINPOSUW))]
21912 "%vphminposuw\t{%1, %0|%0, %1}"
21913 [(set_attr "isa" "noavx,noavx,avx")
21914 (set_attr "type" "sselog1")
21915 (set_attr "prefix_extra" "1")
21916 (set_attr "prefix" "orig,orig,vex")
21917 (set_attr "mode" "TI")])
21919 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
21920 [(set (match_operand:V16HI 0 "register_operand" "=Yw")
21922 (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))]
21923 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
21924 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21925 [(set_attr "type" "ssemov")
21926 (set_attr "prefix_extra" "1")
21927 (set_attr "prefix" "maybe_evex")
21928 (set_attr "mode" "OI")])
21930 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_1"
21931 [(set (match_operand:V32QI 0 "register_operand" "=v")
21934 (match_operand:V32QI 1 "nonimmediate_operand" "vm")
21935 (match_operand:V32QI 2 "const0_operand"))
21936 (match_parallel 3 "pmovzx_parallel"
21937 [(match_operand 4 "const_int_operand")])))]
21940 "&& reload_completed"
21941 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
21943 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
21944 operands[1] = lowpart_subreg (V16QImode, operands[1], V32QImode);
21947 (define_insn_and_split "*avx2_zero_extendv16qiv16hi2_2"
21948 [(set (match_operand:V32QI 0 "register_operand" "=v")
21952 (vec_concat:VI248_256
21953 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
21954 (match_operand:<ssehalfvecmode> 2 "const0_operand")) 0)
21955 (match_operand:V32QI 3 "const0_operand"))
21956 (match_parallel 4 "pmovzx_parallel"
21957 [(match_operand 5 "const_int_operand")])))]
21960 "&& reload_completed"
21961 [(set (match_dup 0) (zero_extend:V16HI (match_dup 1)))]
21963 operands[0] = lowpart_subreg (V16HImode, operands[0], V32QImode);
21964 operands[1] = lowpart_subreg (V16QImode, operands[1], <ssehalfvecmode>mode);
21967 (define_expand "<insn>v16qiv16hi2"
21968 [(set (match_operand:V16HI 0 "register_operand")
21970 (match_operand:V16QI 1 "nonimmediate_operand")))]
21973 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
21974 [(set (match_operand:V32HI 0 "register_operand" "=v")
21976 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
21978 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21979 [(set_attr "type" "ssemov")
21980 (set_attr "prefix_extra" "1")
21981 (set_attr "prefix" "evex")
21982 (set_attr "mode" "XI")])
21984 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_1"
21985 [(set (match_operand:V64QI 0 "register_operand" "=v")
21988 (match_operand:V64QI 1 "nonimmediate_operand" "vm")
21989 (match_operand:V64QI 2 "const0_operand"))
21990 (match_parallel 3 "pmovzx_parallel"
21991 [(match_operand 4 "const_int_operand")])))]
21994 "&& reload_completed"
21995 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
21997 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
21998 operands[1] = lowpart_subreg (V32QImode, operands[1], V64QImode);
22001 (define_insn_and_split "*avx512bw_zero_extendv32qiv32hi2_2"
22002 [(set (match_operand:V64QI 0 "register_operand" "=v")
22006 (vec_concat:VI248_512
22007 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
22008 (match_operand:<ssehalfvecmode> 2 "const0_operand")) 0)
22009 (match_operand:V64QI 3 "const0_operand"))
22010 (match_parallel 4 "pmovzx_parallel"
22011 [(match_operand 5 "const_int_operand")])))]
22014 "&& reload_completed"
22015 [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
22017 operands[0] = lowpart_subreg (V32HImode, operands[0], V64QImode);
22018 operands[1] = lowpart_subreg (V32QImode, operands[1], <ssehalfvecmode>mode);
22021 (define_expand "<insn>v32qiv32hi2"
22022 [(set (match_operand:V32HI 0 "register_operand")
22024 (match_operand:V32QI 1 "nonimmediate_operand")))]
22027 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
22028 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
22031 (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw")
22032 (parallel [(const_int 0) (const_int 1)
22033 (const_int 2) (const_int 3)
22034 (const_int 4) (const_int 5)
22035 (const_int 6) (const_int 7)]))))]
22036 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
22037 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22038 [(set_attr "isa" "noavx,noavx,avx")
22039 (set_attr "type" "ssemov")
22040 (set_attr "prefix_extra" "1")
22041 (set_attr "prefix" "orig,orig,maybe_evex")
22042 (set_attr "mode" "TI")])
22044 (define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
22045 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
22047 (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
22048 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
22049 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22050 [(set_attr "isa" "noavx,noavx,avx")
22051 (set_attr "type" "ssemov")
22052 (set_attr "prefix_extra" "1")
22053 (set_attr "prefix" "orig,orig,maybe_evex")
22054 (set_attr "mode" "TI")])
22056 (define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
22057 [(set (match_operand:V8HI 0 "register_operand")
22062 (match_operand:DI 1 "memory_operand")
22064 (parallel [(const_int 0) (const_int 1)
22065 (const_int 2) (const_int 3)
22066 (const_int 4) (const_int 5)
22067 (const_int 6) (const_int 7)]))))]
22068 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
22069 && ix86_pre_reload_split ()"
22072 [(set (match_dup 0)
22073 (any_extend:V8HI (match_dup 1)))]
22074 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
22076 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_3"
22077 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
22080 (match_operand:V16QI 1 "vector_operand" "YrBm,*xBm,Ywm")
22081 (match_operand:V16QI 2 "const0_operand"))
22082 (match_parallel 3 "pmovzx_parallel"
22083 [(match_operand 4 "const_int_operand")])))]
22086 "&& reload_completed"
22087 [(set (match_dup 0)
22091 (parallel [(const_int 0) (const_int 1)
22092 (const_int 2) (const_int 3)
22093 (const_int 4) (const_int 5)
22094 (const_int 6) (const_int 7)]))))]
22096 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
22097 if (MEM_P (operands[1]))
22099 operands[1] = lowpart_subreg (V8QImode, operands[1], V16QImode);
22100 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
22101 emit_insn (gen_rtx_SET (operands[0], operands[1]));
22105 [(set_attr "isa" "noavx,noavx,avx")])
22107 (define_insn_and_split "*sse4_1_zero_extendv8qiv8hi2_4"
22108 [(set (match_operand:V16QI 0 "register_operand" "=Yr,*x,Yw")
22112 (vec_concat:VI248_128
22113 (match_operand:<ssehalfvecmode> 1 "vector_operand" "YrBm,*xBm,Ywm")
22114 (match_operand:<ssehalfvecmode> 2 "const0_operand")) 0)
22115 (match_operand:V16QI 3 "const0_operand"))
22116 (match_parallel 4 "pmovzx_parallel"
22117 [(match_operand 5 "const_int_operand")])))]
22120 "&& reload_completed"
22121 [(set (match_dup 0)
22125 (parallel [(const_int 0) (const_int 1)
22126 (const_int 2) (const_int 3)
22127 (const_int 4) (const_int 5)
22128 (const_int 6) (const_int 7)]))))]
22130 operands[0] = lowpart_subreg (V8HImode, operands[0], V16QImode);
22131 if (MEM_P (operands[1]))
22133 operands[1] = lowpart_subreg (V8QImode, operands[1], <ssehalfvecmode>mode);
22134 operands[1] = gen_rtx_ZERO_EXTEND (V8HImode, operands[1]);
22135 emit_insn (gen_rtx_SET (operands[0], operands[1]));
22138 operands[1] = lowpart_subreg (V16QImode, operands[1], <ssehalfvecmode>mode);
22140 [(set_attr "isa" "noavx,noavx,avx")])
22142 (define_expand "<insn>v8qiv8hi2"
22143 [(set (match_operand:V8HI 0 "register_operand")
22145 (match_operand:V8QI 1 "nonimmediate_operand")))]
22148 if (!MEM_P (operands[1]))
22150 rtx op1 = force_reg (V8QImode, operands[1]);
22151 op1 = lowpart_subreg (V16QImode, op1, V8QImode);
22152 emit_insn (gen_sse4_1_<code>v8qiv8hi2 (operands[0], op1));
22157 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
22158 [(set (match_operand:V16SI 0 "register_operand" "=v")
22160 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
22162 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22163 [(set_attr "type" "ssemov")
22164 (set_attr "prefix" "evex")
22165 (set_attr "mode" "XI")])
22167 (define_expand "<insn>v16qiv16si2"
22168 [(set (match_operand:V16SI 0 "register_operand")
22170 (match_operand:V16QI 1 "nonimmediate_operand")))]
22173 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
22174 [(set (match_operand:V8SI 0 "register_operand" "=v")
22177 (match_operand:V16QI 1 "register_operand" "v")
22178 (parallel [(const_int 0) (const_int 1)
22179 (const_int 2) (const_int 3)
22180 (const_int 4) (const_int 5)
22181 (const_int 6) (const_int 7)]))))]
22182 "TARGET_AVX2 && <mask_avx512vl_condition>"
22183 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22184 [(set_attr "type" "ssemov")
22185 (set_attr "prefix_extra" "1")
22186 (set_attr "prefix" "maybe_evex")
22187 (set_attr "mode" "OI")])
22189 (define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
22190 [(set (match_operand:V8SI 0 "register_operand" "=v")
22192 (match_operand:V8QI 1 "memory_operand" "m")))]
22193 "TARGET_AVX2 && <mask_avx512vl_condition>"
22194 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22195 [(set_attr "type" "ssemov")
22196 (set_attr "prefix_extra" "1")
22197 (set_attr "prefix" "maybe_evex")
22198 (set_attr "mode" "OI")])
22200 (define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
22201 [(set (match_operand:V8SI 0 "register_operand")
22206 (match_operand:DI 1 "memory_operand")
22208 (parallel [(const_int 0) (const_int 1)
22209 (const_int 2) (const_int 3)
22210 (const_int 4) (const_int 5)
22211 (const_int 6) (const_int 7)]))))]
22212 "TARGET_AVX2 && <mask_avx512vl_condition>
22213 && ix86_pre_reload_split ()"
22216 [(set (match_dup 0)
22217 (any_extend:V8SI (match_dup 1)))]
22218 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
22220 (define_expand "<insn>v8qiv8si2"
22221 [(set (match_operand:V8SI 0 "register_operand")
22223 (match_operand:V8QI 1 "nonimmediate_operand")))]
22226 if (!MEM_P (operands[1]))
22228 rtx op1 = force_reg (V8QImode, operands[1]);
22229 op1 = lowpart_subreg (V16QImode, op1, V8QImode);
22230 emit_insn (gen_avx2_<code>v8qiv8si2 (operands[0], op1));
22235 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
22236 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
22239 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
22240 (parallel [(const_int 0) (const_int 1)
22241 (const_int 2) (const_int 3)]))))]
22242 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22243 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22244 [(set_attr "isa" "noavx,noavx,avx")
22245 (set_attr "type" "ssemov")
22246 (set_attr "prefix_extra" "1")
22247 (set_attr "prefix" "orig,orig,maybe_evex")
22248 (set_attr "mode" "TI")])
22250 (define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
22251 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
22253 (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
22254 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22255 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22256 [(set_attr "isa" "noavx,noavx,avx")
22257 (set_attr "type" "ssemov")
22258 (set_attr "prefix_extra" "1")
22259 (set_attr "prefix" "orig,orig,maybe_evex")
22260 (set_attr "mode" "TI")])
22262 (define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
22263 [(set (match_operand:V4SI 0 "register_operand")
22268 (vec_duplicate:V4SI
22269 (match_operand:SI 1 "memory_operand"))
22271 [(const_int 0) (const_int 0)
22272 (const_int 0) (const_int 0)])
22274 (parallel [(const_int 0) (const_int 1)
22275 (const_int 2) (const_int 3)]))))]
22276 "TARGET_SSE4_1 && <mask_avx512vl_condition>
22277 && ix86_pre_reload_split ()"
22280 [(set (match_dup 0)
22281 (any_extend:V4SI (match_dup 1)))]
22282 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
22284 (define_expand "<insn>v4qiv4si2"
22285 [(set (match_operand:V4SI 0 "register_operand")
22287 (match_operand:V4QI 1 "nonimmediate_operand")))]
22290 if (!MEM_P (operands[1]))
22292 rtx op1 = force_reg (V4QImode, operands[1]);
22293 op1 = lowpart_subreg (V16QImode, op1, V4QImode);
22294 emit_insn (gen_sse4_1_<code>v4qiv4si2 (operands[0], op1));
22299 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
22300 [(set (match_operand:V16SI 0 "register_operand" "=v")
22302 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
22304 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22305 [(set_attr "type" "ssemov")
22306 (set_attr "prefix" "evex")
22307 (set_attr "mode" "XI")])
22309 (define_expand "<insn>v16hiv16si2"
22310 [(set (match_operand:V16SI 0 "register_operand")
22312 (match_operand:V16HI 1 "nonimmediate_operand")))]
22315 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
22316 [(set (match_operand:V32HI 0 "register_operand" "=v")
22319 (match_operand:V32HI 1 "nonimmediate_operand" "vm")
22320 (match_operand:V32HI 2 "const0_operand"))
22321 (match_parallel 3 "pmovzx_parallel"
22322 [(match_operand 4 "const_int_operand")])))]
22325 "&& reload_completed"
22326 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
22328 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
22329 operands[1] = lowpart_subreg (V16HImode, operands[1], V32HImode);
22332 (define_insn_and_split "*avx512f_zero_extendv16hiv16si2_2"
22333 [(set (match_operand:V32HI 0 "register_operand" "=v")
22337 (vec_concat:VI148_512
22338 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
22339 (match_operand:<ssehalfvecmode> 2 "const0_operand")) 0)
22340 (match_operand:V32HI 3 "const0_operand"))
22341 (match_parallel 4 "pmovzx_parallel"
22342 [(match_operand 5 "const_int_operand")])))]
22345 "&& reload_completed"
22346 [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
22348 operands[0] = lowpart_subreg (V16SImode, operands[0], V32HImode);
22349 operands[1] = lowpart_subreg (V16HImode, operands[1], <ssehalfvecmode>mode);
22352 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
22353 [(set (match_operand:V8SI 0 "register_operand" "=v")
22355 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
22356 "TARGET_AVX2 && <mask_avx512vl_condition>"
22357 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22358 [(set_attr "type" "ssemov")
22359 (set_attr "prefix_extra" "1")
22360 (set_attr "prefix" "maybe_evex")
22361 (set_attr "mode" "OI")])
22363 (define_expand "<insn>v8hiv8si2"
22364 [(set (match_operand:V8SI 0 "register_operand")
22366 (match_operand:V8HI 1 "nonimmediate_operand")))]
22369 (define_insn_and_split "avx2_zero_extendv8hiv8si2_1"
22370 [(set (match_operand:V16HI 0 "register_operand" "=v")
22373 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
22374 (match_operand:V16HI 2 "const0_operand"))
22375 (match_parallel 3 "pmovzx_parallel"
22376 [(match_operand 4 "const_int_operand")])))]
22379 "&& reload_completed"
22380 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
22382 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
22383 operands[1] = lowpart_subreg (V8HImode, operands[1], V16HImode);
22386 (define_insn_and_split "*avx2_zero_extendv8hiv8si2_2"
22387 [(set (match_operand:V16HI 0 "register_operand" "=v")
22391 (vec_concat:VI148_256
22392 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
22393 (match_operand:<ssehalfvecmode> 2 "const0_operand")) 0)
22394 (match_operand:V16HI 3 "const0_operand"))
22395 (match_parallel 4 "pmovzx_parallel"
22396 [(match_operand 5 "const_int_operand")])))]
22399 "&& reload_completed"
22400 [(set (match_dup 0) (zero_extend:V8SI (match_dup 1)))]
22402 operands[0] = lowpart_subreg (V8SImode, operands[0], V16HImode);
22403 operands[1] = lowpart_subreg (V8HImode, operands[1], <ssehalfvecmode>mode);
22407 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
22408 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
22411 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
22412 (parallel [(const_int 0) (const_int 1)
22413 (const_int 2) (const_int 3)]))))]
22414 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22415 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22416 [(set_attr "isa" "noavx,noavx,avx")
22417 (set_attr "type" "ssemov")
22418 (set_attr "prefix_extra" "1")
22419 (set_attr "prefix" "orig,orig,maybe_evex")
22420 (set_attr "mode" "TI")])
22422 (define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
22423 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
22425 (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
22426 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22427 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22428 [(set_attr "isa" "noavx,noavx,avx")
22429 (set_attr "type" "ssemov")
22430 (set_attr "prefix_extra" "1")
22431 (set_attr "prefix" "orig,orig,maybe_evex")
22432 (set_attr "mode" "TI")])
22434 (define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
22435 [(set (match_operand:V4SI 0 "register_operand")
22440 (match_operand:DI 1 "memory_operand")
22442 (parallel [(const_int 0) (const_int 1)
22443 (const_int 2) (const_int 3)]))))]
22444 "TARGET_SSE4_1 && <mask_avx512vl_condition>
22445 && ix86_pre_reload_split ()"
22448 [(set (match_dup 0)
22449 (any_extend:V4SI (match_dup 1)))]
22450 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
22452 (define_expand "<insn>v4hiv4si2"
22453 [(set (match_operand:V4SI 0 "register_operand")
22455 (match_operand:V4HI 1 "nonimmediate_operand")))]
22458 if (!MEM_P (operands[1]))
22460 rtx op1 = force_reg (V4HImode, operands[1]);
22461 op1 = lowpart_subreg (V8HImode, op1, V4HImode);
22462 emit_insn (gen_sse4_1_<code>v4hiv4si2 (operands[0], op1));
22467 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_3"
22468 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
22471 (match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,vm")
22472 (match_operand:V8HI 2 "const0_operand"))
22473 (match_parallel 3 "pmovzx_parallel"
22474 [(match_operand 4 "const_int_operand")])))]
22477 "&& reload_completed"
22478 [(set (match_dup 0)
22482 (parallel [(const_int 0) (const_int 1)
22483 (const_int 2) (const_int 3)]))))]
22485 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
22486 if (MEM_P (operands[1]))
22488 operands[1] = lowpart_subreg (V4HImode, operands[1], V8HImode);
22489 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
22490 emit_insn (gen_rtx_SET (operands[0], operands[1]));
22494 [(set_attr "isa" "noavx,noavx,avx")])
22496 (define_insn_and_split "*sse4_1_zero_extendv4hiv4si2_4"
22497 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
22501 (vec_concat:VI148_128
22502 (match_operand:<ssehalfvecmode> 1 "vector_operand" "YrBm,*xBm,vm")
22503 (match_operand:<ssehalfvecmode> 2 "const0_operand")) 0)
22504 (match_operand:V8HI 3 "const0_operand"))
22505 (match_parallel 4 "pmovzx_parallel"
22506 [(match_operand 5 "const_int_operand")])))]
22509 "&& reload_completed"
22510 [(set (match_dup 0)
22514 (parallel [(const_int 0) (const_int 1)
22515 (const_int 2) (const_int 3)]))))]
22517 operands[0] = lowpart_subreg (V4SImode, operands[0], V8HImode);
22518 if (MEM_P (operands[1]))
22520 operands[1] = lowpart_subreg (V4HImode, operands[1], <ssehalfvecmode>mode);
22521 operands[1] = gen_rtx_ZERO_EXTEND (V4SImode, operands[1]);
22522 emit_insn (gen_rtx_SET (operands[0], operands[1]));
22525 operands[1] = lowpart_subreg (V8HImode, operands[1], <ssehalfvecmode>mode);
22527 [(set_attr "isa" "noavx,noavx,avx")])
22529 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
22530 [(set (match_operand:V8DI 0 "register_operand" "=v")
22533 (match_operand:V16QI 1 "register_operand" "v")
22534 (parallel [(const_int 0) (const_int 1)
22535 (const_int 2) (const_int 3)
22536 (const_int 4) (const_int 5)
22537 (const_int 6) (const_int 7)]))))]
22539 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22540 [(set_attr "type" "ssemov")
22541 (set_attr "prefix" "evex")
22542 (set_attr "mode" "XI")])
22544 (define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
22545 [(set (match_operand:V8DI 0 "register_operand" "=v")
22547 (match_operand:V8QI 1 "memory_operand" "m")))]
22549 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22550 [(set_attr "type" "ssemov")
22551 (set_attr "prefix" "evex")
22552 (set_attr "mode" "XI")])
22554 (define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
22555 [(set (match_operand:V8DI 0 "register_operand")
22560 (match_operand:DI 1 "memory_operand")
22562 (parallel [(const_int 0) (const_int 1)
22563 (const_int 2) (const_int 3)
22564 (const_int 4) (const_int 5)
22565 (const_int 6) (const_int 7)]))))]
22566 "TARGET_AVX512F && ix86_pre_reload_split ()"
22569 [(set (match_dup 0)
22570 (any_extend:V8DI (match_dup 1)))]
22571 "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
22573 (define_expand "<insn>v8qiv8di2"
22574 [(set (match_operand:V8DI 0 "register_operand")
22576 (match_operand:V8QI 1 "nonimmediate_operand")))]
22579 if (!MEM_P (operands[1]))
22581 rtx op1 = force_reg (V8QImode, operands[1]);
22582 op1 = lowpart_subreg (V16QImode, op1, V8QImode);
22583 emit_insn (gen_avx512f_<code>v8qiv8di2 (operands[0], op1));
22588 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
22589 [(set (match_operand:V4DI 0 "register_operand" "=v")
22592 (match_operand:V16QI 1 "register_operand" "v")
22593 (parallel [(const_int 0) (const_int 1)
22594 (const_int 2) (const_int 3)]))))]
22595 "TARGET_AVX2 && <mask_avx512vl_condition>"
22596 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22597 [(set_attr "type" "ssemov")
22598 (set_attr "prefix_extra" "1")
22599 (set_attr "prefix" "maybe_evex")
22600 (set_attr "mode" "OI")])
22602 (define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
22603 [(set (match_operand:V4DI 0 "register_operand" "=v")
22605 (match_operand:V4QI 1 "memory_operand" "m")))]
22606 "TARGET_AVX2 && <mask_avx512vl_condition>"
22607 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22608 [(set_attr "type" "ssemov")
22609 (set_attr "prefix_extra" "1")
22610 (set_attr "prefix" "maybe_evex")
22611 (set_attr "mode" "OI")])
22613 (define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
22614 [(set (match_operand:V4DI 0 "register_operand")
22619 (vec_duplicate:V4SI
22620 (match_operand:SI 1 "memory_operand"))
22622 [(const_int 0) (const_int 0)
22623 (const_int 0) (const_int 0)])
22625 (parallel [(const_int 0) (const_int 1)
22626 (const_int 2) (const_int 3)]))))]
22627 "TARGET_AVX2 && <mask_avx512vl_condition>
22628 && ix86_pre_reload_split ()"
22631 [(set (match_dup 0)
22632 (any_extend:V4DI (match_dup 1)))]
22633 "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
22635 (define_expand "<insn>v4qiv4di2"
22636 [(set (match_operand:V4DI 0 "register_operand")
22638 (match_operand:V4QI 1 "nonimmediate_operand")))]
22641 if (!MEM_P (operands[1]))
22643 rtx op1 = force_reg (V4QImode, operands[1]);
22644 op1 = lowpart_subreg (V16QImode, operands[1], V4QImode);
22645 emit_insn (gen_avx2_<code>v4qiv4di2 (operands[0], op1));
22650 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
22651 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
22654 (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
22655 (parallel [(const_int 0) (const_int 1)]))))]
22656 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22657 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22658 [(set_attr "isa" "noavx,noavx,avx")
22659 (set_attr "type" "ssemov")
22660 (set_attr "prefix_extra" "1")
22661 (set_attr "prefix" "orig,orig,maybe_evex")
22662 (set_attr "mode" "TI")])
22664 (define_insn "*sse4_1_<code>v2qiv2di2<mask_name>_1"
22665 [(set (match_operand:V2DI 0 "register_operand" "=v")
22667 (match_operand:V2QI 1 "memory_operand" "m")))]
22668 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22669 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22670 [(set_attr "type" "ssemov")
22671 (set_attr "prefix_extra" "1")
22672 (set_attr "prefix" "maybe_evex")
22673 (set_attr "mode" "TI")])
22675 (define_expand "<insn>v2qiv2di2"
22676 [(set (match_operand:V2DI 0 "register_operand")
22678 (match_operand:V2QI 1 "nonimmediate_operand")))]
22681 if (!MEM_P (operands[1]))
22683 rtx op1 = force_reg (V2QImode, operands[1]);
22684 op1 = lowpart_subreg (V16QImode, op1, V2QImode);
22685 emit_insn (gen_sse4_1_<code>v2qiv2di2 (operands[0], op1));
22690 (define_insn_and_split "*sse4_1_zero_extendv2qiv2di2_2"
22691 [(set (match_operand:V2DI 0 "register_operand")
22696 (vec_duplicate:V8_128
22697 (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))
22698 (match_operand:V8_128 2 "const0_operand")
22700 (parallel [(const_int 0) (const_int 1)]))))]
22701 "TARGET_SSE4_1 && ix86_pre_reload_split ()"
22706 if (!MEM_P (operands[1]))
22707 operands[1] = force_reg (<ssescalarmode>mode, operands[1]);
22708 operands[1] = lowpart_subreg (V2QImode, operands[1], <ssescalarmode>mode);
22709 emit_insn (gen_zero_extendv2qiv2di2 (operands[0], operands[1]));
22713 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
22714 [(set (match_operand:V8DI 0 "register_operand" "=v")
22716 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
22718 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22719 [(set_attr "type" "ssemov")
22720 (set_attr "prefix" "evex")
22721 (set_attr "mode" "XI")])
22723 (define_expand "<insn>v8hiv8di2"
22724 [(set (match_operand:V8DI 0 "register_operand")
22726 (match_operand:V8HI 1 "nonimmediate_operand")))]
22729 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
22730 [(set (match_operand:V4DI 0 "register_operand" "=v")
22733 (match_operand:V8HI 1 "register_operand" "v")
22734 (parallel [(const_int 0) (const_int 1)
22735 (const_int 2) (const_int 3)]))))]
22736 "TARGET_AVX2 && <mask_avx512vl_condition>"
22737 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22738 [(set_attr "type" "ssemov")
22739 (set_attr "prefix_extra" "1")
22740 (set_attr "prefix" "maybe_evex")
22741 (set_attr "mode" "OI")])
22743 (define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
22744 [(set (match_operand:V4DI 0 "register_operand" "=v")
22746 (match_operand:V4HI 1 "memory_operand" "m")))]
22747 "TARGET_AVX2 && <mask_avx512vl_condition>"
22748 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22749 [(set_attr "type" "ssemov")
22750 (set_attr "prefix_extra" "1")
22751 (set_attr "prefix" "maybe_evex")
22752 (set_attr "mode" "OI")])
22754 (define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
22755 [(set (match_operand:V4DI 0 "register_operand")
22760 (match_operand:DI 1 "memory_operand")
22762 (parallel [(const_int 0) (const_int 1)
22763 (const_int 2) (const_int 3)]))))]
22764 "TARGET_AVX2 && <mask_avx512vl_condition>
22765 && ix86_pre_reload_split ()"
22768 [(set (match_dup 0)
22769 (any_extend:V4DI (match_dup 1)))]
22770 "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
22772 (define_expand "<insn>v4hiv4di2"
22773 [(set (match_operand:V4DI 0 "register_operand")
22775 (match_operand:V4HI 1 "nonimmediate_operand")))]
22778 if (!MEM_P (operands[1]))
22780 rtx op1 = force_reg (V4HImode, operands[1]);
22781 op1 = lowpart_subreg (V8HImode, op1, V4HImode);
22782 emit_insn (gen_avx2_<code>v4hiv4di2 (operands[0], op1));
22787 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
22788 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
22791 (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
22792 (parallel [(const_int 0) (const_int 1)]))))]
22793 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22794 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22795 [(set_attr "isa" "noavx,noavx,avx")
22796 (set_attr "type" "ssemov")
22797 (set_attr "prefix_extra" "1")
22798 (set_attr "prefix" "orig,orig,maybe_evex")
22799 (set_attr "mode" "TI")])
22801 (define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
22802 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
22804 (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
22805 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22806 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22807 [(set_attr "isa" "noavx,noavx,avx")
22808 (set_attr "type" "ssemov")
22809 (set_attr "prefix_extra" "1")
22810 (set_attr "prefix" "orig,orig,maybe_evex")
22811 (set_attr "mode" "TI")])
22813 (define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
22814 [(set (match_operand:V2DI 0 "register_operand")
22819 (vec_duplicate:V4SI
22820 (match_operand:SI 1 "memory_operand"))
22822 [(const_int 0) (const_int 0)
22823 (const_int 0) (const_int 0)])
22825 (parallel [(const_int 0) (const_int 1)]))))]
22826 "TARGET_SSE4_1 && <mask_avx512vl_condition>
22827 && ix86_pre_reload_split ()"
22830 [(set (match_dup 0)
22831 (any_extend:V2DI (match_dup 1)))]
22832 "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
22834 (define_expand "<insn>v2hiv2di2"
22835 [(set (match_operand:V2DI 0 "register_operand")
22837 (match_operand:V2HI 1 "nonimmediate_operand")))]
22840 if (!MEM_P (operands[1]))
22842 rtx op1 = force_reg (V2HImode, operands[1]);
22843 op1 = lowpart_subreg (V8HImode, op1, V2HImode);
22844 emit_insn (gen_sse4_1_<code>v2hiv2di2 (operands[0], op1));
22849 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
22850 [(set (match_operand:V8DI 0 "register_operand" "=v")
22852 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
22854 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22855 [(set_attr "type" "ssemov")
22856 (set_attr "prefix" "evex")
22857 (set_attr "mode" "XI")])
22859 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_1"
22860 [(set (match_operand:V16SI 0 "register_operand" "=v")
22863 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
22864 (match_operand:V16SI 2 "const0_operand"))
22865 (match_parallel 3 "pmovzx_parallel"
22866 [(match_operand 4 "const_int_operand")])))]
22869 "&& reload_completed"
22870 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
22872 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
22873 operands[1] = lowpart_subreg (V8SImode, operands[1], V16SImode);
22876 (define_insn_and_split "*avx512f_zero_extendv8siv8di2_2"
22877 [(set (match_operand:V16SI 0 "register_operand" "=v")
22881 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
22882 (match_operand:V8SI 2 "const0_operand"))
22883 (match_operand:V16SI 3 "const0_operand"))
22884 (match_parallel 4 "pmovzx_parallel"
22885 [(match_operand 5 "const_int_operand")])))]
22888 "&& reload_completed"
22889 [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
22891 operands[0] = lowpart_subreg (V8DImode, operands[0], V16SImode);
22894 (define_expand "<insn>v8siv8di2"
22895 [(set (match_operand:V8DI 0 "register_operand" "=v")
22897 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
22900 (define_insn "avx2_<code>v4siv4di2<mask_name>"
22901 [(set (match_operand:V4DI 0 "register_operand" "=v")
22903 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
22904 "TARGET_AVX2 && <mask_avx512vl_condition>"
22905 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22906 [(set_attr "type" "ssemov")
22907 (set_attr "prefix" "maybe_evex")
22908 (set_attr "prefix_extra" "1")
22909 (set_attr "mode" "OI")])
22911 (define_insn_and_split "*avx2_zero_extendv4siv4di2_1"
22912 [(set (match_operand:V8SI 0 "register_operand" "=v")
22915 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
22916 (match_operand:V8SI 2 "const0_operand"))
22917 (match_parallel 3 "pmovzx_parallel"
22918 [(match_operand 4 "const_int_operand")])))]
22921 "&& reload_completed"
22922 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
22924 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
22925 operands[1] = lowpart_subreg (V4SImode, operands[1], V8SImode);
22928 (define_insn_and_split "*avx2_zero_extendv4siv4di2_2"
22929 [(set (match_operand:V8SI 0 "register_operand" "=v")
22933 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
22934 (match_operand:V4SI 2 "const0_operand"))
22935 (match_operand:V8SI 3 "const0_operand"))
22936 (match_parallel 4 "pmovzx_parallel"
22937 [(match_operand 5 "const_int_operand")])))]
22940 "&& reload_completed"
22941 [(set (match_dup 0) (zero_extend:V4DI (match_dup 1)))]
22943 operands[0] = lowpart_subreg (V4DImode, operands[0], V8SImode);
22946 (define_expand "<insn>v4siv4di2"
22947 [(set (match_operand:V4DI 0 "register_operand")
22949 (match_operand:V4SI 1 "nonimmediate_operand")))]
22952 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
22953 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
22956 (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
22957 (parallel [(const_int 0) (const_int 1)]))))]
22958 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22959 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22960 [(set_attr "isa" "noavx,noavx,avx")
22961 (set_attr "type" "ssemov")
22962 (set_attr "prefix_extra" "1")
22963 (set_attr "prefix" "orig,orig,maybe_evex")
22964 (set_attr "mode" "TI")])
22966 (define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
22967 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
22969 (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
22970 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
22971 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22972 [(set_attr "isa" "noavx,noavx,avx")
22973 (set_attr "type" "ssemov")
22974 (set_attr "prefix_extra" "1")
22975 (set_attr "prefix" "orig,orig,maybe_evex")
22976 (set_attr "mode" "TI")])
22978 (define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
22979 [(set (match_operand:V2DI 0 "register_operand")
22984 (match_operand:DI 1 "memory_operand")
22986 (parallel [(const_int 0) (const_int 1)]))))]
22987 "TARGET_SSE4_1 && <mask_avx512vl_condition>
22988 && ix86_pre_reload_split ()"
22991 [(set (match_dup 0)
22992 (any_extend:V2DI (match_dup 1)))]
22993 "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
22995 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_3"
22996 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
22999 (match_operand:V4SI 1 "vector_operand" "YrBm,*xBm,vm")
23000 (match_operand:V4SI 2 "const0_operand"))
23001 (match_parallel 3 "pmovzx_parallel"
23002 [(match_operand 4 "const_int_operand")])))]
23005 "&& reload_completed"
23006 [(set (match_dup 0)
23008 (vec_select:V2SI (match_dup 1)
23009 (parallel [(const_int 0) (const_int 1)]))))]
23011 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
23012 if (MEM_P (operands[1]))
23014 operands[1] = lowpart_subreg (V2SImode, operands[1], V4SImode);
23015 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
23016 emit_insn (gen_rtx_SET (operands[0], operands[1]));
23020 [(set_attr "isa" "noavx,noavx,avx")])
23022 (define_insn_and_split "*sse4_1_zero_extendv2siv2di2_4"
23023 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
23027 (match_operand:V2SI 1 "vector_operand" "YrBm, *xBm, vm")
23028 (match_operand:V2SI 2 "const0_operand"))
23029 (match_operand:V4SI 3 "const0_operand"))
23030 (match_parallel 4 "pmovzx_parallel"
23031 [(match_operand 5 "const_int_operand")])))]
23034 "&& reload_completed"
23035 [(set (match_dup 0)
23037 (vec_select:V2SI (match_dup 1)
23038 (parallel [(const_int 0) (const_int 1)]))))]
23040 operands[0] = lowpart_subreg (V2DImode, operands[0], V4SImode);
23041 if (MEM_P (operands[1]))
23043 operands[1] = gen_rtx_ZERO_EXTEND (V2DImode, operands[1]);
23044 emit_insn (gen_rtx_SET (operands[0], operands[1]));
23047 operands[1] = lowpart_subreg (V4SImode, operands[1], V2SImode);
23049 [(set_attr "isa" "noavx,noavx,avx")])
23051 (define_expand "<insn>v2siv2di2"
23052 [(set (match_operand:V2DI 0 "register_operand")
23054 (match_operand:V2SI 1 "nonimmediate_operand")))]
23057 if (!MEM_P (operands[1]))
23059 rtx op1 = force_reg (V2SImode, operands[1]);
23060 op1 = lowpart_subreg (V4SImode, op1, V2SImode);
23061 emit_insn (gen_sse4_1_<code>v2siv2di2 (operands[0], op1));
23066 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
23067 ;; setting FLAGS_REG. But it is not a really compare instruction.
23068 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
23069 [(set (reg:CC FLAGS_REG)
23070 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
23071 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
23074 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
23075 [(set_attr "type" "ssecomi")
23076 (set_attr "prefix_extra" "1")
23077 (set_attr "prefix" "vex")
23078 (set_attr "mode" "<MODE>")])
23080 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
23081 ;; But it is not a really compare instruction.
23082 (define_insn "<sse4_1>_ptest<mode>"
23083 [(set (reg:CC FLAGS_REG)
23084 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
23085 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
23088 "%vptest\t{%1, %0|%0, %1}"
23089 [(set_attr "isa" "noavx,noavx,avx")
23090 (set_attr "type" "ssecomi")
23091 (set_attr "prefix_extra" "1")
23092 (set_attr "prefix" "orig,orig,vex")
23093 (set (attr "btver2_decode")
23095 (match_test "<sseinsnmode>mode==OImode")
23096 (const_string "vector")
23097 (const_string "*")))
23098 (set_attr "mode" "<sseinsnmode>")])
23100 (define_insn "ptesttf2"
23101 [(set (reg:CC FLAGS_REG)
23102 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
23103 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
23106 "%vptest\t{%1, %0|%0, %1}"
23107 [(set_attr "isa" "noavx,noavx,avx")
23108 (set_attr "type" "ssecomi")
23109 (set_attr "prefix_extra" "1")
23110 (set_attr "prefix" "orig,orig,vex")
23111 (set_attr "mode" "TI")])
23113 (define_insn_and_split "*ptest<mode>_and"
23114 [(set (reg:CC FLAGS_REG)
23115 (unspec:CC [(and:V_AVX (match_operand:V_AVX 0 "register_operand")
23116 (match_operand:V_AVX 1 "vector_operand"))
23117 (and:V_AVX (match_dup 0) (match_dup 1))]
23120 && ix86_pre_reload_split ()"
23123 [(set (reg:CC FLAGS_REG)
23124 (unspec:CC [(match_dup 0) (match_dup 1)] UNSPEC_PTEST))])
23126 (define_expand "nearbyint<mode>2"
23127 [(set (match_operand:VFH 0 "register_operand")
23129 [(match_operand:VFH 1 "vector_operand")
23133 "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
23135 (define_expand "rint<mode>2"
23136 [(set (match_operand:VFH 0 "register_operand")
23138 [(match_operand:VFH 1 "vector_operand")
23142 "operands[2] = GEN_INT (ROUND_MXCSR);")
23144 ;; Note vcvtpd2qq require avx512dq for all vector lengths.
23145 (define_expand "lrint<mode><sseintvecmodelower>2"
23146 [(set (match_operand:<sseintvecmode> 0 "register_operand")
23147 (unspec:<sseintvecmode>
23148 [(match_operand:VF1_VF2_AVX512DQ 1 "register_operand")]
23149 UNSPEC_FIX_NOTRUNC))]
23152 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
23153 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
23155 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
23156 (match_operand:SI 2 "const_0_to_15_operand")]
23159 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23160 [(set_attr "isa" "noavx,noavx,avx")
23161 (set_attr "type" "ssecvt")
23162 (set_attr "prefix_data16" "1,1,*")
23163 (set_attr "prefix_extra" "1")
23164 (set_attr "length_immediate" "1")
23165 (set_attr "prefix" "orig,orig,vex")
23166 (set_attr "mode" "<MODE>")])
23168 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
23169 [(match_operand:<sseintvecmode> 0 "register_operand")
23170 (match_operand:VF1_128_256 1 "vector_operand")
23171 (match_operand:SI 2 "const_0_to_15_operand")]
23174 rtx tmp = gen_reg_rtx (<MODE>mode);
23177 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
23180 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
23184 (define_expand "avx512f_round<castmode>512"
23185 [(match_operand:VF_512 0 "register_operand")
23186 (match_operand:VF_512 1 "nonimmediate_operand")
23187 (match_operand:SI 2 "const_0_to_15_operand")]
23190 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
23194 (define_expand "avx512f_roundps512_sfix"
23195 [(match_operand:V16SI 0 "register_operand")
23196 (match_operand:V16SF 1 "nonimmediate_operand")
23197 (match_operand:SI 2 "const_0_to_15_operand")]
23200 rtx tmp = gen_reg_rtx (V16SFmode);
23201 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
23202 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
23206 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
23207 [(match_operand:<ssepackfltmode> 0 "register_operand")
23208 (match_operand:VF2 1 "vector_operand")
23209 (match_operand:VF2 2 "vector_operand")
23210 (match_operand:SI 3 "const_0_to_15_operand")]
23215 if (<MODE>mode == V2DFmode
23216 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
23218 rtx tmp2 = gen_reg_rtx (V4DFmode);
23220 tmp0 = gen_reg_rtx (V4DFmode);
23221 tmp1 = force_reg (V2DFmode, operands[1]);
23223 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
23224 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
23225 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
23229 tmp0 = gen_reg_rtx (<MODE>mode);
23230 tmp1 = gen_reg_rtx (<MODE>mode);
23233 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
23236 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
23239 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
23244 (define_insn "sse4_1_round<ssescalarmodesuffix>"
23245 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
23248 [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
23249 (match_operand:SI 3 "const_0_to_15_operand")]
23251 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
23255 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
23256 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
23257 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
23258 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
23259 [(set_attr "isa" "noavx,noavx,avx,avx512f")
23260 (set_attr "type" "ssecvt")
23261 (set_attr "length_immediate" "1")
23262 (set_attr "prefix_data16" "1,1,*,*")
23263 (set_attr "prefix_extra" "1")
23264 (set_attr "prefix" "orig,orig,vex,evex")
23265 (set_attr "mode" "<MODE>")])
23267 (define_insn "*sse4_1_round<ssescalarmodesuffix>"
23268 [(set (match_operand:VFH_128 0 "register_operand" "=Yr,*x,x,v")
23270 (vec_duplicate:VFH_128
23271 (unspec:<ssescalarmode>
23272 [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
23273 (match_operand:SI 3 "const_0_to_15_operand")]
23275 (match_operand:VFH_128 1 "register_operand" "0,0,x,v")
23279 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
23280 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
23281 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
23282 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23283 [(set_attr "isa" "noavx,noavx,avx,avx512f")
23284 (set_attr "type" "ssecvt")
23285 (set_attr "length_immediate" "1")
23286 (set_attr "prefix_data16" "1,1,*,*")
23287 (set_attr "prefix_extra" "1")
23288 (set_attr "prefix" "orig,orig,vex,evex")
23289 (set_attr "mode" "<MODE>")])
23291 (define_expand "floor<mode>2"
23292 [(set (match_operand:VFH 0 "register_operand")
23294 [(match_operand:VFH 1 "vector_operand")
23297 "TARGET_SSE4_1 && !flag_trapping_math"
23298 "operands[2] = GEN_INT (ROUND_FLOOR | ROUND_NO_EXC);")
23300 (define_expand "lfloor<mode><sseintvecmodelower>2"
23301 [(match_operand:<sseintvecmode> 0 "register_operand")
23302 (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")]
23303 "TARGET_SSE4_1 && !flag_trapping_math"
23305 rtx tmp = gen_reg_rtx (<MODE>mode);
23306 emit_insn (gen_floor<mode>2 (tmp, operands[1]));
23307 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
23311 (define_expand "ceil<mode>2"
23312 [(set (match_operand:VFH 0 "register_operand")
23314 [(match_operand:VFH 1 "vector_operand")
23317 "TARGET_SSE4_1 && !flag_trapping_math"
23318 "operands[2] = GEN_INT (ROUND_CEIL | ROUND_NO_EXC);")
23320 (define_expand "lceil<mode><sseintvecmodelower>2"
23321 [(match_operand:<sseintvecmode> 0 "register_operand")
23322 (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")]
23323 "TARGET_SSE4_1 && !flag_trapping_math"
23325 rtx tmp = gen_reg_rtx (<MODE>mode);
23326 emit_insn (gen_ceil<mode>2 (tmp, operands[1]));
23327 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
23331 (define_expand "btrunc<mode>2"
23332 [(set (match_operand:VFH 0 "register_operand")
23334 [(match_operand:VFH 1 "vector_operand")
23337 "TARGET_SSE4_1 && !flag_trapping_math"
23338 "operands[2] = GEN_INT (ROUND_TRUNC | ROUND_NO_EXC);")
23340 (define_expand "round<mode>2"
23341 [(set (match_dup 3)
23343 (match_operand:VF 1 "register_operand")
23345 (set (match_operand:VF 0 "register_operand")
23347 [(match_dup 3) (match_dup 4)]
23349 "TARGET_SSE4_1 && !flag_trapping_math"
23351 machine_mode scalar_mode;
23352 const struct real_format *fmt;
23353 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23354 rtx half, vec_half;
23356 scalar_mode = GET_MODE_INNER (<MODE>mode);
23358 /* load nextafter (0.5, 0.0) */
23359 fmt = REAL_MODE_FORMAT (scalar_mode);
23360 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
23361 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
23362 half = const_double_from_real_value (pred_half, scalar_mode);
23364 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
23365 vec_half = force_reg (<MODE>mode, vec_half);
23367 operands[2] = gen_reg_rtx (<MODE>mode);
23368 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
23370 operands[3] = gen_reg_rtx (<MODE>mode);
23371 operands[4] = GEN_INT (ROUND_TRUNC);
23374 (define_expand "lround<mode><sseintvecmodelower>2"
23375 [(match_operand:<sseintvecmode> 0 "register_operand")
23376 (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")]
23377 "TARGET_SSE4_1 && !flag_trapping_math"
23379 rtx tmp = gen_reg_rtx (<MODE>mode);
23380 emit_insn (gen_round<mode>2 (tmp, operands[1]));
23381 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
23385 (define_expand "round<mode>2_sfix"
23386 [(match_operand:<sseintvecmode> 0 "register_operand")
23387 (match_operand:VF1 1 "register_operand")]
23388 "TARGET_SSE4_1 && !flag_trapping_math"
23390 rtx tmp = gen_reg_rtx (<MODE>mode);
23392 emit_insn (gen_round<mode>2 (tmp, operands[1]));
23395 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
23399 (define_expand "round<mode>2_vec_pack_sfix"
23400 [(match_operand:<ssepackfltmode> 0 "register_operand")
23401 (match_operand:VF2 1 "register_operand")
23402 (match_operand:VF2 2 "register_operand")]
23403 "TARGET_SSE4_1 && !flag_trapping_math"
23407 if (<MODE>mode == V2DFmode
23408 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
23410 rtx tmp2 = gen_reg_rtx (V4DFmode);
23412 tmp0 = gen_reg_rtx (V4DFmode);
23413 tmp1 = force_reg (V2DFmode, operands[1]);
23415 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
23416 emit_insn (gen_roundv4df2 (tmp2, tmp0));
23417 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
23421 tmp0 = gen_reg_rtx (<MODE>mode);
23422 tmp1 = gen_reg_rtx (<MODE>mode);
23424 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
23425 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
23428 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
23433 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
23435 ;; Intel SSE4.2 string/text processing instructions
23437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
23439 (define_insn_and_split "sse4_2_pcmpestr"
23440 [(set (match_operand:SI 0 "register_operand" "=c,c")
23442 [(match_operand:V16QI 2 "register_operand" "x,x")
23443 (match_operand:SI 3 "register_operand" "a,a")
23444 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
23445 (match_operand:SI 5 "register_operand" "d,d")
23446 (match_operand:SI 6 "const_0_to_255_operand")]
23448 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
23456 (set (reg:CC FLAGS_REG)
23465 && ix86_pre_reload_split ()"
23470 int ecx = !find_regno_note (curr_insn, REG_UNUSED,
23471 reg_or_subregno (operands[0]));
23472 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED,
23473 reg_or_subregno (operands[1]));
23474 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
23477 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
23478 operands[3], operands[4],
23479 operands[5], operands[6]));
23481 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
23482 operands[3], operands[4],
23483 operands[5], operands[6]));
23484 if (flags && !(ecx || xmm0))
23485 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
23486 operands[2], operands[3],
23487 operands[4], operands[5],
23489 if (!(flags || ecx || xmm0))
23490 emit_note (NOTE_INSN_DELETED);
23494 [(set_attr "type" "sselog")
23495 (set_attr "prefix_data16" "1")
23496 (set_attr "prefix_extra" "1")
23497 (set_attr "length_immediate" "1")
23498 (set_attr "memory" "none,load")
23499 (set_attr "mode" "TI")])
23501 (define_insn "sse4_2_pcmpestri"
23502 [(set (match_operand:SI 0 "register_operand" "=c,c")
23504 [(match_operand:V16QI 1 "register_operand" "x,x")
23505 (match_operand:SI 2 "register_operand" "a,a")
23506 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
23507 (match_operand:SI 4 "register_operand" "d,d")
23508 (match_operand:SI 5 "const_0_to_255_operand")]
23510 (set (reg:CC FLAGS_REG)
23519 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
23520 [(set_attr "type" "sselog")
23521 (set_attr "prefix_data16" "1")
23522 (set_attr "prefix_extra" "1")
23523 (set_attr "prefix" "maybe_vex")
23524 (set_attr "length_immediate" "1")
23525 (set_attr "btver2_decode" "vector")
23526 (set_attr "memory" "none,load")
23527 (set_attr "mode" "TI")])
23529 (define_insn "sse4_2_pcmpestrm"
23530 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
23532 [(match_operand:V16QI 1 "register_operand" "x,x")
23533 (match_operand:SI 2 "register_operand" "a,a")
23534 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
23535 (match_operand:SI 4 "register_operand" "d,d")
23536 (match_operand:SI 5 "const_0_to_255_operand")]
23538 (set (reg:CC FLAGS_REG)
23547 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
23548 [(set_attr "type" "sselog")
23549 (set_attr "prefix_data16" "1")
23550 (set_attr "prefix_extra" "1")
23551 (set_attr "length_immediate" "1")
23552 (set_attr "prefix" "maybe_vex")
23553 (set_attr "btver2_decode" "vector")
23554 (set_attr "memory" "none,load")
23555 (set_attr "mode" "TI")])
23557 (define_insn "sse4_2_pcmpestr_cconly"
23558 [(set (reg:CC FLAGS_REG)
23560 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
23561 (match_operand:SI 3 "register_operand" "a,a,a,a")
23562 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
23563 (match_operand:SI 5 "register_operand" "d,d,d,d")
23564 (match_operand:SI 6 "const_0_to_255_operand")]
23566 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
23567 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
23570 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
23571 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
23572 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
23573 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
23574 [(set_attr "type" "sselog")
23575 (set_attr "prefix_data16" "1")
23576 (set_attr "prefix_extra" "1")
23577 (set_attr "length_immediate" "1")
23578 (set_attr "memory" "none,load,none,load")
23579 (set_attr "btver2_decode" "vector,vector,vector,vector")
23580 (set_attr "prefix" "maybe_vex")
23581 (set_attr "mode" "TI")])
23583 (define_insn_and_split "sse4_2_pcmpistr"
23584 [(set (match_operand:SI 0 "register_operand" "=c,c")
23586 [(match_operand:V16QI 2 "register_operand" "x,x")
23587 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
23588 (match_operand:SI 4 "const_0_to_255_operand")]
23590 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
23596 (set (reg:CC FLAGS_REG)
23603 && ix86_pre_reload_split ()"
23608 int ecx = !find_regno_note (curr_insn, REG_UNUSED,
23609 reg_or_subregno (operands[0]));
23610 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED,
23611 reg_or_subregno (operands[1]));
23612 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
23615 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
23616 operands[3], operands[4]));
23618 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
23619 operands[3], operands[4]));
23620 if (flags && !(ecx || xmm0))
23621 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
23622 operands[2], operands[3],
23624 if (!(flags || ecx || xmm0))
23625 emit_note (NOTE_INSN_DELETED);
23629 [(set_attr "type" "sselog")
23630 (set_attr "prefix_data16" "1")
23631 (set_attr "prefix_extra" "1")
23632 (set_attr "length_immediate" "1")
23633 (set_attr "memory" "none,load")
23634 (set_attr "mode" "TI")])
23636 (define_insn "sse4_2_pcmpistri"
23637 [(set (match_operand:SI 0 "register_operand" "=c,c")
23639 [(match_operand:V16QI 1 "register_operand" "x,x")
23640 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
23641 (match_operand:SI 3 "const_0_to_255_operand")]
23643 (set (reg:CC FLAGS_REG)
23650 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
23651 [(set_attr "type" "sselog")
23652 (set_attr "prefix_data16" "1")
23653 (set_attr "prefix_extra" "1")
23654 (set_attr "length_immediate" "1")
23655 (set_attr "prefix" "maybe_vex")
23656 (set_attr "memory" "none,load")
23657 (set_attr "btver2_decode" "vector")
23658 (set_attr "mode" "TI")])
23660 (define_insn "sse4_2_pcmpistrm"
23661 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
23663 [(match_operand:V16QI 1 "register_operand" "x,x")
23664 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
23665 (match_operand:SI 3 "const_0_to_255_operand")]
23667 (set (reg:CC FLAGS_REG)
23674 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
23675 [(set_attr "type" "sselog")
23676 (set_attr "prefix_data16" "1")
23677 (set_attr "prefix_extra" "1")
23678 (set_attr "length_immediate" "1")
23679 (set_attr "prefix" "maybe_vex")
23680 (set_attr "memory" "none,load")
23681 (set_attr "btver2_decode" "vector")
23682 (set_attr "mode" "TI")])
23684 (define_insn "sse4_2_pcmpistr_cconly"
23685 [(set (reg:CC FLAGS_REG)
23687 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
23688 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
23689 (match_operand:SI 4 "const_0_to_255_operand")]
23691 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
23692 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
23695 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
23696 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
23697 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
23698 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
23699 [(set_attr "type" "sselog")
23700 (set_attr "prefix_data16" "1")
23701 (set_attr "prefix_extra" "1")
23702 (set_attr "length_immediate" "1")
23703 (set_attr "memory" "none,load,none,load")
23704 (set_attr "prefix" "maybe_vex")
23705 (set_attr "btver2_decode" "vector,vector,vector,vector")
23706 (set_attr "mode" "TI")])
23708 ;; Packed float variants
23709 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
23710 [(V8DI "V8SF") (V16SI "V16SF")])
23712 (define_expand "avx512pf_gatherpf<mode>sf"
23714 [(match_operand:<avx512fmaskmode> 0 "register_operand")
23715 (mem:<GATHER_SCATTER_SF_MEM_MODE>
23717 [(match_operand 2 "vsib_address_operand")
23718 (match_operand:VI48_512 1 "register_operand")
23719 (match_operand:SI 3 "const1248_operand")]))
23720 (match_operand:SI 4 "const_2_to_3_operand")]
23721 UNSPEC_GATHER_PREFETCH)]
23725 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
23726 operands[3]), UNSPEC_VSIBADDR);
23729 (define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
23731 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
23732 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
23734 [(match_operand:P 2 "vsib_address_operand" "Tv")
23735 (match_operand:VI48_512 1 "register_operand" "v")
23736 (match_operand:SI 3 "const1248_operand")]
23738 (match_operand:SI 4 "const_2_to_3_operand")]
23739 UNSPEC_GATHER_PREFETCH)]
23742 switch (INTVAL (operands[4]))
23745 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23746 gas changed what it requires incompatibly. */
23747 return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
23749 return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
23751 gcc_unreachable ();
23754 [(set_attr "type" "sse")
23755 (set_attr "prefix" "evex")
23756 (set_attr "mode" "XI")])
23758 ;; Packed double variants
23759 (define_expand "avx512pf_gatherpf<mode>df"
23761 [(match_operand:<avx512fmaskmode> 0 "register_operand")
23764 [(match_operand 2 "vsib_address_operand")
23765 (match_operand:VI4_256_8_512 1 "register_operand")
23766 (match_operand:SI 3 "const1248_operand")]))
23767 (match_operand:SI 4 "const_2_to_3_operand")]
23768 UNSPEC_GATHER_PREFETCH)]
23772 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
23773 operands[3]), UNSPEC_VSIBADDR);
23776 (define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
23778 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
23779 (match_operator:V8DF 5 "vsib_mem_operator"
23781 [(match_operand:P 2 "vsib_address_operand" "Tv")
23782 (match_operand:VI4_256_8_512 1 "register_operand" "v")
23783 (match_operand:SI 3 "const1248_operand")]
23785 (match_operand:SI 4 "const_2_to_3_operand")]
23786 UNSPEC_GATHER_PREFETCH)]
23789 switch (INTVAL (operands[4]))
23792 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23793 gas changed what it requires incompatibly. */
23794 return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
23796 return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
23798 gcc_unreachable ();
23801 [(set_attr "type" "sse")
23802 (set_attr "prefix" "evex")
23803 (set_attr "mode" "XI")])
23805 ;; Packed float variants
23806 (define_expand "avx512pf_scatterpf<mode>sf"
23808 [(match_operand:<avx512fmaskmode> 0 "register_operand")
23809 (mem:<GATHER_SCATTER_SF_MEM_MODE>
23811 [(match_operand 2 "vsib_address_operand")
23812 (match_operand:VI48_512 1 "register_operand")
23813 (match_operand:SI 3 "const1248_operand")]))
23814 (match_operand:SI 4 "const2367_operand")]
23815 UNSPEC_SCATTER_PREFETCH)]
23819 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
23820 operands[3]), UNSPEC_VSIBADDR);
23823 (define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
23825 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
23826 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
23828 [(match_operand:P 2 "vsib_address_operand" "Tv")
23829 (match_operand:VI48_512 1 "register_operand" "v")
23830 (match_operand:SI 3 "const1248_operand")]
23832 (match_operand:SI 4 "const2367_operand")]
23833 UNSPEC_SCATTER_PREFETCH)]
23836 switch (INTVAL (operands[4]))
23840 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23841 gas changed what it requires incompatibly. */
23842 return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
23845 return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
23847 gcc_unreachable ();
23850 [(set_attr "type" "sse")
23851 (set_attr "prefix" "evex")
23852 (set_attr "mode" "XI")])
23854 ;; Packed double variants
23855 (define_expand "avx512pf_scatterpf<mode>df"
23857 [(match_operand:<avx512fmaskmode> 0 "register_operand")
23860 [(match_operand 2 "vsib_address_operand")
23861 (match_operand:VI4_256_8_512 1 "register_operand")
23862 (match_operand:SI 3 "const1248_operand")]))
23863 (match_operand:SI 4 "const2367_operand")]
23864 UNSPEC_SCATTER_PREFETCH)]
23868 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
23869 operands[3]), UNSPEC_VSIBADDR);
23872 (define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
23874 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
23875 (match_operator:V8DF 5 "vsib_mem_operator"
23877 [(match_operand:P 2 "vsib_address_operand" "Tv")
23878 (match_operand:VI4_256_8_512 1 "register_operand" "v")
23879 (match_operand:SI 3 "const1248_operand")]
23881 (match_operand:SI 4 "const2367_operand")]
23882 UNSPEC_SCATTER_PREFETCH)]
23885 switch (INTVAL (operands[4]))
23889 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
23890 gas changed what it requires incompatibly. */
23891 return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
23894 return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
23896 gcc_unreachable ();
23899 [(set_attr "type" "sse")
23900 (set_attr "prefix" "evex")
23901 (set_attr "mode" "XI")])
23903 (define_expand "exp2<mode>2"
23904 [(set (match_operand:VF_512 0 "register_operand")
23906 [(match_operand:VF_512 1 "vector_operand")]
23910 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
23911 [(set (match_operand:VF_512 0 "register_operand" "=v")
23913 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
23916 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
23917 [(set_attr "prefix" "evex")
23918 (set_attr "type" "sse")
23919 (set_attr "mode" "<MODE>")])
23921 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
23922 [(set (match_operand:VF_512 0 "register_operand" "=v")
23924 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
23927 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
23928 [(set_attr "prefix" "evex")
23929 (set_attr "type" "sse")
23930 (set_attr "mode" "<MODE>")])
23932 (define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
23933 [(set (match_operand:VF_128 0 "register_operand" "=v")
23936 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
23938 (match_operand:VF_128 2 "register_operand" "v")
23941 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
23942 [(set_attr "length_immediate" "1")
23943 (set_attr "prefix" "evex")
23944 (set_attr "type" "sse")
23945 (set_attr "mode" "<MODE>")])
23947 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
23948 [(set (match_operand:VF_512 0 "register_operand" "=v")
23950 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
23953 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
23954 [(set_attr "prefix" "evex")
23955 (set_attr "type" "sse")
23956 (set_attr "mode" "<MODE>")])
23958 (define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
23959 [(set (match_operand:VF_128 0 "register_operand" "=v")
23962 [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
23964 (match_operand:VF_128 2 "register_operand" "v")
23967 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
23968 [(set_attr "length_immediate" "1")
23969 (set_attr "type" "sse")
23970 (set_attr "prefix" "evex")
23971 (set_attr "mode" "<MODE>")])
23973 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
23975 ;; XOP instructions
23977 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
23979 (define_code_iterator xop_plus [plus ss_plus])
23981 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
23982 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
23984 ;; XOP parallel integer multiply/add instructions.
23986 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
23987 [(set (match_operand:VI24_128 0 "register_operand" "=x")
23990 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
23991 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
23992 (match_operand:VI24_128 3 "register_operand" "x")))]
23994 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23995 [(set_attr "type" "ssemuladd")
23996 (set_attr "mode" "TI")])
23998 (define_insn "xop_p<macs>dql"
23999 [(set (match_operand:V2DI 0 "register_operand" "=x")
24004 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
24005 (parallel [(const_int 0) (const_int 2)])))
24008 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
24009 (parallel [(const_int 0) (const_int 2)]))))
24010 (match_operand:V2DI 3 "register_operand" "x")))]
24012 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24013 [(set_attr "type" "ssemuladd")
24014 (set_attr "mode" "TI")])
24016 (define_insn "xop_p<macs>dqh"
24017 [(set (match_operand:V2DI 0 "register_operand" "=x")
24022 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
24023 (parallel [(const_int 1) (const_int 3)])))
24026 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
24027 (parallel [(const_int 1) (const_int 3)]))))
24028 (match_operand:V2DI 3 "register_operand" "x")))]
24030 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24031 [(set_attr "type" "ssemuladd")
24032 (set_attr "mode" "TI")])
24034 ;; XOP parallel integer multiply/add instructions for the intrinisics
24035 (define_insn "xop_p<macs>wd"
24036 [(set (match_operand:V4SI 0 "register_operand" "=x")
24041 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
24042 (parallel [(const_int 1) (const_int 3)
24043 (const_int 5) (const_int 7)])))
24046 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
24047 (parallel [(const_int 1) (const_int 3)
24048 (const_int 5) (const_int 7)]))))
24049 (match_operand:V4SI 3 "register_operand" "x")))]
24051 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24052 [(set_attr "type" "ssemuladd")
24053 (set_attr "mode" "TI")])
24055 (define_insn "xop_p<madcs>wd"
24056 [(set (match_operand:V4SI 0 "register_operand" "=x")
24062 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
24063 (parallel [(const_int 0) (const_int 2)
24064 (const_int 4) (const_int 6)])))
24067 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
24068 (parallel [(const_int 0) (const_int 2)
24069 (const_int 4) (const_int 6)]))))
24074 (parallel [(const_int 1) (const_int 3)
24075 (const_int 5) (const_int 7)])))
24079 (parallel [(const_int 1) (const_int 3)
24080 (const_int 5) (const_int 7)])))))
24081 (match_operand:V4SI 3 "register_operand" "x")))]
24083 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24084 [(set_attr "type" "ssemuladd")
24085 (set_attr "mode" "TI")])
24087 ;; XOP parallel XMM conditional moves
24088 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
24089 [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
24090 (if_then_else:V_128_256
24091 (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
24092 (match_operand:V_128_256 1 "register_operand" "x,x")
24093 (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
24095 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24096 [(set_attr "type" "sse4arg")])
24098 ;; Recognize XOP's vpcmov from canonical (xor (and (xor t f) c) f)
24100 [(set (match_operand:V_128_256 0 "register_operand")
24103 (xor:V_128_256 (match_operand:V_128_256 1 "register_operand")
24104 (match_operand:V_128_256 2 "register_operand"))
24105 (match_operand:V_128_256 3 "nonimmediate_operand"))
24108 [(set (match_dup 0) (if_then_else:V_128_256 (match_dup 3)
24112 [(set (match_operand:V_128_256 0 "register_operand")
24115 (xor:V_128_256 (match_operand:V_128_256 1 "register_operand")
24116 (match_operand:V_128_256 2 "register_operand"))
24117 (match_operand:V_128_256 3 "nonimmediate_operand"))
24120 [(set (match_dup 0) (if_then_else:V_128_256 (match_dup 3)
24124 ;; XOP horizontal add/subtract instructions
24125 (define_insn "xop_phadd<u>bw"
24126 [(set (match_operand:V8HI 0 "register_operand" "=x")
24130 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
24131 (parallel [(const_int 0) (const_int 2)
24132 (const_int 4) (const_int 6)
24133 (const_int 8) (const_int 10)
24134 (const_int 12) (const_int 14)])))
24138 (parallel [(const_int 1) (const_int 3)
24139 (const_int 5) (const_int 7)
24140 (const_int 9) (const_int 11)
24141 (const_int 13) (const_int 15)])))))]
24143 "vphadd<u>bw\t{%1, %0|%0, %1}"
24144 [(set_attr "type" "sseiadd1")])
24146 (define_insn "xop_phadd<u>bd"
24147 [(set (match_operand:V4SI 0 "register_operand" "=x")
24152 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
24153 (parallel [(const_int 0) (const_int 4)
24154 (const_int 8) (const_int 12)])))
24158 (parallel [(const_int 1) (const_int 5)
24159 (const_int 9) (const_int 13)]))))
24164 (parallel [(const_int 2) (const_int 6)
24165 (const_int 10) (const_int 14)])))
24169 (parallel [(const_int 3) (const_int 7)
24170 (const_int 11) (const_int 15)]))))))]
24172 "vphadd<u>bd\t{%1, %0|%0, %1}"
24173 [(set_attr "type" "sseiadd1")])
24175 (define_insn "xop_phadd<u>bq"
24176 [(set (match_operand:V2DI 0 "register_operand" "=x")
24182 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
24183 (parallel [(const_int 0) (const_int 8)])))
24187 (parallel [(const_int 1) (const_int 9)]))))
24192 (parallel [(const_int 2) (const_int 10)])))
24196 (parallel [(const_int 3) (const_int 11)])))))
24202 (parallel [(const_int 4) (const_int 12)])))
24206 (parallel [(const_int 5) (const_int 13)]))))
24211 (parallel [(const_int 6) (const_int 14)])))
24215 (parallel [(const_int 7) (const_int 15)])))))))]
24217 "vphadd<u>bq\t{%1, %0|%0, %1}"
24218 [(set_attr "type" "sseiadd1")])
24220 (define_insn "xop_phadd<u>wd"
24221 [(set (match_operand:V4SI 0 "register_operand" "=x")
24225 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
24226 (parallel [(const_int 0) (const_int 2)
24227 (const_int 4) (const_int 6)])))
24231 (parallel [(const_int 1) (const_int 3)
24232 (const_int 5) (const_int 7)])))))]
24234 "vphadd<u>wd\t{%1, %0|%0, %1}"
24235 [(set_attr "type" "sseiadd1")])
24237 (define_insn "xop_phadd<u>wq"
24238 [(set (match_operand:V2DI 0 "register_operand" "=x")
24243 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
24244 (parallel [(const_int 0) (const_int 4)])))
24248 (parallel [(const_int 1) (const_int 5)]))))
24253 (parallel [(const_int 2) (const_int 6)])))
24257 (parallel [(const_int 3) (const_int 7)]))))))]
24259 "vphadd<u>wq\t{%1, %0|%0, %1}"
24260 [(set_attr "type" "sseiadd1")])
24262 (define_insn "xop_phadd<u>dq"
24263 [(set (match_operand:V2DI 0 "register_operand" "=x")
24267 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
24268 (parallel [(const_int 0) (const_int 2)])))
24272 (parallel [(const_int 1) (const_int 3)])))))]
24274 "vphadd<u>dq\t{%1, %0|%0, %1}"
24275 [(set_attr "type" "sseiadd1")])
24277 (define_insn "xop_phsubbw"
24278 [(set (match_operand:V8HI 0 "register_operand" "=x")
24282 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
24283 (parallel [(const_int 0) (const_int 2)
24284 (const_int 4) (const_int 6)
24285 (const_int 8) (const_int 10)
24286 (const_int 12) (const_int 14)])))
24290 (parallel [(const_int 1) (const_int 3)
24291 (const_int 5) (const_int 7)
24292 (const_int 9) (const_int 11)
24293 (const_int 13) (const_int 15)])))))]
24295 "vphsubbw\t{%1, %0|%0, %1}"
24296 [(set_attr "type" "sseiadd1")])
24298 (define_insn "xop_phsubwd"
24299 [(set (match_operand:V4SI 0 "register_operand" "=x")
24303 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
24304 (parallel [(const_int 0) (const_int 2)
24305 (const_int 4) (const_int 6)])))
24309 (parallel [(const_int 1) (const_int 3)
24310 (const_int 5) (const_int 7)])))))]
24312 "vphsubwd\t{%1, %0|%0, %1}"
24313 [(set_attr "type" "sseiadd1")])
24315 (define_insn "xop_phsubdq"
24316 [(set (match_operand:V2DI 0 "register_operand" "=x")
24320 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
24321 (parallel [(const_int 0) (const_int 2)])))
24325 (parallel [(const_int 1) (const_int 3)])))))]
24327 "vphsubdq\t{%1, %0|%0, %1}"
24328 [(set_attr "type" "sseiadd1")])
24330 ;; XOP permute instructions
24331 (define_insn "xop_pperm"
24332 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
24334 [(match_operand:V16QI 1 "register_operand" "x,x")
24335 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
24336 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
24337 UNSPEC_XOP_PERMUTE))]
24338 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24339 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24340 [(set_attr "type" "sse4arg")
24341 (set_attr "mode" "TI")])
24343 ;; XOP pack instructions that combine two vectors into a smaller vector
24344 (define_insn "xop_pperm_pack_v2di_v4si"
24345 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
24348 (match_operand:V2DI 1 "register_operand" "x,x"))
24350 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
24351 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
24352 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24353 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24354 [(set_attr "type" "sse4arg")
24355 (set_attr "mode" "TI")])
24357 (define_insn "xop_pperm_pack_v4si_v8hi"
24358 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
24361 (match_operand:V4SI 1 "register_operand" "x,x"))
24363 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
24364 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
24365 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24366 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24367 [(set_attr "type" "sse4arg")
24368 (set_attr "mode" "TI")])
24370 (define_insn "xop_pperm_pack_v8hi_v16qi"
24371 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
24374 (match_operand:V8HI 1 "register_operand" "x,x"))
24376 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
24377 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
24378 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
24379 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
24380 [(set_attr "type" "sse4arg")
24381 (set_attr "mode" "TI")])
24383 ;; XOP packed rotate instructions
24384 (define_expand "rotl<mode>3"
24385 [(set (match_operand:VI_128 0 "register_operand")
24387 (match_operand:VI_128 1 "nonimmediate_operand")
24388 (match_operand:SI 2 "general_operand")))]
24391 /* If we were given a scalar, convert it to parallel */
24392 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
24394 rtvec vs = rtvec_alloc (<ssescalarnum>);
24395 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
24396 rtx reg = gen_reg_rtx (<MODE>mode);
24397 rtx op2 = operands[2];
24400 if (GET_MODE (op2) != <ssescalarmode>mode)
24402 op2 = gen_reg_rtx (<ssescalarmode>mode);
24403 convert_move (op2, operands[2], false);
24406 for (i = 0; i < <ssescalarnum>; i++)
24407 RTVEC_ELT (vs, i) = op2;
24409 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
24410 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
24415 (define_expand "rotr<mode>3"
24416 [(set (match_operand:VI_128 0 "register_operand")
24418 (match_operand:VI_128 1 "nonimmediate_operand")
24419 (match_operand:SI 2 "general_operand")))]
24422 /* If we were given a scalar, convert it to parallel */
24423 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
24425 rtvec vs = rtvec_alloc (<ssescalarnum>);
24426 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
24427 rtx neg = gen_reg_rtx (<MODE>mode);
24428 rtx reg = gen_reg_rtx (<MODE>mode);
24429 rtx op2 = operands[2];
24432 if (GET_MODE (op2) != <ssescalarmode>mode)
24434 op2 = gen_reg_rtx (<ssescalarmode>mode);
24435 convert_move (op2, operands[2], false);
24438 for (i = 0; i < <ssescalarnum>; i++)
24439 RTVEC_ELT (vs, i) = op2;
24441 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
24442 emit_insn (gen_neg<mode>2 (neg, reg));
24443 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
24448 (define_insn "xop_rotl<mode>3"
24449 [(set (match_operand:VI_128 0 "register_operand" "=x")
24451 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
24452 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand")))]
24454 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24455 [(set_attr "type" "sseishft")
24456 (set_attr "length_immediate" "1")
24457 (set_attr "mode" "TI")])
24459 (define_insn "xop_rotr<mode>3"
24460 [(set (match_operand:VI_128 0 "register_operand" "=x")
24462 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
24463 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand")))]
24467 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
24468 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
24470 [(set_attr "type" "sseishft")
24471 (set_attr "length_immediate" "1")
24472 (set_attr "mode" "TI")])
24474 (define_expand "vrotr<mode>3"
24475 [(match_operand:VI_128 0 "register_operand")
24476 (match_operand:VI_128 1 "register_operand")
24477 (match_operand:VI_128 2 "register_operand")]
24480 rtx reg = gen_reg_rtx (<MODE>mode);
24481 emit_insn (gen_neg<mode>2 (reg, operands[2]));
24482 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
24486 (define_expand "vrotl<mode>3"
24487 [(match_operand:VI_128 0 "register_operand")
24488 (match_operand:VI_128 1 "register_operand")
24489 (match_operand:VI_128 2 "register_operand")]
24492 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
24496 (define_insn "xop_vrotl<mode>3"
24497 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
24498 (if_then_else:VI_128
24500 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
24503 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
24507 (neg:VI_128 (match_dup 2)))))]
24508 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
24509 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24510 [(set_attr "type" "sseishft")
24511 (set_attr "prefix_data16" "0")
24512 (set_attr "prefix_extra" "2")
24513 (set_attr "mode" "TI")])
24515 ;; XOP packed shift instructions.
24516 (define_expand "vlshr<mode>3"
24517 [(set (match_operand:VI12_128 0 "register_operand")
24519 (match_operand:VI12_128 1 "register_operand")
24520 (match_operand:VI12_128 2 "nonimmediate_operand")))]
24521 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
24525 rtx neg = gen_reg_rtx (<MODE>mode);
24526 emit_insn (gen_neg<mode>2 (neg, operands[2]));
24527 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
24530 else if (<MODE>mode == V16QImode)
24532 ix86_expand_vecop_qihi (LSHIFTRT, operands[0], operands[1], operands[2]);
24537 (define_expand "vlshr<mode>3"
24538 [(set (match_operand:VI48_128 0 "register_operand")
24540 (match_operand:VI48_128 1 "register_operand")
24541 (match_operand:VI48_128 2 "nonimmediate_operand")))]
24542 "TARGET_AVX2 || TARGET_XOP"
24546 rtx neg = gen_reg_rtx (<MODE>mode);
24547 emit_insn (gen_neg<mode>2 (neg, operands[2]));
24548 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
24553 (define_expand "v<insn><mode>3"
24554 [(set (match_operand:VI12_256_512_AVX512VL 0 "register_operand")
24555 (any_shift:VI12_256_512_AVX512VL
24556 (match_operand:VI12_256_512_AVX512VL 1 "register_operand")
24557 (match_operand:VI12_256_512_AVX512VL 2 "nonimmediate_operand")))]
24560 if (<MODE>mode == V32QImode || <MODE>mode == V64QImode)
24562 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
24567 (define_expand "vlshr<mode>3"
24568 [(set (match_operand:VI48_512 0 "register_operand")
24570 (match_operand:VI48_512 1 "register_operand")
24571 (match_operand:VI48_512 2 "nonimmediate_operand")))]
24574 (define_expand "vlshr<mode>3"
24575 [(set (match_operand:VI48_256 0 "register_operand")
24577 (match_operand:VI48_256 1 "register_operand")
24578 (match_operand:VI48_256 2 "nonimmediate_operand")))]
24581 (define_expand "vashrv8di3"
24582 [(set (match_operand:V8DI 0 "register_operand")
24584 (match_operand:V8DI 1 "register_operand")
24585 (match_operand:V8DI 2 "nonimmediate_operand")))]
24588 (define_expand "vashrv4di3"
24589 [(set (match_operand:V4DI 0 "register_operand")
24591 (match_operand:V4DI 1 "register_operand")
24592 (match_operand:V4DI 2 "nonimmediate_operand")))]
24595 if (!TARGET_AVX512VL)
24597 rtx mask = ix86_build_signbit_mask (V4DImode, 1, 0);
24598 rtx t1 = gen_reg_rtx (V4DImode);
24599 rtx t2 = gen_reg_rtx (V4DImode);
24600 rtx t3 = gen_reg_rtx (V4DImode);
24601 emit_insn (gen_vlshrv4di3 (t1, operands[1], operands[2]));
24602 emit_insn (gen_vlshrv4di3 (t2, mask, operands[2]));
24603 emit_insn (gen_xorv4di3 (t3, t1, t2));
24604 emit_insn (gen_subv4di3 (operands[0], t3, t2));
24609 (define_expand "vashr<mode>3"
24610 [(set (match_operand:VI12_128 0 "register_operand")
24612 (match_operand:VI12_128 1 "register_operand")
24613 (match_operand:VI12_128 2 "nonimmediate_operand")))]
24614 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
24618 rtx neg = gen_reg_rtx (<MODE>mode);
24619 emit_insn (gen_neg<mode>2 (neg, operands[2]));
24620 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
24623 else if(<MODE>mode == V16QImode)
24625 ix86_expand_vecop_qihi (ASHIFTRT, operands[0],operands[1], operands[2]);
24630 (define_expand "vashrv2di3"
24631 [(set (match_operand:V2DI 0 "register_operand")
24633 (match_operand:V2DI 1 "register_operand")
24634 (match_operand:V2DI 2 "nonimmediate_operand")))]
24635 "TARGET_XOP || TARGET_AVX2"
24639 rtx neg = gen_reg_rtx (V2DImode);
24640 emit_insn (gen_negv2di2 (neg, operands[2]));
24641 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
24644 if (!TARGET_AVX512VL)
24646 rtx mask = ix86_build_signbit_mask (V2DImode, 1, 0);
24647 rtx t1 = gen_reg_rtx (V2DImode);
24648 rtx t2 = gen_reg_rtx (V2DImode);
24649 rtx t3 = gen_reg_rtx (V2DImode);
24650 emit_insn (gen_vlshrv2di3 (t1, operands[1], operands[2]));
24651 emit_insn (gen_vlshrv2di3 (t2, mask, operands[2]));
24652 emit_insn (gen_xorv2di3 (t3, t1, t2));
24653 emit_insn (gen_subv2di3 (operands[0], t3, t2));
24658 (define_expand "vashrv4si3"
24659 [(set (match_operand:V4SI 0 "register_operand")
24660 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
24661 (match_operand:V4SI 2 "nonimmediate_operand")))]
24662 "TARGET_AVX2 || TARGET_XOP"
24666 rtx neg = gen_reg_rtx (V4SImode);
24667 emit_insn (gen_negv4si2 (neg, operands[2]));
24668 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
24673 (define_expand "vashrv16si3"
24674 [(set (match_operand:V16SI 0 "register_operand")
24675 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
24676 (match_operand:V16SI 2 "nonimmediate_operand")))]
24679 (define_expand "vashrv8si3"
24680 [(set (match_operand:V8SI 0 "register_operand")
24681 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
24682 (match_operand:V8SI 2 "nonimmediate_operand")))]
24685 (define_expand "vashl<mode>3"
24686 [(set (match_operand:VI12_128 0 "register_operand")
24688 (match_operand:VI12_128 1 "register_operand")
24689 (match_operand:VI12_128 2 "nonimmediate_operand")))]
24690 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
24694 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
24697 else if (<MODE>mode == V16QImode)
24699 ix86_expand_vecop_qihi (ASHIFT, operands[0], operands[1], operands[2]);
24704 (define_expand "vashl<mode>3"
24705 [(set (match_operand:VI48_128 0 "register_operand")
24707 (match_operand:VI48_128 1 "register_operand")
24708 (match_operand:VI48_128 2 "nonimmediate_operand")))]
24709 "TARGET_AVX2 || TARGET_XOP"
24713 operands[2] = force_reg (<MODE>mode, operands[2]);
24714 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
24719 (define_expand "vashl<mode>3"
24720 [(set (match_operand:VI48_512 0 "register_operand")
24722 (match_operand:VI48_512 1 "register_operand")
24723 (match_operand:VI48_512 2 "nonimmediate_operand")))]
24726 (define_expand "vashl<mode>3"
24727 [(set (match_operand:VI48_256 0 "register_operand")
24729 (match_operand:VI48_256 1 "register_operand")
24730 (match_operand:VI48_256 2 "nonimmediate_operand")))]
24733 (define_insn "xop_sha<mode>3"
24734 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
24735 (if_then_else:VI_128
24737 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
24740 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
24744 (neg:VI_128 (match_dup 2)))))]
24745 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
24746 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24747 [(set_attr "type" "sseishft")
24748 (set_attr "prefix_data16" "0")
24749 (set_attr "prefix_extra" "2")
24750 (set_attr "mode" "TI")])
24752 (define_insn "xop_shl<mode>3"
24753 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
24754 (if_then_else:VI_128
24756 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
24759 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
24763 (neg:VI_128 (match_dup 2)))))]
24764 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
24765 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
24766 [(set_attr "type" "sseishft")
24767 (set_attr "prefix_data16" "0")
24768 (set_attr "prefix_extra" "2")
24769 (set_attr "mode" "TI")])
24771 (define_expand "<insn><mode>3"
24772 [(set (match_operand:VI1_AVX512 0 "register_operand")
24773 (any_shift:VI1_AVX512
24774 (match_operand:VI1_AVX512 1 "register_operand")
24775 (match_operand:SI 2 "nonmemory_operand")))]
24778 if (TARGET_XOP && <MODE>mode == V16QImode)
24780 bool negate = false;
24781 rtx (*gen) (rtx, rtx, rtx);
24785 if (<CODE> != ASHIFT)
24787 if (CONST_INT_P (operands[2]))
24788 operands[2] = GEN_INT (-INTVAL (operands[2]));
24792 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
24793 tmp = lowpart_subreg (QImode, operands[2], SImode);
24794 for (i = 0; i < 16; i++)
24795 XVECEXP (par, 0, i) = tmp;
24797 tmp = gen_reg_rtx (V16QImode);
24798 emit_insn (gen_vec_initv16qiqi (tmp, par));
24801 emit_insn (gen_negv16qi2 (tmp, tmp));
24803 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
24804 emit_insn (gen (operands[0], operands[1], tmp));
24807 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
24811 (define_expand "ashrv2di3"
24812 [(set (match_operand:V2DI 0 "register_operand")
24814 (match_operand:V2DI 1 "register_operand")
24815 (match_operand:DI 2 "nonmemory_operand")))]
24818 if (!TARGET_AVX512VL)
24821 && CONST_INT_P (operands[2])
24822 && UINTVAL (operands[2]) >= 63)
24824 rtx zero = force_reg (V2DImode, CONST0_RTX (V2DImode));
24825 emit_insn (gen_sse4_2_gtv2di3 (operands[0], zero, operands[1]));
24828 if (operands[2] == const0_rtx)
24830 emit_move_insn (operands[0], operands[1]);
24833 if (CONST_INT_P (operands[2])
24834 && (!TARGET_XOP || UINTVAL (operands[2]) >= 63))
24836 vec_perm_builder sel (4, 4, 1);
24837 sel.quick_grow (4);
24839 rtx op1 = lowpart_subreg (V4SImode,
24840 force_reg (V2DImode, operands[1]),
24842 rtx target = gen_reg_rtx (V4SImode);
24843 if (UINTVAL (operands[2]) >= 63)
24845 arg0 = arg1 = gen_reg_rtx (V4SImode);
24846 emit_insn (gen_ashrv4si3 (arg0, op1, GEN_INT (31)));
24852 else if (INTVAL (operands[2]) > 32)
24854 arg0 = gen_reg_rtx (V4SImode);
24855 arg1 = gen_reg_rtx (V4SImode);
24856 emit_insn (gen_ashrv4si3 (arg1, op1, GEN_INT (31)));
24857 emit_insn (gen_ashrv4si3 (arg0, op1,
24858 GEN_INT (INTVAL (operands[2]) - 32)));
24864 else if (INTVAL (operands[2]) == 32)
24867 arg1 = gen_reg_rtx (V4SImode);
24868 emit_insn (gen_ashrv4si3 (arg1, op1, GEN_INT (31)));
24876 arg0 = gen_reg_rtx (V2DImode);
24877 arg1 = gen_reg_rtx (V4SImode);
24878 emit_insn (gen_lshrv2di3 (arg0, operands[1], operands[2]));
24879 emit_insn (gen_ashrv4si3 (arg1, op1, operands[2]));
24880 arg0 = lowpart_subreg (V4SImode, arg0, V2DImode);
24886 vec_perm_indices indices (sel, arg0 != arg1 ? 2 : 1, 4);
24887 bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode,
24888 target, arg0, arg1,
24891 emit_move_insn (operands[0],
24892 lowpart_subreg (V2DImode, target, V4SImode));
24897 rtx zero = force_reg (V2DImode, CONST0_RTX (V2DImode));
24898 rtx zero_or_all_ones;
24901 zero_or_all_ones = gen_reg_rtx (V2DImode);
24902 emit_insn (gen_sse4_2_gtv2di3 (zero_or_all_ones, zero,
24907 rtx temp = gen_reg_rtx (V4SImode);
24908 emit_insn (gen_ashrv4si3 (temp,
24909 lowpart_subreg (V4SImode,
24910 force_reg (V2DImode,
24914 zero_or_all_ones = gen_reg_rtx (V4SImode);
24915 emit_insn (gen_sse2_pshufd_1 (zero_or_all_ones, temp,
24916 const1_rtx, const1_rtx,
24917 GEN_INT (3), GEN_INT (3)));
24918 zero_or_all_ones = lowpart_subreg (V2DImode, zero_or_all_ones,
24921 rtx lshr_res = gen_reg_rtx (V2DImode);
24922 emit_insn (gen_lshrv2di3 (lshr_res, operands[1], operands[2]));
24923 rtx ashl_res = gen_reg_rtx (V2DImode);
24927 amount = gen_reg_rtx (DImode);
24928 emit_insn (gen_subdi3 (amount, force_reg (DImode, GEN_INT (64)),
24933 rtx temp = gen_reg_rtx (SImode);
24934 emit_insn (gen_subsi3 (temp, force_reg (SImode, GEN_INT (64)),
24935 lowpart_subreg (SImode, operands[2],
24937 amount = gen_reg_rtx (V4SImode);
24938 emit_insn (gen_vec_setv4si_0 (amount, CONST0_RTX (V4SImode),
24941 amount = lowpart_subreg (DImode, amount, GET_MODE (amount));
24942 emit_insn (gen_ashlv2di3 (ashl_res, zero_or_all_ones, amount));
24943 emit_insn (gen_iorv2di3 (operands[0], lshr_res, ashl_res));
24947 rtx reg = gen_reg_rtx (V2DImode);
24949 bool negate = false;
24952 if (CONST_INT_P (operands[2]))
24953 operands[2] = GEN_INT (-INTVAL (operands[2]));
24957 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
24958 for (i = 0; i < 2; i++)
24959 XVECEXP (par, 0, i) = operands[2];
24961 emit_insn (gen_vec_initv2didi (reg, par));
24964 emit_insn (gen_negv2di2 (reg, reg));
24966 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
24971 ;; XOP FRCZ support
24972 (define_insn "xop_frcz<mode>2"
24973 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
24975 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
24978 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
24979 [(set_attr "type" "ssecvt1")
24980 (set_attr "mode" "<MODE>")])
24982 (define_expand "xop_vmfrcz<mode>2"
24983 [(set (match_operand:VF_128 0 "register_operand")
24986 [(match_operand:VF_128 1 "nonimmediate_operand")]
24991 "operands[2] = CONST0_RTX (<MODE>mode);")
24993 (define_insn "*xop_vmfrcz<mode>2"
24994 [(set (match_operand:VF_128 0 "register_operand" "=x")
24997 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
24999 (match_operand:VF_128 2 "const0_operand")
25002 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
25003 [(set_attr "type" "ssecvt1")
25004 (set_attr "mode" "<MODE>")])
25006 (define_insn "xop_maskcmp<mode>3"
25007 [(set (match_operand:VI_128 0 "register_operand" "=x")
25008 (match_operator:VI_128 1 "ix86_comparison_int_operator"
25009 [(match_operand:VI_128 2 "register_operand" "x")
25010 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
25012 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
25013 [(set_attr "type" "sse4arg")
25014 (set_attr "prefix_data16" "0")
25015 (set_attr "prefix_rep" "0")
25016 (set_attr "prefix_extra" "2")
25017 (set_attr "length_immediate" "1")
25018 (set_attr "mode" "TI")])
25020 (define_insn "xop_maskcmp_uns<mode>3"
25021 [(set (match_operand:VI_128 0 "register_operand" "=x")
25022 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
25023 [(match_operand:VI_128 2 "register_operand" "x")
25024 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
25026 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
25027 [(set_attr "type" "ssecmp")
25028 (set_attr "prefix_data16" "0")
25029 (set_attr "prefix_rep" "0")
25030 (set_attr "prefix_extra" "2")
25031 (set_attr "length_immediate" "1")
25032 (set_attr "mode" "TI")])
25034 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
25035 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
25036 ;; the exact instruction generated for the intrinsic.
25037 (define_insn "xop_maskcmp_uns2<mode>3"
25038 [(set (match_operand:VI_128 0 "register_operand" "=x")
25040 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
25041 [(match_operand:VI_128 2 "register_operand" "x")
25042 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
25043 UNSPEC_XOP_UNSIGNED_CMP))]
25045 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
25046 [(set_attr "type" "ssecmp")
25047 (set_attr "prefix_data16" "0")
25048 (set_attr "prefix_extra" "2")
25049 (set_attr "length_immediate" "1")
25050 (set_attr "mode" "TI")])
25052 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
25053 ;; being added here to be complete.
25054 (define_insn "xop_pcom_tf<mode>3"
25055 [(set (match_operand:VI_128 0 "register_operand" "=x")
25057 [(match_operand:VI_128 1 "register_operand" "x")
25058 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
25059 (match_operand:SI 3 "const_int_operand")]
25060 UNSPEC_XOP_TRUEFALSE))]
25063 return ((INTVAL (operands[3]) != 0)
25064 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
25065 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
25067 [(set_attr "type" "ssecmp")
25068 (set_attr "prefix_data16" "0")
25069 (set_attr "prefix_extra" "2")
25070 (set_attr "length_immediate" "1")
25071 (set_attr "mode" "TI")])
25073 (define_insn "xop_vpermil2<mode>3"
25074 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
25076 [(match_operand:VF_128_256 1 "register_operand" "x,x")
25077 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
25078 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
25079 (match_operand:SI 4 "const_0_to_3_operand")]
25082 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
25083 [(set_attr "type" "sse4arg")
25084 (set_attr "length_immediate" "1")
25085 (set_attr "mode" "<MODE>")])
25087 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25089 (define_insn "aesenc"
25090 [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
25091 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
25092 (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
25094 "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
25096 aesenc\t{%2, %0|%0, %2}
25097 vaesenc\t{%2, %1, %0|%0, %1, %2}
25098 vaesenc\t{%2, %1, %0|%0, %1, %2}"
25099 [(set_attr "isa" "noavx,aes,avx512vl")
25100 (set_attr "type" "sselog1")
25101 (set_attr "prefix_extra" "1")
25102 (set_attr "prefix" "orig,vex,evex")
25103 (set_attr "btver2_decode" "double,double,double")
25104 (set_attr "mode" "TI")])
25106 (define_insn "aesenclast"
25107 [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
25108 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
25109 (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
25110 UNSPEC_AESENCLAST))]
25111 "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
25113 aesenclast\t{%2, %0|%0, %2}
25114 vaesenclast\t{%2, %1, %0|%0, %1, %2}
25115 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
25116 [(set_attr "isa" "noavx,aes,avx512vl")
25117 (set_attr "type" "sselog1")
25118 (set_attr "prefix_extra" "1")
25119 (set_attr "prefix" "orig,vex,evex")
25120 (set_attr "btver2_decode" "double,double,double")
25121 (set_attr "mode" "TI")])
25123 (define_insn "aesdec"
25124 [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
25125 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
25126 (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
25128 "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
25130 aesdec\t{%2, %0|%0, %2}
25131 vaesdec\t{%2, %1, %0|%0, %1, %2}
25132 vaesdec\t{%2, %1, %0|%0, %1, %2}"
25133 [(set_attr "isa" "noavx,aes,avx512vl")
25134 (set_attr "type" "sselog1")
25135 (set_attr "prefix_extra" "1")
25136 (set_attr "prefix" "orig,vex,evex")
25137 (set_attr "btver2_decode" "double,double,double")
25138 (set_attr "mode" "TI")])
25140 (define_insn "aesdeclast"
25141 [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
25142 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
25143 (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
25144 UNSPEC_AESDECLAST))]
25145 "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
25147 aesdeclast\t{%2, %0|%0, %2}
25148 vaesdeclast\t{%2, %1, %0|%0, %1, %2}
25149 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
25150 [(set_attr "isa" "noavx,aes,avx512vl")
25151 (set_attr "type" "sselog1")
25152 (set_attr "prefix_extra" "1")
25153 (set_attr "prefix" "orig,vex,evex")
25154 (set_attr "btver2_decode" "double,double,double")
25155 (set_attr "mode" "TI")])
25157 (define_insn "aesimc"
25158 [(set (match_operand:V2DI 0 "register_operand" "=x")
25159 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
25162 "%vaesimc\t{%1, %0|%0, %1}"
25163 [(set_attr "type" "sselog1")
25164 (set_attr "prefix_extra" "1")
25165 (set_attr "prefix" "maybe_vex")
25166 (set_attr "mode" "TI")])
25168 (define_insn "aeskeygenassist"
25169 [(set (match_operand:V2DI 0 "register_operand" "=x")
25170 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
25171 (match_operand:SI 2 "const_0_to_255_operand")]
25172 UNSPEC_AESKEYGENASSIST))]
25174 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
25175 [(set_attr "type" "sselog1")
25176 (set_attr "prefix_extra" "1")
25177 (set_attr "length_immediate" "1")
25178 (set_attr "prefix" "maybe_vex")
25179 (set_attr "mode" "TI")])
25181 (define_insn "pclmulqdq"
25182 [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
25183 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
25184 (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")
25185 (match_operand:SI 3 "const_0_to_255_operand")]
25189 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
25190 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}
25191 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
25192 [(set_attr "isa" "noavx,avx,vpclmulqdqvl")
25193 (set_attr "type" "sselog1")
25194 (set_attr "prefix_extra" "1")
25195 (set_attr "length_immediate" "1")
25196 (set_attr "prefix" "orig,vex,evex")
25197 (set_attr "mode" "TI")])
25199 (define_expand "avx_vzeroall"
25200 [(match_par_dup 0 [(const_int 0)])]
25203 int nregs = TARGET_64BIT ? 16 : 8;
25206 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
25208 XVECEXP (operands[0], 0, 0)
25209 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
25212 for (regno = 0; regno < nregs; regno++)
25213 XVECEXP (operands[0], 0, regno + 1)
25214 = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
25215 CONST0_RTX (V8SImode));
25218 (define_insn "*avx_vzeroall"
25219 [(match_parallel 0 "vzeroall_operation"
25220 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
25223 [(set_attr "type" "sse")
25224 (set_attr "modrm" "0")
25225 (set_attr "memory" "none")
25226 (set_attr "prefix" "vex")
25227 (set_attr "btver2_decode" "vector")
25228 (set_attr "mode" "OI")])
25230 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
25231 ;; if the upper 128bits are unused. Initially we expand the instructions
25232 ;; as though they had no effect on the SSE registers, but later add SETs and
25233 ;; CLOBBERs to the PARALLEL to model the real effect.
25235 (define_expand "avx_vzeroupper"
25236 [(parallel [(call (mem:QI (const_int 0))
25238 (unspec [(const_int ABI_VZEROUPPER)] UNSPEC_CALLEE_ABI)])]
25241 ix86_expand_avx_vzeroupper ();
25245 (define_insn "avx_vzeroupper_callee_abi"
25246 [(call (mem:QI (const_int 0))
25248 (unspec [(const_int ABI_VZEROUPPER)] UNSPEC_CALLEE_ABI)]
25251 [(set_attr "type" "sse")
25252 (set_attr "modrm" "0")
25253 (set_attr "memory" "none")
25254 (set_attr "prefix" "vex")
25255 (set_attr "btver2_decode" "vector")
25256 (set_attr "mode" "OI")])
25258 (define_mode_attr pbroadcast_evex_isa
25259 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
25260 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
25261 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
25262 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")
25263 (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")
25264 (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")])
25266 (define_insn "avx2_pbroadcast<mode>"
25267 [(set (match_operand:VIHFBF 0 "register_operand" "=x,v")
25268 (vec_duplicate:VIHFBF
25269 (vec_select:<ssescalarmode>
25270 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
25271 (parallel [(const_int 0)]))))]
25273 "vpbroadcast<sseintmodesuffix>\t{%1, %0|%0, %<iptr>1}"
25274 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
25275 (set_attr "type" "ssemov")
25276 (set_attr "prefix_extra" "1")
25277 (set_attr "prefix" "vex,evex")
25278 (set_attr "mode" "<sseinsnmode>")])
25280 (define_insn "avx2_pbroadcast<mode>_1"
25281 [(set (match_operand:VIHFBF_256 0 "register_operand" "=x,x,v,v")
25282 (vec_duplicate:VIHFBF_256
25283 (vec_select:<ssescalarmode>
25284 (match_operand:VIHFBF_256 1 "nonimmediate_operand" "m,x,m,v")
25285 (parallel [(const_int 0)]))))]
25288 vpbroadcast<sseintmodesuffix>\t{%1, %0|%0, %<iptr>1}
25289 vpbroadcast<sseintmodesuffix>\t{%x1, %0|%0, %x1}
25290 vpbroadcast<sseintmodesuffix>\t{%1, %0|%0, %<iptr>1}
25291 vpbroadcast<sseintmodesuffix>\t{%x1, %0|%0, %x1}"
25292 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
25293 (set_attr "type" "ssemov")
25294 (set_attr "prefix_extra" "1")
25295 (set_attr "prefix" "vex")
25296 (set_attr "mode" "<sseinsnmode>")])
25298 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
25299 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
25300 (unspec:VI48F_256_512
25301 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
25302 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
25304 "TARGET_AVX2 && <mask_mode512bit_condition>"
25306 if (TARGET_DEST_FALSE_DEP_FOR_GLC
25307 && <mask3_dest_false_dep_for_glc_cond>
25308 && !reg_mentioned_p (operands[0], operands[1])
25309 && !reg_mentioned_p (operands[0], operands[2]))
25310 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
25311 return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}";
25313 [(set_attr "type" "sselog")
25314 (set_attr "prefix" "<mask_prefix2>")
25315 (set_attr "mode" "<sseinsnmode>")])
25317 (define_insn "<avx512>_permvar<mode><mask_name>"
25318 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
25319 (unspec:VI1_AVX512VL
25320 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
25321 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
25323 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
25324 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
25325 [(set_attr "type" "sselog")
25326 (set_attr "prefix" "<mask_prefix2>")
25327 (set_attr "mode" "<sseinsnmode>")])
25329 (define_insn "<avx512>_permvar<mode><mask_name>"
25330 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
25331 (unspec:VI2_AVX512VL
25332 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
25333 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
25335 "TARGET_AVX512BW && <mask_mode512bit_condition>"
25336 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
25337 [(set_attr "type" "sselog")
25338 (set_attr "prefix" "<mask_prefix2>")
25339 (set_attr "mode" "<sseinsnmode>")])
25341 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
25342 ;; If it so happens that the input is in memory, use vbroadcast.
25343 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
25344 (define_insn "*avx_vperm_broadcast_v4sf"
25345 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
25347 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
25348 (match_parallel 2 "avx_vbroadcast_operand"
25349 [(match_operand 3 "const_int_operand" "C,n,n")])))]
25352 int elt = INTVAL (operands[3]);
25353 switch (which_alternative)
25357 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
25358 return "vbroadcastss\t{%1, %0|%0, %k1}";
25360 operands[2] = GEN_INT (elt * 0x55);
25361 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
25363 gcc_unreachable ();
25366 [(set_attr "type" "ssemov,ssemov,sselog1")
25367 (set_attr "prefix_extra" "1")
25368 (set_attr "length_immediate" "0,0,1")
25369 (set_attr "prefix" "maybe_evex")
25370 (set_attr "mode" "SF,SF,V4SF")])
25372 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
25373 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
25375 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
25376 (match_parallel 2 "avx_vbroadcast_operand"
25377 [(match_operand 3 "const_int_operand" "C,n,n")])))]
25379 && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
25381 "&& reload_completed"
25382 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
25384 rtx op0 = operands[0], op1 = operands[1];
25385 int elt = INTVAL (operands[3]);
25391 if (TARGET_AVX2 && elt == 0)
25393 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
25398 /* Shuffle element we care about into all elements of the 128-bit lane.
25399 The other lane gets shuffled too, but we don't care. */
25400 if (<MODE>mode == V4DFmode)
25401 mask = (elt & 1 ? 15 : 0);
25403 mask = (elt & 3) * 0x55;
25404 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
25406 /* Shuffle the lane we care about into both lanes of the dest. */
25407 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
25408 if (EXT_REX_SSE_REG_P (op0))
25410 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
25412 gcc_assert (<MODE>mode == V8SFmode);
25413 if ((mask & 1) == 0)
25414 emit_insn (gen_avx2_vec_dupv8sf (op0,
25415 gen_lowpart (V4SFmode, op0)));
25417 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
25418 GEN_INT (4), GEN_INT (5),
25419 GEN_INT (6), GEN_INT (7),
25420 GEN_INT (12), GEN_INT (13),
25421 GEN_INT (14), GEN_INT (15)));
25425 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
25429 operands[1] = adjust_address (op1, <ssescalarmode>mode,
25430 elt * GET_MODE_SIZE (<ssescalarmode>mode));
25433 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
25434 [(set (match_operand:VF2 0 "register_operand")
25436 (match_operand:VF2 1 "nonimmediate_operand")
25437 (match_operand:SI 2 "const_0_to_255_operand")))]
25438 "TARGET_AVX && <mask_mode512bit_condition>"
25440 int mask = INTVAL (operands[2]);
25441 rtx perm[<ssescalarnum>];
25444 for (i = 0; i < <ssescalarnum>; i = i + 2)
25446 perm[i] = GEN_INT (((mask >> i) & 1) + i);
25447 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
25451 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
25454 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
25455 [(set (match_operand:VF1 0 "register_operand")
25457 (match_operand:VF1 1 "nonimmediate_operand")
25458 (match_operand:SI 2 "const_0_to_255_operand")))]
25459 "TARGET_AVX && <mask_mode512bit_condition>"
25461 int mask = INTVAL (operands[2]);
25462 rtx perm[<ssescalarnum>];
25465 for (i = 0; i < <ssescalarnum>; i = i + 4)
25467 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
25468 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
25469 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
25470 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
25474 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
25477 ;; This pattern needs to come before the avx2_perm*/avx512f_perm*
25478 ;; patterns, as they have the same RTL representation (vpermilp*
25479 ;; being a subset of what vpermp* can do), but vpermilp* has shorter
25480 ;; latency as it never crosses lanes.
25481 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
25482 [(set (match_operand:VF 0 "register_operand" "=v")
25484 (match_operand:VF 1 "nonimmediate_operand" "vm")
25485 (match_parallel 2 ""
25486 [(match_operand 3 "const_int_operand")])))]
25487 "TARGET_AVX && <mask_mode512bit_condition>
25488 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
25490 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
25491 operands[2] = GEN_INT (mask);
25492 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
25494 [(set_attr "type" "sselog")
25495 (set_attr "prefix_extra" "1")
25496 (set_attr "length_immediate" "1")
25497 (set_attr "prefix" "<mask_prefix>")
25498 (set_attr "mode" "<sseinsnmode>")])
25500 (define_expand "avx2_perm<mode>"
25501 [(match_operand:VI8F_256 0 "register_operand")
25502 (match_operand:VI8F_256 1 "nonimmediate_operand")
25503 (match_operand:SI 2 "const_0_to_255_operand")]
25506 int mask = INTVAL (operands[2]);
25507 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
25508 GEN_INT ((mask >> 0) & 3),
25509 GEN_INT ((mask >> 2) & 3),
25510 GEN_INT ((mask >> 4) & 3),
25511 GEN_INT ((mask >> 6) & 3)));
25515 (define_expand "avx512vl_perm<mode>_mask"
25516 [(match_operand:VI8F_256 0 "register_operand")
25517 (match_operand:VI8F_256 1 "nonimmediate_operand")
25518 (match_operand:SI 2 "const_0_to_255_operand")
25519 (match_operand:VI8F_256 3 "nonimm_or_0_operand")
25520 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25523 int mask = INTVAL (operands[2]);
25524 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
25525 GEN_INT ((mask >> 0) & 3),
25526 GEN_INT ((mask >> 2) & 3),
25527 GEN_INT ((mask >> 4) & 3),
25528 GEN_INT ((mask >> 6) & 3),
25529 operands[3], operands[4]));
25533 (define_insn "avx2_perm<mode>_1<mask_name>"
25534 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
25535 (vec_select:VI8F_256
25536 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
25537 (parallel [(match_operand 2 "const_0_to_3_operand")
25538 (match_operand 3 "const_0_to_3_operand")
25539 (match_operand 4 "const_0_to_3_operand")
25540 (match_operand 5 "const_0_to_3_operand")])))]
25541 "TARGET_AVX2 && <mask_mode512bit_condition>"
25544 mask |= INTVAL (operands[2]) << 0;
25545 mask |= INTVAL (operands[3]) << 2;
25546 mask |= INTVAL (operands[4]) << 4;
25547 mask |= INTVAL (operands[5]) << 6;
25548 operands[2] = GEN_INT (mask);
25549 if (TARGET_DEST_FALSE_DEP_FOR_GLC
25550 && <mask6_dest_false_dep_for_glc_cond>
25551 && !reg_mentioned_p (operands[0], operands[1]))
25552 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
25553 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
25555 [(set_attr "type" "sselog")
25556 (set_attr "prefix" "<mask_prefix2>")
25557 (set_attr "mode" "<sseinsnmode>")])
25559 (define_expand "avx512f_perm<mode>"
25560 [(match_operand:V8FI 0 "register_operand")
25561 (match_operand:V8FI 1 "nonimmediate_operand")
25562 (match_operand:SI 2 "const_0_to_255_operand")]
25565 int mask = INTVAL (operands[2]);
25566 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
25567 GEN_INT ((mask >> 0) & 3),
25568 GEN_INT ((mask >> 2) & 3),
25569 GEN_INT ((mask >> 4) & 3),
25570 GEN_INT ((mask >> 6) & 3),
25571 GEN_INT (((mask >> 0) & 3) + 4),
25572 GEN_INT (((mask >> 2) & 3) + 4),
25573 GEN_INT (((mask >> 4) & 3) + 4),
25574 GEN_INT (((mask >> 6) & 3) + 4)));
25578 (define_expand "avx512f_perm<mode>_mask"
25579 [(match_operand:V8FI 0 "register_operand")
25580 (match_operand:V8FI 1 "nonimmediate_operand")
25581 (match_operand:SI 2 "const_0_to_255_operand")
25582 (match_operand:V8FI 3 "nonimm_or_0_operand")
25583 (match_operand:<avx512fmaskmode> 4 "register_operand")]
25586 int mask = INTVAL (operands[2]);
25587 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
25588 GEN_INT ((mask >> 0) & 3),
25589 GEN_INT ((mask >> 2) & 3),
25590 GEN_INT ((mask >> 4) & 3),
25591 GEN_INT ((mask >> 6) & 3),
25592 GEN_INT (((mask >> 0) & 3) + 4),
25593 GEN_INT (((mask >> 2) & 3) + 4),
25594 GEN_INT (((mask >> 4) & 3) + 4),
25595 GEN_INT (((mask >> 6) & 3) + 4),
25596 operands[3], operands[4]));
25600 (define_insn "avx512f_perm<mode>_1<mask_name>"
25601 [(set (match_operand:V8FI 0 "register_operand" "=v")
25603 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
25604 (parallel [(match_operand 2 "const_0_to_3_operand")
25605 (match_operand 3 "const_0_to_3_operand")
25606 (match_operand 4 "const_0_to_3_operand")
25607 (match_operand 5 "const_0_to_3_operand")
25608 (match_operand 6 "const_4_to_7_operand")
25609 (match_operand 7 "const_4_to_7_operand")
25610 (match_operand 8 "const_4_to_7_operand")
25611 (match_operand 9 "const_4_to_7_operand")])))]
25612 "TARGET_AVX512F && <mask_mode512bit_condition>
25613 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
25614 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
25615 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
25616 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
25619 mask |= INTVAL (operands[2]) << 0;
25620 mask |= INTVAL (operands[3]) << 2;
25621 mask |= INTVAL (operands[4]) << 4;
25622 mask |= INTVAL (operands[5]) << 6;
25623 operands[2] = GEN_INT (mask);
25624 if (TARGET_DEST_FALSE_DEP_FOR_GLC
25625 && <mask10_dest_false_dep_for_glc_cond>
25626 && !reg_mentioned_p (operands[0], operands[1]))
25627 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
25628 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
25630 [(set_attr "type" "sselog")
25631 (set_attr "prefix" "<mask_prefix2>")
25632 (set_attr "mode" "<sseinsnmode>")])
25634 (define_insn "avx2_permv2ti"
25635 [(set (match_operand:V4DI 0 "register_operand" "=x")
25637 [(match_operand:V4DI 1 "register_operand" "x")
25638 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
25639 (match_operand:SI 3 "const_0_to_255_operand")]
25643 int mask = INTVAL (operands[3]);
25644 if ((mask & 0xbb) == 16)
25646 if (rtx_equal_p (operands[0], operands[1]))
25649 return "vmovaps\t{%1, %0|%0, %1}";
25651 if ((mask & 0xbb) == 50)
25653 if (rtx_equal_p (operands[0], operands[2]))
25656 return "vmovaps\t{%2, %0|%0, %2}";
25658 if ((mask & 0xbb) == 18)
25659 return "vblendps\t{$15, %2, %1, %0|%0, %1, %2, 15}";
25660 if ((mask & 0xbb) == 48)
25661 return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}";
25662 return "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
25664 [(set_attr "type" "sselog")
25665 (set_attr "prefix" "vex")
25666 (set_attr "mode" "OI")])
25668 (define_insn "avx2_vec_dupv4df"
25669 [(set (match_operand:V4DF 0 "register_operand" "=v")
25670 (vec_duplicate:V4DF
25672 (match_operand:V2DF 1 "register_operand" "v")
25673 (parallel [(const_int 0)]))))]
25675 "vbroadcastsd\t{%1, %0|%0, %1}"
25676 [(set_attr "type" "sselog1")
25677 (set_attr "prefix" "maybe_evex")
25678 (set_attr "mode" "V4DF")])
25680 (define_insn "<avx512>_vec_dup<mode>_1"
25681 [(set (match_operand:VIHFBF_AVX512BW 0 "register_operand" "=v,v")
25682 (vec_duplicate:VIHFBF_AVX512BW
25683 (vec_select:<ssescalarmode>
25684 (match_operand:VIHFBF_AVX512BW 1 "nonimmediate_operand" "v,m")
25685 (parallel [(const_int 0)]))))]
25688 vpbroadcast<sseintmodesuffix>\t{%x1, %0|%0, %x1}
25689 vpbroadcast<sseintmodesuffix>\t{%x1, %0|%0, %<iptr>1}"
25690 [(set_attr "type" "ssemov")
25691 (set_attr "prefix" "evex")
25692 (set_attr "mode" "<sseinsnmode>")])
25694 (define_insn "<avx512>_vec_dup<mode><mask_name>"
25695 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
25696 (vec_duplicate:V48_AVX512VL
25697 (vec_select:<ssescalarmode>
25698 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
25699 (parallel [(const_int 0)]))))]
25702 /* There is no DF broadcast (in AVX-512*) to 128b register.
25703 Mimic it with integer variant. */
25704 if (<MODE>mode == V2DFmode)
25705 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
25707 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
25709 [(set_attr "type" "ssemov")
25710 (set_attr "prefix" "evex")
25711 (set_attr "mode" "<sseinsnmode>")])
25713 (define_insn "<avx512>_vec_dup<mode><mask_name>"
25714 [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
25715 (vec_duplicate:VI12HFBF_AVX512VL
25716 (vec_select:<ssescalarmode>
25717 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
25718 (parallel [(const_int 0)]))))]
25720 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
25721 [(set_attr "type" "ssemov")
25722 (set_attr "prefix" "evex")
25723 (set_attr "mode" "<sseinsnmode>")])
25725 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
25726 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
25727 (vec_duplicate:V16FI
25728 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
25731 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
25732 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
25733 [(set_attr "type" "ssemov")
25734 (set_attr "prefix" "evex")
25735 (set_attr "mode" "<sseinsnmode>")])
25737 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
25738 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
25739 (vec_duplicate:V8FI
25740 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
25743 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
25744 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
25745 [(set_attr "type" "ssemov")
25746 (set_attr "prefix" "evex")
25747 (set_attr "mode" "<sseinsnmode>")])
25749 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
25750 [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v,v")
25751 (vec_duplicate:VI12HFBF_AVX512VL
25752 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
25755 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
25756 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
25757 [(set_attr "type" "ssemov")
25758 (set_attr "prefix" "evex")
25759 (set_attr "mode" "<sseinsnmode>")])
25761 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
25762 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
25763 (vec_duplicate:V48_AVX512VL
25764 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
25766 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
25767 [(set_attr "type" "ssemov")
25768 (set_attr "prefix" "evex")
25769 (set_attr "mode" "<sseinsnmode>")
25770 (set (attr "enabled")
25771 (if_then_else (eq_attr "alternative" "1")
25772 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
25773 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
25776 (define_insn "vec_dupv4sf"
25777 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
25778 (vec_duplicate:V4SF
25779 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
25782 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
25783 vbroadcastss\t{%1, %0|%0, %1}
25784 shufps\t{$0, %0, %0|%0, %0, 0}"
25785 [(set_attr "isa" "avx,avx,noavx")
25786 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
25787 (set_attr "length_immediate" "1,0,1")
25788 (set_attr "prefix_extra" "0,1,*")
25789 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
25790 (set_attr "mode" "V4SF")])
25792 (define_insn "*vec_dupv4si"
25793 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
25794 (vec_duplicate:V4SI
25795 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
25798 %vpshufd\t{$0, %1, %0|%0, %1, 0}
25799 vbroadcastss\t{%1, %0|%0, %1}
25800 shufps\t{$0, %0, %0|%0, %0, 0}"
25801 [(set_attr "isa" "sse2,avx,noavx")
25802 (set_attr "type" "sselog1,ssemov,sselog1")
25803 (set_attr "length_immediate" "1,0,1")
25804 (set_attr "prefix_extra" "0,1,*")
25805 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
25806 (set_attr "mode" "TI,V4SF,V4SF")
25807 (set (attr "preferred_for_speed")
25808 (cond [(eq_attr "alternative" "1")
25809 (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
25811 (symbol_ref "true")))])
25813 (define_insn "*vec_dupv2di"
25814 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
25815 (vec_duplicate:V2DI
25816 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
25820 vpunpcklqdq\t{%d1, %0|%0, %d1}
25821 %vmovddup\t{%1, %0|%0, %1}
25823 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
25824 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
25825 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
25826 (set_attr "mode" "TI,TI,DF,V4SF")])
25828 (define_insn "avx2_vbroadcasti128_<mode>"
25829 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
25831 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
25835 vbroadcasti128\t{%1, %0|%0, %1}
25836 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
25837 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
25838 [(set_attr "isa" "*,avx512dq,avx512vl")
25839 (set_attr "type" "ssemov")
25840 (set_attr "prefix_extra" "1")
25841 (set_attr "prefix" "vex,evex,evex")
25842 (set_attr "mode" "OI")])
25844 ;; Modes handled by AVX vec_dup patterns.
25845 (define_mode_iterator AVX_VEC_DUP_MODE
25846 [V8SI V8SF V4DI V4DF])
25847 (define_mode_attr vecdupssescalarmodesuffix
25848 [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
25849 ;; Modes handled by AVX2 vec_dup patterns.
25850 (define_mode_iterator AVX2_VEC_DUP_MODE
25851 [V32QI V16QI V16HI V8HI V8SI V4SI V16HF V8HF V16BF V8BF])
25853 (define_insn "*vec_dup<mode>"
25854 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
25855 (vec_duplicate:AVX2_VEC_DUP_MODE
25856 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
25859 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
25860 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
25862 [(set_attr "isa" "*,*,noavx512vl")
25863 (set_attr "type" "ssemov")
25864 (set_attr "prefix_extra" "1")
25865 (set_attr "prefix" "maybe_evex")
25866 (set_attr "mode" "<sseinsnmode>")
25867 (set (attr "preferred_for_speed")
25868 (cond [(eq_attr "alternative" "2")
25869 (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
25871 (symbol_ref "true")))])
25873 (define_insn "vec_dup<mode>"
25874 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
25875 (vec_duplicate:AVX_VEC_DUP_MODE
25876 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
25879 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
25880 vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
25881 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
25882 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
25884 [(set_attr "type" "ssemov")
25885 (set_attr "prefix_extra" "1")
25886 (set_attr "prefix" "maybe_evex")
25887 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
25888 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
25891 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
25892 (vec_duplicate:AVX2_VEC_DUP_MODE
25893 (match_operand:<ssescalarmode> 1 "register_operand")))]
25895 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
25896 available, because then we can broadcast from GPRs directly.
25897 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
25898 for V*SI mode it requires just -mavx512vl. */
25899 && !(TARGET_AVX512VL
25900 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
25901 && reload_completed && GENERAL_REG_P (operands[1])"
25904 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
25905 CONST0_RTX (V4SImode),
25906 gen_lowpart (SImode, operands[1])));
25907 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
25908 gen_lowpart (<ssexmmmode>mode,
25914 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
25915 (vec_duplicate:AVX_VEC_DUP_MODE
25916 (match_operand:<ssescalarmode> 1 "register_operand")))]
25917 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
25918 [(set (match_dup 2)
25919 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
25921 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
25922 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
25924 (define_insn "avx_vbroadcastf128_<mode>"
25925 [(set (match_operand:V_256H 0 "register_operand" "=x,x,x,v,v,v,v")
25927 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
25931 vbroadcast<i128>\t{%1, %0|%0, %1}
25932 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
25933 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
25934 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
25935 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
25936 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
25937 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
25938 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
25939 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
25940 (set_attr "prefix_extra" "1")
25941 (set_attr "length_immediate" "0,1,1,0,1,0,1")
25942 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
25943 (set_attr "mode" "<sseinsnmode>")])
25945 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
25946 (define_mode_iterator VI4F_BRCST32x2
25947 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
25948 V16SF (V8SF "TARGET_AVX512VL")])
25950 (define_mode_attr 64x2mode
25951 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
25953 (define_mode_attr 32x2mode
25954 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
25955 (V8SF "V2SF") (V4SI "V2SI")])
25957 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
25958 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
25959 (vec_duplicate:VI4F_BRCST32x2
25960 (vec_select:<32x2mode>
25961 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
25962 (parallel [(const_int 0) (const_int 1)]))))]
25964 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
25965 [(set_attr "type" "ssemov")
25966 (set_attr "prefix_extra" "1")
25967 (set_attr "prefix" "evex")
25968 (set_attr "mode" "<sseinsnmode>")])
25970 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
25971 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
25972 (vec_duplicate:VI4F_256
25973 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
25976 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
25977 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
25978 [(set_attr "type" "ssemov")
25979 (set_attr "prefix_extra" "1")
25980 (set_attr "prefix" "evex")
25981 (set_attr "mode" "<sseinsnmode>")])
25983 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
25984 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
25985 (vec_duplicate:V16FI
25986 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
25989 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
25990 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
25991 [(set_attr "type" "ssemov")
25992 (set_attr "prefix_extra" "1")
25993 (set_attr "prefix" "evex")
25994 (set_attr "mode" "<sseinsnmode>")])
25996 ;; For broadcast[i|f]64x2
25997 (define_mode_iterator VI8F_BRCST64x2
25998 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
26000 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
26001 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
26002 (vec_duplicate:VI8F_BRCST64x2
26003 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
26006 vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
26007 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
26008 [(set_attr "type" "ssemov")
26009 (set_attr "prefix_extra" "1")
26010 (set_attr "prefix" "evex")
26011 (set_attr "mode" "<sseinsnmode>")])
26013 (define_insn "avx512cd_maskb_vec_dup<mode>"
26014 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
26015 (vec_duplicate:VI8_AVX512VL
26017 (match_operand:QI 1 "register_operand" "k"))))]
26019 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
26020 [(set_attr "type" "mskmov")
26021 (set_attr "prefix" "evex")
26022 (set_attr "mode" "XI")])
26024 (define_insn "avx512cd_maskw_vec_dup<mode>"
26025 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
26026 (vec_duplicate:VI4_AVX512VL
26028 (match_operand:HI 1 "register_operand" "k"))))]
26030 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
26031 [(set_attr "type" "mskmov")
26032 (set_attr "prefix" "evex")
26033 (set_attr "mode" "XI")])
26035 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
26036 [(set (match_operand:VF 0 "register_operand" "=v")
26038 [(match_operand:VF 1 "register_operand" "v")
26039 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
26041 "TARGET_AVX && <mask_mode512bit_condition>"
26042 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
26043 [(set_attr "type" "sselog")
26044 (set_attr "prefix_extra" "1")
26045 (set_attr "btver2_decode" "vector")
26046 (set_attr "prefix" "<mask_prefix>")
26047 (set_attr "mode" "<sseinsnmode>")])
26049 (define_mode_iterator VPERMI2
26050 [V16SI V16SF V8DI V8DF
26051 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
26052 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
26053 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
26054 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
26055 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
26056 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
26057 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
26058 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
26060 (define_mode_iterator VPERMI2I
26062 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
26063 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
26064 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
26065 (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
26066 (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
26067 (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
26069 (define_expand "<avx512>_vpermi2var<mode>3_mask"
26070 [(set (match_operand:VPERMI2 0 "register_operand")
26073 [(match_operand:<sseintvecmode> 2 "register_operand")
26074 (match_operand:VPERMI2 1 "register_operand")
26075 (match_operand:VPERMI2 3 "nonimmediate_operand")]
26078 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
26081 operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
26082 operands[5] = gen_lowpart (<MODE>mode, operands[2]);
26085 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
26086 [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
26087 (vec_merge:VPERMI2I
26089 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
26090 (match_operand:VPERMI2I 1 "register_operand" "v")
26091 (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
26094 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
26096 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
26097 [(set_attr "type" "sselog")
26098 (set_attr "prefix" "evex")
26099 (set_attr "mode" "<sseinsnmode>")])
26101 (define_insn "*<avx512>_vpermi2var<mode>3_mask"
26102 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
26103 (vec_merge:VF_AVX512VL
26104 (unspec:VF_AVX512VL
26105 [(match_operand:<sseintvecmode> 2 "register_operand" "0")
26106 (match_operand:VF_AVX512VL 1 "register_operand" "v")
26107 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
26109 (subreg:VF_AVX512VL (match_dup 2) 0)
26110 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
26112 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
26113 [(set_attr "type" "sselog")
26114 (set_attr "prefix" "evex")
26115 (set_attr "mode" "<sseinsnmode>")])
26117 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
26118 [(match_operand:VPERMI2 0 "register_operand")
26119 (match_operand:<sseintvecmode> 1 "register_operand")
26120 (match_operand:VPERMI2 2 "register_operand")
26121 (match_operand:VPERMI2 3 "nonimmediate_operand")
26122 (match_operand:<avx512fmaskmode> 4 "register_operand")]
26125 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
26126 operands[0], operands[1], operands[2], operands[3],
26127 CONST0_RTX (<MODE>mode), operands[4]));
26131 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
26132 [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
26134 [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
26135 (match_operand:VPERMI2 2 "register_operand" "0,v")
26136 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
26140 vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
26141 vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
26142 [(set_attr "type" "sselog")
26143 (set_attr "prefix" "evex")
26144 (set_attr "mode" "<sseinsnmode>")])
26146 (define_insn "<avx512>_vpermt2var<mode>3_mask"
26147 [(set (match_operand:VPERMI2 0 "register_operand" "=v")
26150 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
26151 (match_operand:VPERMI2 2 "register_operand" "0")
26152 (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
26155 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
26157 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
26158 [(set_attr "type" "sselog")
26159 (set_attr "prefix" "evex")
26160 (set_attr "mode" "<sseinsnmode>")])
26162 (define_expand "avx_vperm2f128<mode>3"
26163 [(set (match_operand:AVX256MODE2P 0 "register_operand")
26164 (unspec:AVX256MODE2P
26165 [(match_operand:AVX256MODE2P 1 "register_operand")
26166 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
26167 (match_operand:SI 3 "const_0_to_255_operand")]
26168 UNSPEC_VPERMIL2F128))]
26171 int mask = INTVAL (operands[3]);
26172 if ((mask & 0x88) == 0)
26174 rtx perm[<ssescalarnum>], t1, t2;
26175 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
26177 base = (mask & 3) * nelt2;
26178 for (i = 0; i < nelt2; ++i)
26179 perm[i] = GEN_INT (base + i);
26181 base = ((mask >> 4) & 3) * nelt2;
26182 for (i = 0; i < nelt2; ++i)
26183 perm[i + nelt2] = GEN_INT (base + i);
26185 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
26186 operands[1], operands[2]);
26187 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
26188 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
26189 t2 = gen_rtx_SET (operands[0], t2);
26195 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
26196 ;; means that in order to represent this properly in rtl we'd have to
26197 ;; nest *another* vec_concat with a zero operand and do the select from
26198 ;; a 4x wide vector. That doesn't seem very nice.
26199 (define_insn "*avx_vperm2f128<mode>_full"
26200 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
26201 (unspec:AVX256MODE2P
26202 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
26203 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
26204 (match_operand:SI 3 "const_0_to_255_operand")]
26205 UNSPEC_VPERMIL2F128))]
26207 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
26208 [(set_attr "type" "sselog")
26209 (set_attr "prefix_extra" "1")
26210 (set_attr "length_immediate" "1")
26211 (set_attr "prefix" "vex")
26212 (set_attr "mode" "<sseinsnmode>")])
26214 (define_insn "*avx_vperm2f128<mode>_nozero"
26215 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
26216 (vec_select:AVX256MODE2P
26217 (vec_concat:<ssedoublevecmode>
26218 (match_operand:AVX256MODE2P 1 "register_operand" "x")
26219 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
26220 (match_parallel 3 ""
26221 [(match_operand 4 "const_int_operand")])))]
26223 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
26225 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
26226 if ((mask & 0xbb) == 0x12)
26227 return "vblendps\t{$15, %2, %1, %0|%0, %1, %2, 15}";
26228 if ((mask & 0xbb) == 0x30)
26229 return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}";
26230 if ((mask & 0xbb) == 0x20)
26231 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
26232 operands[3] = GEN_INT (mask);
26233 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
26235 [(set_attr "type" "sselog")
26236 (set_attr "prefix_extra" "1")
26237 (set_attr "length_immediate" "1")
26238 (set_attr "prefix" "vex")
26239 (set_attr "mode" "<sseinsnmode>")])
26241 (define_insn "*ssse3_palignr<mode>_perm"
26242 [(set (match_operand:V_128 0 "register_operand" "=x,Yw")
26244 (match_operand:V_128 1 "register_operand" "0,Yw")
26245 (match_parallel 2 "palignr_operand"
26246 [(match_operand 3 "const_int_operand")])))]
26249 operands[2] = (GEN_INT (INTVAL (operands[3])
26250 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
26252 switch (which_alternative)
26255 return "palignr\t{%2, %1, %0|%0, %1, %2}";
26257 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
26259 gcc_unreachable ();
26262 [(set_attr "isa" "noavx,avx")
26263 (set_attr "type" "sseishft")
26264 (set_attr "atom_unit" "sishuf")
26265 (set_attr "prefix_data16" "1,*")
26266 (set_attr "prefix_extra" "1")
26267 (set_attr "length_immediate" "1")
26268 (set_attr "prefix" "orig,maybe_evex")])
26270 (define_expand "avx512vl_vinsert<mode>"
26271 [(match_operand:VI48F_256 0 "register_operand")
26272 (match_operand:VI48F_256 1 "register_operand")
26273 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
26274 (match_operand:SI 3 "const_0_to_1_operand")
26275 (match_operand:VI48F_256 4 "register_operand")
26276 (match_operand:<avx512fmaskmode> 5 "register_operand")]
26279 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
26281 switch (INTVAL (operands[3]))
26284 insn = gen_vec_set_lo_<mode>_mask;
26287 insn = gen_vec_set_hi_<mode>_mask;
26290 gcc_unreachable ();
26293 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
26298 (define_expand "avx_vinsertf128<mode>"
26299 [(match_operand:V_256 0 "register_operand")
26300 (match_operand:V_256 1 "register_operand")
26301 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
26302 (match_operand:SI 3 "const_0_to_1_operand")]
26305 rtx (*insn)(rtx, rtx, rtx);
26307 switch (INTVAL (operands[3]))
26310 insn = gen_vec_set_lo_<mode>;
26313 insn = gen_vec_set_hi_<mode>;
26316 gcc_unreachable ();
26319 emit_insn (insn (operands[0], operands[1], operands[2]));
26323 (define_insn "vec_set_lo_<mode><mask_name>"
26324 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
26325 (vec_concat:VI8F_256
26326 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
26327 (vec_select:<ssehalfvecmode>
26328 (match_operand:VI8F_256 1 "register_operand" "v")
26329 (parallel [(const_int 2) (const_int 3)]))))]
26330 "TARGET_AVX && <mask_avx512dq_condition>"
26332 if (TARGET_AVX512DQ)
26333 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
26334 else if (TARGET_AVX512VL)
26335 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
26337 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
26339 [(set_attr "type" "sselog")
26340 (set_attr "prefix_extra" "1")
26341 (set_attr "length_immediate" "1")
26342 (set_attr "prefix" "vex")
26343 (set_attr "mode" "<sseinsnmode>")])
26345 (define_insn "vec_set_hi_<mode><mask_name>"
26346 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
26347 (vec_concat:VI8F_256
26348 (vec_select:<ssehalfvecmode>
26349 (match_operand:VI8F_256 1 "register_operand" "v")
26350 (parallel [(const_int 0) (const_int 1)]))
26351 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
26352 "TARGET_AVX && <mask_avx512dq_condition>"
26354 if (TARGET_AVX512DQ)
26355 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
26356 else if (TARGET_AVX512VL)
26357 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
26359 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
26361 [(set_attr "type" "sselog")
26362 (set_attr "prefix_extra" "1")
26363 (set_attr "length_immediate" "1")
26364 (set_attr "prefix" "vex")
26365 (set_attr "mode" "<sseinsnmode>")])
26367 (define_insn "vec_set_lo_<mode><mask_name>"
26368 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
26369 (vec_concat:VI4F_256
26370 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
26371 (vec_select:<ssehalfvecmode>
26372 (match_operand:VI4F_256 1 "register_operand" "v")
26373 (parallel [(const_int 4) (const_int 5)
26374 (const_int 6) (const_int 7)]))))]
26377 if (TARGET_AVX512VL)
26378 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
26380 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
26382 [(set_attr "type" "sselog")
26383 (set_attr "prefix_extra" "1")
26384 (set_attr "length_immediate" "1")
26385 (set_attr "prefix" "vex")
26386 (set_attr "mode" "<sseinsnmode>")])
26388 (define_insn "vec_set_hi_<mode><mask_name>"
26389 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
26390 (vec_concat:VI4F_256
26391 (vec_select:<ssehalfvecmode>
26392 (match_operand:VI4F_256 1 "register_operand" "v")
26393 (parallel [(const_int 0) (const_int 1)
26394 (const_int 2) (const_int 3)]))
26395 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
26398 if (TARGET_AVX512VL)
26399 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
26401 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
26403 [(set_attr "type" "sselog")
26404 (set_attr "prefix_extra" "1")
26405 (set_attr "length_immediate" "1")
26406 (set_attr "prefix" "vex")
26407 (set_attr "mode" "<sseinsnmode>")])
26409 (define_insn "vec_set_lo_<mode>"
26410 [(set (match_operand:V16_256 0 "register_operand" "=x,v")
26411 (vec_concat:V16_256
26412 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm,vm")
26413 (vec_select:<ssehalfvecmode>
26414 (match_operand:V16_256 1 "register_operand" "x,v")
26415 (parallel [(const_int 8) (const_int 9)
26416 (const_int 10) (const_int 11)
26417 (const_int 12) (const_int 13)
26418 (const_int 14) (const_int 15)]))))]
26421 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
26422 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
26423 [(set_attr "type" "sselog")
26424 (set_attr "prefix_extra" "1")
26425 (set_attr "length_immediate" "1")
26426 (set_attr "prefix" "vex,evex")
26427 (set_attr "mode" "OI")])
26429 (define_insn "vec_set_hi_<mode>"
26430 [(set (match_operand:V16_256 0 "register_operand" "=x,v")
26431 (vec_concat:V16_256
26432 (vec_select:<ssehalfvecmode>
26433 (match_operand:V16_256 1 "register_operand" "x,v")
26434 (parallel [(const_int 0) (const_int 1)
26435 (const_int 2) (const_int 3)
26436 (const_int 4) (const_int 5)
26437 (const_int 6) (const_int 7)]))
26438 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm,vm")))]
26441 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
26442 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
26443 [(set_attr "type" "sselog")
26444 (set_attr "prefix_extra" "1")
26445 (set_attr "length_immediate" "1")
26446 (set_attr "prefix" "vex,evex")
26447 (set_attr "mode" "OI")])
26449 (define_insn "vec_set_lo_v32qi"
26450 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
26452 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
26454 (match_operand:V32QI 1 "register_operand" "x,v")
26455 (parallel [(const_int 16) (const_int 17)
26456 (const_int 18) (const_int 19)
26457 (const_int 20) (const_int 21)
26458 (const_int 22) (const_int 23)
26459 (const_int 24) (const_int 25)
26460 (const_int 26) (const_int 27)
26461 (const_int 28) (const_int 29)
26462 (const_int 30) (const_int 31)]))))]
26465 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
26466 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
26467 [(set_attr "type" "sselog")
26468 (set_attr "prefix_extra" "1")
26469 (set_attr "length_immediate" "1")
26470 (set_attr "prefix" "vex,evex")
26471 (set_attr "mode" "OI")])
26473 (define_insn "vec_set_hi_v32qi"
26474 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
26477 (match_operand:V32QI 1 "register_operand" "x,v")
26478 (parallel [(const_int 0) (const_int 1)
26479 (const_int 2) (const_int 3)
26480 (const_int 4) (const_int 5)
26481 (const_int 6) (const_int 7)
26482 (const_int 8) (const_int 9)
26483 (const_int 10) (const_int 11)
26484 (const_int 12) (const_int 13)
26485 (const_int 14) (const_int 15)]))
26486 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
26489 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
26490 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
26491 [(set_attr "type" "sselog")
26492 (set_attr "prefix_extra" "1")
26493 (set_attr "length_immediate" "1")
26494 (set_attr "prefix" "vex,evex")
26495 (set_attr "mode" "OI")])
26497 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
26498 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
26500 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
26501 (match_operand:V48_AVX2 1 "memory_operand" "m")]
26504 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
26505 [(set_attr "type" "sselog1")
26506 (set_attr "prefix_extra" "1")
26507 (set_attr "prefix" "vex")
26508 (set_attr "btver2_decode" "vector")
26509 (set_attr "mode" "<sseinsnmode>")])
26511 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
26512 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
26514 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
26515 (match_operand:V48_AVX2 2 "register_operand" "x")
26519 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
26520 [(set_attr "type" "sselog1")
26521 (set_attr "prefix_extra" "1")
26522 (set_attr "prefix" "vex")
26523 (set_attr "btver2_decode" "vector")
26524 (set_attr "mode" "<sseinsnmode>")])
26526 (define_expand "maskload<mode><sseintvecmodelower>"
26527 [(set (match_operand:V48_AVX2 0 "register_operand")
26529 [(match_operand:<sseintvecmode> 2 "register_operand")
26530 (match_operand:V48_AVX2 1 "memory_operand")]
26534 (define_expand "maskload<mode><avx512fmaskmodelower>"
26535 [(set (match_operand:V48H_AVX512VL 0 "register_operand")
26536 (vec_merge:V48H_AVX512VL
26537 (match_operand:V48H_AVX512VL 1 "memory_operand")
26539 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
26542 (define_expand "maskload<mode><avx512fmaskmodelower>"
26543 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
26544 (vec_merge:VI12_AVX512VL
26545 (match_operand:VI12_AVX512VL 1 "memory_operand")
26547 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
26550 (define_expand "maskstore<mode><sseintvecmodelower>"
26551 [(set (match_operand:V48_AVX2 0 "memory_operand")
26553 [(match_operand:<sseintvecmode> 2 "register_operand")
26554 (match_operand:V48_AVX2 1 "register_operand")
26559 (define_expand "maskstore<mode><avx512fmaskmodelower>"
26560 [(set (match_operand:V48H_AVX512VL 0 "memory_operand")
26561 (vec_merge:V48H_AVX512VL
26562 (match_operand:V48H_AVX512VL 1 "register_operand")
26564 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
26567 (define_expand "maskstore<mode><avx512fmaskmodelower>"
26568 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
26569 (vec_merge:VI12_AVX512VL
26570 (match_operand:VI12_AVX512VL 1 "register_operand")
26572 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
26575 (define_expand "cbranch<mode>4"
26576 [(set (reg:CC FLAGS_REG)
26577 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
26578 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
26579 (set (pc) (if_then_else
26580 (match_operator 0 "bt_comparison_operator"
26581 [(reg:CC FLAGS_REG) (const_int 0)])
26582 (label_ref (match_operand 3))
26586 ix86_expand_branch (GET_CODE (operands[0]),
26587 operands[1], operands[2], operands[3]);
26592 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
26593 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
26594 (vec_concat:AVX256MODE2P
26595 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
26596 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
26597 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
26599 "&& reload_completed"
26600 [(set (match_dup 0) (match_dup 1))]
26602 if (REG_P (operands[0]))
26603 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
26605 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
26606 <ssehalfvecmode>mode);
26609 ;; Modes handled by vec_init expanders.
26610 (define_mode_iterator VEC_INIT_MODE
26611 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
26612 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
26613 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
26614 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
26615 (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
26616 (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
26617 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
26618 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
26619 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
26621 ;; Likewise, but for initialization from half sized vectors.
26622 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
26623 (define_mode_iterator VEC_INIT_HALF_MODE
26624 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
26625 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
26626 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
26627 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
26628 (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
26629 (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
26630 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
26631 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
26632 (V4TI "TARGET_AVX512F")])
26634 (define_expand "vec_init<mode><ssescalarmodelower>"
26635 [(match_operand:VEC_INIT_MODE 0 "register_operand")
26639 ix86_expand_vector_init (false, operands[0], operands[1]);
26643 (define_expand "vec_init<mode><ssehalfvecmodelower>"
26644 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
26648 ix86_expand_vector_init (false, operands[0], operands[1]);
26652 (define_expand "cond_<insn><mode>"
26653 [(set (match_operand:VI248_AVX512VLBW 0 "register_operand")
26654 (vec_merge:VI248_AVX512VLBW
26655 (any_shift:VI248_AVX512VLBW
26656 (match_operand:VI248_AVX512VLBW 2 "register_operand")
26657 (match_operand:VI248_AVX512VLBW 3 "nonimmediate_or_const_vec_dup_operand"))
26658 (match_operand:VI248_AVX512VLBW 4 "nonimm_or_0_operand")
26659 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
26662 if (const_vec_duplicate_p (operands[3]))
26664 operands[3] = unwrap_const_vec_duplicate (operands[3]);
26665 operands[3] = lowpart_subreg (DImode, operands[3], <ssescalarmode>mode);
26666 emit_insn (gen_<insn><mode>3_mask (operands[0],
26673 emit_insn (gen_<avx2_avx512>_<insn>v<mode>_mask (operands[0],
26681 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
26682 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
26683 (ashiftrt:VI48_AVX512F_AVX512VL
26684 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
26685 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
26686 "TARGET_AVX2 && <mask_mode512bit_condition>"
26687 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
26688 [(set_attr "type" "sseishft")
26689 (set_attr "prefix" "maybe_evex")
26690 (set_attr "mode" "<sseinsnmode>")])
26692 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
26693 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
26694 (ashiftrt:VI2_AVX512VL
26695 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
26696 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
26698 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
26699 [(set_attr "type" "sseishft")
26700 (set_attr "prefix" "maybe_evex")
26701 (set_attr "mode" "<sseinsnmode>")])
26703 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
26704 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
26705 (any_lshift:VI48_AVX512F
26706 (match_operand:VI48_AVX512F 1 "register_operand" "v")
26707 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
26708 "TARGET_AVX2 && <mask_mode512bit_condition>"
26709 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
26710 [(set_attr "type" "sseishft")
26711 (set_attr "prefix" "maybe_evex")
26712 (set_attr "mode" "<sseinsnmode>")])
26714 (define_insn "<avx2_avx512>_<insn>v<mode><mask_name>"
26715 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
26716 (any_lshift:VI2_AVX512VL
26717 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
26718 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
26720 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
26721 [(set_attr "type" "sseishft")
26722 (set_attr "prefix" "maybe_evex")
26723 (set_attr "mode" "<sseinsnmode>")])
26725 (define_insn "avx_vec_concat<mode>"
26726 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
26727 (vec_concat:V_256_512
26728 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
26729 (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
26731 && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
26732 || !MEM_P (operands[1]))"
26734 switch (which_alternative)
26737 return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
26739 if (<MODE_SIZE> == 64)
26741 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
26742 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
26744 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
26748 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
26749 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
26751 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
26755 switch (get_attr_mode (insn))
26758 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
26759 return "vmovups\t{%1, %t0|%t0, %1}";
26761 return "vmovaps\t{%1, %t0|%t0, %1}";
26763 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
26764 return "vmovupd\t{%1, %t0|%t0, %1}";
26766 return "vmovapd\t{%1, %t0|%t0, %1}";
26768 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
26769 return "vmovups\t{%1, %x0|%x0, %1}";
26771 return "vmovaps\t{%1, %x0|%x0, %1}";
26773 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
26774 return "vmovupd\t{%1, %x0|%x0, %1}";
26776 return "vmovapd\t{%1, %x0|%x0, %1}";
26778 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
26780 if (which_alternative == 2)
26781 return "vmovdqu\t{%1, %t0|%t0, %1}";
26782 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
26783 return "vmovdqu64\t{%1, %t0|%t0, %1}";
26785 return "vmovdqu32\t{%1, %t0|%t0, %1}";
26789 if (which_alternative == 2)
26790 return "vmovdqa\t{%1, %t0|%t0, %1}";
26791 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
26792 return "vmovdqa64\t{%1, %t0|%t0, %1}";
26794 return "vmovdqa32\t{%1, %t0|%t0, %1}";
26797 if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
26799 if (which_alternative == 2)
26800 return "vmovdqu\t{%1, %x0|%x0, %1}";
26801 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
26802 return "vmovdqu64\t{%1, %x0|%x0, %1}";
26804 return "vmovdqu32\t{%1, %x0|%x0, %1}";
26808 if (which_alternative == 2)
26809 return "vmovdqa\t{%1, %x0|%x0, %1}";
26810 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
26811 return "vmovdqa64\t{%1, %x0|%x0, %1}";
26813 return "vmovdqa32\t{%1, %x0|%x0, %1}";
26816 gcc_unreachable ();
26819 gcc_unreachable ();
26822 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
26823 (set_attr "prefix_extra" "1,1,*,*")
26824 (set_attr "length_immediate" "1,1,*,*")
26825 (set_attr "prefix" "maybe_evex")
26826 (set_attr "mode" "<sseinsnmode>")])
26828 (define_insn_and_split "*vec_concat<mode>_0_1"
26829 [(set (match_operand:V 0 "register_operand")
26831 (vec_concat:<ssedoublevecmode>
26832 (match_operand:V 1 "nonimmediate_operand")
26833 (match_operand:V 2 "const0_operand"))
26834 (match_parallel 3 "movq_parallel"
26835 [(match_operand 4 "const_int_operand")])))]
26836 "TARGET_SSE2 && ix86_pre_reload_split ()"
26839 [(set (match_dup 0)
26840 (vec_concat:V (match_dup 1) (match_dup 5)))]
26842 operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
26843 operands[5] = CONST0_RTX (<ssehalfvecmode>mode);
26846 (define_insn "vcvtph2ps<mask_name>"
26847 [(set (match_operand:V4SF 0 "register_operand" "=v")
26849 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
26851 (parallel [(const_int 0) (const_int 1)
26852 (const_int 2) (const_int 3)])))]
26853 "TARGET_F16C || TARGET_AVX512VL"
26854 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
26855 [(set_attr "type" "ssecvt")
26856 (set_attr "prefix" "maybe_evex")
26857 (set_attr "mode" "V4SF")])
26859 (define_insn "*vcvtph2ps_load<mask_name>"
26860 [(set (match_operand:V4SF 0 "register_operand" "=v")
26861 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
26862 UNSPEC_VCVTPH2PS))]
26863 "TARGET_F16C || TARGET_AVX512VL"
26864 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
26865 [(set_attr "type" "ssecvt")
26866 (set_attr "prefix" "vex")
26867 (set_attr "mode" "V8SF")])
26869 (define_insn "vcvtph2ps256<mask_name>"
26870 [(set (match_operand:V8SF 0 "register_operand" "=v")
26871 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
26872 UNSPEC_VCVTPH2PS))]
26873 "TARGET_F16C || TARGET_AVX512VL"
26874 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
26875 [(set_attr "type" "ssecvt")
26876 (set_attr "prefix" "vex")
26877 (set_attr "btver2_decode" "double")
26878 (set_attr "mode" "V8SF")])
26880 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
26881 [(set (match_operand:V16SF 0 "register_operand" "=v")
26883 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
26884 UNSPEC_VCVTPH2PS))]
26886 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
26887 [(set_attr "type" "ssecvt")
26888 (set_attr "prefix" "evex")
26889 (set_attr "mode" "V16SF")])
26891 (define_expand "vcvtps2ph_mask"
26892 [(set (match_operand:V8HI 0 "register_operand")
26895 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
26896 (match_operand:SI 2 "const_0_to_255_operand")]
26899 (match_operand:V8HI 3 "nonimm_or_0_operand")
26900 (match_operand:QI 4 "register_operand")))]
26902 "operands[5] = CONST0_RTX (V4HImode);")
26904 (define_expand "vcvtps2ph"
26905 [(set (match_operand:V8HI 0 "register_operand")
26907 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
26908 (match_operand:SI 2 "const_0_to_255_operand")]
26912 "operands[3] = CONST0_RTX (V4HImode);")
26914 (define_insn "*vcvtps2ph<mask_name>"
26915 [(set (match_operand:V8HI 0 "register_operand" "=v")
26917 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
26918 (match_operand:SI 2 "const_0_to_255_operand")]
26920 (match_operand:V4HI 3 "const0_operand")))]
26921 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
26922 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
26923 [(set_attr "type" "ssecvt")
26924 (set_attr "prefix" "maybe_evex")
26925 (set_attr "mode" "V4SF")])
26927 (define_insn "*vcvtps2ph_store<merge_mask_name>"
26928 [(set (match_operand:V4HI 0 "memory_operand" "=m")
26929 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
26930 (match_operand:SI 2 "const_0_to_255_operand")]
26931 UNSPEC_VCVTPS2PH))]
26932 "TARGET_F16C || TARGET_AVX512VL"
26933 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
26934 [(set_attr "type" "ssecvt")
26935 (set_attr "prefix" "maybe_evex")
26936 (set_attr "mode" "V4SF")])
26938 (define_insn "vcvtps2ph256<mask_name>"
26939 [(set (match_operand:V8HI 0 "register_operand" "=v")
26940 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
26941 (match_operand:SI 2 "const_0_to_255_operand")]
26942 UNSPEC_VCVTPS2PH))]
26943 "TARGET_F16C || TARGET_AVX512VL"
26944 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
26945 [(set_attr "type" "ssecvt")
26946 (set_attr "prefix" "maybe_evex")
26947 (set_attr "btver2_decode" "vector")
26948 (set_attr "mode" "V8SF")])
26950 (define_insn "*vcvtps2ph256<merge_mask_name>"
26951 [(set (match_operand:V8HI 0 "memory_operand" "=m")
26952 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
26953 (match_operand:SI 2 "const_0_to_255_operand")]
26954 UNSPEC_VCVTPS2PH))]
26955 "TARGET_F16C || TARGET_AVX512VL"
26956 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
26957 [(set_attr "type" "ssecvt")
26958 (set_attr "prefix" "maybe_evex")
26959 (set_attr "btver2_decode" "vector")
26960 (set_attr "mode" "V8SF")])
26962 ;; vcvtps2ph is special, it encodes {sae} in evex, but round control in the imm
26963 ;; For intrinsic _mm512_cvt_roundps_ph (a, imm), imm contains both {sae}
26964 ;; and round control, we need to separate it in the assembly output.
26965 ;; op2 in avx512f_vcvtps2ph512_mask_sae contains both sae and round control.
26966 (define_expand "avx512f_vcvtps2ph512_mask_sae"
26967 [(set (match_operand:V16HI 0 "register_operand" "=v")
26970 [(match_operand:V16SF 1 "register_operand" "v")
26971 (match_operand:SI 2 "const_0_to_255_operand")]
26973 (match_operand:V16HI 3 "nonimm_or_0_operand")
26974 (match_operand:HI 4 "register_operand")))]
26977 int round = INTVAL (operands[2]);
26978 /* Separate {sae} from rounding control imm,
26979 imm[3:7] will be ignored by the instruction. */
26982 emit_insn (gen_avx512f_vcvtps2ph512_mask_round (operands[0], operands[1],
26983 operands[2], operands[3], operands[4], GEN_INT (8)));
26988 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name><round_saeonly_name>"
26989 [(set (match_operand:V16HI 0 "register_operand" "=v")
26991 [(match_operand:V16SF 1 "register_operand" "v")
26992 (match_operand:SI 2 "const_0_to_255_operand")]
26993 UNSPEC_VCVTPS2PH))]
26995 "vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
26996 [(set_attr "type" "ssecvt")
26997 (set_attr "prefix" "evex")
26998 (set_attr "mode" "V16SF")])
27000 (define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
27001 [(set (match_operand:V16HI 0 "memory_operand" "=m")
27003 [(match_operand:V16SF 1 "register_operand" "v")
27004 (match_operand:SI 2 "const_0_to_255_operand")]
27005 UNSPEC_VCVTPS2PH))]
27007 "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
27008 [(set_attr "type" "ssecvt")
27009 (set_attr "prefix" "evex")
27010 (set_attr "mode" "V16SF")])
27012 ;; For gather* insn patterns
27013 (define_mode_iterator VEC_GATHER_MODE
27014 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
27015 (define_mode_attr VEC_GATHER_IDXSI
27016 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
27017 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
27018 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
27019 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
27021 (define_mode_attr VEC_GATHER_IDXDI
27022 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
27023 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
27024 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
27025 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
27027 (define_mode_attr VEC_GATHER_SRCDI
27028 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
27029 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
27030 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
27031 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
27033 (define_expand "avx2_gathersi<mode>"
27034 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
27035 (unspec:VEC_GATHER_MODE
27036 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
27037 (mem:<ssescalarmode>
27039 [(match_operand 2 "vsib_address_operand")
27040 (match_operand:<VEC_GATHER_IDXSI>
27041 3 "register_operand")
27042 (match_operand:SI 5 "const1248_operand ")]))
27043 (mem:BLK (scratch))
27044 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
27046 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
27050 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
27051 operands[5]), UNSPEC_VSIBADDR);
27054 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
27055 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
27056 (unspec:VEC_GATHER_MODE
27057 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
27058 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
27060 [(match_operand:P 3 "vsib_address_operand" "Tv")
27061 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
27062 (match_operand:SI 6 "const1248_operand")]
27064 (mem:BLK (scratch))
27065 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
27067 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
27069 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
27070 [(set_attr "type" "ssemov")
27071 (set_attr "prefix" "vex")
27072 (set_attr "mode" "<sseinsnmode>")])
27074 (define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
27075 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
27076 (unspec:VEC_GATHER_MODE
27078 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
27080 [(match_operand:P 2 "vsib_address_operand" "Tv")
27081 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
27082 (match_operand:SI 5 "const1248_operand")]
27084 (mem:BLK (scratch))
27085 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
27087 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
27089 "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
27090 [(set_attr "type" "ssemov")
27091 (set_attr "prefix" "vex")
27092 (set_attr "mode" "<sseinsnmode>")])
27094 (define_expand "avx2_gatherdi<mode>"
27095 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
27096 (unspec:VEC_GATHER_MODE
27097 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
27098 (mem:<ssescalarmode>
27100 [(match_operand 2 "vsib_address_operand")
27101 (match_operand:<VEC_GATHER_IDXDI>
27102 3 "register_operand")
27103 (match_operand:SI 5 "const1248_operand ")]))
27104 (mem:BLK (scratch))
27105 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
27107 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
27111 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
27112 operands[5]), UNSPEC_VSIBADDR);
27115 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
27116 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
27117 (unspec:VEC_GATHER_MODE
27118 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
27119 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
27121 [(match_operand:P 3 "vsib_address_operand" "Tv")
27122 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
27123 (match_operand:SI 6 "const1248_operand")]
27125 (mem:BLK (scratch))
27126 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
27128 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
27130 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
27131 [(set_attr "type" "ssemov")
27132 (set_attr "prefix" "vex")
27133 (set_attr "mode" "<sseinsnmode>")])
27135 (define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
27136 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
27137 (unspec:VEC_GATHER_MODE
27139 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
27141 [(match_operand:P 2 "vsib_address_operand" "Tv")
27142 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
27143 (match_operand:SI 5 "const1248_operand")]
27145 (mem:BLK (scratch))
27146 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
27148 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
27151 if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
27152 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
27153 return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
27155 [(set_attr "type" "ssemov")
27156 (set_attr "prefix" "vex")
27157 (set_attr "mode" "<sseinsnmode>")])
27159 (define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
27160 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
27161 (vec_select:<VEC_GATHER_SRCDI>
27163 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
27164 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
27166 [(match_operand:P 3 "vsib_address_operand" "Tv")
27167 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
27168 (match_operand:SI 6 "const1248_operand")]
27170 (mem:BLK (scratch))
27171 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
27173 (parallel [(const_int 0) (const_int 1)
27174 (const_int 2) (const_int 3)])))
27175 (clobber (match_scratch:VI4F_256 1 "=&x"))]
27177 "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
27178 [(set_attr "type" "ssemov")
27179 (set_attr "prefix" "vex")
27180 (set_attr "mode" "<sseinsnmode>")])
27182 (define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
27183 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
27184 (vec_select:<VEC_GATHER_SRCDI>
27187 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
27189 [(match_operand:P 2 "vsib_address_operand" "Tv")
27190 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
27191 (match_operand:SI 5 "const1248_operand")]
27193 (mem:BLK (scratch))
27194 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
27196 (parallel [(const_int 0) (const_int 1)
27197 (const_int 2) (const_int 3)])))
27198 (clobber (match_scratch:VI4F_256 1 "=&x"))]
27200 "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
27201 [(set_attr "type" "ssemov")
27202 (set_attr "prefix" "vex")
27203 (set_attr "mode" "<sseinsnmode>")])
27205 (define_expand "<avx512>_gathersi<mode>"
27206 [(parallel [(set (match_operand:VI48F 0 "register_operand")
27208 [(match_operand:VI48F 1 "register_operand")
27209 (match_operand:<avx512fmaskmode> 4 "register_operand")
27210 (mem:<ssescalarmode>
27212 [(match_operand 2 "vsib_address_operand")
27213 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
27214 (match_operand:SI 5 "const1248_operand")]))]
27216 (clobber (match_scratch:<avx512fmaskmode> 7))])]
27220 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
27221 operands[5]), UNSPEC_VSIBADDR);
27224 (define_insn "*avx512f_gathersi<VI48F:mode>"
27225 [(set (match_operand:VI48F 0 "register_operand" "=&v")
27227 [(match_operand:VI48F 1 "register_operand" "0")
27228 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
27229 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
27231 [(match_operand:P 4 "vsib_address_operand" "Tv")
27232 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
27233 (match_operand:SI 5 "const1248_operand")]
27234 UNSPEC_VSIBADDR)])]
27236 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
27238 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
27239 ;; gas changed what it requires incompatibly.
27240 "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
27241 [(set_attr "type" "ssemov")
27242 (set_attr "prefix" "evex")
27243 (set_attr "mode" "<sseinsnmode>")])
27245 (define_insn "*avx512f_gathersi<VI48F:mode>_2"
27246 [(set (match_operand:VI48F 0 "register_operand" "=&v")
27249 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
27250 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
27252 [(match_operand:P 3 "vsib_address_operand" "Tv")
27253 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
27254 (match_operand:SI 4 "const1248_operand")]
27255 UNSPEC_VSIBADDR)])]
27257 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
27259 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
27260 ;; gas changed what it requires incompatibly.
27261 "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
27262 [(set_attr "type" "ssemov")
27263 (set_attr "prefix" "evex")
27264 (set_attr "mode" "<sseinsnmode>")])
27267 (define_expand "<avx512>_gatherdi<mode>"
27268 [(parallel [(set (match_operand:VI48F 0 "register_operand")
27270 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
27271 (match_operand:QI 4 "register_operand")
27272 (mem:<ssescalarmode>
27274 [(match_operand 2 "vsib_address_operand")
27275 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
27276 (match_operand:SI 5 "const1248_operand")]))]
27278 (clobber (match_scratch:QI 7))])]
27282 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
27283 operands[5]), UNSPEC_VSIBADDR);
27286 (define_insn "*avx512f_gatherdi<VI48F:mode>"
27287 [(set (match_operand:VI48F 0 "register_operand" "=&v")
27289 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
27290 (match_operand:QI 7 "register_operand" "2")
27291 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
27293 [(match_operand:P 4 "vsib_address_operand" "Tv")
27294 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
27295 (match_operand:SI 5 "const1248_operand")]
27296 UNSPEC_VSIBADDR)])]
27298 (clobber (match_scratch:QI 2 "=&Yk"))]
27300 ;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
27301 ;; gas changed what it requires incompatibly.
27302 "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
27303 [(set_attr "type" "ssemov")
27304 (set_attr "prefix" "evex")
27305 (set_attr "mode" "<sseinsnmode>")])
27307 (define_insn "*avx512f_gatherdi<VI48F:mode>_2"
27308 [(set (match_operand:VI48F 0 "register_operand" "=&v")
27311 (match_operand:QI 6 "register_operand" "1")
27312 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
27314 [(match_operand:P 3 "vsib_address_operand" "Tv")
27315 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
27316 (match_operand:SI 4 "const1248_operand")]
27317 UNSPEC_VSIBADDR)])]
27319 (clobber (match_scratch:QI 1 "=&Yk"))]
27322 /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
27323 gas changed what it requires incompatibly. */
27324 if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
27326 if (<VI48F:MODE_SIZE> != 64)
27327 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
27329 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
27331 return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
27333 [(set_attr "type" "ssemov")
27334 (set_attr "prefix" "evex")
27335 (set_attr "mode" "<sseinsnmode>")])
27337 (define_expand "<avx512>_scattersi<mode>"
27338 [(parallel [(set (mem:VI48F
27340 [(match_operand 0 "vsib_address_operand")
27341 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
27342 (match_operand:SI 4 "const1248_operand")]))
27344 [(match_operand:<avx512fmaskmode> 1 "register_operand")
27345 (match_operand:VI48F 3 "register_operand")]
27347 (clobber (match_scratch:<avx512fmaskmode> 6))])]
27351 = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
27352 operands[4], operands[1]),
27356 (define_insn "*avx512f_scattersi<VI48F:mode>"
27357 [(set (match_operator:VI48F 5 "vsib_mem_operator"
27359 [(match_operand:P 0 "vsib_address_operand" "Tv")
27360 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
27361 (match_operand:SI 4 "const1248_operand")
27362 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
27366 (match_operand:VI48F 3 "register_operand" "v")]
27368 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
27370 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
27371 ;; gas changed what it requires incompatibly.
27372 "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
27373 [(set_attr "type" "ssemov")
27374 (set_attr "prefix" "evex")
27375 (set_attr "mode" "<sseinsnmode>")])
27377 (define_expand "<avx512>_scatterdi<mode>"
27378 [(parallel [(set (mem:VI48F
27380 [(match_operand 0 "vsib_address_operand")
27381 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
27382 (match_operand:SI 4 "const1248_operand")]))
27384 [(match_operand:QI 1 "register_operand")
27385 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
27387 (clobber (match_scratch:QI 6))])]
27391 = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
27392 operands[4], operands[1]),
27396 (define_insn "*avx512f_scatterdi<VI48F:mode>"
27397 [(set (match_operator:VI48F 5 "vsib_mem_operator"
27399 [(match_operand:P 0 "vsib_address_operand" "Tv")
27400 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
27401 (match_operand:SI 4 "const1248_operand")
27402 (match_operand:QI 6 "register_operand" "1")]
27406 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
27408 (clobber (match_scratch:QI 1 "=&Yk"))]
27410 ;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
27411 ;; gas changed what it requires incompatibly.
27412 "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
27413 [(set_attr "type" "ssemov")
27414 (set_attr "prefix" "evex")
27415 (set_attr "mode" "<sseinsnmode>")])
27417 (define_insn "<avx512>_compress<mode>_mask"
27418 [(set (match_operand:VI48F 0 "register_operand" "=v")
27420 [(match_operand:VI48F 1 "register_operand" "v")
27421 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
27422 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
27425 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
27426 [(set_attr "type" "ssemov")
27427 (set_attr "prefix" "evex")
27428 (set_attr "mode" "<sseinsnmode>")])
27430 (define_insn "compress<mode>_mask"
27431 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
27432 (unspec:VI12_AVX512VL
27433 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
27434 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
27435 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
27437 "TARGET_AVX512VBMI2"
27438 "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
27439 [(set_attr "type" "ssemov")
27440 (set_attr "prefix" "evex")
27441 (set_attr "mode" "<sseinsnmode>")])
27443 (define_insn "<avx512>_compressstore<mode>_mask"
27444 [(set (match_operand:VI48F 0 "memory_operand" "=m")
27446 [(match_operand:VI48F 1 "register_operand" "x")
27448 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
27449 UNSPEC_COMPRESS_STORE))]
27451 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
27452 [(set_attr "type" "ssemov")
27453 (set_attr "prefix" "evex")
27454 (set_attr "memory" "store")
27455 (set_attr "mode" "<sseinsnmode>")])
27457 (define_insn "compressstore<mode>_mask"
27458 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
27459 (unspec:VI12_AVX512VL
27460 [(match_operand:VI12_AVX512VL 1 "register_operand" "x")
27462 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
27463 UNSPEC_COMPRESS_STORE))]
27464 "TARGET_AVX512VBMI2"
27465 "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
27466 [(set_attr "type" "ssemov")
27467 (set_attr "prefix" "evex")
27468 (set_attr "memory" "store")
27469 (set_attr "mode" "<sseinsnmode>")])
27471 (define_expand "<avx512>_expand<mode>_maskz"
27472 [(set (match_operand:VI48F 0 "register_operand")
27474 [(match_operand:VI48F 1 "nonimmediate_operand")
27475 (match_operand:VI48F 2 "nonimm_or_0_operand")
27476 (match_operand:<avx512fmaskmode> 3 "register_operand")]
27479 "operands[2] = CONST0_RTX (<MODE>mode);")
27481 (define_insn "expand<mode>_mask"
27482 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
27484 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
27485 (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
27486 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
27489 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
27490 [(set_attr "type" "ssemov")
27491 (set_attr "prefix" "evex")
27492 (set_attr "memory" "none,load")
27493 (set_attr "mode" "<sseinsnmode>")])
27495 (define_insn "expand<mode>_mask"
27496 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
27497 (unspec:VI12_AVX512VL
27498 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
27499 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
27500 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
27502 "TARGET_AVX512VBMI2"
27503 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
27504 [(set_attr "type" "ssemov")
27505 (set_attr "prefix" "evex")
27506 (set_attr "memory" "none,load")
27507 (set_attr "mode" "<sseinsnmode>")])
27509 (define_insn_and_split "*expand<mode>_mask"
27510 [(set (match_operand:VI12_VI48F_AVX512VL 0 "register_operand")
27511 (unspec:VI12_VI48F_AVX512VL
27512 [(match_operand:VI12_VI48F_AVX512VL 1 "nonimmediate_operand")
27513 (match_operand:VI12_VI48F_AVX512VL 2 "nonimm_or_0_operand")
27514 (match_operand 3 "const_int_operand")]
27516 "ix86_pre_reload_split ()
27517 && (TARGET_AVX512VBMI2 || GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4)"
27522 unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
27523 bool has_zero = false;
27524 unsigned n = GET_MODE_NUNITS (<MODE>mode), i;
27527 /* If all ones bits is in mask's lower part,
27528 get number of ones and assign it to ONES. */
27529 for (i = 0; i != n; i++)
27531 if ((mask & HOST_WIDE_INT_1U << i) && has_zero)
27534 /* Record first zero bit. */
27535 if (!(mask & HOST_WIDE_INT_1U << i) && !has_zero)
27545 if (i != n || (ones != 0 && ones != n))
27547 rtx reg = gen_reg_rtx (<avx512fmaskmode>mode);
27548 emit_move_insn (reg, operands[3]);
27549 enum insn_code icode;
27551 /* For masks with all one bits in it's lower part,
27552 we can transform v{,p}expand* to vmovdq* with
27554 icode = CODE_FOR_<avx512>_load<mode>_mask;
27556 icode = CODE_FOR_expand<mode>_mask;
27557 emit_insn (GEN_FCN (icode) (operands[0], operands[1], operands[2], reg));
27560 /* For ALL_MASK_ONES or CONST0_RTX mask, transform it to simple mov. */
27561 emit_move_insn (operands[0], ones ? operands[1] : operands[2]);
27565 (define_expand "expand<mode>_maskz"
27566 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
27567 (unspec:VI12_AVX512VL
27568 [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
27569 (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
27570 (match_operand:<avx512fmaskmode> 3 "register_operand")]
27572 "TARGET_AVX512VBMI2"
27573 "operands[2] = CONST0_RTX (<MODE>mode);")
27575 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
27576 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
27577 (unspec:VF_AVX512VL
27578 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
27579 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
27580 (match_operand:SI 3 "const_0_to_15_operand")]
27582 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
27584 if (TARGET_DEST_FALSE_DEP_FOR_GLC
27585 && <mask4_dest_false_dep_for_glc_cond>
27586 && !reg_mentioned_p (operands[0], operands[1])
27587 && !reg_mentioned_p (operands[0], operands[2]))
27588 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
27589 return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
27591 [(set_attr "type" "sse")
27592 (set_attr "prefix" "evex")
27593 (set_attr "mode" "<MODE>")])
27595 (define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
27596 [(set (match_operand:VF_128 0 "register_operand" "=v")
27599 [(match_operand:VF_128 1 "register_operand" "v")
27600 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
27601 (match_operand:SI 3 "const_0_to_15_operand")]
27607 if (TARGET_DEST_FALSE_DEP_FOR_GLC
27608 && <mask_scalar4_dest_false_dep_for_glc_cond>
27609 && !reg_mentioned_p (operands[0], operands[1])
27610 && !reg_mentioned_p (operands[0], operands[2]))
27611 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
27612 return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
27614 [(set_attr "type" "sse")
27615 (set_attr "prefix" "evex")
27616 (set_attr "mode" "<MODE>")])
27618 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
27619 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
27620 (unspec:<avx512fmaskmode>
27621 [(match_operand:VFH_AVX512VL 1 "vector_operand" "vm")
27622 (match_operand 2 "const_0_to_255_operand")]
27624 "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)"
27625 "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
27626 [(set_attr "type" "sse")
27627 (set_attr "length_immediate" "1")
27628 (set_attr "prefix" "evex")
27629 (set_attr "mode" "<MODE>")])
27631 (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
27632 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
27633 (and:<avx512fmaskmode>
27634 (unspec:<avx512fmaskmode>
27635 [(match_operand:VFH_128 1 "nonimmediate_operand" "vm")
27636 (match_operand 2 "const_0_to_255_operand")]
27639 "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)"
27640 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
27641 [(set_attr "type" "sse")
27642 (set_attr "length_immediate" "1")
27643 (set_attr "prefix" "evex")
27644 (set_attr "mode" "<MODE>")])
27646 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
27647 [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
27648 (unspec:VFH_AVX512VL
27649 [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
27650 (match_operand:SI 2 "const_0_to_15_operand")]
27654 if (TARGET_DEST_FALSE_DEP_FOR_GLC
27655 && <mask3_dest_false_dep_for_glc_cond>
27656 && MEM_P (operands[1]))
27657 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
27658 return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
27660 [(set_attr "prefix" "evex")
27661 (set_attr "mode" "<MODE>")])
27663 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
27664 [(set (match_operand:VFH_128 0 "register_operand" "=v")
27667 [(match_operand:VFH_128 1 "register_operand" "v")
27668 (match_operand:VFH_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
27669 (match_operand:SI 3 "const_0_to_15_operand")]
27675 if (TARGET_DEST_FALSE_DEP_FOR_GLC
27676 && <mask_scalar4_dest_false_dep_for_glc_cond>
27677 && !reg_mentioned_p (operands[0], operands[1])
27678 && !reg_mentioned_p (operands[0], operands[2]))
27679 output_asm_insn ("vxorps\t%x0, %x0, %x0", operands);
27680 return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
27682 [(set_attr "prefix" "evex")
27683 (set_attr "mode" "<ssescalarmode>")])
27685 ;; The correct representation for this is absolutely enormous, and
27686 ;; surely not generally useful.
27687 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
27688 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
27689 (unspec:VI2_AVX512VL
27690 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
27691 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
27692 (match_operand:SI 3 "const_0_to_255_operand")]
27695 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
27696 [(set_attr "type" "sselog1")
27697 (set_attr "length_immediate" "1")
27698 (set_attr "prefix" "evex")
27699 (set_attr "mode" "<sseinsnmode>")])
27701 (define_insn "clz<mode>2<mask_name>"
27702 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
27704 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
27706 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
27707 [(set_attr "type" "sse")
27708 (set_attr "prefix" "evex")
27709 (set_attr "mode" "<sseinsnmode>")])
27711 (define_insn "<mask_codefor>conflict<mode><mask_name>"
27712 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
27713 (unspec:VI48_AVX512VL
27714 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
27717 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
27718 [(set_attr "type" "sse")
27719 (set_attr "prefix" "evex")
27720 (set_attr "mode" "<sseinsnmode>")])
27722 (define_insn "sha1msg1"
27723 [(set (match_operand:V4SI 0 "register_operand" "=x")
27725 [(match_operand:V4SI 1 "register_operand" "0")
27726 (match_operand:V4SI 2 "vector_operand" "xBm")]
27729 "sha1msg1\t{%2, %0|%0, %2}"
27730 [(set_attr "type" "sselog1")
27731 (set_attr "mode" "TI")])
27733 (define_insn "sha1msg2"
27734 [(set (match_operand:V4SI 0 "register_operand" "=x")
27736 [(match_operand:V4SI 1 "register_operand" "0")
27737 (match_operand:V4SI 2 "vector_operand" "xBm")]
27740 "sha1msg2\t{%2, %0|%0, %2}"
27741 [(set_attr "type" "sselog1")
27742 (set_attr "mode" "TI")])
27744 (define_insn "sha1nexte"
27745 [(set (match_operand:V4SI 0 "register_operand" "=x")
27747 [(match_operand:V4SI 1 "register_operand" "0")
27748 (match_operand:V4SI 2 "vector_operand" "xBm")]
27749 UNSPEC_SHA1NEXTE))]
27751 "sha1nexte\t{%2, %0|%0, %2}"
27752 [(set_attr "type" "sselog1")
27753 (set_attr "mode" "TI")])
27755 (define_insn "sha1rnds4"
27756 [(set (match_operand:V4SI 0 "register_operand" "=x")
27758 [(match_operand:V4SI 1 "register_operand" "0")
27759 (match_operand:V4SI 2 "vector_operand" "xBm")
27760 (match_operand:SI 3 "const_0_to_3_operand")]
27761 UNSPEC_SHA1RNDS4))]
27763 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
27764 [(set_attr "type" "sselog1")
27765 (set_attr "length_immediate" "1")
27766 (set_attr "mode" "TI")])
27768 (define_insn "sha256msg1"
27769 [(set (match_operand:V4SI 0 "register_operand" "=x")
27771 [(match_operand:V4SI 1 "register_operand" "0")
27772 (match_operand:V4SI 2 "vector_operand" "xBm")]
27773 UNSPEC_SHA256MSG1))]
27775 "sha256msg1\t{%2, %0|%0, %2}"
27776 [(set_attr "type" "sselog1")
27777 (set_attr "mode" "TI")])
27779 (define_insn "sha256msg2"
27780 [(set (match_operand:V4SI 0 "register_operand" "=x")
27782 [(match_operand:V4SI 1 "register_operand" "0")
27783 (match_operand:V4SI 2 "vector_operand" "xBm")]
27784 UNSPEC_SHA256MSG2))]
27786 "sha256msg2\t{%2, %0|%0, %2}"
27787 [(set_attr "type" "sselog1")
27788 (set_attr "mode" "TI")])
27790 (define_insn "sha256rnds2"
27791 [(set (match_operand:V4SI 0 "register_operand" "=x")
27793 [(match_operand:V4SI 1 "register_operand" "0")
27794 (match_operand:V4SI 2 "vector_operand" "xBm")
27795 (match_operand:V4SI 3 "register_operand" "Yz")]
27796 UNSPEC_SHA256RNDS2))]
27798 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
27799 [(set_attr "type" "sselog1")
27800 (set_attr "length_immediate" "1")
27801 (set_attr "mode" "TI")])
27803 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
27804 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
27805 (vec_concat:AVX512MODE2P
27806 (vec_concat:<ssehalfvecmode>
27807 (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
27808 (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
27809 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
27810 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
27812 "&& reload_completed"
27813 [(set (match_dup 0) (match_dup 1))]
27815 if (REG_P (operands[0]))
27816 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
27818 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
27819 <ssequartermode>mode);
27822 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
27823 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
27824 (vec_concat:AVX512MODE2P
27825 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
27826 (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
27827 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
27829 "&& reload_completed"
27830 [(set (match_dup 0) (match_dup 1))]
27832 if (REG_P (operands[0]))
27833 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
27835 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
27836 <ssehalfvecmode>mode);
27839 (define_int_iterator VPMADD52
27840 [UNSPEC_VPMADD52LUQ
27841 UNSPEC_VPMADD52HUQ])
27843 (define_int_attr vpmadd52type
27844 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
27846 (define_expand "vpmadd52huq<mode>_maskz"
27847 [(match_operand:VI8_AVX512VL 0 "register_operand")
27848 (match_operand:VI8_AVX512VL 1 "register_operand")
27849 (match_operand:VI8_AVX512VL 2 "register_operand")
27850 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
27851 (match_operand:<avx512fmaskmode> 4 "register_operand")]
27852 "TARGET_AVX512IFMA"
27854 emit_insn (gen_vpmadd52huq<mode>_maskz_1 (
27855 operands[0], operands[1], operands[2], operands[3],
27856 CONST0_RTX (<MODE>mode), operands[4]));
27860 (define_expand "vpmadd52luq<mode>_maskz"
27861 [(match_operand:VI8_AVX512VL 0 "register_operand")
27862 (match_operand:VI8_AVX512VL 1 "register_operand")
27863 (match_operand:VI8_AVX512VL 2 "register_operand")
27864 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
27865 (match_operand:<avx512fmaskmode> 4 "register_operand")]
27866 "TARGET_AVX512IFMA"
27868 emit_insn (gen_vpmadd52luq<mode>_maskz_1 (
27869 operands[0], operands[1], operands[2], operands[3],
27870 CONST0_RTX (<MODE>mode), operands[4]));
27874 (define_insn "vpmadd52<vpmadd52type>v8di"
27875 [(set (match_operand:V8DI 0 "register_operand" "=v")
27877 [(match_operand:V8DI 1 "register_operand" "0")
27878 (match_operand:V8DI 2 "register_operand" "v")
27879 (match_operand:V8DI 3 "nonimmediate_operand" "vm")]
27881 "TARGET_AVX512IFMA"
27882 "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
27883 [(set_attr "type" "ssemuladd")
27884 (set_attr "prefix" "evex")
27885 (set_attr "mode" "XI")])
27887 (define_insn "vpmadd52<vpmadd52type><mode>"
27888 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,v")
27890 [(match_operand:VI8_AVX2 1 "register_operand" "0,0")
27891 (match_operand:VI8_AVX2 2 "register_operand" "x,v")
27892 (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm,vm")]
27894 "TARGET_AVXIFMA || (TARGET_AVX512IFMA && TARGET_AVX512VL)"
27896 %{vex%} vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}
27897 vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
27898 [(set_attr "isa" "avxifma,avx512ifmavl")
27899 (set_attr "type" "ssemuladd")
27900 (set_attr "prefix" "vex,evex")
27901 (set_attr "mode" "<sseinsnmode>")])
27903 (define_insn "vpmadd52<vpmadd52type><mode>_maskz_1"
27904 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
27905 (vec_merge:VI8_AVX512VL
27906 (unspec:VI8_AVX512VL
27907 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
27908 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
27909 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
27911 (match_operand:VI8_AVX512VL 4 "const0_operand" "C")
27912 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
27913 "TARGET_AVX512IFMA"
27914 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}"
27915 [(set_attr "type" "ssemuladd")
27916 (set_attr "prefix" "evex")
27917 (set_attr "mode" "<sseinsnmode>")])
27919 (define_insn "vpmadd52<vpmadd52type><mode>_mask"
27920 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
27921 (vec_merge:VI8_AVX512VL
27922 (unspec:VI8_AVX512VL
27923 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
27924 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
27925 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
27928 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
27929 "TARGET_AVX512IFMA"
27930 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
27931 [(set_attr "type" "ssemuladd")
27932 (set_attr "prefix" "evex")
27933 (set_attr "mode" "<sseinsnmode>")])
27935 (define_insn "vpmultishiftqb<mode><mask_name>"
27936 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
27937 (unspec:VI1_AVX512VL
27938 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
27939 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
27940 UNSPEC_VPMULTISHIFT))]
27941 "TARGET_AVX512VBMI"
27942 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
27943 [(set_attr "type" "sselog")
27944 (set_attr "prefix" "evex")
27945 (set_attr "mode" "<sseinsnmode>")])
27947 (define_mode_iterator IMOD4
27948 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
27950 (define_mode_attr imod4_narrow
27951 [(V64SF "V16SF") (V64SI "V16SI")])
27953 (define_expand "mov<mode>"
27954 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
27955 (match_operand:IMOD4 1 "nonimm_or_0_operand"))]
27958 ix86_expand_vector_move (<MODE>mode, operands);
27962 (define_insn_and_split "*mov<mode>_internal"
27963 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
27964 (match_operand:IMOD4 1 "nonimm_or_0_operand" " C,vm,v"))]
27966 && (register_operand (operands[0], <MODE>mode)
27967 || register_operand (operands[1], <MODE>mode))"
27969 "&& reload_completed"
27975 for (i = 0; i < 4; i++)
27977 op0 = simplify_subreg
27978 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
27979 op1 = simplify_subreg
27980 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
27981 emit_move_insn (op0, op1);
27986 (define_insn "avx5124fmaddps_4fmaddps"
27987 [(set (match_operand:V16SF 0 "register_operand" "=v")
27989 [(match_operand:V16SF 1 "register_operand" "0")
27990 (match_operand:V64SF 2 "register_operand" "v")
27991 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
27992 "TARGET_AVX5124FMAPS"
27993 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
27994 [(set_attr ("type") ("ssemuladd"))
27995 (set_attr ("prefix") ("evex"))
27996 (set_attr ("mode") ("V16SF"))])
27998 (define_insn "avx5124fmaddps_4fmaddps_mask"
27999 [(set (match_operand:V16SF 0 "register_operand" "=v")
28002 [(match_operand:V64SF 1 "register_operand" "v")
28003 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
28004 (match_operand:V16SF 3 "register_operand" "0")
28005 (match_operand:HI 4 "register_operand" "Yk")))]
28006 "TARGET_AVX5124FMAPS"
28007 "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
28008 [(set_attr ("type") ("ssemuladd"))
28009 (set_attr ("prefix") ("evex"))
28010 (set_attr ("mode") ("V16SF"))])
28012 (define_insn "avx5124fmaddps_4fmaddps_maskz"
28013 [(set (match_operand:V16SF 0 "register_operand" "=v")
28016 [(match_operand:V16SF 1 "register_operand" "0")
28017 (match_operand:V64SF 2 "register_operand" "v")
28018 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
28019 (match_operand:V16SF 4 "const0_operand")
28020 (match_operand:HI 5 "register_operand" "Yk")))]
28021 "TARGET_AVX5124FMAPS"
28022 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
28023 [(set_attr ("type") ("ssemuladd"))
28024 (set_attr ("prefix") ("evex"))
28025 (set_attr ("mode") ("V16SF"))])
28027 (define_insn "avx5124fmaddps_4fmaddss"
28028 [(set (match_operand:V4SF 0 "register_operand" "=v")
28030 [(match_operand:V4SF 1 "register_operand" "0")
28031 (match_operand:V64SF 2 "register_operand" "v")
28032 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
28033 "TARGET_AVX5124FMAPS"
28034 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
28035 [(set_attr ("type") ("ssemuladd"))
28036 (set_attr ("prefix") ("evex"))
28037 (set_attr ("mode") ("SF"))])
28039 (define_insn "avx5124fmaddps_4fmaddss_mask"
28040 [(set (match_operand:V4SF 0 "register_operand" "=v")
28043 [(match_operand:V64SF 1 "register_operand" "v")
28044 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
28045 (match_operand:V4SF 3 "register_operand" "0")
28046 (match_operand:QI 4 "register_operand" "Yk")))]
28047 "TARGET_AVX5124FMAPS"
28048 "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
28049 [(set_attr ("type") ("ssemuladd"))
28050 (set_attr ("prefix") ("evex"))
28051 (set_attr ("mode") ("SF"))])
28053 (define_insn "avx5124fmaddps_4fmaddss_maskz"
28054 [(set (match_operand:V4SF 0 "register_operand" "=v")
28057 [(match_operand:V4SF 1 "register_operand" "0")
28058 (match_operand:V64SF 2 "register_operand" "v")
28059 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
28060 (match_operand:V4SF 4 "const0_operand")
28061 (match_operand:QI 5 "register_operand" "Yk")))]
28062 "TARGET_AVX5124FMAPS"
28063 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
28064 [(set_attr ("type") ("ssemuladd"))
28065 (set_attr ("prefix") ("evex"))
28066 (set_attr ("mode") ("SF"))])
28068 (define_insn "avx5124fmaddps_4fnmaddps"
28069 [(set (match_operand:V16SF 0 "register_operand" "=v")
28071 [(match_operand:V16SF 1 "register_operand" "0")
28072 (match_operand:V64SF 2 "register_operand" "v")
28073 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
28074 "TARGET_AVX5124FMAPS"
28075 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
28076 [(set_attr ("type") ("ssemuladd"))
28077 (set_attr ("prefix") ("evex"))
28078 (set_attr ("mode") ("V16SF"))])
28080 (define_insn "avx5124fmaddps_4fnmaddps_mask"
28081 [(set (match_operand:V16SF 0 "register_operand" "=v")
28084 [(match_operand:V64SF 1 "register_operand" "v")
28085 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
28086 (match_operand:V16SF 3 "register_operand" "0")
28087 (match_operand:HI 4 "register_operand" "Yk")))]
28088 "TARGET_AVX5124FMAPS"
28089 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
28090 [(set_attr ("type") ("ssemuladd"))
28091 (set_attr ("prefix") ("evex"))
28092 (set_attr ("mode") ("V16SF"))])
28094 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
28095 [(set (match_operand:V16SF 0 "register_operand" "=v")
28098 [(match_operand:V16SF 1 "register_operand" "0")
28099 (match_operand:V64SF 2 "register_operand" "v")
28100 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
28101 (match_operand:V16SF 4 "const0_operand")
28102 (match_operand:HI 5 "register_operand" "Yk")))]
28103 "TARGET_AVX5124FMAPS"
28104 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
28105 [(set_attr ("type") ("ssemuladd"))
28106 (set_attr ("prefix") ("evex"))
28107 (set_attr ("mode") ("V16SF"))])
28109 (define_insn "avx5124fmaddps_4fnmaddss"
28110 [(set (match_operand:V4SF 0 "register_operand" "=v")
28112 [(match_operand:V4SF 1 "register_operand" "0")
28113 (match_operand:V64SF 2 "register_operand" "v")
28114 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
28115 "TARGET_AVX5124FMAPS"
28116 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
28117 [(set_attr ("type") ("ssemuladd"))
28118 (set_attr ("prefix") ("evex"))
28119 (set_attr ("mode") ("SF"))])
28121 (define_insn "avx5124fmaddps_4fnmaddss_mask"
28122 [(set (match_operand:V4SF 0 "register_operand" "=v")
28125 [(match_operand:V64SF 1 "register_operand" "v")
28126 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
28127 (match_operand:V4SF 3 "register_operand" "0")
28128 (match_operand:QI 4 "register_operand" "Yk")))]
28129 "TARGET_AVX5124FMAPS"
28130 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
28131 [(set_attr ("type") ("ssemuladd"))
28132 (set_attr ("prefix") ("evex"))
28133 (set_attr ("mode") ("SF"))])
28135 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
28136 [(set (match_operand:V4SF 0 "register_operand" "=v")
28139 [(match_operand:V4SF 1 "register_operand" "0")
28140 (match_operand:V64SF 2 "register_operand" "v")
28141 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
28142 (match_operand:V4SF 4 "const0_operand")
28143 (match_operand:QI 5 "register_operand" "Yk")))]
28144 "TARGET_AVX5124FMAPS"
28145 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
28146 [(set_attr ("type") ("ssemuladd"))
28147 (set_attr ("prefix") ("evex"))
28148 (set_attr ("mode") ("SF"))])
28150 (define_insn "avx5124vnniw_vp4dpwssd"
28151 [(set (match_operand:V16SI 0 "register_operand" "=v")
28153 [(match_operand:V16SI 1 "register_operand" "0")
28154 (match_operand:V64SI 2 "register_operand" "v")
28155 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
28156 "TARGET_AVX5124VNNIW"
28157 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
28158 [(set_attr ("type") ("ssemuladd"))
28159 (set_attr ("prefix") ("evex"))
28160 (set_attr ("mode") ("TI"))])
28162 (define_insn "avx5124vnniw_vp4dpwssd_mask"
28163 [(set (match_operand:V16SI 0 "register_operand" "=v")
28166 [(match_operand:V64SI 1 "register_operand" "v")
28167 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
28168 (match_operand:V16SI 3 "register_operand" "0")
28169 (match_operand:HI 4 "register_operand" "Yk")))]
28170 "TARGET_AVX5124VNNIW"
28171 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
28172 [(set_attr ("type") ("ssemuladd"))
28173 (set_attr ("prefix") ("evex"))
28174 (set_attr ("mode") ("TI"))])
28176 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
28177 [(set (match_operand:V16SI 0 "register_operand" "=v")
28180 [(match_operand:V16SI 1 "register_operand" "0")
28181 (match_operand:V64SI 2 "register_operand" "v")
28182 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
28183 (match_operand:V16SI 4 "const0_operand")
28184 (match_operand:HI 5 "register_operand" "Yk")))]
28185 "TARGET_AVX5124VNNIW"
28186 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
28187 [(set_attr ("type") ("ssemuladd"))
28188 (set_attr ("prefix") ("evex"))
28189 (set_attr ("mode") ("TI"))])
28191 (define_insn "avx5124vnniw_vp4dpwssds"
28192 [(set (match_operand:V16SI 0 "register_operand" "=v")
28194 [(match_operand:V16SI 1 "register_operand" "0")
28195 (match_operand:V64SI 2 "register_operand" "v")
28196 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
28197 "TARGET_AVX5124VNNIW"
28198 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
28199 [(set_attr ("type") ("ssemuladd"))
28200 (set_attr ("prefix") ("evex"))
28201 (set_attr ("mode") ("TI"))])
28203 (define_insn "avx5124vnniw_vp4dpwssds_mask"
28204 [(set (match_operand:V16SI 0 "register_operand" "=v")
28207 [(match_operand:V64SI 1 "register_operand" "v")
28208 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
28209 (match_operand:V16SI 3 "register_operand" "0")
28210 (match_operand:HI 4 "register_operand" "Yk")))]
28211 "TARGET_AVX5124VNNIW"
28212 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
28213 [(set_attr ("type") ("ssemuladd"))
28214 (set_attr ("prefix") ("evex"))
28215 (set_attr ("mode") ("TI"))])
28217 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
28218 [(set (match_operand:V16SI 0 "register_operand" "=v")
28221 [(match_operand:V16SI 1 "register_operand" "0")
28222 (match_operand:V64SI 2 "register_operand" "v")
28223 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
28224 (match_operand:V16SI 4 "const0_operand")
28225 (match_operand:HI 5 "register_operand" "Yk")))]
28226 "TARGET_AVX5124VNNIW"
28227 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
28228 [(set_attr ("type") ("ssemuladd"))
28229 (set_attr ("prefix") ("evex"))
28230 (set_attr ("mode") ("TI"))])
28232 (define_expand "popcount<mode>2"
28233 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
28234 (popcount:VI48_AVX512VL
28235 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")))]
28236 "TARGET_AVX512VPOPCNTDQ")
28238 (define_insn "vpopcount<mode><mask_name>"
28239 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
28240 (popcount:VI48_AVX512VL
28241 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
28242 "TARGET_AVX512VPOPCNTDQ"
28243 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
28245 ;; Save multiple registers out-of-line.
28246 (define_insn "*save_multiple<mode>"
28247 [(match_parallel 0 "save_multiple"
28248 [(use (match_operand:P 1 "symbol_operand"))])]
28249 "TARGET_SSE && TARGET_64BIT"
28252 ;; Restore multiple registers out-of-line.
28253 (define_insn "*restore_multiple<mode>"
28254 [(match_parallel 0 "restore_multiple"
28255 [(use (match_operand:P 1 "symbol_operand"))])]
28256 "TARGET_SSE && TARGET_64BIT"
28259 ;; Restore multiple registers out-of-line and return.
28260 (define_insn "*restore_multiple_and_return<mode>"
28261 [(match_parallel 0 "restore_multiple"
28263 (use (match_operand:P 1 "symbol_operand"))
28264 (set (reg:DI SP_REG) (reg:DI R10_REG))
28266 "TARGET_SSE && TARGET_64BIT"
28269 ;; Restore multiple registers out-of-line when hard frame pointer is used,
28270 ;; perform the leave operation prior to returning (from the function).
28271 (define_insn "*restore_multiple_leave_return<mode>"
28272 [(match_parallel 0 "restore_multiple"
28274 (use (match_operand:P 1 "symbol_operand"))
28275 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
28276 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
28277 (clobber (mem:BLK (scratch)))
28279 "TARGET_SSE && TARGET_64BIT"
28282 (define_expand "popcount<mode>2"
28283 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
28284 (popcount:VI12_AVX512VL
28285 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
28286 "TARGET_AVX512BITALG")
28288 (define_insn "vpopcount<mode><mask_name>"
28289 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
28290 (popcount:VI12_AVX512VL
28291 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
28292 "TARGET_AVX512BITALG"
28293 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
28295 (define_insn "vgf2p8affineinvqb_<mode><mask_name>"
28296 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
28297 (unspec:VI1_AVX512F
28298 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
28299 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
28300 (match_operand 3 "const_0_to_255_operand")]
28301 UNSPEC_GF2P8AFFINEINV))]
28304 gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
28305 vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
28306 [(set_attr "isa" "noavx,avx")
28307 (set_attr "prefix_data16" "1,*")
28308 (set_attr "prefix_extra" "1")
28309 (set_attr "prefix" "orig,maybe_evex")
28310 (set_attr "mode" "<sseinsnmode>")])
28312 (define_insn "vgf2p8affineqb_<mode><mask_name>"
28313 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
28314 (unspec:VI1_AVX512F
28315 [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
28316 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
28317 (match_operand 3 "const_0_to_255_operand")]
28318 UNSPEC_GF2P8AFFINE))]
28321 gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
28322 vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
28323 [(set_attr "isa" "noavx,avx")
28324 (set_attr "prefix_data16" "1,*")
28325 (set_attr "prefix_extra" "1")
28326 (set_attr "prefix" "orig,maybe_evex")
28327 (set_attr "mode" "<sseinsnmode>")])
28329 (define_insn "vgf2p8mulb_<mode><mask_name>"
28330 [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
28331 (unspec:VI1_AVX512F
28332 [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
28333 (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
28337 gf2p8mulb\t{%2, %0| %0, %2}
28338 vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
28339 [(set_attr "isa" "noavx,avx")
28340 (set_attr "prefix_data16" "1,*")
28341 (set_attr "prefix_extra" "1")
28342 (set_attr "prefix" "orig,maybe_evex")
28343 (set_attr "mode" "<sseinsnmode>")])
28345 (define_insn "vpshrd_<mode><mask_name>"
28346 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28347 (unspec:VI248_AVX512VL
28348 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
28349 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
28350 (match_operand:SI 3 "const_0_to_255_operand")]
28352 "TARGET_AVX512VBMI2"
28353 "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
28354 [(set_attr ("prefix") ("evex"))])
28356 (define_insn "vpshld_<mode><mask_name>"
28357 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28358 (unspec:VI248_AVX512VL
28359 [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
28360 (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
28361 (match_operand:SI 3 "const_0_to_255_operand")]
28363 "TARGET_AVX512VBMI2"
28364 "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
28365 [(set_attr ("prefix") ("evex"))])
28367 (define_insn "vpshrdv_<mode>"
28368 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28369 (unspec:VI248_AVX512VL
28370 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
28371 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
28372 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
28374 "TARGET_AVX512VBMI2"
28375 "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
28376 [(set_attr ("prefix") ("evex"))
28377 (set_attr "mode" "<sseinsnmode>")])
28379 (define_insn "vpshrdv_<mode>_mask"
28380 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28381 (vec_merge:VI248_AVX512VL
28382 (unspec:VI248_AVX512VL
28383 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
28384 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
28385 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
28388 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
28389 "TARGET_AVX512VBMI2"
28390 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
28391 [(set_attr ("prefix") ("evex"))
28392 (set_attr "mode" "<sseinsnmode>")])
28394 (define_expand "vpshrdv_<mode>_maskz"
28395 [(match_operand:VI248_AVX512VL 0 "register_operand")
28396 (match_operand:VI248_AVX512VL 1 "register_operand")
28397 (match_operand:VI248_AVX512VL 2 "register_operand")
28398 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
28399 (match_operand:<avx512fmaskmode> 4 "register_operand")]
28400 "TARGET_AVX512VBMI2"
28402 emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
28403 operands[2], operands[3],
28404 CONST0_RTX (<MODE>mode),
28409 (define_insn "vpshrdv_<mode>_maskz_1"
28410 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28411 (vec_merge:VI248_AVX512VL
28412 (unspec:VI248_AVX512VL
28413 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
28414 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
28415 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
28417 (match_operand:VI248_AVX512VL 4 "const0_operand")
28418 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
28419 "TARGET_AVX512VBMI2"
28420 "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
28421 [(set_attr ("prefix") ("evex"))
28422 (set_attr "mode" "<sseinsnmode>")])
28424 (define_insn "vpshldv_<mode>"
28425 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28426 (unspec:VI248_AVX512VL
28427 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
28428 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
28429 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
28431 "TARGET_AVX512VBMI2"
28432 "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
28433 [(set_attr ("prefix") ("evex"))
28434 (set_attr "mode" "<sseinsnmode>")])
28436 (define_insn "vpshldv_<mode>_mask"
28437 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28438 (vec_merge:VI248_AVX512VL
28439 (unspec:VI248_AVX512VL
28440 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
28441 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
28442 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
28445 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
28446 "TARGET_AVX512VBMI2"
28447 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
28448 [(set_attr ("prefix") ("evex"))
28449 (set_attr "mode" "<sseinsnmode>")])
28451 (define_expand "vpshldv_<mode>_maskz"
28452 [(match_operand:VI248_AVX512VL 0 "register_operand")
28453 (match_operand:VI248_AVX512VL 1 "register_operand")
28454 (match_operand:VI248_AVX512VL 2 "register_operand")
28455 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
28456 (match_operand:<avx512fmaskmode> 4 "register_operand")]
28457 "TARGET_AVX512VBMI2"
28459 emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
28460 operands[2], operands[3],
28461 CONST0_RTX (<MODE>mode),
28466 (define_insn "vpshldv_<mode>_maskz_1"
28467 [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
28468 (vec_merge:VI248_AVX512VL
28469 (unspec:VI248_AVX512VL
28470 [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
28471 (match_operand:VI248_AVX512VL 2 "register_operand" "v")
28472 (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
28474 (match_operand:VI248_AVX512VL 4 "const0_operand")
28475 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
28476 "TARGET_AVX512VBMI2"
28477 "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
28478 [(set_attr ("prefix") ("evex"))
28479 (set_attr "mode" "<sseinsnmode>")])
28481 (define_expand "usdot_prod<mode>"
28482 [(match_operand:<ssedvecmode> 0 "register_operand")
28483 (match_operand:VI1_AVX512VNNI 1 "register_operand")
28484 (match_operand:VI1_AVX512VNNI 2 "register_operand")
28485 (match_operand:<ssedvecmode> 3 "register_operand")]
28486 "(<MODE_SIZE> == 64
28487 ||((TARGET_AVX512VNNI && TARGET_AVX512VL)
28488 || TARGET_AVXVNNI))"
28490 operands[1] = lowpart_subreg (<ssedvecmode>mode,
28491 force_reg (<MODE>mode, operands[1]),
28493 operands[2] = lowpart_subreg (<ssedvecmode>mode,
28494 force_reg (<MODE>mode, operands[2]),
28496 emit_insn (gen_rtx_SET (operands[0], operands[3]));
28497 emit_insn (gen_vpdpbusd_<ssedvecmodelower> (operands[0], operands[3],
28498 operands[1], operands[2]));
28502 (define_insn "vpdpbusd_v16si"
28503 [(set (match_operand:V16SI 0 "register_operand" "=v")
28505 [(match_operand:V16SI 1 "register_operand" "0")
28506 (match_operand:V16SI 2 "register_operand" "v")
28507 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
28509 "TARGET_AVX512VNNI"
28510 "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
28511 [(set_attr ("prefix") ("evex"))])
28513 (define_insn "vpdpbusd_<mode>"
28514 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
28516 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
28517 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
28518 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
28520 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
28522 %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
28523 vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
28524 [(set_attr ("prefix") ("vex,evex"))
28525 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
28527 (define_insn "vpdpbusd_<mode>_mask"
28528 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28529 (vec_merge:VI4_AVX512VL
28530 (unspec:VI4_AVX512VL
28531 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28532 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28533 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28536 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
28537 "TARGET_AVX512VNNI"
28538 "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
28539 [(set_attr ("prefix") ("evex"))])
28541 (define_expand "vpdpbusd_<mode>_maskz"
28542 [(match_operand:VI4_AVX512VL 0 "register_operand")
28543 (match_operand:VI4_AVX512VL 1 "register_operand")
28544 (match_operand:VI4_AVX512VL 2 "register_operand")
28545 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
28546 (match_operand:<avx512fmaskmode> 4 "register_operand")]
28547 "TARGET_AVX512VNNI"
28549 emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
28550 operands[2], operands[3],
28551 CONST0_RTX (<MODE>mode),
28556 (define_insn "vpdpbusd_<mode>_maskz_1"
28557 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28558 (vec_merge:VI4_AVX512VL
28559 (unspec:VI4_AVX512VL
28560 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28561 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28562 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
28564 (match_operand:VI4_AVX512VL 4 "const0_operand")
28565 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
28566 "TARGET_AVX512VNNI"
28567 "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
28568 [(set_attr ("prefix") ("evex"))])
28570 (define_insn "vpdpbusds_v16si"
28571 [(set (match_operand:V16SI 0 "register_operand" "=v")
28573 [(match_operand:V16SI 1 "register_operand" "0")
28574 (match_operand:V16SI 2 "register_operand" "v")
28575 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
28576 UNSPEC_VPDPBUSDS))]
28577 "TARGET_AVX512VNNI"
28578 "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
28579 [(set_attr ("prefix") ("evex"))])
28581 (define_insn "vpdpbusds_<mode>"
28582 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
28584 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
28585 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
28586 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
28587 UNSPEC_VPDPBUSDS))]
28588 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
28590 %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
28591 vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
28592 [(set_attr ("prefix") ("vex,evex"))
28593 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
28595 (define_insn "vpdpbusds_<mode>_mask"
28596 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28597 (vec_merge:VI4_AVX512VL
28598 (unspec:VI4_AVX512VL
28599 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28600 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28601 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28604 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
28605 "TARGET_AVX512VNNI"
28606 "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
28607 [(set_attr ("prefix") ("evex"))])
28609 (define_expand "vpdpbusds_<mode>_maskz"
28610 [(match_operand:VI4_AVX512VL 0 "register_operand")
28611 (match_operand:VI4_AVX512VL 1 "register_operand")
28612 (match_operand:VI4_AVX512VL 2 "register_operand")
28613 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
28614 (match_operand:<avx512fmaskmode> 4 "register_operand")]
28615 "TARGET_AVX512VNNI"
28617 emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
28618 operands[2], operands[3],
28619 CONST0_RTX (<MODE>mode),
28624 (define_insn "vpdpbusds_<mode>_maskz_1"
28625 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28626 (vec_merge:VI4_AVX512VL
28627 (unspec:VI4_AVX512VL
28628 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28629 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28630 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28632 (match_operand:VI4_AVX512VL 4 "const0_operand")
28633 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
28634 "TARGET_AVX512VNNI"
28635 "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
28636 [(set_attr ("prefix") ("evex"))])
28638 (define_insn "vpdpwssd_v16si"
28639 [(set (match_operand:V16SI 0 "register_operand" "=v")
28641 [(match_operand:V16SI 1 "register_operand" "0")
28642 (match_operand:V16SI 2 "register_operand" "v")
28643 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
28645 "TARGET_AVX512VNNI"
28646 "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
28647 [(set_attr ("prefix") ("evex"))])
28649 (define_insn "vpdpwssd_<mode>"
28650 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
28652 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
28653 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
28654 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
28656 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
28658 %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
28659 vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
28660 [(set_attr ("prefix") ("vex,evex"))
28661 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
28663 (define_insn "vpdpwssd_<mode>_mask"
28664 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28665 (vec_merge:VI4_AVX512VL
28666 (unspec:VI4_AVX512VL
28667 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28668 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28669 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28672 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
28673 "TARGET_AVX512VNNI"
28674 "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
28675 [(set_attr ("prefix") ("evex"))])
28677 (define_expand "vpdpwssd_<mode>_maskz"
28678 [(match_operand:VI4_AVX512VL 0 "register_operand")
28679 (match_operand:VI4_AVX512VL 1 "register_operand")
28680 (match_operand:VI4_AVX512VL 2 "register_operand")
28681 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
28682 (match_operand:<avx512fmaskmode> 4 "register_operand")]
28683 "TARGET_AVX512VNNI"
28685 emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
28686 operands[2], operands[3],
28687 CONST0_RTX (<MODE>mode),
28692 (define_insn "vpdpwssd_<mode>_maskz_1"
28693 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28694 (vec_merge:VI4_AVX512VL
28695 (unspec:VI4_AVX512VL
28696 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28697 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28698 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28700 (match_operand:VI4_AVX512VL 4 "const0_operand")
28701 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
28702 "TARGET_AVX512VNNI"
28703 "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
28704 [(set_attr ("prefix") ("evex"))])
28706 (define_insn "vpdpwssds_v16si"
28707 [(set (match_operand:V16SI 0 "register_operand" "=v")
28709 [(match_operand:V16SI 1 "register_operand" "0")
28710 (match_operand:V16SI 2 "register_operand" "v")
28711 (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
28712 UNSPEC_VPDPWSSDS))]
28713 "TARGET_AVX512VNNI"
28714 "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
28715 [(set_attr ("prefix") ("evex"))])
28717 (define_insn "vpdpwssds_<mode>"
28718 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
28720 [(match_operand:VI4_AVX2 1 "register_operand" "0,0")
28721 (match_operand:VI4_AVX2 2 "register_operand" "x,v")
28722 (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
28723 UNSPEC_VPDPWSSDS))]
28724 "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
28726 %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
28727 vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
28728 [(set_attr ("prefix") ("vex,evex"))
28729 (set_attr ("isa") ("avxvnni,avx512vnnivl"))])
28731 (define_insn "vpdpwssds_<mode>_mask"
28732 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28733 (vec_merge:VI4_AVX512VL
28734 (unspec:VI4_AVX512VL
28735 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28736 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28737 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28740 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
28741 "TARGET_AVX512VNNI"
28742 "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
28743 [(set_attr ("prefix") ("evex"))])
28745 (define_expand "vpdpwssds_<mode>_maskz"
28746 [(match_operand:VI4_AVX512VL 0 "register_operand")
28747 (match_operand:VI4_AVX512VL 1 "register_operand")
28748 (match_operand:VI4_AVX512VL 2 "register_operand")
28749 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
28750 (match_operand:<avx512fmaskmode> 4 "register_operand")]
28751 "TARGET_AVX512VNNI"
28753 emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
28754 operands[2], operands[3],
28755 CONST0_RTX (<MODE>mode),
28760 (define_insn "vpdpwssds_<mode>_maskz_1"
28761 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
28762 (vec_merge:VI4_AVX512VL
28763 (unspec:VI4_AVX512VL
28764 [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
28765 (match_operand:VI4_AVX512VL 2 "register_operand" "v")
28766 (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
28768 (match_operand:VI4_AVX512VL 4 "const0_operand")
28769 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
28770 "TARGET_AVX512VNNI"
28771 "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
28772 [(set_attr ("prefix") ("evex"))])
28774 (define_insn "vaesdec_<mode>"
28775 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
28776 (unspec:VI1_AVX512VL_F
28777 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
28778 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
28781 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
28784 (define_insn "vaesdeclast_<mode>"
28785 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
28786 (unspec:VI1_AVX512VL_F
28787 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
28788 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
28789 UNSPEC_VAESDECLAST))]
28791 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
28794 (define_insn "vaesenc_<mode>"
28795 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
28796 (unspec:VI1_AVX512VL_F
28797 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
28798 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
28801 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
28804 (define_insn "vaesenclast_<mode>"
28805 [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
28806 (unspec:VI1_AVX512VL_F
28807 [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
28808 (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
28809 UNSPEC_VAESENCLAST))]
28811 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
28814 (define_insn "vpclmulqdq_<mode>"
28815 [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
28816 (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
28817 (match_operand:VI8_FVL 2 "vector_operand" "vm")
28818 (match_operand:SI 3 "const_0_to_255_operand")]
28819 UNSPEC_VPCLMULQDQ))]
28820 "TARGET_VPCLMULQDQ"
28821 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
28822 [(set_attr "mode" "DI")])
28824 (define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
28825 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
28826 (unspec:<avx512fmaskmode>
28827 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
28828 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
28829 UNSPEC_VPSHUFBIT))]
28830 "TARGET_AVX512BITALG"
28831 "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
28832 [(set_attr "prefix" "evex")
28833 (set_attr "mode" "<sseinsnmode>")])
28835 (define_mode_iterator VI48_AVX512VP2VL
28837 (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
28838 (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
28840 (define_mode_iterator MASK_DWI [P2QI P2HI])
28842 (define_expand "mov<mode>"
28843 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
28844 (match_operand:MASK_DWI 1 "nonimmediate_operand"))]
28845 "TARGET_AVX512VP2INTERSECT"
28847 if (MEM_P (operands[0]) && MEM_P (operands[1]))
28848 operands[1] = force_reg (<MODE>mode, operands[1]);
28851 (define_insn_and_split "*mov<mode>_internal"
28852 [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
28853 (match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
28854 "TARGET_AVX512VP2INTERSECT
28855 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
28857 "&& reload_completed"
28858 [(set (match_dup 0) (match_dup 1))
28859 (set (match_dup 2) (match_dup 3))]
28861 split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
28864 (define_insn "avx512vp2intersect_2intersect<mode>"
28865 [(set (match_operand:P2QI 0 "register_operand" "=k")
28867 [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
28868 (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
28869 UNSPEC_VP2INTERSECT))]
28870 "TARGET_AVX512VP2INTERSECT"
28871 "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
28872 [(set_attr ("prefix") ("evex"))])
28874 (define_insn "avx512vp2intersect_2intersectv16si"
28875 [(set (match_operand:P2HI 0 "register_operand" "=k")
28876 (unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
28877 (match_operand:V16SI 2 "vector_operand" "vm")]
28878 UNSPEC_VP2INTERSECT))]
28879 "TARGET_AVX512VP2INTERSECT"
28880 "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
28881 [(set_attr ("prefix") ("evex"))])
28883 (define_mode_iterator VF_AVX512BF16VL
28884 [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
28885 ;; Converting from BF to SF
28886 (define_mode_attr bf16_cvt_2sf
28887 [(V32BF "V16SF") (V16BF "V8SF") (V8BF "V4SF")])
28888 ;; Converting from SF to BF
28889 (define_mode_attr sf_cvt_bf16
28890 [(V8SF "V8BF") (V16SF "V16BF")])
28891 ;; Mapping from BF to SF
28892 (define_mode_attr sf_bf16
28893 [(V4SF "V8BF") (V8SF "V16BF") (V16SF "V32BF")])
28895 (define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
28896 [(match_operand:VF_AVX512BF16VL 0 "register_operand")
28897 (match_operand:<bf16_cvt_2sf> 1 "register_operand")
28898 (match_operand:<bf16_cvt_2sf> 2 "nonimmediate_operand")
28899 (match_operand:<avx512fmaskmode> 3 "register_operand")]
28900 "TARGET_AVX512BF16"
28902 emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[2],
28903 operands[1], CONST0_RTX(<MODE>mode), operands[3]));
28907 (define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
28908 [(set (match_operand:VF_AVX512BF16VL 0 "register_operand" "=v")
28909 (vec_concat:VF_AVX512BF16VL
28910 (float_truncate:<ssehalfvecmode>
28911 (match_operand:<bf16_cvt_2sf> 2 "nonimmediate_operand" "vm"))
28912 (float_truncate:<ssehalfvecmode>
28913 (match_operand:<bf16_cvt_2sf> 1 "register_operand" "v"))))]
28914 "TARGET_AVX512BF16"
28915 "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
28917 (define_expand "vcvtneps2bf16_v4sf"
28918 [(set (match_operand:V8BF 0 "register_operand")
28920 (float_truncate:V4BF
28921 (match_operand:V4SF 1 "nonimmediate_operand"))
28923 "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)"
28924 "operands[2] = CONST0_RTX (V4BFmode);")
28926 (define_insn "*vcvtneps2bf16_v4sf"
28927 [(set (match_operand:V8BF 0 "register_operand" "=x,v")
28929 (float_truncate:V4BF
28930 (match_operand:V4SF 1 "nonimmediate_operand" "xm,vm"))
28931 (match_operand:V4BF 2 "const0_operand")))]
28932 "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)"
28934 %{vex%} vcvtneps2bf16{x}\t{%1, %0|%0, %1}
28935 vcvtneps2bf16{x}\t{%1, %0|%0, %1}"
28936 [(set_attr "isa" "avxneconvert,avx512bf16vl")
28937 (set_attr "prefix" "vex,evex")])
28939 (define_expand "avx512f_cvtneps2bf16_v4sf_maskz"
28940 [(match_operand:V8BF 0 "register_operand")
28941 (match_operand:V4SF 1 "nonimmediate_operand")
28942 (match_operand:QI 2 "register_operand")]
28943 "TARGET_AVX512BF16 && TARGET_AVX512VL"
28945 emit_insn (gen_avx512f_cvtneps2bf16_v4sf_mask_1(operands[0], operands[1],
28946 CONST0_RTX(V8BFmode), operands[2], CONST0_RTX(V4BFmode)));
28950 (define_expand "avx512f_cvtneps2bf16_v4sf_mask"
28951 [(match_operand:V8BF 0 "register_operand")
28952 (match_operand:V4SF 1 "nonimmediate_operand")
28953 (match_operand:V8BF 2 "nonimm_or_0_operand")
28954 (match_operand:QI 3 "register_operand")]
28955 "TARGET_AVX512BF16 && TARGET_AVX512VL"
28957 emit_insn (gen_avx512f_cvtneps2bf16_v4sf_mask_1(operands[0], operands[1],
28958 operands[2], operands[3], CONST0_RTX(V4BFmode)));
28962 (define_insn "avx512f_cvtneps2bf16_v4sf_mask_1"
28963 [(set (match_operand:V8BF 0 "register_operand" "=v")
28966 (float_truncate:V4BF
28967 (match_operand:V4SF 1 "nonimmediate_operand" "vm"))
28969 (match_operand:V8BF 2 "nonimm_or_0_operand" "0C")
28970 (parallel [(const_int 0) (const_int 1)
28971 (const_int 2) (const_int 3)]))
28972 (match_operand:QI 3 "register_operand" "Yk"))
28973 (match_operand:V4BF 4 "const0_operand")))]
28974 "TARGET_AVX512BF16 && TARGET_AVX512VL"
28975 "vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
28977 (define_mode_iterator VF1_AVX512_256 [V16SF (V8SF "TARGET_AVX512VL")])
28979 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
28980 [(match_operand:<sf_cvt_bf16> 0 "register_operand")
28981 (match_operand:VF1_AVX512_256 1 "nonimmediate_operand")
28982 (match_operand:<avx512fmaskmode> 2 "register_operand")]
28983 "TARGET_AVX512BF16"
28985 emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
28986 CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
28990 (define_insn "vcvtneps2bf16_v8sf"
28991 [(set (match_operand:V8BF 0 "register_operand" "=x,v")
28992 (float_truncate:V8BF
28993 (match_operand:V8SF 1 "nonimmediate_operand" "xm,vm")))]
28994 "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)"
28996 %{vex%} vcvtneps2bf16{y}\t{%1, %0|%0, %1}
28997 vcvtneps2bf16{y}\t{%1, %0|%0, %1}"
28998 [(set_attr "isa" "avxneconvert,avx512bf16vl")
28999 (set_attr "prefix" "vex,evex")])
29002 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
29003 [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
29004 (float_truncate:<sf_cvt_bf16>
29005 (match_operand:VF1_AVX512_256 1 "nonimmediate_operand" "vm")))]
29006 "TARGET_AVX512BF16"
29007 "vcvtneps2bf16<qq2phsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
29009 (define_expand "avx512f_dpbf16ps_<mode>_maskz"
29010 [(match_operand:VF1_AVX512VL 0 "register_operand")
29011 (match_operand:VF1_AVX512VL 1 "register_operand")
29012 (match_operand:<sf_bf16> 2 "register_operand")
29013 (match_operand:<sf_bf16> 3 "register_operand")
29014 (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
29015 "TARGET_AVX512BF16"
29017 emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
29018 operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
29022 (define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
29023 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
29024 (unspec:VF1_AVX512VL
29025 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
29026 (match_operand:<sf_bf16> 2 "register_operand" "v")
29027 (match_operand:<sf_bf16> 3 "nonimmediate_operand" "vm")]
29028 UNSPEC_VDPBF16PS))]
29029 "TARGET_AVX512BF16"
29030 "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
29032 (define_insn "avx512f_dpbf16ps_<mode>_mask"
29033 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
29034 (vec_merge:VF1_AVX512VL
29035 (unspec:VF1_AVX512VL
29036 [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
29037 (match_operand:<sf_bf16> 2 "register_operand" "v")
29038 (match_operand:<sf_bf16> 3 "nonimmediate_operand" "vm")]
29041 (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
29042 "TARGET_AVX512BF16"
29043 "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
29046 (define_insn "loadiwkey"
29047 [(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "x")
29048 (match_operand:V2DI 1 "register_operand" "x")
29049 (match_operand:V2DI 2 "register_operand" "Yz")
29050 (match_operand:SI 3 "register_operand" "a")]
29052 (clobber (reg:CC FLAGS_REG))]
29054 "loadiwkey\t{%0, %1|%1, %0}"
29055 [(set_attr "type" "other")])
29057 (define_expand "encodekey128u32"
29059 [(set (match_operand:SI 0 "register_operand")
29060 (unspec_volatile:SI
29061 [(match_operand:SI 1 "register_operand")
29062 (reg:V2DI XMM0_REG)]
29063 UNSPECV_ENCODEKEY128U32))])]
29070 /* parallel rtx for encodekey128 predicate */
29071 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
29073 for (i = 0; i < 7; i++)
29074 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
29077 = gen_rtx_UNSPEC_VOLATILE (SImode,
29078 gen_rtvec (2, operands[1], xmm_regs[0]),
29079 UNSPECV_ENCODEKEY128U32);
29081 XVECEXP (operands[2], 0, 0)
29082 = gen_rtx_SET (operands[0], tmp_unspec);
29085 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
29086 gen_rtvec (1, const0_rtx),
29087 UNSPECV_ENCODEKEY128U32);
29089 for (i = 0; i < 3; i++)
29090 XVECEXP (operands[2], 0, i + 1)
29091 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
29093 for (i = 4; i < 7; i++)
29094 XVECEXP (operands[2], 0, i)
29095 = gen_rtx_CLOBBER (VOIDmode, xmm_regs[i]);
29097 XVECEXP (operands[2], 0, 7)
29098 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
29101 (define_insn "*encodekey128u32"
29102 [(match_parallel 2 "encodekey128_operation"
29103 [(set (match_operand:SI 0 "register_operand" "=r")
29104 (unspec_volatile:SI
29105 [(match_operand:SI 1 "register_operand" "r")
29106 (reg:V2DI XMM0_REG)]
29107 UNSPECV_ENCODEKEY128U32))])]
29109 "encodekey128\t{%1, %0|%0, %1}"
29110 [(set_attr "type" "other")])
29112 (define_expand "encodekey256u32"
29114 [(set (match_operand:SI 0 "register_operand")
29115 (unspec_volatile:SI
29116 [(match_operand:SI 1 "register_operand")
29117 (reg:V2DI XMM0_REG)
29118 (reg:V2DI XMM1_REG)]
29119 UNSPECV_ENCODEKEY256U32))])]
29126 /* parallel rtx for encodekey256 predicate */
29127 operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
29129 for (i = 0; i < 7; i++)
29130 xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
29133 = gen_rtx_UNSPEC_VOLATILE (SImode,
29134 gen_rtvec (3, operands[1],
29135 xmm_regs[0], xmm_regs[1]),
29136 UNSPECV_ENCODEKEY256U32);
29138 XVECEXP (operands[2], 0, 0)
29139 = gen_rtx_SET (operands[0], tmp_unspec);
29142 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
29143 gen_rtvec (1, const0_rtx),
29144 UNSPECV_ENCODEKEY256U32);
29146 for (i = 0; i < 4; i++)
29147 XVECEXP (operands[2], 0, i + 1)
29148 = gen_rtx_SET (xmm_regs[i], tmp_unspec);
29150 for (i = 4; i < 7; i++)
29151 XVECEXP (operands[2], 0, i + 1)
29152 = gen_rtx_CLOBBER (VOIDmode, xmm_regs[i]);
29154 XVECEXP (operands[2], 0, 8)
29155 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
29158 (define_insn "*encodekey256u32"
29159 [(match_parallel 2 "encodekey256_operation"
29160 [(set (match_operand:SI 0 "register_operand" "=r")
29161 (unspec_volatile:SI
29162 [(match_operand:SI 1 "register_operand" "r")
29163 (reg:V2DI XMM0_REG)
29164 (reg:V2DI XMM1_REG)]
29165 UNSPECV_ENCODEKEY256U32))])]
29167 "encodekey256\t{%1, %0|%0, %1}"
29168 [(set_attr "type" "other")])
29170 (define_int_iterator AESDECENCKL
29171 [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
29172 UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
29174 (define_int_attr aesklvariant
29175 [(UNSPECV_AESDEC128KLU8 "dec128kl")
29176 (UNSPECV_AESDEC256KLU8 "dec256kl")
29177 (UNSPECV_AESENC128KLU8 "enc128kl")
29178 (UNSPECV_AESENC256KLU8 "enc256kl")])
29180 (define_insn "aes<aesklvariant>u8"
29181 [(set (match_operand:V2DI 0 "register_operand" "=x")
29182 (unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
29183 (match_operand:BLK 2 "memory_operand" "m")]
29185 (set (reg:CCZ FLAGS_REG)
29186 (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
29188 "aes<aesklvariant>\t{%2, %0|%0, %2}"
29189 [(set_attr "type" "other")])
29191 (define_int_iterator AESDECENCWIDEKL
29192 [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
29193 UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
29195 (define_int_attr aeswideklvariant
29196 [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
29197 (UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
29198 (UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
29199 (UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
29201 (define_int_attr AESWIDEKLVARIANT
29202 [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
29203 (UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
29204 (UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
29205 (UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
29207 (define_expand "aes<aeswideklvariant>u8"
29209 [(set (reg:CCZ FLAGS_REG)
29210 (unspec_volatile:CCZ
29211 [(match_operand:BLK 0 "memory_operand")]
29212 AESDECENCWIDEKL))])]
29218 /* parallel rtx for widekl predicate */
29219 operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
29222 = gen_rtx_UNSPEC_VOLATILE (CCZmode,
29223 gen_rtvec (1, operands[0]),
29224 UNSPECV_<AESWIDEKLVARIANT>);
29226 XVECEXP (operands[1], 0, 0)
29227 = gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
29230 for (i = 0; i < 8; i++)
29232 rtx xmm_reg = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
29235 = gen_rtx_UNSPEC_VOLATILE (V2DImode,
29236 gen_rtvec (1, xmm_reg),
29237 UNSPECV_<AESWIDEKLVARIANT>);
29238 XVECEXP (operands[1], 0, i + 1)
29239 = gen_rtx_SET (xmm_reg, tmp_unspec);
29243 (define_insn "*aes<aeswideklvariant>u8"
29244 [(match_parallel 1 "aeswidekl_operation"
29245 [(set (reg:CCZ FLAGS_REG)
29246 (unspec_volatile:CCZ
29247 [(match_operand:BLK 0 "memory_operand" "m")]
29248 AESDECENCWIDEKL))])]
29250 "aes<aeswideklvariant>\t%0"
29251 [(set_attr "type" "other")])
29253 ;; Modes handled by broadcast patterns. NB: Allow V64QI and V32HI with
29254 ;; TARGET_AVX512F since ix86_expand_vector_init_duplicate can expand
29255 ;; without TARGET_AVX512BW which is used by memset vector broadcast
29256 ;; expander to XI with:
29257 ;; vmovd %edi, %xmm15
29258 ;; vpbroadcastb %xmm15, %ymm15
29259 ;; vinserti64x4 $0x1, %ymm15, %zmm15, %zmm15
29261 (define_mode_iterator INT_BROADCAST_MODE
29262 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
29263 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
29264 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
29265 (V8DI "TARGET_AVX512F && TARGET_64BIT")
29266 (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
29268 ;; Broadcast from an integer. NB: Enable broadcast only if we can move
29269 ;; from GPR to SSE register directly.
29270 (define_expand "vec_duplicate<mode>"
29271 [(set (match_operand:INT_BROADCAST_MODE 0 "register_operand")
29272 (vec_duplicate:INT_BROADCAST_MODE
29273 (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))]
29274 "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
29276 if (!ix86_expand_vector_init_duplicate (false,
29277 GET_MODE (operands[0]),
29280 gcc_unreachable ();
29284 (define_int_iterator VPDOTPROD
29292 (define_int_attr vpdotprodtype
29293 [(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
29294 (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
29295 (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
29297 (define_expand "sdot_prod<mode>"
29298 [(match_operand:<ssedvecmode> 0 "register_operand")
29299 (match_operand:VI1 1 "register_operand")
29300 (match_operand:VI1 2 "register_operand")
29301 (match_operand:<ssedvecmode> 3 "register_operand")]
29302 "TARGET_AVXVNNIINT8"
29304 operands[1] = lowpart_subreg (<ssedvecmode>mode,
29305 force_reg (<MODE>mode, operands[1]),
29307 operands[2] = lowpart_subreg (<ssedvecmode>mode,
29308 force_reg (<MODE>mode, operands[2]),
29310 emit_insn (gen_rtx_SET (operands[0], operands[3]));
29311 emit_insn (gen_vpdpbssd_<ssedvecmodelower> (operands[0], operands[3],
29312 operands[1], operands[2]));
29316 (define_expand "udot_prod<mode>"
29317 [(match_operand:<ssedvecmode> 0 "register_operand")
29318 (match_operand:VI1 1 "register_operand")
29319 (match_operand:VI1 2 "register_operand")
29320 (match_operand:<ssedvecmode> 3 "register_operand")]
29321 "TARGET_AVXVNNIINT8"
29323 operands[1] = lowpart_subreg (<ssedvecmode>mode,
29324 force_reg (<MODE>mode, operands[1]),
29326 operands[2] = lowpart_subreg (<ssedvecmode>mode,
29327 force_reg (<MODE>mode, operands[2]),
29329 emit_insn (gen_rtx_SET (operands[0], operands[3]));
29330 emit_insn (gen_vpdpbuud_<ssedvecmodelower> (operands[0], operands[3],
29331 operands[1], operands[2]));
29335 (define_insn "vpdp<vpdotprodtype>_<mode>"
29336 [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
29338 [(match_operand:VI4_AVX 1 "register_operand" "0")
29339 (match_operand:VI4_AVX 2 "register_operand" "x")
29340 (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
29342 "TARGET_AVXVNNIINT8"
29343 "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
29344 [(set_attr "prefix" "vex")])
29346 (define_insn "vbcstnebf162ps_<mode>"
29347 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
29348 (vec_duplicate:VF1_128_256
29350 (match_operand:BF 1 "memory_operand" "m"))))]
29351 "TARGET_AVXNECONVERT"
29352 "vbcstnebf162ps\t{%1, %0|%0, %1}"
29353 [(set_attr "prefix" "vex")
29354 (set_attr "mode" "<sseinsnmode>")])
29356 (define_insn "vbcstnesh2ps_<mode>"
29357 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
29358 (vec_duplicate:VF1_128_256
29360 (match_operand:HF 1 "memory_operand" "m"))))]
29361 "TARGET_AVXNECONVERT"
29362 "vbcstnesh2ps\t{%1, %0|%0, %1}"
29363 [(set_attr "prefix" "vex")
29364 (set_attr "mode" "<sseinsnmode>")])
29366 (define_mode_iterator V16BFH_256 [V16HF V16BF])
29368 (define_mode_attr bf16_ph
29369 [(V8HF "ph") (V16HF "ph")
29370 (V8BF "bf16") (V16BF "bf16")])
29372 (define_insn "vcvtnee<bf16_ph>2ps_<mode>"
29373 [(set (match_operand:V4SF 0 "register_operand" "=x")
29375 (vec_select:<ssehalfvecmode>
29376 (match_operand:V8BFH_128 1 "memory_operand" "m")
29377 (parallel [(const_int 0) (const_int 2)
29378 (const_int 4) (const_int 6)]))))]
29379 "TARGET_AVXNECONVERT"
29380 "vcvtnee<bf16_ph>2ps\t{%1, %0|%0, %1}"
29381 [(set_attr "prefix" "vex")
29382 (set_attr "mode" "<sseinsnmode>")])
29384 (define_insn "vcvtnee<bf16_ph>2ps_<mode>"
29385 [(set (match_operand:V8SF 0 "register_operand" "=x")
29387 (vec_select:<ssehalfvecmode>
29388 (match_operand:V16BFH_256 1 "memory_operand" "m")
29389 (parallel [(const_int 0) (const_int 2)
29390 (const_int 4) (const_int 6)
29391 (const_int 8) (const_int 10)
29392 (const_int 12) (const_int 14)]))))]
29393 "TARGET_AVXNECONVERT"
29394 "vcvtnee<bf16_ph>2ps\t{%1, %0|%0, %1}"
29395 [(set_attr "prefix" "vex")
29396 (set_attr "mode" "<sseinsnmode>")])
29398 (define_insn "vcvtneo<bf16_ph>2ps_<mode>"
29399 [(set (match_operand:V4SF 0 "register_operand" "=x")
29401 (vec_select:<ssehalfvecmode>
29402 (match_operand:V8BFH_128 1 "memory_operand" "m")
29403 (parallel [(const_int 1) (const_int 3)
29404 (const_int 5) (const_int 7)]))))]
29405 "TARGET_AVXNECONVERT"
29406 "vcvtneo<bf16_ph>2ps\t{%1, %0|%0, %1}"
29407 [(set_attr "prefix" "vex")
29408 (set_attr "mode" "<sseinsnmode>")])
29410 (define_insn "vcvtneo<bf16_ph>2ps_<mode>"
29411 [(set (match_operand:V8SF 0 "register_operand" "=x")
29413 (vec_select:<ssehalfvecmode>
29414 (match_operand:V16BFH_256 1 "memory_operand" "m")
29415 (parallel [(const_int 1) (const_int 3)
29416 (const_int 5) (const_int 7)
29417 (const_int 9) (const_int 11)
29418 (const_int 13) (const_int 15)]))))]
29419 "TARGET_AVXNECONVERT"
29420 "vcvtneo<bf16_ph>2ps\t{%1, %0|%0, %1}"
29421 [(set_attr "prefix" "vex")
29422 (set_attr "mode" "<sseinsnmode>")])