1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2017 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
53 UNSPEC_XOP_UNSIGNED_CMP
64 UNSPEC_AESKEYGENASSIST
85 ;; For AVX512F support
89 UNSPEC_UNSIGNED_FIX_NOTRUNC
104 UNSPEC_COMPRESS_STORE
114 ;; For embed. rounding feature
115 UNSPEC_EMBEDDED_ROUNDING
117 ;; For AVX512PF support
118 UNSPEC_GATHER_PREFETCH
119 UNSPEC_SCATTER_PREFETCH
121 ;; For AVX512ER support
135 ;; For AVX512BW support
143 ;; For AVX512DQ support
148 ;; For AVX512IFMA support
152 ;; For AVX512VBMI support
155 ;; For AVX5124FMAPS/AVX5124VNNIW support
162 (define_c_enum "unspecv" [
172 ;; All vector modes including V?TImode, used in move patterns.
173 (define_mode_iterator VMOVE
174 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
175 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
176 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
177 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
178 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
179 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
180 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
182 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
183 (define_mode_iterator V48_AVX512VL
184 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
185 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
186 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
187 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
189 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
190 (define_mode_iterator VI12_AVX512VL
191 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
192 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
194 (define_mode_iterator VI1_AVX512VL
195 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
198 (define_mode_iterator V
199 [(V32QI "TARGET_AVX") V16QI
200 (V16HI "TARGET_AVX") V8HI
201 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
202 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
203 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
204 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
206 ;; All 128bit vector modes
207 (define_mode_iterator V_128
208 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
210 ;; All 256bit vector modes
211 (define_mode_iterator V_256
212 [V32QI V16HI V8SI V4DI V8SF V4DF])
214 ;; All 512bit vector modes
215 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
217 ;; All 256bit and 512bit vector modes
218 (define_mode_iterator V_256_512
219 [V32QI V16HI V8SI V4DI V8SF V4DF
220 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
221 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
223 ;; All vector float modes
224 (define_mode_iterator VF
225 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
226 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
228 ;; 128- and 256-bit float vector modes
229 (define_mode_iterator VF_128_256
230 [(V8SF "TARGET_AVX") V4SF
231 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
233 ;; All SFmode vector float modes
234 (define_mode_iterator VF1
235 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
237 ;; 128- and 256-bit SF vector modes
238 (define_mode_iterator VF1_128_256
239 [(V8SF "TARGET_AVX") V4SF])
241 (define_mode_iterator VF1_128_256VL
242 [V8SF (V4SF "TARGET_AVX512VL")])
244 ;; All DFmode vector float modes
245 (define_mode_iterator VF2
246 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
248 ;; 128- and 256-bit DF vector modes
249 (define_mode_iterator VF2_128_256
250 [(V4DF "TARGET_AVX") V2DF])
252 (define_mode_iterator VF2_512_256
253 [(V8DF "TARGET_AVX512F") V4DF])
255 (define_mode_iterator VF2_512_256VL
256 [V8DF (V4DF "TARGET_AVX512VL")])
258 ;; All 128bit vector float modes
259 (define_mode_iterator VF_128
260 [V4SF (V2DF "TARGET_SSE2")])
262 ;; All 256bit vector float modes
263 (define_mode_iterator VF_256
266 ;; All 512bit vector float modes
267 (define_mode_iterator VF_512
270 (define_mode_iterator VI48_AVX512VL
271 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
272 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
274 (define_mode_iterator VF_AVX512VL
275 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
276 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
278 (define_mode_iterator VF2_AVX512VL
279 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
281 (define_mode_iterator VF1_AVX512VL
282 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
284 ;; All vector integer modes
285 (define_mode_iterator VI
286 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
287 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
288 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
289 (V8SI "TARGET_AVX") V4SI
290 (V4DI "TARGET_AVX") V2DI])
292 (define_mode_iterator VI_AVX2
293 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
294 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
295 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
296 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
298 ;; All QImode vector integer modes
299 (define_mode_iterator VI1
300 [(V32QI "TARGET_AVX") V16QI])
302 ;; All DImode vector integer modes
303 (define_mode_iterator V_AVX
304 [V16QI V8HI V4SI V2DI V4SF V2DF
305 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
306 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
307 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
309 (define_mode_iterator VI48_AVX
311 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
313 (define_mode_iterator VI8
314 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
316 (define_mode_iterator VI8_AVX512VL
317 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
319 (define_mode_iterator VI8_256_512
320 [V8DI (V4DI "TARGET_AVX512VL")])
322 (define_mode_iterator VI1_AVX2
323 [(V32QI "TARGET_AVX2") V16QI])
325 (define_mode_iterator VI1_AVX512
326 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
328 (define_mode_iterator VI2_AVX2
329 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
331 (define_mode_iterator VI2_AVX512F
332 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
334 (define_mode_iterator VI4_AVX
335 [(V8SI "TARGET_AVX") V4SI])
337 (define_mode_iterator VI4_AVX2
338 [(V8SI "TARGET_AVX2") V4SI])
340 (define_mode_iterator VI4_AVX512F
341 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
343 (define_mode_iterator VI4_AVX512VL
344 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
346 (define_mode_iterator VI48_AVX512F_AVX512VL
347 [V4SI V8SI (V16SI "TARGET_AVX512F")
348 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
350 (define_mode_iterator VI2_AVX512VL
351 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
353 (define_mode_iterator VI8_AVX2_AVX512BW
354 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
356 (define_mode_iterator VI8_AVX2
357 [(V4DI "TARGET_AVX2") V2DI])
359 (define_mode_iterator VI8_AVX2_AVX512F
360 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
362 (define_mode_iterator VI4_128_8_256
366 (define_mode_iterator V8FI
370 (define_mode_iterator V16FI
373 ;; ??? We should probably use TImode instead.
374 (define_mode_iterator VIMAX_AVX2_AVX512BW
375 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
377 ;; Suppose TARGET_AVX512BW as baseline
378 (define_mode_iterator VIMAX_AVX512VL
379 [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
381 (define_mode_iterator VIMAX_AVX2
382 [(V2TI "TARGET_AVX2") V1TI])
384 ;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
385 (define_mode_iterator SSESCALARMODE
386 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
388 (define_mode_iterator VI12_AVX2
389 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
390 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
392 (define_mode_iterator VI24_AVX2
393 [(V16HI "TARGET_AVX2") V8HI
394 (V8SI "TARGET_AVX2") V4SI])
396 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
397 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
398 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
399 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
401 (define_mode_iterator VI124_AVX2
402 [(V32QI "TARGET_AVX2") V16QI
403 (V16HI "TARGET_AVX2") V8HI
404 (V8SI "TARGET_AVX2") V4SI])
406 (define_mode_iterator VI2_AVX2_AVX512BW
407 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
409 (define_mode_iterator VI48_AVX2
410 [(V8SI "TARGET_AVX2") V4SI
411 (V4DI "TARGET_AVX2") V2DI])
413 (define_mode_iterator VI248_AVX2
414 [(V16HI "TARGET_AVX2") V8HI
415 (V8SI "TARGET_AVX2") V4SI
416 (V4DI "TARGET_AVX2") V2DI])
418 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
419 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
420 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
421 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
423 (define_mode_iterator VI248_AVX512BW
424 [(V32HI "TARGET_AVX512BW") V16SI V8DI])
426 (define_mode_iterator VI248_AVX512BW_AVX512VL
427 [(V32HI "TARGET_AVX512BW")
428 (V4DI "TARGET_AVX512VL") V16SI V8DI])
430 ;; Suppose TARGET_AVX512VL as baseline
431 (define_mode_iterator VI248_AVX512BW_1
432 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
436 (define_mode_iterator VI248_AVX512BW_2
437 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
441 (define_mode_iterator VI48_AVX512F
442 [(V16SI "TARGET_AVX512F") V8SI V4SI
443 (V8DI "TARGET_AVX512F") V4DI V2DI])
445 (define_mode_iterator VI48_AVX_AVX512F
446 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
447 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
449 (define_mode_iterator VI12_AVX_AVX512F
450 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
451 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
453 (define_mode_iterator V48_AVX2
456 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
457 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
459 (define_mode_attr avx512
460 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
461 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
462 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
463 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
464 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
465 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
467 (define_mode_attr sse2_avx_avx512f
468 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
469 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
470 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
471 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
472 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
473 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
475 (define_mode_attr sse2_avx2
476 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
477 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
478 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
479 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
480 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
482 (define_mode_attr ssse3_avx2
483 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
484 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
485 (V4SI "ssse3") (V8SI "avx2")
486 (V2DI "ssse3") (V4DI "avx2")
487 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
489 (define_mode_attr sse4_1_avx2
490 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
491 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
492 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
493 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
495 (define_mode_attr avx_avx2
496 [(V4SF "avx") (V2DF "avx")
497 (V8SF "avx") (V4DF "avx")
498 (V4SI "avx2") (V2DI "avx2")
499 (V8SI "avx2") (V4DI "avx2")])
501 (define_mode_attr vec_avx2
502 [(V16QI "vec") (V32QI "avx2")
503 (V8HI "vec") (V16HI "avx2")
504 (V4SI "vec") (V8SI "avx2")
505 (V2DI "vec") (V4DI "avx2")])
507 (define_mode_attr avx2_avx512
508 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
509 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
510 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
511 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
512 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
514 (define_mode_attr shuffletype
515 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
516 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
517 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
518 (V32HI "i") (V16HI "i") (V8HI "i")
519 (V64QI "i") (V32QI "i") (V16QI "i")
520 (V4TI "i") (V2TI "i") (V1TI "i")])
522 (define_mode_attr ssequartermode
523 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
525 (define_mode_attr ssedoublemodelower
526 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
527 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
528 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
530 (define_mode_attr ssedoublemode
531 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
532 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
533 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
534 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
535 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
536 (V4DI "V8DI") (V8DI "V16DI")])
538 (define_mode_attr ssebytemode
539 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
541 ;; All 128bit vector integer modes
542 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
544 ;; All 256bit vector integer modes
545 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
547 ;; Various 128bit vector integer mode combinations
548 (define_mode_iterator VI12_128 [V16QI V8HI])
549 (define_mode_iterator VI14_128 [V16QI V4SI])
550 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
551 (define_mode_iterator VI24_128 [V8HI V4SI])
552 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
553 (define_mode_iterator VI48_128 [V4SI V2DI])
555 ;; Various 256bit and 512 vector integer mode combinations
556 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
557 (define_mode_iterator VI124_256_AVX512F_AVX512BW
559 (V64QI "TARGET_AVX512BW")
560 (V32HI "TARGET_AVX512BW")
561 (V16SI "TARGET_AVX512F")])
562 (define_mode_iterator VI48_256 [V8SI V4DI])
563 (define_mode_iterator VI48_512 [V16SI V8DI])
564 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
565 (define_mode_iterator VI_AVX512BW
566 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
568 ;; Int-float size matches
569 (define_mode_iterator VI4F_128 [V4SI V4SF])
570 (define_mode_iterator VI8F_128 [V2DI V2DF])
571 (define_mode_iterator VI4F_256 [V8SI V8SF])
572 (define_mode_iterator VI8F_256 [V4DI V4DF])
573 (define_mode_iterator VI48F_256_512
575 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
576 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
577 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
578 (define_mode_iterator VF48_I1248
579 [V16SI V16SF V8DI V8DF V32HI V64QI])
580 (define_mode_iterator VI48F
581 [V16SI V16SF V8DI V8DF
582 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
583 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
584 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
585 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
586 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
588 ;; Mapping from float mode to required SSE level
589 (define_mode_attr sse
590 [(SF "sse") (DF "sse2")
591 (V4SF "sse") (V2DF "sse2")
592 (V16SF "avx512f") (V8SF "avx")
593 (V8DF "avx512f") (V4DF "avx")])
595 (define_mode_attr sse2
596 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
597 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
599 (define_mode_attr sse3
600 [(V16QI "sse3") (V32QI "avx")])
602 (define_mode_attr sse4_1
603 [(V4SF "sse4_1") (V2DF "sse4_1")
604 (V8SF "avx") (V4DF "avx")
606 (V4DI "avx") (V2DI "sse4_1")
607 (V8SI "avx") (V4SI "sse4_1")
608 (V16QI "sse4_1") (V32QI "avx")
609 (V8HI "sse4_1") (V16HI "avx")])
611 (define_mode_attr avxsizesuffix
612 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
613 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
614 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
615 (V16SF "512") (V8DF "512")
616 (V8SF "256") (V4DF "256")
617 (V4SF "") (V2DF "")])
619 ;; SSE instruction mode
620 (define_mode_attr sseinsnmode
621 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
622 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
623 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
624 (V16SF "V16SF") (V8DF "V8DF")
625 (V8SF "V8SF") (V4DF "V4DF")
626 (V4SF "V4SF") (V2DF "V2DF")
629 ;; Mapping of vector modes to corresponding mask size
630 (define_mode_attr avx512fmaskmode
631 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
632 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
633 (V16SI "HI") (V8SI "QI") (V4SI "QI")
634 (V8DI "QI") (V4DI "QI") (V2DI "QI")
635 (V16SF "HI") (V8SF "QI") (V4SF "QI")
636 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
638 ;; Mapping of vector modes to corresponding mask size
639 (define_mode_attr avx512fmaskmodelower
640 [(V64QI "di") (V32QI "si") (V16QI "hi")
641 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
642 (V16SI "hi") (V8SI "qi") (V4SI "qi")
643 (V8DI "qi") (V4DI "qi") (V2DI "qi")
644 (V16SF "hi") (V8SF "qi") (V4SF "qi")
645 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
647 ;; Mapping of vector float modes to an integer mode of the same size
648 (define_mode_attr sseintvecmode
649 [(V16SF "V16SI") (V8DF "V8DI")
650 (V8SF "V8SI") (V4DF "V4DI")
651 (V4SF "V4SI") (V2DF "V2DI")
652 (V16SI "V16SI") (V8DI "V8DI")
653 (V8SI "V8SI") (V4DI "V4DI")
654 (V4SI "V4SI") (V2DI "V2DI")
655 (V16HI "V16HI") (V8HI "V8HI")
656 (V32HI "V32HI") (V64QI "V64QI")
657 (V32QI "V32QI") (V16QI "V16QI")])
659 (define_mode_attr sseintvecmode2
660 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
661 (V8SF "OI") (V4SF "TI")])
663 (define_mode_attr sseintvecmodelower
664 [(V16SF "v16si") (V8DF "v8di")
665 (V8SF "v8si") (V4DF "v4di")
666 (V4SF "v4si") (V2DF "v2di")
667 (V8SI "v8si") (V4DI "v4di")
668 (V4SI "v4si") (V2DI "v2di")
669 (V16HI "v16hi") (V8HI "v8hi")
670 (V32QI "v32qi") (V16QI "v16qi")])
672 ;; Mapping of vector modes to a vector mode of double size
673 (define_mode_attr ssedoublevecmode
674 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
675 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
676 (V8SF "V16SF") (V4DF "V8DF")
677 (V4SF "V8SF") (V2DF "V4DF")])
679 ;; Mapping of vector modes to a vector mode of half size
680 (define_mode_attr ssehalfvecmode
681 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
682 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
683 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
684 (V16SF "V8SF") (V8DF "V4DF")
685 (V8SF "V4SF") (V4DF "V2DF")
688 (define_mode_attr ssehalfvecmodelower
689 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
690 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
691 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
692 (V16SF "v8sf") (V8DF "v4df")
693 (V8SF "v4sf") (V4DF "v2df")
696 ;; Mapping of vector modes ti packed single mode of the same size
697 (define_mode_attr ssePSmode
698 [(V16SI "V16SF") (V8DF "V16SF")
699 (V16SF "V16SF") (V8DI "V16SF")
700 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
701 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
702 (V8SI "V8SF") (V4SI "V4SF")
703 (V4DI "V8SF") (V2DI "V4SF")
704 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
705 (V8SF "V8SF") (V4SF "V4SF")
706 (V4DF "V8SF") (V2DF "V4SF")])
708 (define_mode_attr ssePSmode2
709 [(V8DI "V8SF") (V4DI "V4SF")])
711 ;; Mapping of vector modes back to the scalar modes
712 (define_mode_attr ssescalarmode
713 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
714 (V32HI "HI") (V16HI "HI") (V8HI "HI")
715 (V16SI "SI") (V8SI "SI") (V4SI "SI")
716 (V8DI "DI") (V4DI "DI") (V2DI "DI")
717 (V16SF "SF") (V8SF "SF") (V4SF "SF")
718 (V8DF "DF") (V4DF "DF") (V2DF "DF")
719 (V4TI "TI") (V2TI "TI")])
721 ;; Mapping of vector modes back to the scalar modes
722 (define_mode_attr ssescalarmodelower
723 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
724 (V32HI "hi") (V16HI "hi") (V8HI "hi")
725 (V16SI "si") (V8SI "si") (V4SI "si")
726 (V8DI "di") (V4DI "di") (V2DI "di")
727 (V16SF "sf") (V8SF "sf") (V4SF "sf")
728 (V8DF "df") (V4DF "df") (V2DF "df")
729 (V4TI "ti") (V2TI "ti")])
731 ;; Mapping of vector modes to the 128bit modes
732 (define_mode_attr ssexmmmode
733 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
734 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
735 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
736 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
737 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
738 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
740 ;; Pointer size override for scalar modes (Intel asm dialect)
741 (define_mode_attr iptr
742 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
743 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
744 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
745 (V8SF "k") (V4DF "q")
746 (V4SF "k") (V2DF "q")
749 ;; Number of scalar elements in each vector type
750 (define_mode_attr ssescalarnum
751 [(V64QI "64") (V16SI "16") (V8DI "8")
752 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
753 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
754 (V16SF "16") (V8DF "8")
755 (V8SF "8") (V4DF "4")
756 (V4SF "4") (V2DF "2")])
758 ;; Mask of scalar elements in each vector type
759 (define_mode_attr ssescalarnummask
760 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
761 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
762 (V8SF "7") (V4DF "3")
763 (V4SF "3") (V2DF "1")])
765 (define_mode_attr ssescalarsize
766 [(V4TI "64") (V2TI "64") (V1TI "64")
767 (V8DI "64") (V4DI "64") (V2DI "64")
768 (V64QI "8") (V32QI "8") (V16QI "8")
769 (V32HI "16") (V16HI "16") (V8HI "16")
770 (V16SI "32") (V8SI "32") (V4SI "32")
771 (V16SF "32") (V8SF "32") (V4SF "32")
772 (V8DF "64") (V4DF "64") (V2DF "64")])
774 ;; SSE prefix for integer vector modes
775 (define_mode_attr sseintprefix
776 [(V2DI "p") (V2DF "")
781 (V16SI "p") (V16SF "")
782 (V16QI "p") (V8HI "p")
783 (V32QI "p") (V16HI "p")
784 (V64QI "p") (V32HI "p")])
786 ;; SSE scalar suffix for vector modes
787 (define_mode_attr ssescalarmodesuffix
789 (V8SF "ss") (V4DF "sd")
790 (V4SF "ss") (V2DF "sd")
791 (V8SI "ss") (V4DI "sd")
794 ;; Pack/unpack vector modes
795 (define_mode_attr sseunpackmode
796 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
797 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
798 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
800 (define_mode_attr ssepackmode
801 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
802 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
803 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
805 ;; Mapping of the max integer size for xop rotate immediate constraint
806 (define_mode_attr sserotatemax
807 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
809 ;; Mapping of mode to cast intrinsic name
810 (define_mode_attr castmode
811 [(V8SI "si") (V8SF "ps") (V4DF "pd")
812 (V16SI "si") (V16SF "ps") (V8DF "pd")])
814 ;; Instruction suffix for sign and zero extensions.
815 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
817 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
818 ;; i64x4 or f64x4 for 512bit modes.
819 (define_mode_attr i128
820 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
821 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
822 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
824 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
825 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
826 (define_mode_attr i128vldq
827 [(V8SF "f32x4") (V4DF "f64x2")
828 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
831 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
832 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
834 ;; Mapping for dbpsabbw modes
835 (define_mode_attr dbpsadbwmode
836 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
838 ;; Mapping suffixes for broadcast
839 (define_mode_attr bcstscalarsuff
840 [(V64QI "b") (V32QI "b") (V16QI "b")
841 (V32HI "w") (V16HI "w") (V8HI "w")
842 (V16SI "d") (V8SI "d") (V4SI "d")
843 (V8DI "q") (V4DI "q") (V2DI "q")
844 (V16SF "ss") (V8SF "ss") (V4SF "ss")
845 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
847 ;; Tie mode of assembler operand to mode iterator
848 (define_mode_attr concat_tg_mode
849 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
850 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
852 ;; Tie mode of assembler operand to mode iterator
853 (define_mode_attr xtg_mode
854 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
855 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
856 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
858 ;; Half mask mode for unpacks
859 (define_mode_attr HALFMASKMODE
860 [(DI "SI") (SI "HI")])
862 ;; Double mask mode for packs
863 (define_mode_attr DOUBLEMASKMODE
864 [(HI "SI") (SI "DI")])
867 ;; Include define_subst patterns for instructions with mask
870 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
872 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
876 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
878 ;; All of these patterns are enabled for SSE1 as well as SSE2.
879 ;; This is essential for maintaining stable calling conventions.
881 (define_expand "mov<mode>"
882 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
883 (match_operand:VMOVE 1 "nonimmediate_operand"))]
886 ix86_expand_vector_move (<MODE>mode, operands);
890 (define_insn "mov<mode>_internal"
891 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
893 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
896 && (register_operand (operands[0], <MODE>mode)
897 || register_operand (operands[1], <MODE>mode))"
899 switch (get_attr_type (insn))
902 return standard_sse_constant_opcode (insn, operands[1]);
905 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
906 in avx512f, so we need to use workarounds, to access sse registers
907 16-31, which are evex-only. In avx512vl we don't need workarounds. */
908 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
909 && (EXT_REX_SSE_REG_P (operands[0])
910 || EXT_REX_SSE_REG_P (operands[1])))
912 if (memory_operand (operands[0], <MODE>mode))
914 if (<MODE_SIZE> == 32)
915 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
916 else if (<MODE_SIZE> == 16)
917 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
921 else if (memory_operand (operands[1], <MODE>mode))
923 if (<MODE_SIZE> == 32)
924 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
925 else if (<MODE_SIZE> == 16)
926 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
931 /* Reg -> reg move is always aligned. Just use wider move. */
932 switch (get_attr_mode (insn))
936 return "vmovaps\t{%g1, %g0|%g0, %g1}";
939 return "vmovapd\t{%g1, %g0|%g0, %g1}";
942 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
948 switch (get_attr_mode (insn))
953 if (misaligned_operand (operands[0], <MODE>mode)
954 || misaligned_operand (operands[1], <MODE>mode))
955 return "%vmovups\t{%1, %0|%0, %1}";
957 return "%vmovaps\t{%1, %0|%0, %1}";
962 if (misaligned_operand (operands[0], <MODE>mode)
963 || misaligned_operand (operands[1], <MODE>mode))
964 return "%vmovupd\t{%1, %0|%0, %1}";
966 return "%vmovapd\t{%1, %0|%0, %1}";
970 if (misaligned_operand (operands[0], <MODE>mode)
971 || misaligned_operand (operands[1], <MODE>mode))
972 return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
973 : "%vmovdqu\t{%1, %0|%0, %1}";
975 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
976 : "%vmovdqa\t{%1, %0|%0, %1}";
978 if (misaligned_operand (operands[0], <MODE>mode)
979 || misaligned_operand (operands[1], <MODE>mode))
980 return (<MODE>mode == V16SImode
981 || <MODE>mode == V8DImode
983 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
984 : "vmovdqu64\t{%1, %0|%0, %1}";
986 return "vmovdqa64\t{%1, %0|%0, %1}";
996 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
997 (set_attr "prefix" "maybe_vex")
999 (cond [(and (eq_attr "alternative" "1")
1000 (match_test "TARGET_AVX512VL"))
1001 (const_string "<sseinsnmode>")
1002 (and (match_test "<MODE_SIZE> == 16")
1003 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1004 (and (eq_attr "alternative" "3")
1005 (match_test "TARGET_SSE_TYPELESS_STORES"))))
1006 (const_string "<ssePSmode>")
1007 (match_test "TARGET_AVX")
1008 (const_string "<sseinsnmode>")
1009 (ior (not (match_test "TARGET_SSE2"))
1010 (match_test "optimize_function_for_size_p (cfun)"))
1011 (const_string "V4SF")
1012 (and (eq_attr "alternative" "0")
1013 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1016 (const_string "<sseinsnmode>")))
1017 (set (attr "enabled")
1018 (cond [(and (match_test "<MODE_SIZE> == 16")
1019 (eq_attr "alternative" "1"))
1020 (symbol_ref "TARGET_SSE2")
1021 (and (match_test "<MODE_SIZE> == 32")
1022 (eq_attr "alternative" "1"))
1023 (symbol_ref "TARGET_AVX2")
1025 (symbol_ref "true")))])
1027 (define_insn "<avx512>_load<mode>_mask"
1028 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1029 (vec_merge:V48_AVX512VL
1030 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1031 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
1032 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1035 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1037 if (misaligned_operand (operands[1], <MODE>mode))
1038 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1040 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1044 if (misaligned_operand (operands[1], <MODE>mode))
1045 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1047 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1050 [(set_attr "type" "ssemov")
1051 (set_attr "prefix" "evex")
1052 (set_attr "memory" "none,load")
1053 (set_attr "mode" "<sseinsnmode>")])
1055 (define_insn "<avx512>_load<mode>_mask"
1056 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1057 (vec_merge:VI12_AVX512VL
1058 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1059 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1060 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1062 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1063 [(set_attr "type" "ssemov")
1064 (set_attr "prefix" "evex")
1065 (set_attr "memory" "none,load")
1066 (set_attr "mode" "<sseinsnmode>")])
1068 (define_insn "<avx512>_blendm<mode>"
1069 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1070 (vec_merge:V48_AVX512VL
1071 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1072 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1073 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1075 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1076 [(set_attr "type" "ssemov")
1077 (set_attr "prefix" "evex")
1078 (set_attr "mode" "<sseinsnmode>")])
1080 (define_insn "<avx512>_blendm<mode>"
1081 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1082 (vec_merge:VI12_AVX512VL
1083 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1084 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1085 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1087 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1088 [(set_attr "type" "ssemov")
1089 (set_attr "prefix" "evex")
1090 (set_attr "mode" "<sseinsnmode>")])
1092 (define_insn "<avx512>_store<mode>_mask"
1093 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1094 (vec_merge:V48_AVX512VL
1095 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1097 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1100 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1102 if (misaligned_operand (operands[0], <MODE>mode))
1103 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1105 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1109 if (misaligned_operand (operands[0], <MODE>mode))
1110 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1112 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1115 [(set_attr "type" "ssemov")
1116 (set_attr "prefix" "evex")
1117 (set_attr "memory" "store")
1118 (set_attr "mode" "<sseinsnmode>")])
1120 (define_insn "<avx512>_store<mode>_mask"
1121 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1122 (vec_merge:VI12_AVX512VL
1123 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1125 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1127 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1128 [(set_attr "type" "ssemov")
1129 (set_attr "prefix" "evex")
1130 (set_attr "memory" "store")
1131 (set_attr "mode" "<sseinsnmode>")])
1133 (define_insn "sse2_movq128"
1134 [(set (match_operand:V2DI 0 "register_operand" "=v")
1137 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1138 (parallel [(const_int 0)]))
1141 "%vmovq\t{%1, %0|%0, %q1}"
1142 [(set_attr "type" "ssemov")
1143 (set_attr "prefix" "maybe_vex")
1144 (set_attr "mode" "TI")])
1146 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1147 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1148 ;; from memory, we'd prefer to load the memory directly into the %xmm
1149 ;; register. To facilitate this happy circumstance, this pattern won't
1150 ;; split until after register allocation. If the 64-bit value didn't
1151 ;; come from memory, this is the best we can do. This is much better
1152 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1155 (define_insn_and_split "movdi_to_sse"
1157 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1158 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1159 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1160 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1162 "&& reload_completed"
1165 if (register_operand (operands[1], DImode))
1167 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1168 Assemble the 64-bit DImode value in an xmm register. */
1169 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1170 gen_lowpart (SImode, operands[1])));
1171 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1172 gen_highpart (SImode, operands[1])));
1173 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1176 else if (memory_operand (operands[1], DImode))
1178 rtx tmp = gen_reg_rtx (V2DImode);
1179 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1180 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1188 [(set (match_operand:V4SF 0 "register_operand")
1189 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1190 "TARGET_SSE && reload_completed"
1193 (vec_duplicate:V4SF (match_dup 1))
1197 operands[1] = gen_lowpart (SFmode, operands[1]);
1198 operands[2] = CONST0_RTX (V4SFmode);
1202 [(set (match_operand:V2DF 0 "register_operand")
1203 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1204 "TARGET_SSE2 && reload_completed"
1205 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1207 operands[1] = gen_lowpart (DFmode, operands[1]);
1208 operands[2] = CONST0_RTX (DFmode);
1211 (define_expand "movmisalign<mode>"
1212 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1213 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1216 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1220 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1222 [(set (match_operand:V2DF 0 "sse_reg_operand")
1223 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1224 (match_operand:DF 4 "const0_operand")))
1225 (set (match_operand:V2DF 2 "sse_reg_operand")
1226 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1227 (parallel [(const_int 0)]))
1228 (match_operand:DF 3 "memory_operand")))]
1229 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1230 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1231 [(set (match_dup 2) (match_dup 5))]
1232 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1235 [(set (match_operand:DF 0 "sse_reg_operand")
1236 (match_operand:DF 1 "memory_operand"))
1237 (set (match_operand:V2DF 2 "sse_reg_operand")
1238 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1239 (match_operand:DF 3 "memory_operand")))]
1240 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1241 && REGNO (operands[4]) == REGNO (operands[2])
1242 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1243 [(set (match_dup 2) (match_dup 5))]
1244 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1246 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1248 [(set (match_operand:DF 0 "memory_operand")
1249 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1250 (parallel [(const_int 0)])))
1251 (set (match_operand:DF 2 "memory_operand")
1252 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1253 (parallel [(const_int 1)])))]
1254 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1255 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1256 [(set (match_dup 4) (match_dup 1))]
1257 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1259 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1260 [(set (match_operand:VI1 0 "register_operand" "=x")
1261 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1264 "%vlddqu\t{%1, %0|%0, %1}"
1265 [(set_attr "type" "ssemov")
1266 (set_attr "movu" "1")
1267 (set (attr "prefix_data16")
1269 (match_test "TARGET_AVX")
1271 (const_string "0")))
1272 (set (attr "prefix_rep")
1274 (match_test "TARGET_AVX")
1276 (const_string "1")))
1277 (set_attr "prefix" "maybe_vex")
1278 (set_attr "mode" "<sseinsnmode>")])
1280 (define_insn "sse2_movnti<mode>"
1281 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1282 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1285 "movnti\t{%1, %0|%0, %1}"
1286 [(set_attr "type" "ssemov")
1287 (set_attr "prefix_data16" "0")
1288 (set_attr "mode" "<MODE>")])
1290 (define_insn "<sse>_movnt<mode>"
1291 [(set (match_operand:VF 0 "memory_operand" "=m")
1293 [(match_operand:VF 1 "register_operand" "v")]
1296 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1297 [(set_attr "type" "ssemov")
1298 (set_attr "prefix" "maybe_vex")
1299 (set_attr "mode" "<MODE>")])
1301 (define_insn "<sse2>_movnt<mode>"
1302 [(set (match_operand:VI8 0 "memory_operand" "=m")
1303 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1306 "%vmovntdq\t{%1, %0|%0, %1}"
1307 [(set_attr "type" "ssecvt")
1308 (set (attr "prefix_data16")
1310 (match_test "TARGET_AVX")
1312 (const_string "1")))
1313 (set_attr "prefix" "maybe_vex")
1314 (set_attr "mode" "<sseinsnmode>")])
1316 ; Expand patterns for non-temporal stores. At the moment, only those
1317 ; that directly map to insns are defined; it would be possible to
1318 ; define patterns for other modes that would expand to several insns.
1320 ;; Modes handled by storent patterns.
1321 (define_mode_iterator STORENT_MODE
1322 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1323 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1324 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1325 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1326 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1328 (define_expand "storent<mode>"
1329 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1330 (unspec:STORENT_MODE
1331 [(match_operand:STORENT_MODE 1 "register_operand")]
1335 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1339 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1341 ;; All integer modes with AVX512BW/DQ.
1342 (define_mode_iterator SWI1248_AVX512BWDQ
1343 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1345 ;; All integer modes with AVX512BW, where HImode operation
1346 ;; can be used instead of QImode.
1347 (define_mode_iterator SWI1248_AVX512BW
1348 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1350 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1351 (define_mode_iterator SWI1248_AVX512BWDQ2
1352 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1353 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1355 (define_expand "kmov<mskmodesuffix>"
1356 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1357 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1359 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1361 (define_insn "k<code><mode>"
1362 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1363 (any_logic:SWI1248_AVX512BW
1364 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1365 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1366 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1369 if (get_attr_mode (insn) == MODE_HI)
1370 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1372 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1374 [(set_attr "type" "msklog")
1375 (set_attr "prefix" "vex")
1377 (cond [(and (match_test "<MODE>mode == QImode")
1378 (not (match_test "TARGET_AVX512DQ")))
1381 (const_string "<MODE>")))])
1383 (define_insn "kandn<mode>"
1384 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1385 (and:SWI1248_AVX512BW
1386 (not:SWI1248_AVX512BW
1387 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1388 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1389 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1392 if (get_attr_mode (insn) == MODE_HI)
1393 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1395 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1397 [(set_attr "type" "msklog")
1398 (set_attr "prefix" "vex")
1400 (cond [(and (match_test "<MODE>mode == QImode")
1401 (not (match_test "TARGET_AVX512DQ")))
1404 (const_string "<MODE>")))])
1406 (define_insn "kxnor<mode>"
1407 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1408 (not:SWI1248_AVX512BW
1409 (xor:SWI1248_AVX512BW
1410 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1411 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1412 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1415 if (get_attr_mode (insn) == MODE_HI)
1416 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1418 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1420 [(set_attr "type" "msklog")
1421 (set_attr "prefix" "vex")
1423 (cond [(and (match_test "<MODE>mode == QImode")
1424 (not (match_test "TARGET_AVX512DQ")))
1427 (const_string "<MODE>")))])
1429 (define_insn "knot<mode>"
1430 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1431 (not:SWI1248_AVX512BW
1432 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1433 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1436 if (get_attr_mode (insn) == MODE_HI)
1437 return "knotw\t{%1, %0|%0, %1}";
1439 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1441 [(set_attr "type" "msklog")
1442 (set_attr "prefix" "vex")
1444 (cond [(and (match_test "<MODE>mode == QImode")
1445 (not (match_test "TARGET_AVX512DQ")))
1448 (const_string "<MODE>")))])
1450 (define_insn "kadd<mode>"
1451 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1452 (plus:SWI1248_AVX512BWDQ2
1453 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1454 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1455 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1457 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1458 [(set_attr "type" "msklog")
1459 (set_attr "prefix" "vex")
1460 (set_attr "mode" "<MODE>")])
1462 ;; Mask variant shift mnemonics
1463 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1465 (define_insn "k<code><mode>"
1466 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1467 (any_lshift:SWI1248_AVX512BWDQ
1468 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1469 (match_operand:QI 2 "immediate_operand" "n")))
1470 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1472 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1473 [(set_attr "type" "msklog")
1474 (set_attr "prefix" "vex")
1475 (set_attr "mode" "<MODE>")])
1477 (define_insn "ktest<mode>"
1478 [(set (reg:CC FLAGS_REG)
1480 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1481 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1484 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1485 [(set_attr "mode" "<MODE>")
1486 (set_attr "type" "msklog")
1487 (set_attr "prefix" "vex")])
1489 (define_insn "kortest<mode>"
1490 [(set (reg:CC FLAGS_REG)
1492 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1493 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1496 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1497 [(set_attr "mode" "<MODE>")
1498 (set_attr "type" "msklog")
1499 (set_attr "prefix" "vex")])
1501 (define_insn "kunpckhi"
1502 [(set (match_operand:HI 0 "register_operand" "=k")
1505 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1507 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1509 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1510 [(set_attr "mode" "HI")
1511 (set_attr "type" "msklog")
1512 (set_attr "prefix" "vex")])
1514 (define_insn "kunpcksi"
1515 [(set (match_operand:SI 0 "register_operand" "=k")
1518 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1520 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1522 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1523 [(set_attr "mode" "SI")])
1525 (define_insn "kunpckdi"
1526 [(set (match_operand:DI 0 "register_operand" "=k")
1529 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1531 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1533 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1534 [(set_attr "mode" "DI")])
1537 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1539 ;; Parallel floating point arithmetic
1541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1543 (define_expand "<code><mode>2"
1544 [(set (match_operand:VF 0 "register_operand")
1546 (match_operand:VF 1 "register_operand")))]
1548 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1550 (define_insn_and_split "*absneg<mode>2"
1551 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1552 (match_operator:VF 3 "absneg_operator"
1553 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1554 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1557 "&& reload_completed"
1560 enum rtx_code absneg_op;
1566 if (MEM_P (operands[1]))
1567 op1 = operands[2], op2 = operands[1];
1569 op1 = operands[1], op2 = operands[2];
1574 if (rtx_equal_p (operands[0], operands[1]))
1580 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1581 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1582 t = gen_rtx_SET (operands[0], t);
1586 [(set_attr "isa" "noavx,noavx,avx,avx")])
1588 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1589 [(set (match_operand:VF 0 "register_operand")
1591 (match_operand:VF 1 "<round_nimm_predicate>")
1592 (match_operand:VF 2 "<round_nimm_predicate>")))]
1593 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1594 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1596 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1597 [(set (match_operand:VF 0 "register_operand" "=x,v")
1599 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1600 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1601 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1603 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1604 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1605 [(set_attr "isa" "noavx,avx")
1606 (set_attr "type" "sseadd")
1607 (set_attr "prefix" "<mask_prefix3>")
1608 (set_attr "mode" "<MODE>")])
1610 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1611 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1614 (match_operand:VF_128 1 "register_operand" "0,v")
1615 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1620 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1621 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1622 [(set_attr "isa" "noavx,avx")
1623 (set_attr "type" "sseadd")
1624 (set_attr "prefix" "<round_scalar_prefix>")
1625 (set_attr "mode" "<ssescalarmode>")])
1627 (define_expand "mul<mode>3<mask_name><round_name>"
1628 [(set (match_operand:VF 0 "register_operand")
1630 (match_operand:VF 1 "<round_nimm_predicate>")
1631 (match_operand:VF 2 "<round_nimm_predicate>")))]
1632 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1633 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1635 (define_insn "*mul<mode>3<mask_name><round_name>"
1636 [(set (match_operand:VF 0 "register_operand" "=x,v")
1638 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1639 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1640 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1642 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1643 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1644 [(set_attr "isa" "noavx,avx")
1645 (set_attr "type" "ssemul")
1646 (set_attr "prefix" "<mask_prefix3>")
1647 (set_attr "btver2_decode" "direct,double")
1648 (set_attr "mode" "<MODE>")])
1650 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1651 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1654 (match_operand:VF_128 1 "register_operand" "0,v")
1655 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1660 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1661 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1662 [(set_attr "isa" "noavx,avx")
1663 (set_attr "type" "sse<multdiv_mnemonic>")
1664 (set_attr "prefix" "<round_scalar_prefix>")
1665 (set_attr "btver2_decode" "direct,double")
1666 (set_attr "mode" "<ssescalarmode>")])
1668 (define_expand "div<mode>3"
1669 [(set (match_operand:VF2 0 "register_operand")
1670 (div:VF2 (match_operand:VF2 1 "register_operand")
1671 (match_operand:VF2 2 "vector_operand")))]
1673 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1675 (define_expand "div<mode>3"
1676 [(set (match_operand:VF1 0 "register_operand")
1677 (div:VF1 (match_operand:VF1 1 "register_operand")
1678 (match_operand:VF1 2 "vector_operand")))]
1681 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1684 && TARGET_RECIP_VEC_DIV
1685 && !optimize_insn_for_size_p ()
1686 && flag_finite_math_only && !flag_trapping_math
1687 && flag_unsafe_math_optimizations)
1689 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1694 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1695 [(set (match_operand:VF 0 "register_operand" "=x,v")
1697 (match_operand:VF 1 "register_operand" "0,v")
1698 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1699 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1701 div<ssemodesuffix>\t{%2, %0|%0, %2}
1702 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1703 [(set_attr "isa" "noavx,avx")
1704 (set_attr "type" "ssediv")
1705 (set_attr "prefix" "<mask_prefix3>")
1706 (set_attr "mode" "<MODE>")])
1708 (define_insn "<sse>_rcp<mode>2"
1709 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1711 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1713 "%vrcpps\t{%1, %0|%0, %1}"
1714 [(set_attr "type" "sse")
1715 (set_attr "atom_sse_attr" "rcp")
1716 (set_attr "btver2_sse_attr" "rcp")
1717 (set_attr "prefix" "maybe_vex")
1718 (set_attr "mode" "<MODE>")])
1720 (define_insn "sse_vmrcpv4sf2"
1721 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1723 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1725 (match_operand:V4SF 2 "register_operand" "0,x")
1729 rcpss\t{%1, %0|%0, %k1}
1730 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1731 [(set_attr "isa" "noavx,avx")
1732 (set_attr "type" "sse")
1733 (set_attr "atom_sse_attr" "rcp")
1734 (set_attr "btver2_sse_attr" "rcp")
1735 (set_attr "prefix" "orig,vex")
1736 (set_attr "mode" "SF")])
1738 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1739 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1741 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1744 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1745 [(set_attr "type" "sse")
1746 (set_attr "prefix" "evex")
1747 (set_attr "mode" "<MODE>")])
1749 (define_insn "srcp14<mode>"
1750 [(set (match_operand:VF_128 0 "register_operand" "=v")
1753 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1755 (match_operand:VF_128 2 "register_operand" "v")
1758 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1759 [(set_attr "type" "sse")
1760 (set_attr "prefix" "evex")
1761 (set_attr "mode" "<MODE>")])
1763 (define_insn "srcp14<mode>_mask"
1764 [(set (match_operand:VF_128 0 "register_operand" "=v")
1768 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1770 (match_operand:VF_128 3 "vector_move_operand" "0C")
1771 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1772 (match_operand:VF_128 2 "register_operand" "v")
1775 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1776 [(set_attr "type" "sse")
1777 (set_attr "prefix" "evex")
1778 (set_attr "mode" "<MODE>")])
1780 (define_expand "sqrt<mode>2"
1781 [(set (match_operand:VF2 0 "register_operand")
1782 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1785 (define_expand "sqrt<mode>2"
1786 [(set (match_operand:VF1 0 "register_operand")
1787 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1791 && TARGET_RECIP_VEC_SQRT
1792 && !optimize_insn_for_size_p ()
1793 && flag_finite_math_only && !flag_trapping_math
1794 && flag_unsafe_math_optimizations)
1796 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1801 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1802 [(set (match_operand:VF 0 "register_operand" "=x,v")
1803 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1804 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1806 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1807 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1808 [(set_attr "isa" "noavx,avx")
1809 (set_attr "type" "sse")
1810 (set_attr "atom_sse_attr" "sqrt")
1811 (set_attr "btver2_sse_attr" "sqrt")
1812 (set_attr "prefix" "maybe_vex")
1813 (set_attr "mode" "<MODE>")])
1815 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1816 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1819 (match_operand:VF_128 1 "vector_operand" "xBm,<round_constraint>"))
1820 (match_operand:VF_128 2 "register_operand" "0,v")
1824 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1825 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1826 [(set_attr "isa" "noavx,avx")
1827 (set_attr "type" "sse")
1828 (set_attr "atom_sse_attr" "sqrt")
1829 (set_attr "prefix" "<round_prefix>")
1830 (set_attr "btver2_sse_attr" "sqrt")
1831 (set_attr "mode" "<ssescalarmode>")])
1833 (define_expand "rsqrt<mode>2"
1834 [(set (match_operand:VF1_128_256 0 "register_operand")
1836 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1839 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1843 (define_expand "rsqrtv16sf2"
1844 [(set (match_operand:V16SF 0 "register_operand")
1846 [(match_operand:V16SF 1 "vector_operand")]
1848 "TARGET_SSE_MATH && TARGET_AVX512ER"
1850 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1854 (define_insn "<sse>_rsqrt<mode>2"
1855 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1857 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1859 "%vrsqrtps\t{%1, %0|%0, %1}"
1860 [(set_attr "type" "sse")
1861 (set_attr "prefix" "maybe_vex")
1862 (set_attr "mode" "<MODE>")])
1864 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1865 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1867 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1870 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1871 [(set_attr "type" "sse")
1872 (set_attr "prefix" "evex")
1873 (set_attr "mode" "<MODE>")])
1875 (define_insn "rsqrt14<mode>"
1876 [(set (match_operand:VF_128 0 "register_operand" "=v")
1879 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1881 (match_operand:VF_128 2 "register_operand" "v")
1884 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1885 [(set_attr "type" "sse")
1886 (set_attr "prefix" "evex")
1887 (set_attr "mode" "<MODE>")])
1889 (define_insn "rsqrt14_<mode>_mask"
1890 [(set (match_operand:VF_128 0 "register_operand" "=v")
1894 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1896 (match_operand:VF_128 3 "vector_move_operand" "0C")
1897 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1898 (match_operand:VF_128 2 "register_operand" "v")
1901 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1902 [(set_attr "type" "sse")
1903 (set_attr "prefix" "evex")
1904 (set_attr "mode" "<MODE>")])
1906 (define_insn "sse_vmrsqrtv4sf2"
1907 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1909 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1911 (match_operand:V4SF 2 "register_operand" "0,x")
1915 rsqrtss\t{%1, %0|%0, %k1}
1916 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1917 [(set_attr "isa" "noavx,avx")
1918 (set_attr "type" "sse")
1919 (set_attr "prefix" "orig,vex")
1920 (set_attr "mode" "SF")])
1922 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1923 [(set (match_operand:VF 0 "register_operand")
1925 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1926 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1927 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1929 if (!flag_finite_math_only || flag_signed_zeros)
1931 operands[1] = force_reg (<MODE>mode, operands[1]);
1932 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
1933 (operands[0], operands[1], operands[2]
1934 <mask_operand_arg34>
1935 <round_saeonly_mask_arg3>));
1939 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1942 ;; These versions of the min/max patterns are intentionally ignorant of
1943 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
1944 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
1945 ;; are undefined in this condition, we're certain this is correct.
1947 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1948 [(set (match_operand:VF 0 "register_operand" "=x,v")
1950 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1951 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1952 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1953 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1955 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1956 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1957 [(set_attr "isa" "noavx,avx")
1958 (set_attr "type" "sseadd")
1959 (set_attr "btver2_sse_attr" "maxmin")
1960 (set_attr "prefix" "<mask_prefix3>")
1961 (set_attr "mode" "<MODE>")])
1963 ;; These versions of the min/max patterns implement exactly the operations
1964 ;; min = (op1 < op2 ? op1 : op2)
1965 ;; max = (!(op1 < op2) ? op1 : op2)
1966 ;; Their operands are not commutative, and thus they may be used in the
1967 ;; presence of -0.0 and NaN.
1969 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
1970 [(set (match_operand:VF 0 "register_operand" "=x,v")
1972 [(match_operand:VF 1 "register_operand" "0,v")
1973 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
1976 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1978 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
1979 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1980 [(set_attr "isa" "noavx,avx")
1981 (set_attr "type" "sseadd")
1982 (set_attr "btver2_sse_attr" "maxmin")
1983 (set_attr "prefix" "<mask_prefix3>")
1984 (set_attr "mode" "<MODE>")])
1986 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
1987 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1990 (match_operand:VF_128 1 "register_operand" "0,v")
1991 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
1996 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1997 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
1998 [(set_attr "isa" "noavx,avx")
1999 (set_attr "type" "sse")
2000 (set_attr "btver2_sse_attr" "maxmin")
2001 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2002 (set_attr "mode" "<ssescalarmode>")])
2004 (define_insn "avx_addsubv4df3"
2005 [(set (match_operand:V4DF 0 "register_operand" "=x")
2008 (match_operand:V4DF 1 "register_operand" "x")
2009 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2010 (plus:V4DF (match_dup 1) (match_dup 2))
2013 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2014 [(set_attr "type" "sseadd")
2015 (set_attr "prefix" "vex")
2016 (set_attr "mode" "V4DF")])
2018 (define_insn "sse3_addsubv2df3"
2019 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2022 (match_operand:V2DF 1 "register_operand" "0,x")
2023 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2024 (plus:V2DF (match_dup 1) (match_dup 2))
2028 addsubpd\t{%2, %0|%0, %2}
2029 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2030 [(set_attr "isa" "noavx,avx")
2031 (set_attr "type" "sseadd")
2032 (set_attr "atom_unit" "complex")
2033 (set_attr "prefix" "orig,vex")
2034 (set_attr "mode" "V2DF")])
2036 (define_insn "avx_addsubv8sf3"
2037 [(set (match_operand:V8SF 0 "register_operand" "=x")
2040 (match_operand:V8SF 1 "register_operand" "x")
2041 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2042 (plus:V8SF (match_dup 1) (match_dup 2))
2045 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2046 [(set_attr "type" "sseadd")
2047 (set_attr "prefix" "vex")
2048 (set_attr "mode" "V8SF")])
2050 (define_insn "sse3_addsubv4sf3"
2051 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2054 (match_operand:V4SF 1 "register_operand" "0,x")
2055 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2056 (plus:V4SF (match_dup 1) (match_dup 2))
2060 addsubps\t{%2, %0|%0, %2}
2061 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2062 [(set_attr "isa" "noavx,avx")
2063 (set_attr "type" "sseadd")
2064 (set_attr "prefix" "orig,vex")
2065 (set_attr "prefix_rep" "1,*")
2066 (set_attr "mode" "V4SF")])
2069 [(set (match_operand:VF_128_256 0 "register_operand")
2070 (match_operator:VF_128_256 6 "addsub_vm_operator"
2072 (match_operand:VF_128_256 1 "register_operand")
2073 (match_operand:VF_128_256 2 "vector_operand"))
2075 (match_operand:VF_128_256 3 "vector_operand")
2076 (match_operand:VF_128_256 4 "vector_operand"))
2077 (match_operand 5 "const_int_operand")]))]
2079 && can_create_pseudo_p ()
2080 && ((rtx_equal_p (operands[1], operands[3])
2081 && rtx_equal_p (operands[2], operands[4]))
2082 || (rtx_equal_p (operands[1], operands[4])
2083 && rtx_equal_p (operands[2], operands[3])))"
2085 (vec_merge:VF_128_256
2086 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2087 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2091 [(set (match_operand:VF_128_256 0 "register_operand")
2092 (match_operator:VF_128_256 6 "addsub_vm_operator"
2094 (match_operand:VF_128_256 1 "vector_operand")
2095 (match_operand:VF_128_256 2 "vector_operand"))
2097 (match_operand:VF_128_256 3 "register_operand")
2098 (match_operand:VF_128_256 4 "vector_operand"))
2099 (match_operand 5 "const_int_operand")]))]
2101 && can_create_pseudo_p ()
2102 && ((rtx_equal_p (operands[1], operands[3])
2103 && rtx_equal_p (operands[2], operands[4]))
2104 || (rtx_equal_p (operands[1], operands[4])
2105 && rtx_equal_p (operands[2], operands[3])))"
2107 (vec_merge:VF_128_256
2108 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2109 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2112 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2114 = GEN_INT (~INTVAL (operands[5])
2115 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2119 [(set (match_operand:VF_128_256 0 "register_operand")
2120 (match_operator:VF_128_256 7 "addsub_vs_operator"
2121 [(vec_concat:<ssedoublemode>
2123 (match_operand:VF_128_256 1 "register_operand")
2124 (match_operand:VF_128_256 2 "vector_operand"))
2126 (match_operand:VF_128_256 3 "vector_operand")
2127 (match_operand:VF_128_256 4 "vector_operand")))
2128 (match_parallel 5 "addsub_vs_parallel"
2129 [(match_operand 6 "const_int_operand")])]))]
2131 && can_create_pseudo_p ()
2132 && ((rtx_equal_p (operands[1], operands[3])
2133 && rtx_equal_p (operands[2], operands[4]))
2134 || (rtx_equal_p (operands[1], operands[4])
2135 && rtx_equal_p (operands[2], operands[3])))"
2137 (vec_merge:VF_128_256
2138 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2139 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2142 int i, nelt = XVECLEN (operands[5], 0);
2143 HOST_WIDE_INT ival = 0;
2145 for (i = 0; i < nelt; i++)
2146 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2147 ival |= HOST_WIDE_INT_1 << i;
2149 operands[5] = GEN_INT (ival);
2153 [(set (match_operand:VF_128_256 0 "register_operand")
2154 (match_operator:VF_128_256 7 "addsub_vs_operator"
2155 [(vec_concat:<ssedoublemode>
2157 (match_operand:VF_128_256 1 "vector_operand")
2158 (match_operand:VF_128_256 2 "vector_operand"))
2160 (match_operand:VF_128_256 3 "register_operand")
2161 (match_operand:VF_128_256 4 "vector_operand")))
2162 (match_parallel 5 "addsub_vs_parallel"
2163 [(match_operand 6 "const_int_operand")])]))]
2165 && can_create_pseudo_p ()
2166 && ((rtx_equal_p (operands[1], operands[3])
2167 && rtx_equal_p (operands[2], operands[4]))
2168 || (rtx_equal_p (operands[1], operands[4])
2169 && rtx_equal_p (operands[2], operands[3])))"
2171 (vec_merge:VF_128_256
2172 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2173 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2176 int i, nelt = XVECLEN (operands[5], 0);
2177 HOST_WIDE_INT ival = 0;
2179 for (i = 0; i < nelt; i++)
2180 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2181 ival |= HOST_WIDE_INT_1 << i;
2183 operands[5] = GEN_INT (ival);
2186 (define_insn "avx_h<plusminus_insn>v4df3"
2187 [(set (match_operand:V4DF 0 "register_operand" "=x")
2192 (match_operand:V4DF 1 "register_operand" "x")
2193 (parallel [(const_int 0)]))
2194 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2197 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2198 (parallel [(const_int 0)]))
2199 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2202 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2203 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2205 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2206 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2208 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2209 [(set_attr "type" "sseadd")
2210 (set_attr "prefix" "vex")
2211 (set_attr "mode" "V4DF")])
2213 (define_expand "sse3_haddv2df3"
2214 [(set (match_operand:V2DF 0 "register_operand")
2218 (match_operand:V2DF 1 "register_operand")
2219 (parallel [(const_int 0)]))
2220 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2223 (match_operand:V2DF 2 "vector_operand")
2224 (parallel [(const_int 0)]))
2225 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2228 (define_insn "*sse3_haddv2df3"
2229 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2233 (match_operand:V2DF 1 "register_operand" "0,x")
2234 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2237 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2240 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2241 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2244 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2246 && INTVAL (operands[3]) != INTVAL (operands[4])
2247 && INTVAL (operands[5]) != INTVAL (operands[6])"
2249 haddpd\t{%2, %0|%0, %2}
2250 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2251 [(set_attr "isa" "noavx,avx")
2252 (set_attr "type" "sseadd")
2253 (set_attr "prefix" "orig,vex")
2254 (set_attr "mode" "V2DF")])
2256 (define_insn "sse3_hsubv2df3"
2257 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2261 (match_operand:V2DF 1 "register_operand" "0,x")
2262 (parallel [(const_int 0)]))
2263 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2266 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2267 (parallel [(const_int 0)]))
2268 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2271 hsubpd\t{%2, %0|%0, %2}
2272 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2273 [(set_attr "isa" "noavx,avx")
2274 (set_attr "type" "sseadd")
2275 (set_attr "prefix" "orig,vex")
2276 (set_attr "mode" "V2DF")])
2278 (define_insn "*sse3_haddv2df3_low"
2279 [(set (match_operand:DF 0 "register_operand" "=x,x")
2282 (match_operand:V2DF 1 "register_operand" "0,x")
2283 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2286 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2288 && INTVAL (operands[2]) != INTVAL (operands[3])"
2290 haddpd\t{%0, %0|%0, %0}
2291 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2292 [(set_attr "isa" "noavx,avx")
2293 (set_attr "type" "sseadd1")
2294 (set_attr "prefix" "orig,vex")
2295 (set_attr "mode" "V2DF")])
2297 (define_insn "*sse3_hsubv2df3_low"
2298 [(set (match_operand:DF 0 "register_operand" "=x,x")
2301 (match_operand:V2DF 1 "register_operand" "0,x")
2302 (parallel [(const_int 0)]))
2305 (parallel [(const_int 1)]))))]
2308 hsubpd\t{%0, %0|%0, %0}
2309 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2310 [(set_attr "isa" "noavx,avx")
2311 (set_attr "type" "sseadd1")
2312 (set_attr "prefix" "orig,vex")
2313 (set_attr "mode" "V2DF")])
2315 (define_insn "avx_h<plusminus_insn>v8sf3"
2316 [(set (match_operand:V8SF 0 "register_operand" "=x")
2322 (match_operand:V8SF 1 "register_operand" "x")
2323 (parallel [(const_int 0)]))
2324 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2326 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2327 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2331 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2332 (parallel [(const_int 0)]))
2333 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2335 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2336 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2340 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2341 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2343 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2344 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2347 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2348 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2350 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2351 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2353 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2354 [(set_attr "type" "sseadd")
2355 (set_attr "prefix" "vex")
2356 (set_attr "mode" "V8SF")])
2358 (define_insn "sse3_h<plusminus_insn>v4sf3"
2359 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2364 (match_operand:V4SF 1 "register_operand" "0,x")
2365 (parallel [(const_int 0)]))
2366 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2368 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2369 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2373 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2374 (parallel [(const_int 0)]))
2375 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2377 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2378 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2381 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2382 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2383 [(set_attr "isa" "noavx,avx")
2384 (set_attr "type" "sseadd")
2385 (set_attr "atom_unit" "complex")
2386 (set_attr "prefix" "orig,vex")
2387 (set_attr "prefix_rep" "1,*")
2388 (set_attr "mode" "V4SF")])
2390 (define_expand "reduc_plus_scal_v8df"
2391 [(match_operand:DF 0 "register_operand")
2392 (match_operand:V8DF 1 "register_operand")]
2395 rtx tmp = gen_reg_rtx (V8DFmode);
2396 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2397 emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
2401 (define_expand "reduc_plus_scal_v4df"
2402 [(match_operand:DF 0 "register_operand")
2403 (match_operand:V4DF 1 "register_operand")]
2406 rtx tmp = gen_reg_rtx (V4DFmode);
2407 rtx tmp2 = gen_reg_rtx (V4DFmode);
2408 rtx vec_res = gen_reg_rtx (V4DFmode);
2409 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2410 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2411 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2412 emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
2416 (define_expand "reduc_plus_scal_v2df"
2417 [(match_operand:DF 0 "register_operand")
2418 (match_operand:V2DF 1 "register_operand")]
2421 rtx tmp = gen_reg_rtx (V2DFmode);
2422 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2423 emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
2427 (define_expand "reduc_plus_scal_v16sf"
2428 [(match_operand:SF 0 "register_operand")
2429 (match_operand:V16SF 1 "register_operand")]
2432 rtx tmp = gen_reg_rtx (V16SFmode);
2433 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2434 emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
2438 (define_expand "reduc_plus_scal_v8sf"
2439 [(match_operand:SF 0 "register_operand")
2440 (match_operand:V8SF 1 "register_operand")]
2443 rtx tmp = gen_reg_rtx (V8SFmode);
2444 rtx tmp2 = gen_reg_rtx (V8SFmode);
2445 rtx vec_res = gen_reg_rtx (V8SFmode);
2446 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2447 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2448 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2449 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2450 emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
2454 (define_expand "reduc_plus_scal_v4sf"
2455 [(match_operand:SF 0 "register_operand")
2456 (match_operand:V4SF 1 "register_operand")]
2459 rtx vec_res = gen_reg_rtx (V4SFmode);
2462 rtx tmp = gen_reg_rtx (V4SFmode);
2463 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2464 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2467 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2468 emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
2472 ;; Modes handled by reduc_sm{in,ax}* patterns.
2473 (define_mode_iterator REDUC_SMINMAX_MODE
2474 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2475 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2476 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2477 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2478 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2479 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2480 (V8DF "TARGET_AVX512F")])
2482 (define_expand "reduc_<code>_scal_<mode>"
2483 [(smaxmin:REDUC_SMINMAX_MODE
2484 (match_operand:<ssescalarmode> 0 "register_operand")
2485 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2488 rtx tmp = gen_reg_rtx (<MODE>mode);
2489 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2490 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2495 (define_expand "reduc_<code>_scal_<mode>"
2496 [(umaxmin:VI_AVX512BW
2497 (match_operand:<ssescalarmode> 0 "register_operand")
2498 (match_operand:VI_AVX512BW 1 "register_operand"))]
2501 rtx tmp = gen_reg_rtx (<MODE>mode);
2502 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2503 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2508 (define_expand "reduc_<code>_scal_<mode>"
2510 (match_operand:<ssescalarmode> 0 "register_operand")
2511 (match_operand:VI_256 1 "register_operand"))]
2514 rtx tmp = gen_reg_rtx (<MODE>mode);
2515 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2516 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2521 (define_expand "reduc_umin_scal_v8hi"
2523 (match_operand:HI 0 "register_operand")
2524 (match_operand:V8HI 1 "register_operand"))]
2527 rtx tmp = gen_reg_rtx (V8HImode);
2528 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2529 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2533 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2534 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2536 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2537 (match_operand:SI 2 "const_0_to_255_operand")]
2540 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2541 [(set_attr "type" "sse")
2542 (set_attr "prefix" "evex")
2543 (set_attr "mode" "<MODE>")])
2545 (define_insn "reduces<mode><mask_scalar_name>"
2546 [(set (match_operand:VF_128 0 "register_operand" "=v")
2549 [(match_operand:VF_128 1 "register_operand" "v")
2550 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2551 (match_operand:SI 3 "const_0_to_255_operand")]
2556 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}"
2557 [(set_attr "type" "sse")
2558 (set_attr "prefix" "evex")
2559 (set_attr "mode" "<MODE>")])
2561 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2563 ;; Parallel floating point comparisons
2565 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2567 (define_insn "avx_cmp<mode>3"
2568 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2570 [(match_operand:VF_128_256 1 "register_operand" "x")
2571 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2572 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2575 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2576 [(set_attr "type" "ssecmp")
2577 (set_attr "length_immediate" "1")
2578 (set_attr "prefix" "vex")
2579 (set_attr "mode" "<MODE>")])
2581 (define_insn "avx_vmcmp<mode>3"
2582 [(set (match_operand:VF_128 0 "register_operand" "=x")
2585 [(match_operand:VF_128 1 "register_operand" "x")
2586 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2587 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2592 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2593 [(set_attr "type" "ssecmp")
2594 (set_attr "length_immediate" "1")
2595 (set_attr "prefix" "vex")
2596 (set_attr "mode" "<ssescalarmode>")])
2598 (define_insn "*<sse>_maskcmp<mode>3_comm"
2599 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2600 (match_operator:VF_128_256 3 "sse_comparison_operator"
2601 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2602 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2604 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2606 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2607 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2608 [(set_attr "isa" "noavx,avx")
2609 (set_attr "type" "ssecmp")
2610 (set_attr "length_immediate" "1")
2611 (set_attr "prefix" "orig,vex")
2612 (set_attr "mode" "<MODE>")])
2614 (define_insn "<sse>_maskcmp<mode>3"
2615 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2616 (match_operator:VF_128_256 3 "sse_comparison_operator"
2617 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2618 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2621 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2622 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2623 [(set_attr "isa" "noavx,avx")
2624 (set_attr "type" "ssecmp")
2625 (set_attr "length_immediate" "1")
2626 (set_attr "prefix" "orig,vex")
2627 (set_attr "mode" "<MODE>")])
2629 (define_insn "<sse>_vmmaskcmp<mode>3"
2630 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2632 (match_operator:VF_128 3 "sse_comparison_operator"
2633 [(match_operand:VF_128 1 "register_operand" "0,x")
2634 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2639 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2640 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2641 [(set_attr "isa" "noavx,avx")
2642 (set_attr "type" "ssecmp")
2643 (set_attr "length_immediate" "1,*")
2644 (set_attr "prefix" "orig,vex")
2645 (set_attr "mode" "<ssescalarmode>")])
2647 (define_mode_attr cmp_imm_predicate
2648 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2649 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2650 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2651 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2652 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2653 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2654 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2655 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2656 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2658 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2659 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2660 (unspec:<avx512fmaskmode>
2661 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2662 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2663 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2665 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2666 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2667 [(set_attr "type" "ssecmp")
2668 (set_attr "length_immediate" "1")
2669 (set_attr "prefix" "evex")
2670 (set_attr "mode" "<sseinsnmode>")])
2672 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2673 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2674 (unspec:<avx512fmaskmode>
2675 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2676 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2677 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2680 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2681 [(set_attr "type" "ssecmp")
2682 (set_attr "length_immediate" "1")
2683 (set_attr "prefix" "evex")
2684 (set_attr "mode" "<sseinsnmode>")])
2686 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2687 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2688 (unspec:<avx512fmaskmode>
2689 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2690 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2691 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2692 UNSPEC_UNSIGNED_PCMP))]
2694 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2695 [(set_attr "type" "ssecmp")
2696 (set_attr "length_immediate" "1")
2697 (set_attr "prefix" "evex")
2698 (set_attr "mode" "<sseinsnmode>")])
2700 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2701 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2702 (unspec:<avx512fmaskmode>
2703 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2704 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2705 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2706 UNSPEC_UNSIGNED_PCMP))]
2708 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2709 [(set_attr "type" "ssecmp")
2710 (set_attr "length_immediate" "1")
2711 (set_attr "prefix" "evex")
2712 (set_attr "mode" "<sseinsnmode>")])
2714 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2715 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2716 (and:<avx512fmaskmode>
2717 (unspec:<avx512fmaskmode>
2718 [(match_operand:VF_128 1 "register_operand" "v")
2719 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2720 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2724 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2725 [(set_attr "type" "ssecmp")
2726 (set_attr "length_immediate" "1")
2727 (set_attr "prefix" "evex")
2728 (set_attr "mode" "<ssescalarmode>")])
2730 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2731 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2732 (and:<avx512fmaskmode>
2733 (unspec:<avx512fmaskmode>
2734 [(match_operand:VF_128 1 "register_operand" "v")
2735 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2736 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2738 (and:<avx512fmaskmode>
2739 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2742 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2743 [(set_attr "type" "ssecmp")
2744 (set_attr "length_immediate" "1")
2745 (set_attr "prefix" "evex")
2746 (set_attr "mode" "<ssescalarmode>")])
2748 (define_insn "avx512f_maskcmp<mode>3"
2749 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2750 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2751 [(match_operand:VF 1 "register_operand" "v")
2752 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2754 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2755 [(set_attr "type" "ssecmp")
2756 (set_attr "length_immediate" "1")
2757 (set_attr "prefix" "evex")
2758 (set_attr "mode" "<sseinsnmode>")])
2760 (define_insn "<sse>_comi<round_saeonly_name>"
2761 [(set (reg:CCFP FLAGS_REG)
2764 (match_operand:<ssevecmode> 0 "register_operand" "v")
2765 (parallel [(const_int 0)]))
2767 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2768 (parallel [(const_int 0)]))))]
2769 "SSE_FLOAT_MODE_P (<MODE>mode)"
2770 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2771 [(set_attr "type" "ssecomi")
2772 (set_attr "prefix" "maybe_vex")
2773 (set_attr "prefix_rep" "0")
2774 (set (attr "prefix_data16")
2775 (if_then_else (eq_attr "mode" "DF")
2777 (const_string "0")))
2778 (set_attr "mode" "<MODE>")])
2780 (define_insn "<sse>_ucomi<round_saeonly_name>"
2781 [(set (reg:CCFPU FLAGS_REG)
2784 (match_operand:<ssevecmode> 0 "register_operand" "v")
2785 (parallel [(const_int 0)]))
2787 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2788 (parallel [(const_int 0)]))))]
2789 "SSE_FLOAT_MODE_P (<MODE>mode)"
2790 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2791 [(set_attr "type" "ssecomi")
2792 (set_attr "prefix" "maybe_vex")
2793 (set_attr "prefix_rep" "0")
2794 (set (attr "prefix_data16")
2795 (if_then_else (eq_attr "mode" "DF")
2797 (const_string "0")))
2798 (set_attr "mode" "<MODE>")])
2800 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2801 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2802 (match_operator:<avx512fmaskmode> 1 ""
2803 [(match_operand:V48_AVX512VL 2 "register_operand")
2804 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2807 bool ok = ix86_expand_mask_vec_cmp (operands);
2812 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2813 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2814 (match_operator:<avx512fmaskmode> 1 ""
2815 [(match_operand:VI12_AVX512VL 2 "register_operand")
2816 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2819 bool ok = ix86_expand_mask_vec_cmp (operands);
2824 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2825 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2826 (match_operator:<sseintvecmode> 1 ""
2827 [(match_operand:VI_256 2 "register_operand")
2828 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2831 bool ok = ix86_expand_int_vec_cmp (operands);
2836 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2837 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2838 (match_operator:<sseintvecmode> 1 ""
2839 [(match_operand:VI124_128 2 "register_operand")
2840 (match_operand:VI124_128 3 "vector_operand")]))]
2843 bool ok = ix86_expand_int_vec_cmp (operands);
2848 (define_expand "vec_cmpv2div2di"
2849 [(set (match_operand:V2DI 0 "register_operand")
2850 (match_operator:V2DI 1 ""
2851 [(match_operand:V2DI 2 "register_operand")
2852 (match_operand:V2DI 3 "vector_operand")]))]
2855 bool ok = ix86_expand_int_vec_cmp (operands);
2860 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2861 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2862 (match_operator:<sseintvecmode> 1 ""
2863 [(match_operand:VF_256 2 "register_operand")
2864 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2867 bool ok = ix86_expand_fp_vec_cmp (operands);
2872 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2873 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2874 (match_operator:<sseintvecmode> 1 ""
2875 [(match_operand:VF_128 2 "register_operand")
2876 (match_operand:VF_128 3 "vector_operand")]))]
2879 bool ok = ix86_expand_fp_vec_cmp (operands);
2884 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2885 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2886 (match_operator:<avx512fmaskmode> 1 ""
2887 [(match_operand:VI48_AVX512VL 2 "register_operand")
2888 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2891 bool ok = ix86_expand_mask_vec_cmp (operands);
2896 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2897 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2898 (match_operator:<avx512fmaskmode> 1 ""
2899 [(match_operand:VI12_AVX512VL 2 "register_operand")
2900 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2903 bool ok = ix86_expand_mask_vec_cmp (operands);
2908 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2909 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2910 (match_operator:<sseintvecmode> 1 ""
2911 [(match_operand:VI_256 2 "register_operand")
2912 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2915 bool ok = ix86_expand_int_vec_cmp (operands);
2920 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2921 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2922 (match_operator:<sseintvecmode> 1 ""
2923 [(match_operand:VI124_128 2 "register_operand")
2924 (match_operand:VI124_128 3 "vector_operand")]))]
2927 bool ok = ix86_expand_int_vec_cmp (operands);
2932 (define_expand "vec_cmpuv2div2di"
2933 [(set (match_operand:V2DI 0 "register_operand")
2934 (match_operator:V2DI 1 ""
2935 [(match_operand:V2DI 2 "register_operand")
2936 (match_operand:V2DI 3 "vector_operand")]))]
2939 bool ok = ix86_expand_int_vec_cmp (operands);
2944 (define_expand "vec_cmpeqv2div2di"
2945 [(set (match_operand:V2DI 0 "register_operand")
2946 (match_operator:V2DI 1 ""
2947 [(match_operand:V2DI 2 "register_operand")
2948 (match_operand:V2DI 3 "vector_operand")]))]
2951 bool ok = ix86_expand_int_vec_cmp (operands);
2956 (define_expand "vcond<V_512:mode><VF_512:mode>"
2957 [(set (match_operand:V_512 0 "register_operand")
2959 (match_operator 3 ""
2960 [(match_operand:VF_512 4 "nonimmediate_operand")
2961 (match_operand:VF_512 5 "nonimmediate_operand")])
2962 (match_operand:V_512 1 "general_operand")
2963 (match_operand:V_512 2 "general_operand")))]
2965 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2966 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2968 bool ok = ix86_expand_fp_vcond (operands);
2973 (define_expand "vcond<V_256:mode><VF_256:mode>"
2974 [(set (match_operand:V_256 0 "register_operand")
2976 (match_operator 3 ""
2977 [(match_operand:VF_256 4 "nonimmediate_operand")
2978 (match_operand:VF_256 5 "nonimmediate_operand")])
2979 (match_operand:V_256 1 "general_operand")
2980 (match_operand:V_256 2 "general_operand")))]
2982 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2983 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2985 bool ok = ix86_expand_fp_vcond (operands);
2990 (define_expand "vcond<V_128:mode><VF_128:mode>"
2991 [(set (match_operand:V_128 0 "register_operand")
2993 (match_operator 3 ""
2994 [(match_operand:VF_128 4 "vector_operand")
2995 (match_operand:VF_128 5 "vector_operand")])
2996 (match_operand:V_128 1 "general_operand")
2997 (match_operand:V_128 2 "general_operand")))]
2999 && (GET_MODE_NUNITS (<V_128:MODE>mode)
3000 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3002 bool ok = ix86_expand_fp_vcond (operands);
3007 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3008 [(set (match_operand:V48_AVX512VL 0 "register_operand")
3009 (vec_merge:V48_AVX512VL
3010 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3011 (match_operand:V48_AVX512VL 2 "vector_move_operand")
3012 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3015 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3016 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3017 (vec_merge:VI12_AVX512VL
3018 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3019 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3020 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3023 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3024 [(set (match_operand:VI_256 0 "register_operand")
3026 (match_operand:VI_256 1 "nonimmediate_operand")
3027 (match_operand:VI_256 2 "vector_move_operand")
3028 (match_operand:<sseintvecmode> 3 "register_operand")))]
3031 ix86_expand_sse_movcc (operands[0], operands[3],
3032 operands[1], operands[2]);
3036 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3037 [(set (match_operand:VI124_128 0 "register_operand")
3038 (vec_merge:VI124_128
3039 (match_operand:VI124_128 1 "vector_operand")
3040 (match_operand:VI124_128 2 "vector_move_operand")
3041 (match_operand:<sseintvecmode> 3 "register_operand")))]
3044 ix86_expand_sse_movcc (operands[0], operands[3],
3045 operands[1], operands[2]);
3049 (define_expand "vcond_mask_v2div2di"
3050 [(set (match_operand:V2DI 0 "register_operand")
3052 (match_operand:V2DI 1 "vector_operand")
3053 (match_operand:V2DI 2 "vector_move_operand")
3054 (match_operand:V2DI 3 "register_operand")))]
3057 ix86_expand_sse_movcc (operands[0], operands[3],
3058 operands[1], operands[2]);
3062 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3063 [(set (match_operand:VF_256 0 "register_operand")
3065 (match_operand:VF_256 1 "nonimmediate_operand")
3066 (match_operand:VF_256 2 "vector_move_operand")
3067 (match_operand:<sseintvecmode> 3 "register_operand")))]
3070 ix86_expand_sse_movcc (operands[0], operands[3],
3071 operands[1], operands[2]);
3075 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3076 [(set (match_operand:VF_128 0 "register_operand")
3078 (match_operand:VF_128 1 "vector_operand")
3079 (match_operand:VF_128 2 "vector_move_operand")
3080 (match_operand:<sseintvecmode> 3 "register_operand")))]
3083 ix86_expand_sse_movcc (operands[0], operands[3],
3084 operands[1], operands[2]);
3088 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3090 ;; Parallel floating point logical operations
3092 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3094 (define_insn "<sse>_andnot<mode>3<mask_name>"
3095 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3098 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3099 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3100 "TARGET_SSE && <mask_avx512vl_condition>"
3102 static char buf[128];
3106 switch (which_alternative)
3109 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3114 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3120 switch (get_attr_mode (insn))
3128 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3129 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3130 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3133 suffix = "<ssemodesuffix>";
3136 snprintf (buf, sizeof (buf), ops, suffix);
3139 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3140 (set_attr "type" "sselog")
3141 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3143 (cond [(and (match_test "<mask_applied>")
3144 (and (eq_attr "alternative" "1")
3145 (match_test "!TARGET_AVX512DQ")))
3146 (const_string "<sseintvecmode2>")
3147 (eq_attr "alternative" "3")
3148 (const_string "<sseintvecmode2>")
3149 (and (match_test "<MODE_SIZE> == 16")
3150 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3151 (const_string "<ssePSmode>")
3152 (match_test "TARGET_AVX")
3153 (const_string "<MODE>")
3154 (match_test "optimize_function_for_size_p (cfun)")
3155 (const_string "V4SF")
3157 (const_string "<MODE>")))])
3160 (define_insn "<sse>_andnot<mode>3<mask_name>"
3161 [(set (match_operand:VF_512 0 "register_operand" "=v")
3164 (match_operand:VF_512 1 "register_operand" "v"))
3165 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3168 static char buf[128];
3172 suffix = "<ssemodesuffix>";
3175 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3176 if (!TARGET_AVX512DQ)
3178 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3182 snprintf (buf, sizeof (buf),
3183 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3187 [(set_attr "type" "sselog")
3188 (set_attr "prefix" "evex")
3190 (if_then_else (match_test "TARGET_AVX512DQ")
3191 (const_string "<sseinsnmode>")
3192 (const_string "XI")))])
3194 (define_expand "<code><mode>3<mask_name>"
3195 [(set (match_operand:VF_128_256 0 "register_operand")
3196 (any_logic:VF_128_256
3197 (match_operand:VF_128_256 1 "vector_operand")
3198 (match_operand:VF_128_256 2 "vector_operand")))]
3199 "TARGET_SSE && <mask_avx512vl_condition>"
3200 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3202 (define_expand "<code><mode>3<mask_name>"
3203 [(set (match_operand:VF_512 0 "register_operand")
3205 (match_operand:VF_512 1 "nonimmediate_operand")
3206 (match_operand:VF_512 2 "nonimmediate_operand")))]
3208 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3210 (define_insn "*<code><mode>3<mask_name>"
3211 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3212 (any_logic:VF_128_256
3213 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3214 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3215 "TARGET_SSE && <mask_avx512vl_condition>
3216 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3218 static char buf[128];
3222 switch (which_alternative)
3225 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3230 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3236 switch (get_attr_mode (insn))
3244 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3245 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3246 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3249 suffix = "<ssemodesuffix>";
3252 snprintf (buf, sizeof (buf), ops, suffix);
3255 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3256 (set_attr "type" "sselog")
3257 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3259 (cond [(and (match_test "<mask_applied>")
3260 (and (eq_attr "alternative" "1")
3261 (match_test "!TARGET_AVX512DQ")))
3262 (const_string "<sseintvecmode2>")
3263 (eq_attr "alternative" "3")
3264 (const_string "<sseintvecmode2>")
3265 (and (match_test "<MODE_SIZE> == 16")
3266 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3267 (const_string "<ssePSmode>")
3268 (match_test "TARGET_AVX")
3269 (const_string "<MODE>")
3270 (match_test "optimize_function_for_size_p (cfun)")
3271 (const_string "V4SF")
3273 (const_string "<MODE>")))])
3275 (define_insn "*<code><mode>3<mask_name>"
3276 [(set (match_operand:VF_512 0 "register_operand" "=v")
3278 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3279 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3280 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3282 static char buf[128];
3286 suffix = "<ssemodesuffix>";
3289 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3290 if (!TARGET_AVX512DQ)
3292 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3296 snprintf (buf, sizeof (buf),
3297 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3301 [(set_attr "type" "sselog")
3302 (set_attr "prefix" "evex")
3304 (if_then_else (match_test "TARGET_AVX512DQ")
3305 (const_string "<sseinsnmode>")
3306 (const_string "XI")))])
3308 (define_expand "copysign<mode>3"
3311 (not:VF (match_dup 3))
3312 (match_operand:VF 1 "vector_operand")))
3314 (and:VF (match_dup 3)
3315 (match_operand:VF 2 "vector_operand")))
3316 (set (match_operand:VF 0 "register_operand")
3317 (ior:VF (match_dup 4) (match_dup 5)))]
3320 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3322 operands[4] = gen_reg_rtx (<MODE>mode);
3323 operands[5] = gen_reg_rtx (<MODE>mode);
3326 ;; Also define scalar versions. These are used for abs, neg, and
3327 ;; conditional move. Using subregs into vector modes causes register
3328 ;; allocation lossage. These patterns do not allow memory operands
3329 ;; because the native instructions read the full 128-bits.
3331 (define_insn "*andnot<mode>3"
3332 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3335 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3336 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3337 "SSE_FLOAT_MODE_P (<MODE>mode)"
3339 static char buf[128];
3342 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3344 switch (which_alternative)
3347 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3350 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3353 if (TARGET_AVX512DQ)
3354 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3357 suffix = <MODE>mode == DFmode ? "q" : "d";
3358 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3362 if (TARGET_AVX512DQ)
3363 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3366 suffix = <MODE>mode == DFmode ? "q" : "d";
3367 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3374 snprintf (buf, sizeof (buf), ops, suffix);
3377 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3378 (set_attr "type" "sselog")
3379 (set_attr "prefix" "orig,vex,evex,evex")
3381 (cond [(eq_attr "alternative" "2")
3382 (if_then_else (match_test "TARGET_AVX512DQ")
3383 (const_string "<ssevecmode>")
3384 (const_string "TI"))
3385 (eq_attr "alternative" "3")
3386 (if_then_else (match_test "TARGET_AVX512DQ")
3387 (const_string "<avx512fvecmode>")
3388 (const_string "XI"))
3389 (and (match_test "<MODE_SIZE> == 16")
3390 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3391 (const_string "V4SF")
3392 (match_test "TARGET_AVX")
3393 (const_string "<ssevecmode>")
3394 (match_test "optimize_function_for_size_p (cfun)")
3395 (const_string "V4SF")
3397 (const_string "<ssevecmode>")))])
3399 (define_insn "*andnottf3"
3400 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3402 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3403 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3406 static char buf[128];
3409 = (which_alternative >= 2 ? "pandnq"
3410 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3412 switch (which_alternative)
3415 ops = "%s\t{%%2, %%0|%%0, %%2}";
3419 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3422 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3428 snprintf (buf, sizeof (buf), ops, tmp);
3431 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3432 (set_attr "type" "sselog")
3433 (set (attr "prefix_data16")
3435 (and (eq_attr "alternative" "0")
3436 (eq_attr "mode" "TI"))
3438 (const_string "*")))
3439 (set_attr "prefix" "orig,vex,evex,evex")
3441 (cond [(eq_attr "alternative" "2")
3443 (eq_attr "alternative" "3")
3445 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3446 (const_string "V4SF")
3447 (match_test "TARGET_AVX")
3449 (ior (not (match_test "TARGET_SSE2"))
3450 (match_test "optimize_function_for_size_p (cfun)"))
3451 (const_string "V4SF")
3453 (const_string "TI")))])
3455 (define_insn "*<code><mode>3"
3456 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3458 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3459 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3460 "SSE_FLOAT_MODE_P (<MODE>mode)"
3462 static char buf[128];
3465 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3467 switch (which_alternative)
3470 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3473 if (!TARGET_AVX512DQ)
3475 suffix = <MODE>mode == DFmode ? "q" : "d";
3476 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3481 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3484 if (TARGET_AVX512DQ)
3485 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3488 suffix = <MODE>mode == DFmode ? "q" : "d";
3489 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3496 snprintf (buf, sizeof (buf), ops, suffix);
3499 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3500 (set_attr "type" "sselog")
3501 (set_attr "prefix" "orig,vex,evex,evex")
3503 (cond [(eq_attr "alternative" "2")
3504 (if_then_else (match_test "TARGET_AVX512DQ")
3505 (const_string "<ssevecmode>")
3506 (const_string "TI"))
3507 (eq_attr "alternative" "3")
3508 (if_then_else (match_test "TARGET_AVX512DQ")
3509 (const_string "<avx512fvecmode>")
3510 (const_string "XI"))
3511 (and (match_test "<MODE_SIZE> == 16")
3512 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3513 (const_string "V4SF")
3514 (match_test "TARGET_AVX")
3515 (const_string "<ssevecmode>")
3516 (match_test "optimize_function_for_size_p (cfun)")
3517 (const_string "V4SF")
3519 (const_string "<ssevecmode>")))])
3521 (define_expand "<code>tf3"
3522 [(set (match_operand:TF 0 "register_operand")
3524 (match_operand:TF 1 "vector_operand")
3525 (match_operand:TF 2 "vector_operand")))]
3527 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3529 (define_insn "*<code>tf3"
3530 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3532 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3533 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3535 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3537 static char buf[128];
3540 = (which_alternative >= 2 ? "p<logic>q"
3541 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3543 switch (which_alternative)
3546 ops = "%s\t{%%2, %%0|%%0, %%2}";
3550 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3553 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3559 snprintf (buf, sizeof (buf), ops, tmp);
3562 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3563 (set_attr "type" "sselog")
3564 (set (attr "prefix_data16")
3566 (and (eq_attr "alternative" "0")
3567 (eq_attr "mode" "TI"))
3569 (const_string "*")))
3570 (set_attr "prefix" "orig,vex,evex,evex")
3572 (cond [(eq_attr "alternative" "2")
3574 (eq_attr "alternative" "3")
3576 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3577 (const_string "V4SF")
3578 (match_test "TARGET_AVX")
3580 (ior (not (match_test "TARGET_SSE2"))
3581 (match_test "optimize_function_for_size_p (cfun)"))
3582 (const_string "V4SF")
3584 (const_string "TI")))])
3586 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3588 ;; FMA floating point multiply/accumulate instructions. These include
3589 ;; scalar versions of the instructions as well as vector versions.
3591 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3593 ;; The standard names for scalar FMA are only available with SSE math enabled.
3594 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3595 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3596 ;; and TARGET_FMA4 are both false.
3597 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3598 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3599 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3600 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3601 (define_mode_iterator FMAMODEM
3602 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3603 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3604 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3605 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3606 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3607 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3608 (V16SF "TARGET_AVX512F")
3609 (V8DF "TARGET_AVX512F")])
3611 (define_expand "fma<mode>4"
3612 [(set (match_operand:FMAMODEM 0 "register_operand")
3614 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3615 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3616 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3618 (define_expand "fms<mode>4"
3619 [(set (match_operand:FMAMODEM 0 "register_operand")
3621 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3622 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3623 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3625 (define_expand "fnma<mode>4"
3626 [(set (match_operand:FMAMODEM 0 "register_operand")
3628 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3629 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3630 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3632 (define_expand "fnms<mode>4"
3633 [(set (match_operand:FMAMODEM 0 "register_operand")
3635 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3636 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3637 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3639 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3640 (define_mode_iterator FMAMODE_AVX512
3641 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3642 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3643 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3644 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3645 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3646 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3647 (V16SF "TARGET_AVX512F")
3648 (V8DF "TARGET_AVX512F")])
3650 (define_mode_iterator FMAMODE
3651 [SF DF V4SF V2DF V8SF V4DF])
3653 (define_expand "fma4i_fmadd_<mode>"
3654 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3656 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3657 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3658 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3660 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3661 [(match_operand:VF_AVX512VL 0 "register_operand")
3662 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3663 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3664 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3665 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3666 "TARGET_AVX512F && <round_mode512bit_condition>"
3668 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3669 operands[0], operands[1], operands[2], operands[3],
3670 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3674 (define_insn "*fma_fmadd_<mode>"
3675 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3677 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3678 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3679 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3680 "TARGET_FMA || TARGET_FMA4"
3682 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3683 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3684 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3685 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3686 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3687 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3688 (set_attr "type" "ssemuladd")
3689 (set_attr "mode" "<MODE>")])
3691 ;; Suppose AVX-512F as baseline
3692 (define_mode_iterator VF_SF_AVX512VL
3693 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3694 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3696 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3697 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3699 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3700 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3701 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3702 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3704 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3705 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3706 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3707 [(set_attr "type" "ssemuladd")
3708 (set_attr "mode" "<MODE>")])
3710 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3711 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3712 (vec_merge:VF_AVX512VL
3714 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3715 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3716 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3718 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3719 "TARGET_AVX512F && <round_mode512bit_condition>"
3721 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3722 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3723 [(set_attr "type" "ssemuladd")
3724 (set_attr "mode" "<MODE>")])
3726 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3727 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3728 (vec_merge:VF_AVX512VL
3730 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3731 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3732 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3734 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3736 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3737 [(set_attr "type" "ssemuladd")
3738 (set_attr "mode" "<MODE>")])
3740 (define_insn "*fma_fmsub_<mode>"
3741 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3743 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3744 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3746 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3747 "TARGET_FMA || TARGET_FMA4"
3749 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3750 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3751 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3752 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3753 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3754 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3755 (set_attr "type" "ssemuladd")
3756 (set_attr "mode" "<MODE>")])
3758 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3759 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3761 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3762 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3764 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3765 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3767 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3768 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3769 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3770 [(set_attr "type" "ssemuladd")
3771 (set_attr "mode" "<MODE>")])
3773 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3774 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3775 (vec_merge:VF_AVX512VL
3777 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3778 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3780 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3782 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3785 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3786 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3787 [(set_attr "type" "ssemuladd")
3788 (set_attr "mode" "<MODE>")])
3790 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3791 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3792 (vec_merge:VF_AVX512VL
3794 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3795 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3797 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3799 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3800 "TARGET_AVX512F && <round_mode512bit_condition>"
3801 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3802 [(set_attr "type" "ssemuladd")
3803 (set_attr "mode" "<MODE>")])
3805 (define_insn "*fma_fnmadd_<mode>"
3806 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3809 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3810 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3811 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3812 "TARGET_FMA || TARGET_FMA4"
3814 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3815 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3816 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3817 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3818 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3819 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3820 (set_attr "type" "ssemuladd")
3821 (set_attr "mode" "<MODE>")])
3823 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3824 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3827 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3828 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3829 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3830 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3832 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3833 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3834 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3835 [(set_attr "type" "ssemuladd")
3836 (set_attr "mode" "<MODE>")])
3838 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3839 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3840 (vec_merge:VF_AVX512VL
3843 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3844 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3845 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3847 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3848 "TARGET_AVX512F && <round_mode512bit_condition>"
3850 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3851 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3852 [(set_attr "type" "ssemuladd")
3853 (set_attr "mode" "<MODE>")])
3855 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3856 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3857 (vec_merge:VF_AVX512VL
3860 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3861 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3862 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3864 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3865 "TARGET_AVX512F && <round_mode512bit_condition>"
3866 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3867 [(set_attr "type" "ssemuladd")
3868 (set_attr "mode" "<MODE>")])
3870 (define_insn "*fma_fnmsub_<mode>"
3871 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3874 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3875 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3877 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3878 "TARGET_FMA || TARGET_FMA4"
3880 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3881 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3882 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3883 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3884 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3885 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3886 (set_attr "type" "ssemuladd")
3887 (set_attr "mode" "<MODE>")])
3889 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3890 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3893 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3894 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3896 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3897 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3899 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3900 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3901 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3902 [(set_attr "type" "ssemuladd")
3903 (set_attr "mode" "<MODE>")])
3905 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3906 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3907 (vec_merge:VF_AVX512VL
3910 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3911 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3913 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3915 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3916 "TARGET_AVX512F && <round_mode512bit_condition>"
3918 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3919 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3920 [(set_attr "type" "ssemuladd")
3921 (set_attr "mode" "<MODE>")])
3923 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3924 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3925 (vec_merge:VF_AVX512VL
3928 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3929 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3931 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3933 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3935 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3936 [(set_attr "type" "ssemuladd")
3937 (set_attr "mode" "<MODE>")])
3939 ;; FMA parallel floating point multiply addsub and subadd operations.
3941 ;; It would be possible to represent these without the UNSPEC as
3944 ;; (fma op1 op2 op3)
3945 ;; (fma op1 op2 (neg op3))
3948 ;; But this doesn't seem useful in practice.
3950 (define_expand "fmaddsub_<mode>"
3951 [(set (match_operand:VF 0 "register_operand")
3953 [(match_operand:VF 1 "nonimmediate_operand")
3954 (match_operand:VF 2 "nonimmediate_operand")
3955 (match_operand:VF 3 "nonimmediate_operand")]
3957 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3959 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3960 [(match_operand:VF_AVX512VL 0 "register_operand")
3961 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3962 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3963 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3964 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3967 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3968 operands[0], operands[1], operands[2], operands[3],
3969 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3973 (define_insn "*fma_fmaddsub_<mode>"
3974 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3976 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3977 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3978 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3980 "TARGET_FMA || TARGET_FMA4"
3982 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3983 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3984 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3985 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3986 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3987 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3988 (set_attr "type" "ssemuladd")
3989 (set_attr "mode" "<MODE>")])
3991 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3992 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3993 (unspec:VF_SF_AVX512VL
3994 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3995 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3996 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3998 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4000 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4001 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4002 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4003 [(set_attr "type" "ssemuladd")
4004 (set_attr "mode" "<MODE>")])
4006 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4007 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4008 (vec_merge:VF_AVX512VL
4010 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4011 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4012 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4015 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4018 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4019 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4020 [(set_attr "type" "ssemuladd")
4021 (set_attr "mode" "<MODE>")])
4023 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4024 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4025 (vec_merge:VF_AVX512VL
4027 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4028 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4029 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4032 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4034 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4035 [(set_attr "type" "ssemuladd")
4036 (set_attr "mode" "<MODE>")])
4038 (define_insn "*fma_fmsubadd_<mode>"
4039 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4041 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4042 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4044 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4046 "TARGET_FMA || TARGET_FMA4"
4048 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4049 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4050 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4051 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4052 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4053 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4054 (set_attr "type" "ssemuladd")
4055 (set_attr "mode" "<MODE>")])
4057 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4058 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4059 (unspec:VF_SF_AVX512VL
4060 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4061 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4063 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4065 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4067 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4068 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4069 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4070 [(set_attr "type" "ssemuladd")
4071 (set_attr "mode" "<MODE>")])
4073 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4074 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4075 (vec_merge:VF_AVX512VL
4077 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4078 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4080 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4083 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4086 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4087 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4088 [(set_attr "type" "ssemuladd")
4089 (set_attr "mode" "<MODE>")])
4091 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4092 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4093 (vec_merge:VF_AVX512VL
4095 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4096 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4098 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4101 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4103 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4104 [(set_attr "type" "ssemuladd")
4105 (set_attr "mode" "<MODE>")])
4107 ;; FMA3 floating point scalar intrinsics. These merge result with
4108 ;; high-order elements from the destination register.
4110 (define_expand "fmai_vmfmadd_<mode><round_name>"
4111 [(set (match_operand:VF_128 0 "register_operand")
4114 (match_operand:VF_128 1 "<round_nimm_predicate>")
4115 (match_operand:VF_128 2 "<round_nimm_predicate>")
4116 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4121 (define_insn "*fmai_fmadd_<mode>"
4122 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4125 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4126 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4127 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4130 "TARGET_FMA || TARGET_AVX512F"
4132 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4133 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4134 [(set_attr "type" "ssemuladd")
4135 (set_attr "mode" "<MODE>")])
4137 (define_insn "*fmai_fmsub_<mode>"
4138 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4141 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4142 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4144 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4147 "TARGET_FMA || TARGET_AVX512F"
4149 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4150 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4151 [(set_attr "type" "ssemuladd")
4152 (set_attr "mode" "<MODE>")])
4154 (define_insn "*fmai_fnmadd_<mode><round_name>"
4155 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4159 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4160 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4161 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4164 "TARGET_FMA || TARGET_AVX512F"
4166 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4167 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4168 [(set_attr "type" "ssemuladd")
4169 (set_attr "mode" "<MODE>")])
4171 (define_insn "*fmai_fnmsub_<mode><round_name>"
4172 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4176 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4177 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4179 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4182 "TARGET_FMA || TARGET_AVX512F"
4184 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4185 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4186 [(set_attr "type" "ssemuladd")
4187 (set_attr "mode" "<MODE>")])
4189 ;; FMA4 floating point scalar intrinsics. These write the
4190 ;; entire destination register, with the high-order elements zeroed.
4192 (define_expand "fma4i_vmfmadd_<mode>"
4193 [(set (match_operand:VF_128 0 "register_operand")
4196 (match_operand:VF_128 1 "nonimmediate_operand")
4197 (match_operand:VF_128 2 "nonimmediate_operand")
4198 (match_operand:VF_128 3 "nonimmediate_operand"))
4202 "operands[4] = CONST0_RTX (<MODE>mode);")
4204 (define_insn "*fma4i_vmfmadd_<mode>"
4205 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4208 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4209 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4210 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4211 (match_operand:VF_128 4 "const0_operand")
4214 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4215 [(set_attr "type" "ssemuladd")
4216 (set_attr "mode" "<MODE>")])
4218 (define_insn "*fma4i_vmfmsub_<mode>"
4219 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4222 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4223 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4225 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4226 (match_operand:VF_128 4 "const0_operand")
4229 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4230 [(set_attr "type" "ssemuladd")
4231 (set_attr "mode" "<MODE>")])
4233 (define_insn "*fma4i_vmfnmadd_<mode>"
4234 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4238 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4239 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4240 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4241 (match_operand:VF_128 4 "const0_operand")
4244 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4245 [(set_attr "type" "ssemuladd")
4246 (set_attr "mode" "<MODE>")])
4248 (define_insn "*fma4i_vmfnmsub_<mode>"
4249 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4253 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4254 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4256 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4257 (match_operand:VF_128 4 "const0_operand")
4260 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4261 [(set_attr "type" "ssemuladd")
4262 (set_attr "mode" "<MODE>")])
4264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4266 ;; Parallel single-precision floating point conversion operations
4268 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4270 (define_insn "sse_cvtpi2ps"
4271 [(set (match_operand:V4SF 0 "register_operand" "=x")
4274 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4275 (match_operand:V4SF 1 "register_operand" "0")
4278 "cvtpi2ps\t{%2, %0|%0, %2}"
4279 [(set_attr "type" "ssecvt")
4280 (set_attr "mode" "V4SF")])
4282 (define_insn "sse_cvtps2pi"
4283 [(set (match_operand:V2SI 0 "register_operand" "=y")
4285 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4287 (parallel [(const_int 0) (const_int 1)])))]
4289 "cvtps2pi\t{%1, %0|%0, %q1}"
4290 [(set_attr "type" "ssecvt")
4291 (set_attr "unit" "mmx")
4292 (set_attr "mode" "DI")])
4294 (define_insn "sse_cvttps2pi"
4295 [(set (match_operand:V2SI 0 "register_operand" "=y")
4297 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4298 (parallel [(const_int 0) (const_int 1)])))]
4300 "cvttps2pi\t{%1, %0|%0, %q1}"
4301 [(set_attr "type" "ssecvt")
4302 (set_attr "unit" "mmx")
4303 (set_attr "prefix_rep" "0")
4304 (set_attr "mode" "SF")])
4306 (define_insn "sse_cvtsi2ss<round_name>"
4307 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4310 (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4311 (match_operand:V4SF 1 "register_operand" "0,0,v")
4315 cvtsi2ss\t{%2, %0|%0, %2}
4316 cvtsi2ss\t{%2, %0|%0, %2}
4317 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4318 [(set_attr "isa" "noavx,noavx,avx")
4319 (set_attr "type" "sseicvt")
4320 (set_attr "athlon_decode" "vector,double,*")
4321 (set_attr "amdfam10_decode" "vector,double,*")
4322 (set_attr "bdver1_decode" "double,direct,*")
4323 (set_attr "btver2_decode" "double,double,double")
4324 (set_attr "znver1_decode" "double,double,double")
4325 (set_attr "prefix" "orig,orig,maybe_evex")
4326 (set_attr "mode" "SF")])
4328 (define_insn "sse_cvtsi2ssq<round_name>"
4329 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4332 (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4333 (match_operand:V4SF 1 "register_operand" "0,0,v")
4335 "TARGET_SSE && TARGET_64BIT"
4337 cvtsi2ssq\t{%2, %0|%0, %2}
4338 cvtsi2ssq\t{%2, %0|%0, %2}
4339 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4340 [(set_attr "isa" "noavx,noavx,avx")
4341 (set_attr "type" "sseicvt")
4342 (set_attr "athlon_decode" "vector,double,*")
4343 (set_attr "amdfam10_decode" "vector,double,*")
4344 (set_attr "bdver1_decode" "double,direct,*")
4345 (set_attr "btver2_decode" "double,double,double")
4346 (set_attr "length_vex" "*,*,4")
4347 (set_attr "prefix_rex" "1,1,*")
4348 (set_attr "prefix" "orig,orig,maybe_evex")
4349 (set_attr "mode" "SF")])
4351 (define_insn "sse_cvtss2si<round_name>"
4352 [(set (match_operand:SI 0 "register_operand" "=r,r")
4355 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4356 (parallel [(const_int 0)]))]
4357 UNSPEC_FIX_NOTRUNC))]
4359 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4360 [(set_attr "type" "sseicvt")
4361 (set_attr "athlon_decode" "double,vector")
4362 (set_attr "bdver1_decode" "double,double")
4363 (set_attr "prefix_rep" "1")
4364 (set_attr "prefix" "maybe_vex")
4365 (set_attr "mode" "SI")])
4367 (define_insn "sse_cvtss2si_2"
4368 [(set (match_operand:SI 0 "register_operand" "=r,r")
4369 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4370 UNSPEC_FIX_NOTRUNC))]
4372 "%vcvtss2si\t{%1, %0|%0, %k1}"
4373 [(set_attr "type" "sseicvt")
4374 (set_attr "athlon_decode" "double,vector")
4375 (set_attr "amdfam10_decode" "double,double")
4376 (set_attr "bdver1_decode" "double,double")
4377 (set_attr "prefix_rep" "1")
4378 (set_attr "prefix" "maybe_vex")
4379 (set_attr "mode" "SI")])
4381 (define_insn "sse_cvtss2siq<round_name>"
4382 [(set (match_operand:DI 0 "register_operand" "=r,r")
4385 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4386 (parallel [(const_int 0)]))]
4387 UNSPEC_FIX_NOTRUNC))]
4388 "TARGET_SSE && TARGET_64BIT"
4389 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4390 [(set_attr "type" "sseicvt")
4391 (set_attr "athlon_decode" "double,vector")
4392 (set_attr "bdver1_decode" "double,double")
4393 (set_attr "prefix_rep" "1")
4394 (set_attr "prefix" "maybe_vex")
4395 (set_attr "mode" "DI")])
4397 (define_insn "sse_cvtss2siq_2"
4398 [(set (match_operand:DI 0 "register_operand" "=r,r")
4399 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4400 UNSPEC_FIX_NOTRUNC))]
4401 "TARGET_SSE && TARGET_64BIT"
4402 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4403 [(set_attr "type" "sseicvt")
4404 (set_attr "athlon_decode" "double,vector")
4405 (set_attr "amdfam10_decode" "double,double")
4406 (set_attr "bdver1_decode" "double,double")
4407 (set_attr "prefix_rep" "1")
4408 (set_attr "prefix" "maybe_vex")
4409 (set_attr "mode" "DI")])
4411 (define_insn "sse_cvttss2si<round_saeonly_name>"
4412 [(set (match_operand:SI 0 "register_operand" "=r,r")
4415 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4416 (parallel [(const_int 0)]))))]
4418 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4419 [(set_attr "type" "sseicvt")
4420 (set_attr "athlon_decode" "double,vector")
4421 (set_attr "amdfam10_decode" "double,double")
4422 (set_attr "bdver1_decode" "double,double")
4423 (set_attr "prefix_rep" "1")
4424 (set_attr "prefix" "maybe_vex")
4425 (set_attr "mode" "SI")])
4427 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4428 [(set (match_operand:DI 0 "register_operand" "=r,r")
4431 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4432 (parallel [(const_int 0)]))))]
4433 "TARGET_SSE && TARGET_64BIT"
4434 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4435 [(set_attr "type" "sseicvt")
4436 (set_attr "athlon_decode" "double,vector")
4437 (set_attr "amdfam10_decode" "double,double")
4438 (set_attr "bdver1_decode" "double,double")
4439 (set_attr "prefix_rep" "1")
4440 (set_attr "prefix" "maybe_vex")
4441 (set_attr "mode" "DI")])
4443 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4444 [(set (match_operand:VF_128 0 "register_operand" "=v")
4446 (vec_duplicate:VF_128
4447 (unsigned_float:<ssescalarmode>
4448 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4449 (match_operand:VF_128 1 "register_operand" "v")
4451 "TARGET_AVX512F && <round_modev4sf_condition>"
4452 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4453 [(set_attr "type" "sseicvt")
4454 (set_attr "prefix" "evex")
4455 (set_attr "mode" "<ssescalarmode>")])
4457 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4458 [(set (match_operand:VF_128 0 "register_operand" "=v")
4460 (vec_duplicate:VF_128
4461 (unsigned_float:<ssescalarmode>
4462 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4463 (match_operand:VF_128 1 "register_operand" "v")
4465 "TARGET_AVX512F && TARGET_64BIT"
4466 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4467 [(set_attr "type" "sseicvt")
4468 (set_attr "prefix" "evex")
4469 (set_attr "mode" "<ssescalarmode>")])
4471 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4472 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4474 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4475 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4477 cvtdq2ps\t{%1, %0|%0, %1}
4478 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4479 [(set_attr "isa" "noavx,avx")
4480 (set_attr "type" "ssecvt")
4481 (set_attr "prefix" "maybe_vex")
4482 (set_attr "mode" "<sseinsnmode>")])
4484 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4485 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4486 (unsigned_float:VF1_AVX512VL
4487 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4489 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4490 [(set_attr "type" "ssecvt")
4491 (set_attr "prefix" "evex")
4492 (set_attr "mode" "<MODE>")])
4494 (define_expand "floatuns<sseintvecmodelower><mode>2"
4495 [(match_operand:VF1 0 "register_operand")
4496 (match_operand:<sseintvecmode> 1 "register_operand")]
4497 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4499 if (<MODE>mode == V16SFmode)
4500 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4502 if (TARGET_AVX512VL)
4504 if (<MODE>mode == V4SFmode)
4505 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4507 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4510 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4516 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4517 (define_mode_attr sf2simodelower
4518 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4520 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4521 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4523 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4524 UNSPEC_FIX_NOTRUNC))]
4525 "TARGET_SSE2 && <mask_mode512bit_condition>"
4526 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4527 [(set_attr "type" "ssecvt")
4528 (set (attr "prefix_data16")
4530 (match_test "TARGET_AVX")
4532 (const_string "1")))
4533 (set_attr "prefix" "maybe_vex")
4534 (set_attr "mode" "<sseinsnmode>")])
4536 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4537 [(set (match_operand:V16SI 0 "register_operand" "=v")
4539 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4540 UNSPEC_FIX_NOTRUNC))]
4542 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4543 [(set_attr "type" "ssecvt")
4544 (set_attr "prefix" "evex")
4545 (set_attr "mode" "XI")])
4547 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4548 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4549 (unspec:VI4_AVX512VL
4550 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4551 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4553 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4554 [(set_attr "type" "ssecvt")
4555 (set_attr "prefix" "evex")
4556 (set_attr "mode" "<sseinsnmode>")])
4558 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4559 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4560 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4561 UNSPEC_FIX_NOTRUNC))]
4562 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4563 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4564 [(set_attr "type" "ssecvt")
4565 (set_attr "prefix" "evex")
4566 (set_attr "mode" "<sseinsnmode>")])
4568 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4569 [(set (match_operand:V2DI 0 "register_operand" "=v")
4572 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4573 (parallel [(const_int 0) (const_int 1)]))]
4574 UNSPEC_FIX_NOTRUNC))]
4575 "TARGET_AVX512DQ && TARGET_AVX512VL"
4576 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4577 [(set_attr "type" "ssecvt")
4578 (set_attr "prefix" "evex")
4579 (set_attr "mode" "TI")])
4581 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4582 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4583 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4584 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4585 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4586 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4587 [(set_attr "type" "ssecvt")
4588 (set_attr "prefix" "evex")
4589 (set_attr "mode" "<sseinsnmode>")])
4591 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4592 [(set (match_operand:V2DI 0 "register_operand" "=v")
4595 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4596 (parallel [(const_int 0) (const_int 1)]))]
4597 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4598 "TARGET_AVX512DQ && TARGET_AVX512VL"
4599 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4600 [(set_attr "type" "ssecvt")
4601 (set_attr "prefix" "evex")
4602 (set_attr "mode" "TI")])
4604 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4605 [(set (match_operand:V16SI 0 "register_operand" "=v")
4607 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4609 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4610 [(set_attr "type" "ssecvt")
4611 (set_attr "prefix" "evex")
4612 (set_attr "mode" "XI")])
4614 (define_insn "fix_truncv8sfv8si2<mask_name>"
4615 [(set (match_operand:V8SI 0 "register_operand" "=v")
4616 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4617 "TARGET_AVX && <mask_avx512vl_condition>"
4618 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4619 [(set_attr "type" "ssecvt")
4620 (set_attr "prefix" "<mask_prefix>")
4621 (set_attr "mode" "OI")])
4623 (define_insn "fix_truncv4sfv4si2<mask_name>"
4624 [(set (match_operand:V4SI 0 "register_operand" "=v")
4625 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4626 "TARGET_SSE2 && <mask_avx512vl_condition>"
4627 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4628 [(set_attr "type" "ssecvt")
4629 (set (attr "prefix_rep")
4631 (match_test "TARGET_AVX")
4633 (const_string "1")))
4634 (set (attr "prefix_data16")
4636 (match_test "TARGET_AVX")
4638 (const_string "0")))
4639 (set_attr "prefix_data16" "0")
4640 (set_attr "prefix" "<mask_prefix2>")
4641 (set_attr "mode" "TI")])
4643 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4644 [(match_operand:<sseintvecmode> 0 "register_operand")
4645 (match_operand:VF1 1 "register_operand")]
4648 if (<MODE>mode == V16SFmode)
4649 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4654 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4655 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4656 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4657 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4662 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4664 ;; Parallel double-precision floating point conversion operations
4666 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4668 (define_insn "sse2_cvtpi2pd"
4669 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4670 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4672 "cvtpi2pd\t{%1, %0|%0, %1}"
4673 [(set_attr "type" "ssecvt")
4674 (set_attr "unit" "mmx,*")
4675 (set_attr "prefix_data16" "1,*")
4676 (set_attr "mode" "V2DF")])
4678 (define_insn "sse2_cvtpd2pi"
4679 [(set (match_operand:V2SI 0 "register_operand" "=y")
4680 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4681 UNSPEC_FIX_NOTRUNC))]
4683 "cvtpd2pi\t{%1, %0|%0, %1}"
4684 [(set_attr "type" "ssecvt")
4685 (set_attr "unit" "mmx")
4686 (set_attr "bdver1_decode" "double")
4687 (set_attr "btver2_decode" "direct")
4688 (set_attr "prefix_data16" "1")
4689 (set_attr "mode" "DI")])
4691 (define_insn "sse2_cvttpd2pi"
4692 [(set (match_operand:V2SI 0 "register_operand" "=y")
4693 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4695 "cvttpd2pi\t{%1, %0|%0, %1}"
4696 [(set_attr "type" "ssecvt")
4697 (set_attr "unit" "mmx")
4698 (set_attr "bdver1_decode" "double")
4699 (set_attr "prefix_data16" "1")
4700 (set_attr "mode" "TI")])
4702 (define_insn "sse2_cvtsi2sd"
4703 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4706 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4707 (match_operand:V2DF 1 "register_operand" "0,0,v")
4711 cvtsi2sd\t{%2, %0|%0, %2}
4712 cvtsi2sd\t{%2, %0|%0, %2}
4713 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4714 [(set_attr "isa" "noavx,noavx,avx")
4715 (set_attr "type" "sseicvt")
4716 (set_attr "athlon_decode" "double,direct,*")
4717 (set_attr "amdfam10_decode" "vector,double,*")
4718 (set_attr "bdver1_decode" "double,direct,*")
4719 (set_attr "btver2_decode" "double,double,double")
4720 (set_attr "znver1_decode" "double,double,double")
4721 (set_attr "prefix" "orig,orig,maybe_evex")
4722 (set_attr "mode" "DF")])
4724 (define_insn "sse2_cvtsi2sdq<round_name>"
4725 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4728 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4729 (match_operand:V2DF 1 "register_operand" "0,0,v")
4731 "TARGET_SSE2 && TARGET_64BIT"
4733 cvtsi2sdq\t{%2, %0|%0, %2}
4734 cvtsi2sdq\t{%2, %0|%0, %2}
4735 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4736 [(set_attr "isa" "noavx,noavx,avx")
4737 (set_attr "type" "sseicvt")
4738 (set_attr "athlon_decode" "double,direct,*")
4739 (set_attr "amdfam10_decode" "vector,double,*")
4740 (set_attr "bdver1_decode" "double,direct,*")
4741 (set_attr "length_vex" "*,*,4")
4742 (set_attr "prefix_rex" "1,1,*")
4743 (set_attr "prefix" "orig,orig,maybe_evex")
4744 (set_attr "mode" "DF")])
4746 (define_insn "avx512f_vcvtss2usi<round_name>"
4747 [(set (match_operand:SI 0 "register_operand" "=r")
4750 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4751 (parallel [(const_int 0)]))]
4752 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4754 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4755 [(set_attr "type" "sseicvt")
4756 (set_attr "prefix" "evex")
4757 (set_attr "mode" "SI")])
4759 (define_insn "avx512f_vcvtss2usiq<round_name>"
4760 [(set (match_operand:DI 0 "register_operand" "=r")
4763 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4764 (parallel [(const_int 0)]))]
4765 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4766 "TARGET_AVX512F && TARGET_64BIT"
4767 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4768 [(set_attr "type" "sseicvt")
4769 (set_attr "prefix" "evex")
4770 (set_attr "mode" "DI")])
4772 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4773 [(set (match_operand:SI 0 "register_operand" "=r")
4776 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4777 (parallel [(const_int 0)]))))]
4779 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4780 [(set_attr "type" "sseicvt")
4781 (set_attr "prefix" "evex")
4782 (set_attr "mode" "SI")])
4784 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4785 [(set (match_operand:DI 0 "register_operand" "=r")
4788 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4789 (parallel [(const_int 0)]))))]
4790 "TARGET_AVX512F && TARGET_64BIT"
4791 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4792 [(set_attr "type" "sseicvt")
4793 (set_attr "prefix" "evex")
4794 (set_attr "mode" "DI")])
4796 (define_insn "avx512f_vcvtsd2usi<round_name>"
4797 [(set (match_operand:SI 0 "register_operand" "=r")
4800 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4801 (parallel [(const_int 0)]))]
4802 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4804 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4805 [(set_attr "type" "sseicvt")
4806 (set_attr "prefix" "evex")
4807 (set_attr "mode" "SI")])
4809 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4810 [(set (match_operand:DI 0 "register_operand" "=r")
4813 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4814 (parallel [(const_int 0)]))]
4815 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4816 "TARGET_AVX512F && TARGET_64BIT"
4817 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4818 [(set_attr "type" "sseicvt")
4819 (set_attr "prefix" "evex")
4820 (set_attr "mode" "DI")])
4822 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4823 [(set (match_operand:SI 0 "register_operand" "=r")
4826 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4827 (parallel [(const_int 0)]))))]
4829 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4830 [(set_attr "type" "sseicvt")
4831 (set_attr "prefix" "evex")
4832 (set_attr "mode" "SI")])
4834 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4835 [(set (match_operand:DI 0 "register_operand" "=r")
4838 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4839 (parallel [(const_int 0)]))))]
4840 "TARGET_AVX512F && TARGET_64BIT"
4841 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4842 [(set_attr "type" "sseicvt")
4843 (set_attr "prefix" "evex")
4844 (set_attr "mode" "DI")])
4846 (define_insn "sse2_cvtsd2si<round_name>"
4847 [(set (match_operand:SI 0 "register_operand" "=r,r")
4850 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4851 (parallel [(const_int 0)]))]
4852 UNSPEC_FIX_NOTRUNC))]
4854 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4855 [(set_attr "type" "sseicvt")
4856 (set_attr "athlon_decode" "double,vector")
4857 (set_attr "bdver1_decode" "double,double")
4858 (set_attr "btver2_decode" "double,double")
4859 (set_attr "prefix_rep" "1")
4860 (set_attr "prefix" "maybe_vex")
4861 (set_attr "mode" "SI")])
4863 (define_insn "sse2_cvtsd2si_2"
4864 [(set (match_operand:SI 0 "register_operand" "=r,r")
4865 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4866 UNSPEC_FIX_NOTRUNC))]
4868 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4869 [(set_attr "type" "sseicvt")
4870 (set_attr "athlon_decode" "double,vector")
4871 (set_attr "amdfam10_decode" "double,double")
4872 (set_attr "bdver1_decode" "double,double")
4873 (set_attr "prefix_rep" "1")
4874 (set_attr "prefix" "maybe_vex")
4875 (set_attr "mode" "SI")])
4877 (define_insn "sse2_cvtsd2siq<round_name>"
4878 [(set (match_operand:DI 0 "register_operand" "=r,r")
4881 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4882 (parallel [(const_int 0)]))]
4883 UNSPEC_FIX_NOTRUNC))]
4884 "TARGET_SSE2 && TARGET_64BIT"
4885 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4886 [(set_attr "type" "sseicvt")
4887 (set_attr "athlon_decode" "double,vector")
4888 (set_attr "bdver1_decode" "double,double")
4889 (set_attr "prefix_rep" "1")
4890 (set_attr "prefix" "maybe_vex")
4891 (set_attr "mode" "DI")])
4893 (define_insn "sse2_cvtsd2siq_2"
4894 [(set (match_operand:DI 0 "register_operand" "=r,r")
4895 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4896 UNSPEC_FIX_NOTRUNC))]
4897 "TARGET_SSE2 && TARGET_64BIT"
4898 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4899 [(set_attr "type" "sseicvt")
4900 (set_attr "athlon_decode" "double,vector")
4901 (set_attr "amdfam10_decode" "double,double")
4902 (set_attr "bdver1_decode" "double,double")
4903 (set_attr "prefix_rep" "1")
4904 (set_attr "prefix" "maybe_vex")
4905 (set_attr "mode" "DI")])
4907 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4908 [(set (match_operand:SI 0 "register_operand" "=r,r")
4911 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4912 (parallel [(const_int 0)]))))]
4914 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4915 [(set_attr "type" "sseicvt")
4916 (set_attr "athlon_decode" "double,vector")
4917 (set_attr "amdfam10_decode" "double,double")
4918 (set_attr "bdver1_decode" "double,double")
4919 (set_attr "btver2_decode" "double,double")
4920 (set_attr "prefix_rep" "1")
4921 (set_attr "prefix" "maybe_vex")
4922 (set_attr "mode" "SI")])
4924 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4925 [(set (match_operand:DI 0 "register_operand" "=r,r")
4928 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4929 (parallel [(const_int 0)]))))]
4930 "TARGET_SSE2 && TARGET_64BIT"
4931 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4932 [(set_attr "type" "sseicvt")
4933 (set_attr "athlon_decode" "double,vector")
4934 (set_attr "amdfam10_decode" "double,double")
4935 (set_attr "bdver1_decode" "double,double")
4936 (set_attr "prefix_rep" "1")
4937 (set_attr "prefix" "maybe_vex")
4938 (set_attr "mode" "DI")])
4940 ;; For float<si2dfmode><mode>2 insn pattern
4941 (define_mode_attr si2dfmode
4942 [(V8DF "V8SI") (V4DF "V4SI")])
4943 (define_mode_attr si2dfmodelower
4944 [(V8DF "v8si") (V4DF "v4si")])
4946 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4947 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4948 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4949 "TARGET_AVX && <mask_mode512bit_condition>"
4950 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4951 [(set_attr "type" "ssecvt")
4952 (set_attr "prefix" "maybe_vex")
4953 (set_attr "mode" "<MODE>")])
4955 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4956 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4957 (any_float:VF2_AVX512VL
4958 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4960 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4961 [(set_attr "type" "ssecvt")
4962 (set_attr "prefix" "evex")
4963 (set_attr "mode" "<MODE>")])
4965 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4966 (define_mode_attr qq2pssuff
4967 [(V8SF "") (V4SF "{y}")])
4969 (define_mode_attr sselongvecmode
4970 [(V8SF "V8DI") (V4SF "V4DI")])
4972 (define_mode_attr sselongvecmodelower
4973 [(V8SF "v8di") (V4SF "v4di")])
4975 (define_mode_attr sseintvecmode3
4976 [(V8SF "XI") (V4SF "OI")
4977 (V8DF "OI") (V4DF "TI")])
4979 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4980 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4981 (any_float:VF1_128_256VL
4982 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4983 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4984 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4985 [(set_attr "type" "ssecvt")
4986 (set_attr "prefix" "evex")
4987 (set_attr "mode" "<MODE>")])
4989 (define_insn "*<floatsuffix>floatv2div2sf2"
4990 [(set (match_operand:V4SF 0 "register_operand" "=v")
4992 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4993 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4994 "TARGET_AVX512DQ && TARGET_AVX512VL"
4995 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4996 [(set_attr "type" "ssecvt")
4997 (set_attr "prefix" "evex")
4998 (set_attr "mode" "V4SF")])
5000 (define_insn "<floatsuffix>floatv2div2sf2_mask"
5001 [(set (match_operand:V4SF 0 "register_operand" "=v")
5004 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5006 (match_operand:V4SF 2 "vector_move_operand" "0C")
5007 (parallel [(const_int 0) (const_int 1)]))
5008 (match_operand:QI 3 "register_operand" "Yk"))
5009 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5010 "TARGET_AVX512DQ && TARGET_AVX512VL"
5011 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5012 [(set_attr "type" "ssecvt")
5013 (set_attr "prefix" "evex")
5014 (set_attr "mode" "V4SF")])
5016 (define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
5017 [(set (match_operand:V4SF 0 "register_operand" "=v")
5020 (any_float:V2SF (match_operand:V2DI 1
5021 "nonimmediate_operand" "vm"))
5022 (const_vector:V2SF [(const_int 0) (const_int 0)])
5023 (match_operand:QI 2 "register_operand" "Yk"))
5024 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5025 "TARGET_AVX512DQ && TARGET_AVX512VL"
5026 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5027 [(set_attr "type" "ssecvt")
5028 (set_attr "prefix" "evex")
5029 (set_attr "mode" "V4SF")])
5031 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5032 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5033 (unsigned_float:VF2_512_256VL
5034 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5036 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5037 [(set_attr "type" "ssecvt")
5038 (set_attr "prefix" "evex")
5039 (set_attr "mode" "<MODE>")])
5041 (define_insn "ufloatv2siv2df2<mask_name>"
5042 [(set (match_operand:V2DF 0 "register_operand" "=v")
5043 (unsigned_float:V2DF
5045 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5046 (parallel [(const_int 0) (const_int 1)]))))]
5048 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5049 [(set_attr "type" "ssecvt")
5050 (set_attr "prefix" "evex")
5051 (set_attr "mode" "V2DF")])
5053 (define_insn "avx512f_cvtdq2pd512_2"
5054 [(set (match_operand:V8DF 0 "register_operand" "=v")
5057 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5058 (parallel [(const_int 0) (const_int 1)
5059 (const_int 2) (const_int 3)
5060 (const_int 4) (const_int 5)
5061 (const_int 6) (const_int 7)]))))]
5063 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5064 [(set_attr "type" "ssecvt")
5065 (set_attr "prefix" "evex")
5066 (set_attr "mode" "V8DF")])
5068 (define_insn "avx_cvtdq2pd256_2"
5069 [(set (match_operand:V4DF 0 "register_operand" "=v")
5072 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5073 (parallel [(const_int 0) (const_int 1)
5074 (const_int 2) (const_int 3)]))))]
5076 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5077 [(set_attr "type" "ssecvt")
5078 (set_attr "prefix" "maybe_evex")
5079 (set_attr "mode" "V4DF")])
5081 (define_insn "sse2_cvtdq2pd<mask_name>"
5082 [(set (match_operand:V2DF 0 "register_operand" "=v")
5085 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5086 (parallel [(const_int 0) (const_int 1)]))))]
5087 "TARGET_SSE2 && <mask_avx512vl_condition>"
5088 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5089 [(set_attr "type" "ssecvt")
5090 (set_attr "prefix" "maybe_vex")
5091 (set_attr "mode" "V2DF")])
5093 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5094 [(set (match_operand:V8SI 0 "register_operand" "=v")
5096 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5097 UNSPEC_FIX_NOTRUNC))]
5099 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5100 [(set_attr "type" "ssecvt")
5101 (set_attr "prefix" "evex")
5102 (set_attr "mode" "OI")])
5104 (define_insn "avx_cvtpd2dq256<mask_name>"
5105 [(set (match_operand:V4SI 0 "register_operand" "=v")
5106 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5107 UNSPEC_FIX_NOTRUNC))]
5108 "TARGET_AVX && <mask_avx512vl_condition>"
5109 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5110 [(set_attr "type" "ssecvt")
5111 (set_attr "prefix" "<mask_prefix>")
5112 (set_attr "mode" "OI")])
5114 (define_expand "avx_cvtpd2dq256_2"
5115 [(set (match_operand:V8SI 0 "register_operand")
5117 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5121 "operands[2] = CONST0_RTX (V4SImode);")
5123 (define_insn "*avx_cvtpd2dq256_2"
5124 [(set (match_operand:V8SI 0 "register_operand" "=v")
5126 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5128 (match_operand:V4SI 2 "const0_operand")))]
5130 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5131 [(set_attr "type" "ssecvt")
5132 (set_attr "prefix" "vex")
5133 (set_attr "btver2_decode" "vector")
5134 (set_attr "mode" "OI")])
5136 (define_insn "sse2_cvtpd2dq<mask_name>"
5137 [(set (match_operand:V4SI 0 "register_operand" "=v")
5139 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5141 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5142 "TARGET_SSE2 && <mask_avx512vl_condition>"
5145 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5147 return "cvtpd2dq\t{%1, %0|%0, %1}";
5149 [(set_attr "type" "ssecvt")
5150 (set_attr "prefix_rep" "1")
5151 (set_attr "prefix_data16" "0")
5152 (set_attr "prefix" "maybe_vex")
5153 (set_attr "mode" "TI")
5154 (set_attr "amdfam10_decode" "double")
5155 (set_attr "athlon_decode" "vector")
5156 (set_attr "bdver1_decode" "double")])
5158 ;; For ufix_notrunc* insn patterns
5159 (define_mode_attr pd2udqsuff
5160 [(V8DF "") (V4DF "{y}")])
5162 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5163 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5165 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5166 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5168 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5169 [(set_attr "type" "ssecvt")
5170 (set_attr "prefix" "evex")
5171 (set_attr "mode" "<sseinsnmode>")])
5173 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5174 [(set (match_operand:V4SI 0 "register_operand" "=v")
5177 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5178 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5179 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5181 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5182 [(set_attr "type" "ssecvt")
5183 (set_attr "prefix" "evex")
5184 (set_attr "mode" "TI")])
5186 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5187 [(set (match_operand:V8SI 0 "register_operand" "=v")
5189 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5191 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5192 [(set_attr "type" "ssecvt")
5193 (set_attr "prefix" "evex")
5194 (set_attr "mode" "OI")])
5196 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5197 [(set (match_operand:V4SI 0 "register_operand" "=v")
5199 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5200 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5202 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5203 [(set_attr "type" "ssecvt")
5204 (set_attr "prefix" "evex")
5205 (set_attr "mode" "TI")])
5207 (define_insn "fix_truncv4dfv4si2<mask_name>"
5208 [(set (match_operand:V4SI 0 "register_operand" "=v")
5209 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5210 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5211 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5212 [(set_attr "type" "ssecvt")
5213 (set_attr "prefix" "maybe_evex")
5214 (set_attr "mode" "OI")])
5216 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5217 [(set (match_operand:V4SI 0 "register_operand" "=v")
5218 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5219 "TARGET_AVX512VL && TARGET_AVX512F"
5220 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5221 [(set_attr "type" "ssecvt")
5222 (set_attr "prefix" "maybe_evex")
5223 (set_attr "mode" "OI")])
5225 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5226 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5227 (any_fix:<sseintvecmode>
5228 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5229 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5230 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5231 [(set_attr "type" "ssecvt")
5232 (set_attr "prefix" "evex")
5233 (set_attr "mode" "<sseintvecmode2>")])
5235 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5236 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5237 (unspec:<sseintvecmode>
5238 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5239 UNSPEC_FIX_NOTRUNC))]
5240 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5241 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5242 [(set_attr "type" "ssecvt")
5243 (set_attr "prefix" "evex")
5244 (set_attr "mode" "<sseintvecmode2>")])
5246 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5247 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5248 (unspec:<sseintvecmode>
5249 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5250 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5251 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5252 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5253 [(set_attr "type" "ssecvt")
5254 (set_attr "prefix" "evex")
5255 (set_attr "mode" "<sseintvecmode2>")])
5257 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5258 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5259 (any_fix:<sselongvecmode>
5260 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5261 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5262 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5263 [(set_attr "type" "ssecvt")
5264 (set_attr "prefix" "evex")
5265 (set_attr "mode" "<sseintvecmode3>")])
5267 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5268 [(set (match_operand:V2DI 0 "register_operand" "=v")
5271 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5272 (parallel [(const_int 0) (const_int 1)]))))]
5273 "TARGET_AVX512DQ && TARGET_AVX512VL"
5274 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5275 [(set_attr "type" "ssecvt")
5276 (set_attr "prefix" "evex")
5277 (set_attr "mode" "TI")])
5279 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5280 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5281 (unsigned_fix:<sseintvecmode>
5282 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5284 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5285 [(set_attr "type" "ssecvt")
5286 (set_attr "prefix" "evex")
5287 (set_attr "mode" "<sseintvecmode2>")])
5289 (define_expand "avx_cvttpd2dq256_2"
5290 [(set (match_operand:V8SI 0 "register_operand")
5292 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5295 "operands[2] = CONST0_RTX (V4SImode);")
5297 (define_insn "sse2_cvttpd2dq<mask_name>"
5298 [(set (match_operand:V4SI 0 "register_operand" "=v")
5300 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5301 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5302 "TARGET_SSE2 && <mask_avx512vl_condition>"
5305 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5307 return "cvttpd2dq\t{%1, %0|%0, %1}";
5309 [(set_attr "type" "ssecvt")
5310 (set_attr "amdfam10_decode" "double")
5311 (set_attr "athlon_decode" "vector")
5312 (set_attr "bdver1_decode" "double")
5313 (set_attr "prefix" "maybe_vex")
5314 (set_attr "mode" "TI")])
5316 (define_insn "sse2_cvtsd2ss<round_name>"
5317 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5320 (float_truncate:V2SF
5321 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5322 (match_operand:V4SF 1 "register_operand" "0,0,v")
5326 cvtsd2ss\t{%2, %0|%0, %2}
5327 cvtsd2ss\t{%2, %0|%0, %q2}
5328 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5329 [(set_attr "isa" "noavx,noavx,avx")
5330 (set_attr "type" "ssecvt")
5331 (set_attr "athlon_decode" "vector,double,*")
5332 (set_attr "amdfam10_decode" "vector,double,*")
5333 (set_attr "bdver1_decode" "direct,direct,*")
5334 (set_attr "btver2_decode" "double,double,double")
5335 (set_attr "prefix" "orig,orig,<round_prefix>")
5336 (set_attr "mode" "SF")])
5338 (define_insn "*sse2_vd_cvtsd2ss"
5339 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5342 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5343 (match_operand:V4SF 1 "register_operand" "0,0,v")
5347 cvtsd2ss\t{%2, %0|%0, %2}
5348 cvtsd2ss\t{%2, %0|%0, %2}
5349 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5350 [(set_attr "isa" "noavx,noavx,avx")
5351 (set_attr "type" "ssecvt")
5352 (set_attr "athlon_decode" "vector,double,*")
5353 (set_attr "amdfam10_decode" "vector,double,*")
5354 (set_attr "bdver1_decode" "direct,direct,*")
5355 (set_attr "btver2_decode" "double,double,double")
5356 (set_attr "prefix" "orig,orig,vex")
5357 (set_attr "mode" "SF")])
5359 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
5360 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5364 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5365 (parallel [(const_int 0) (const_int 1)])))
5366 (match_operand:V2DF 1 "register_operand" "0,0,v")
5370 cvtss2sd\t{%2, %0|%0, %2}
5371 cvtss2sd\t{%2, %0|%0, %k2}
5372 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5373 [(set_attr "isa" "noavx,noavx,avx")
5374 (set_attr "type" "ssecvt")
5375 (set_attr "amdfam10_decode" "vector,double,*")
5376 (set_attr "athlon_decode" "direct,direct,*")
5377 (set_attr "bdver1_decode" "direct,direct,*")
5378 (set_attr "btver2_decode" "double,double,double")
5379 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5380 (set_attr "mode" "DF")])
5382 (define_insn "*sse2_vd_cvtss2sd"
5383 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5386 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5387 (match_operand:V2DF 1 "register_operand" "0,0,v")
5391 cvtss2sd\t{%2, %0|%0, %2}
5392 cvtss2sd\t{%2, %0|%0, %2}
5393 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5394 [(set_attr "isa" "noavx,noavx,avx")
5395 (set_attr "type" "ssecvt")
5396 (set_attr "amdfam10_decode" "vector,double,*")
5397 (set_attr "athlon_decode" "direct,direct,*")
5398 (set_attr "bdver1_decode" "direct,direct,*")
5399 (set_attr "btver2_decode" "double,double,double")
5400 (set_attr "prefix" "orig,orig,vex")
5401 (set_attr "mode" "DF")])
5403 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5404 [(set (match_operand:V8SF 0 "register_operand" "=v")
5405 (float_truncate:V8SF
5406 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5408 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5409 [(set_attr "type" "ssecvt")
5410 (set_attr "prefix" "evex")
5411 (set_attr "mode" "V8SF")])
5413 (define_insn "avx_cvtpd2ps256<mask_name>"
5414 [(set (match_operand:V4SF 0 "register_operand" "=v")
5415 (float_truncate:V4SF
5416 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5417 "TARGET_AVX && <mask_avx512vl_condition>"
5418 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5419 [(set_attr "type" "ssecvt")
5420 (set_attr "prefix" "maybe_evex")
5421 (set_attr "btver2_decode" "vector")
5422 (set_attr "mode" "V4SF")])
5424 (define_expand "sse2_cvtpd2ps"
5425 [(set (match_operand:V4SF 0 "register_operand")
5427 (float_truncate:V2SF
5428 (match_operand:V2DF 1 "vector_operand"))
5431 "operands[2] = CONST0_RTX (V2SFmode);")
5433 (define_expand "sse2_cvtpd2ps_mask"
5434 [(set (match_operand:V4SF 0 "register_operand")
5437 (float_truncate:V2SF
5438 (match_operand:V2DF 1 "vector_operand"))
5440 (match_operand:V4SF 2 "register_operand")
5441 (match_operand:QI 3 "register_operand")))]
5443 "operands[4] = CONST0_RTX (V2SFmode);")
5445 (define_insn "*sse2_cvtpd2ps<mask_name>"
5446 [(set (match_operand:V4SF 0 "register_operand" "=v")
5448 (float_truncate:V2SF
5449 (match_operand:V2DF 1 "vector_operand" "vBm"))
5450 (match_operand:V2SF 2 "const0_operand")))]
5451 "TARGET_SSE2 && <mask_avx512vl_condition>"
5454 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5456 return "cvtpd2ps\t{%1, %0|%0, %1}";
5458 [(set_attr "type" "ssecvt")
5459 (set_attr "amdfam10_decode" "double")
5460 (set_attr "athlon_decode" "vector")
5461 (set_attr "bdver1_decode" "double")
5462 (set_attr "prefix_data16" "1")
5463 (set_attr "prefix" "maybe_vex")
5464 (set_attr "mode" "V4SF")])
5466 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5467 (define_mode_attr sf2dfmode
5468 [(V8DF "V8SF") (V4DF "V4SF")])
5470 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5471 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5472 (float_extend:VF2_512_256
5473 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5474 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5475 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5476 [(set_attr "type" "ssecvt")
5477 (set_attr "prefix" "maybe_vex")
5478 (set_attr "mode" "<MODE>")])
5480 (define_insn "*avx_cvtps2pd256_2"
5481 [(set (match_operand:V4DF 0 "register_operand" "=v")
5484 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5485 (parallel [(const_int 0) (const_int 1)
5486 (const_int 2) (const_int 3)]))))]
5488 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5489 [(set_attr "type" "ssecvt")
5490 (set_attr "prefix" "vex")
5491 (set_attr "mode" "V4DF")])
5493 (define_insn "vec_unpacks_lo_v16sf"
5494 [(set (match_operand:V8DF 0 "register_operand" "=v")
5497 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5498 (parallel [(const_int 0) (const_int 1)
5499 (const_int 2) (const_int 3)
5500 (const_int 4) (const_int 5)
5501 (const_int 6) (const_int 7)]))))]
5503 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5504 [(set_attr "type" "ssecvt")
5505 (set_attr "prefix" "evex")
5506 (set_attr "mode" "V8DF")])
5508 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5509 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5510 (unspec:<avx512fmaskmode>
5511 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5512 UNSPEC_CVTINT2MASK))]
5514 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5515 [(set_attr "prefix" "evex")
5516 (set_attr "mode" "<sseinsnmode>")])
5518 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5519 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5520 (unspec:<avx512fmaskmode>
5521 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5522 UNSPEC_CVTINT2MASK))]
5524 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5525 [(set_attr "prefix" "evex")
5526 (set_attr "mode" "<sseinsnmode>")])
5528 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5529 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5530 (vec_merge:VI12_AVX512VL
5533 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5536 operands[2] = CONSTM1_RTX (<MODE>mode);
5537 operands[3] = CONST0_RTX (<MODE>mode);
5540 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5541 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5542 (vec_merge:VI12_AVX512VL
5543 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5544 (match_operand:VI12_AVX512VL 3 "const0_operand")
5545 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5547 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5548 [(set_attr "prefix" "evex")
5549 (set_attr "mode" "<sseinsnmode>")])
5551 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5552 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5553 (vec_merge:VI48_AVX512VL
5556 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5559 operands[2] = CONSTM1_RTX (<MODE>mode);
5560 operands[3] = CONST0_RTX (<MODE>mode);
5563 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5564 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5565 (vec_merge:VI48_AVX512VL
5566 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5567 (match_operand:VI48_AVX512VL 3 "const0_operand")
5568 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5570 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5571 [(set_attr "prefix" "evex")
5572 (set_attr "mode" "<sseinsnmode>")])
5574 (define_insn "sse2_cvtps2pd<mask_name>"
5575 [(set (match_operand:V2DF 0 "register_operand" "=v")
5578 (match_operand:V4SF 1 "vector_operand" "vm")
5579 (parallel [(const_int 0) (const_int 1)]))))]
5580 "TARGET_SSE2 && <mask_avx512vl_condition>"
5581 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5582 [(set_attr "type" "ssecvt")
5583 (set_attr "amdfam10_decode" "direct")
5584 (set_attr "athlon_decode" "double")
5585 (set_attr "bdver1_decode" "double")
5586 (set_attr "prefix_data16" "0")
5587 (set_attr "prefix" "maybe_vex")
5588 (set_attr "mode" "V2DF")])
5590 (define_expand "vec_unpacks_hi_v4sf"
5595 (match_operand:V4SF 1 "vector_operand"))
5596 (parallel [(const_int 6) (const_int 7)
5597 (const_int 2) (const_int 3)])))
5598 (set (match_operand:V2DF 0 "register_operand")
5602 (parallel [(const_int 0) (const_int 1)]))))]
5604 "operands[2] = gen_reg_rtx (V4SFmode);")
5606 (define_expand "vec_unpacks_hi_v8sf"
5609 (match_operand:V8SF 1 "register_operand")
5610 (parallel [(const_int 4) (const_int 5)
5611 (const_int 6) (const_int 7)])))
5612 (set (match_operand:V4DF 0 "register_operand")
5616 "operands[2] = gen_reg_rtx (V4SFmode);")
5618 (define_expand "vec_unpacks_hi_v16sf"
5621 (match_operand:V16SF 1 "register_operand")
5622 (parallel [(const_int 8) (const_int 9)
5623 (const_int 10) (const_int 11)
5624 (const_int 12) (const_int 13)
5625 (const_int 14) (const_int 15)])))
5626 (set (match_operand:V8DF 0 "register_operand")
5630 "operands[2] = gen_reg_rtx (V8SFmode);")
5632 (define_expand "vec_unpacks_lo_v4sf"
5633 [(set (match_operand:V2DF 0 "register_operand")
5636 (match_operand:V4SF 1 "vector_operand")
5637 (parallel [(const_int 0) (const_int 1)]))))]
5640 (define_expand "vec_unpacks_lo_v8sf"
5641 [(set (match_operand:V4DF 0 "register_operand")
5644 (match_operand:V8SF 1 "nonimmediate_operand")
5645 (parallel [(const_int 0) (const_int 1)
5646 (const_int 2) (const_int 3)]))))]
5649 (define_mode_attr sseunpackfltmode
5650 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5651 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5653 (define_expand "vec_unpacks_float_hi_<mode>"
5654 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5655 (match_operand:VI2_AVX512F 1 "register_operand")]
5658 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5660 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5661 emit_insn (gen_rtx_SET (operands[0],
5662 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5666 (define_expand "vec_unpacks_float_lo_<mode>"
5667 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5668 (match_operand:VI2_AVX512F 1 "register_operand")]
5671 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5673 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5674 emit_insn (gen_rtx_SET (operands[0],
5675 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5679 (define_expand "vec_unpacku_float_hi_<mode>"
5680 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5681 (match_operand:VI2_AVX512F 1 "register_operand")]
5684 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5686 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5687 emit_insn (gen_rtx_SET (operands[0],
5688 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5692 (define_expand "vec_unpacku_float_lo_<mode>"
5693 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5694 (match_operand:VI2_AVX512F 1 "register_operand")]
5697 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5699 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5700 emit_insn (gen_rtx_SET (operands[0],
5701 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5705 (define_expand "vec_unpacks_float_hi_v4si"
5708 (match_operand:V4SI 1 "vector_operand")
5709 (parallel [(const_int 2) (const_int 3)
5710 (const_int 2) (const_int 3)])))
5711 (set (match_operand:V2DF 0 "register_operand")
5715 (parallel [(const_int 0) (const_int 1)]))))]
5717 "operands[2] = gen_reg_rtx (V4SImode);")
5719 (define_expand "vec_unpacks_float_lo_v4si"
5720 [(set (match_operand:V2DF 0 "register_operand")
5723 (match_operand:V4SI 1 "vector_operand")
5724 (parallel [(const_int 0) (const_int 1)]))))]
5727 (define_expand "vec_unpacks_float_hi_v8si"
5730 (match_operand:V8SI 1 "vector_operand")
5731 (parallel [(const_int 4) (const_int 5)
5732 (const_int 6) (const_int 7)])))
5733 (set (match_operand:V4DF 0 "register_operand")
5737 "operands[2] = gen_reg_rtx (V4SImode);")
5739 (define_expand "vec_unpacks_float_lo_v8si"
5740 [(set (match_operand:V4DF 0 "register_operand")
5743 (match_operand:V8SI 1 "nonimmediate_operand")
5744 (parallel [(const_int 0) (const_int 1)
5745 (const_int 2) (const_int 3)]))))]
5748 (define_expand "vec_unpacks_float_hi_v16si"
5751 (match_operand:V16SI 1 "nonimmediate_operand")
5752 (parallel [(const_int 8) (const_int 9)
5753 (const_int 10) (const_int 11)
5754 (const_int 12) (const_int 13)
5755 (const_int 14) (const_int 15)])))
5756 (set (match_operand:V8DF 0 "register_operand")
5760 "operands[2] = gen_reg_rtx (V8SImode);")
5762 (define_expand "vec_unpacks_float_lo_v16si"
5763 [(set (match_operand:V8DF 0 "register_operand")
5766 (match_operand:V16SI 1 "nonimmediate_operand")
5767 (parallel [(const_int 0) (const_int 1)
5768 (const_int 2) (const_int 3)
5769 (const_int 4) (const_int 5)
5770 (const_int 6) (const_int 7)]))))]
5773 (define_expand "vec_unpacku_float_hi_v4si"
5776 (match_operand:V4SI 1 "vector_operand")
5777 (parallel [(const_int 2) (const_int 3)
5778 (const_int 2) (const_int 3)])))
5783 (parallel [(const_int 0) (const_int 1)]))))
5785 (lt:V2DF (match_dup 6) (match_dup 3)))
5787 (and:V2DF (match_dup 7) (match_dup 4)))
5788 (set (match_operand:V2DF 0 "register_operand")
5789 (plus:V2DF (match_dup 6) (match_dup 8)))]
5792 REAL_VALUE_TYPE TWO32r;
5796 real_ldexp (&TWO32r, &dconst1, 32);
5797 x = const_double_from_real_value (TWO32r, DFmode);
5799 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5800 operands[4] = force_reg (V2DFmode,
5801 ix86_build_const_vector (V2DFmode, 1, x));
5803 operands[5] = gen_reg_rtx (V4SImode);
5805 for (i = 6; i < 9; i++)
5806 operands[i] = gen_reg_rtx (V2DFmode);
5809 (define_expand "vec_unpacku_float_lo_v4si"
5813 (match_operand:V4SI 1 "vector_operand")
5814 (parallel [(const_int 0) (const_int 1)]))))
5816 (lt:V2DF (match_dup 5) (match_dup 3)))
5818 (and:V2DF (match_dup 6) (match_dup 4)))
5819 (set (match_operand:V2DF 0 "register_operand")
5820 (plus:V2DF (match_dup 5) (match_dup 7)))]
5823 REAL_VALUE_TYPE TWO32r;
5827 real_ldexp (&TWO32r, &dconst1, 32);
5828 x = const_double_from_real_value (TWO32r, DFmode);
5830 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5831 operands[4] = force_reg (V2DFmode,
5832 ix86_build_const_vector (V2DFmode, 1, x));
5834 for (i = 5; i < 8; i++)
5835 operands[i] = gen_reg_rtx (V2DFmode);
5838 (define_expand "vec_unpacku_float_hi_v8si"
5839 [(match_operand:V4DF 0 "register_operand")
5840 (match_operand:V8SI 1 "register_operand")]
5843 REAL_VALUE_TYPE TWO32r;
5847 real_ldexp (&TWO32r, &dconst1, 32);
5848 x = const_double_from_real_value (TWO32r, DFmode);
5850 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5851 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5852 tmp[5] = gen_reg_rtx (V4SImode);
5854 for (i = 2; i < 5; i++)
5855 tmp[i] = gen_reg_rtx (V4DFmode);
5856 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5857 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5858 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5859 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5860 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5864 (define_expand "vec_unpacku_float_hi_v16si"
5865 [(match_operand:V8DF 0 "register_operand")
5866 (match_operand:V16SI 1 "register_operand")]
5869 REAL_VALUE_TYPE TWO32r;
5872 real_ldexp (&TWO32r, &dconst1, 32);
5873 x = const_double_from_real_value (TWO32r, DFmode);
5875 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5876 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5877 tmp[2] = gen_reg_rtx (V8DFmode);
5878 tmp[3] = gen_reg_rtx (V8SImode);
5879 k = gen_reg_rtx (QImode);
5881 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5882 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5883 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5884 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5885 emit_move_insn (operands[0], tmp[2]);
5889 (define_expand "vec_unpacku_float_lo_v8si"
5890 [(match_operand:V4DF 0 "register_operand")
5891 (match_operand:V8SI 1 "nonimmediate_operand")]
5894 REAL_VALUE_TYPE TWO32r;
5898 real_ldexp (&TWO32r, &dconst1, 32);
5899 x = const_double_from_real_value (TWO32r, DFmode);
5901 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5902 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5904 for (i = 2; i < 5; i++)
5905 tmp[i] = gen_reg_rtx (V4DFmode);
5906 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5907 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5908 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5909 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5913 (define_expand "vec_unpacku_float_lo_v16si"
5914 [(match_operand:V8DF 0 "register_operand")
5915 (match_operand:V16SI 1 "nonimmediate_operand")]
5918 REAL_VALUE_TYPE TWO32r;
5921 real_ldexp (&TWO32r, &dconst1, 32);
5922 x = const_double_from_real_value (TWO32r, DFmode);
5924 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5925 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5926 tmp[2] = gen_reg_rtx (V8DFmode);
5927 k = gen_reg_rtx (QImode);
5929 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5930 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5931 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5932 emit_move_insn (operands[0], tmp[2]);
5936 (define_expand "vec_pack_trunc_<mode>"
5938 (float_truncate:<sf2dfmode>
5939 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5941 (float_truncate:<sf2dfmode>
5942 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5943 (set (match_operand:<ssePSmode> 0 "register_operand")
5944 (vec_concat:<ssePSmode>
5949 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5950 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5953 (define_expand "vec_pack_trunc_v2df"
5954 [(match_operand:V4SF 0 "register_operand")
5955 (match_operand:V2DF 1 "vector_operand")
5956 (match_operand:V2DF 2 "vector_operand")]
5961 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5963 tmp0 = gen_reg_rtx (V4DFmode);
5964 tmp1 = force_reg (V2DFmode, operands[1]);
5966 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5967 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5971 tmp0 = gen_reg_rtx (V4SFmode);
5972 tmp1 = gen_reg_rtx (V4SFmode);
5974 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5975 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5976 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5981 (define_expand "vec_pack_sfix_trunc_v8df"
5982 [(match_operand:V16SI 0 "register_operand")
5983 (match_operand:V8DF 1 "nonimmediate_operand")
5984 (match_operand:V8DF 2 "nonimmediate_operand")]
5989 r1 = gen_reg_rtx (V8SImode);
5990 r2 = gen_reg_rtx (V8SImode);
5992 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5993 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5994 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5998 (define_expand "vec_pack_sfix_trunc_v4df"
5999 [(match_operand:V8SI 0 "register_operand")
6000 (match_operand:V4DF 1 "nonimmediate_operand")
6001 (match_operand:V4DF 2 "nonimmediate_operand")]
6006 r1 = gen_reg_rtx (V4SImode);
6007 r2 = gen_reg_rtx (V4SImode);
6009 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6010 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6011 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6015 (define_expand "vec_pack_sfix_trunc_v2df"
6016 [(match_operand:V4SI 0 "register_operand")
6017 (match_operand:V2DF 1 "vector_operand")
6018 (match_operand:V2DF 2 "vector_operand")]
6021 rtx tmp0, tmp1, tmp2;
6023 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6025 tmp0 = gen_reg_rtx (V4DFmode);
6026 tmp1 = force_reg (V2DFmode, operands[1]);
6028 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6029 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6033 tmp0 = gen_reg_rtx (V4SImode);
6034 tmp1 = gen_reg_rtx (V4SImode);
6035 tmp2 = gen_reg_rtx (V2DImode);
6037 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6038 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6039 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6040 gen_lowpart (V2DImode, tmp0),
6041 gen_lowpart (V2DImode, tmp1)));
6042 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6047 (define_mode_attr ssepackfltmode
6048 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6050 (define_expand "vec_pack_ufix_trunc_<mode>"
6051 [(match_operand:<ssepackfltmode> 0 "register_operand")
6052 (match_operand:VF2 1 "register_operand")
6053 (match_operand:VF2 2 "register_operand")]
6056 if (<MODE>mode == V8DFmode)
6060 r1 = gen_reg_rtx (V8SImode);
6061 r2 = gen_reg_rtx (V8SImode);
6063 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
6064 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
6065 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6070 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6071 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6072 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6073 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6074 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6076 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6077 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6081 tmp[5] = gen_reg_rtx (V8SFmode);
6082 ix86_expand_vec_extract_even_odd (tmp[5],
6083 gen_lowpart (V8SFmode, tmp[2]),
6084 gen_lowpart (V8SFmode, tmp[3]), 0);
6085 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6087 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6088 operands[0], 0, OPTAB_DIRECT);
6089 if (tmp[6] != operands[0])
6090 emit_move_insn (operands[0], tmp[6]);
6096 (define_expand "avx512f_vec_pack_sfix_v8df"
6097 [(match_operand:V16SI 0 "register_operand")
6098 (match_operand:V8DF 1 "nonimmediate_operand")
6099 (match_operand:V8DF 2 "nonimmediate_operand")]
6104 r1 = gen_reg_rtx (V8SImode);
6105 r2 = gen_reg_rtx (V8SImode);
6107 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6108 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6109 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6113 (define_expand "vec_pack_sfix_v4df"
6114 [(match_operand:V8SI 0 "register_operand")
6115 (match_operand:V4DF 1 "nonimmediate_operand")
6116 (match_operand:V4DF 2 "nonimmediate_operand")]
6121 r1 = gen_reg_rtx (V4SImode);
6122 r2 = gen_reg_rtx (V4SImode);
6124 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6125 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6126 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6130 (define_expand "vec_pack_sfix_v2df"
6131 [(match_operand:V4SI 0 "register_operand")
6132 (match_operand:V2DF 1 "vector_operand")
6133 (match_operand:V2DF 2 "vector_operand")]
6136 rtx tmp0, tmp1, tmp2;
6138 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6140 tmp0 = gen_reg_rtx (V4DFmode);
6141 tmp1 = force_reg (V2DFmode, operands[1]);
6143 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6144 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6148 tmp0 = gen_reg_rtx (V4SImode);
6149 tmp1 = gen_reg_rtx (V4SImode);
6150 tmp2 = gen_reg_rtx (V2DImode);
6152 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6153 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6154 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6155 gen_lowpart (V2DImode, tmp0),
6156 gen_lowpart (V2DImode, tmp1)));
6157 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6162 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6164 ;; Parallel single-precision floating point element swizzling
6166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6168 (define_expand "sse_movhlps_exp"
6169 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6172 (match_operand:V4SF 1 "nonimmediate_operand")
6173 (match_operand:V4SF 2 "nonimmediate_operand"))
6174 (parallel [(const_int 6)
6180 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6182 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6184 /* Fix up the destination if needed. */
6185 if (dst != operands[0])
6186 emit_move_insn (operands[0], dst);
6191 (define_insn "sse_movhlps"
6192 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6195 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6196 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6197 (parallel [(const_int 6)
6201 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6203 movhlps\t{%2, %0|%0, %2}
6204 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6205 movlps\t{%H2, %0|%0, %H2}
6206 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6207 %vmovhps\t{%2, %0|%q0, %2}"
6208 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6209 (set_attr "type" "ssemov")
6210 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6211 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6213 (define_expand "sse_movlhps_exp"
6214 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6217 (match_operand:V4SF 1 "nonimmediate_operand")
6218 (match_operand:V4SF 2 "nonimmediate_operand"))
6219 (parallel [(const_int 0)
6225 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6227 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6229 /* Fix up the destination if needed. */
6230 if (dst != operands[0])
6231 emit_move_insn (operands[0], dst);
6236 (define_insn "sse_movlhps"
6237 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6240 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6241 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6242 (parallel [(const_int 0)
6246 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6248 movlhps\t{%2, %0|%0, %2}
6249 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6250 movhps\t{%2, %0|%0, %q2}
6251 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6252 %vmovlps\t{%2, %H0|%H0, %2}"
6253 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6254 (set_attr "type" "ssemov")
6255 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6256 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6258 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6259 [(set (match_operand:V16SF 0 "register_operand" "=v")
6262 (match_operand:V16SF 1 "register_operand" "v")
6263 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6264 (parallel [(const_int 2) (const_int 18)
6265 (const_int 3) (const_int 19)
6266 (const_int 6) (const_int 22)
6267 (const_int 7) (const_int 23)
6268 (const_int 10) (const_int 26)
6269 (const_int 11) (const_int 27)
6270 (const_int 14) (const_int 30)
6271 (const_int 15) (const_int 31)])))]
6273 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6274 [(set_attr "type" "sselog")
6275 (set_attr "prefix" "evex")
6276 (set_attr "mode" "V16SF")])
6278 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6279 (define_insn "avx_unpckhps256<mask_name>"
6280 [(set (match_operand:V8SF 0 "register_operand" "=v")
6283 (match_operand:V8SF 1 "register_operand" "v")
6284 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6285 (parallel [(const_int 2) (const_int 10)
6286 (const_int 3) (const_int 11)
6287 (const_int 6) (const_int 14)
6288 (const_int 7) (const_int 15)])))]
6289 "TARGET_AVX && <mask_avx512vl_condition>"
6290 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6291 [(set_attr "type" "sselog")
6292 (set_attr "prefix" "vex")
6293 (set_attr "mode" "V8SF")])
6295 (define_expand "vec_interleave_highv8sf"
6299 (match_operand:V8SF 1 "register_operand")
6300 (match_operand:V8SF 2 "nonimmediate_operand"))
6301 (parallel [(const_int 0) (const_int 8)
6302 (const_int 1) (const_int 9)
6303 (const_int 4) (const_int 12)
6304 (const_int 5) (const_int 13)])))
6310 (parallel [(const_int 2) (const_int 10)
6311 (const_int 3) (const_int 11)
6312 (const_int 6) (const_int 14)
6313 (const_int 7) (const_int 15)])))
6314 (set (match_operand:V8SF 0 "register_operand")
6319 (parallel [(const_int 4) (const_int 5)
6320 (const_int 6) (const_int 7)
6321 (const_int 12) (const_int 13)
6322 (const_int 14) (const_int 15)])))]
6325 operands[3] = gen_reg_rtx (V8SFmode);
6326 operands[4] = gen_reg_rtx (V8SFmode);
6329 (define_insn "vec_interleave_highv4sf<mask_name>"
6330 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6333 (match_operand:V4SF 1 "register_operand" "0,v")
6334 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6335 (parallel [(const_int 2) (const_int 6)
6336 (const_int 3) (const_int 7)])))]
6337 "TARGET_SSE && <mask_avx512vl_condition>"
6339 unpckhps\t{%2, %0|%0, %2}
6340 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6341 [(set_attr "isa" "noavx,avx")
6342 (set_attr "type" "sselog")
6343 (set_attr "prefix" "orig,vex")
6344 (set_attr "mode" "V4SF")])
6346 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6347 [(set (match_operand:V16SF 0 "register_operand" "=v")
6350 (match_operand:V16SF 1 "register_operand" "v")
6351 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6352 (parallel [(const_int 0) (const_int 16)
6353 (const_int 1) (const_int 17)
6354 (const_int 4) (const_int 20)
6355 (const_int 5) (const_int 21)
6356 (const_int 8) (const_int 24)
6357 (const_int 9) (const_int 25)
6358 (const_int 12) (const_int 28)
6359 (const_int 13) (const_int 29)])))]
6361 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6362 [(set_attr "type" "sselog")
6363 (set_attr "prefix" "evex")
6364 (set_attr "mode" "V16SF")])
6366 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6367 (define_insn "avx_unpcklps256<mask_name>"
6368 [(set (match_operand:V8SF 0 "register_operand" "=v")
6371 (match_operand:V8SF 1 "register_operand" "v")
6372 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6373 (parallel [(const_int 0) (const_int 8)
6374 (const_int 1) (const_int 9)
6375 (const_int 4) (const_int 12)
6376 (const_int 5) (const_int 13)])))]
6377 "TARGET_AVX && <mask_avx512vl_condition>"
6378 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6379 [(set_attr "type" "sselog")
6380 (set_attr "prefix" "vex")
6381 (set_attr "mode" "V8SF")])
6383 (define_insn "unpcklps128_mask"
6384 [(set (match_operand:V4SF 0 "register_operand" "=v")
6388 (match_operand:V4SF 1 "register_operand" "v")
6389 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6390 (parallel [(const_int 0) (const_int 4)
6391 (const_int 1) (const_int 5)]))
6392 (match_operand:V4SF 3 "vector_move_operand" "0C")
6393 (match_operand:QI 4 "register_operand" "Yk")))]
6395 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6396 [(set_attr "type" "sselog")
6397 (set_attr "prefix" "evex")
6398 (set_attr "mode" "V4SF")])
6400 (define_expand "vec_interleave_lowv8sf"
6404 (match_operand:V8SF 1 "register_operand")
6405 (match_operand:V8SF 2 "nonimmediate_operand"))
6406 (parallel [(const_int 0) (const_int 8)
6407 (const_int 1) (const_int 9)
6408 (const_int 4) (const_int 12)
6409 (const_int 5) (const_int 13)])))
6415 (parallel [(const_int 2) (const_int 10)
6416 (const_int 3) (const_int 11)
6417 (const_int 6) (const_int 14)
6418 (const_int 7) (const_int 15)])))
6419 (set (match_operand:V8SF 0 "register_operand")
6424 (parallel [(const_int 0) (const_int 1)
6425 (const_int 2) (const_int 3)
6426 (const_int 8) (const_int 9)
6427 (const_int 10) (const_int 11)])))]
6430 operands[3] = gen_reg_rtx (V8SFmode);
6431 operands[4] = gen_reg_rtx (V8SFmode);
6434 (define_insn "vec_interleave_lowv4sf"
6435 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6438 (match_operand:V4SF 1 "register_operand" "0,v")
6439 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6440 (parallel [(const_int 0) (const_int 4)
6441 (const_int 1) (const_int 5)])))]
6444 unpcklps\t{%2, %0|%0, %2}
6445 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6446 [(set_attr "isa" "noavx,avx")
6447 (set_attr "type" "sselog")
6448 (set_attr "prefix" "orig,maybe_evex")
6449 (set_attr "mode" "V4SF")])
6451 ;; These are modeled with the same vec_concat as the others so that we
6452 ;; capture users of shufps that can use the new instructions
6453 (define_insn "avx_movshdup256<mask_name>"
6454 [(set (match_operand:V8SF 0 "register_operand" "=v")
6457 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6459 (parallel [(const_int 1) (const_int 1)
6460 (const_int 3) (const_int 3)
6461 (const_int 5) (const_int 5)
6462 (const_int 7) (const_int 7)])))]
6463 "TARGET_AVX && <mask_avx512vl_condition>"
6464 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6465 [(set_attr "type" "sse")
6466 (set_attr "prefix" "vex")
6467 (set_attr "mode" "V8SF")])
6469 (define_insn "sse3_movshdup<mask_name>"
6470 [(set (match_operand:V4SF 0 "register_operand" "=v")
6473 (match_operand:V4SF 1 "vector_operand" "vBm")
6475 (parallel [(const_int 1)
6479 "TARGET_SSE3 && <mask_avx512vl_condition>"
6480 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6481 [(set_attr "type" "sse")
6482 (set_attr "prefix_rep" "1")
6483 (set_attr "prefix" "maybe_vex")
6484 (set_attr "mode" "V4SF")])
6486 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6487 [(set (match_operand:V16SF 0 "register_operand" "=v")
6490 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6492 (parallel [(const_int 1) (const_int 1)
6493 (const_int 3) (const_int 3)
6494 (const_int 5) (const_int 5)
6495 (const_int 7) (const_int 7)
6496 (const_int 9) (const_int 9)
6497 (const_int 11) (const_int 11)
6498 (const_int 13) (const_int 13)
6499 (const_int 15) (const_int 15)])))]
6501 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6502 [(set_attr "type" "sse")
6503 (set_attr "prefix" "evex")
6504 (set_attr "mode" "V16SF")])
6506 (define_insn "avx_movsldup256<mask_name>"
6507 [(set (match_operand:V8SF 0 "register_operand" "=v")
6510 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6512 (parallel [(const_int 0) (const_int 0)
6513 (const_int 2) (const_int 2)
6514 (const_int 4) (const_int 4)
6515 (const_int 6) (const_int 6)])))]
6516 "TARGET_AVX && <mask_avx512vl_condition>"
6517 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6518 [(set_attr "type" "sse")
6519 (set_attr "prefix" "vex")
6520 (set_attr "mode" "V8SF")])
6522 (define_insn "sse3_movsldup<mask_name>"
6523 [(set (match_operand:V4SF 0 "register_operand" "=v")
6526 (match_operand:V4SF 1 "vector_operand" "vBm")
6528 (parallel [(const_int 0)
6532 "TARGET_SSE3 && <mask_avx512vl_condition>"
6533 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6534 [(set_attr "type" "sse")
6535 (set_attr "prefix_rep" "1")
6536 (set_attr "prefix" "maybe_vex")
6537 (set_attr "mode" "V4SF")])
6539 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6540 [(set (match_operand:V16SF 0 "register_operand" "=v")
6543 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6545 (parallel [(const_int 0) (const_int 0)
6546 (const_int 2) (const_int 2)
6547 (const_int 4) (const_int 4)
6548 (const_int 6) (const_int 6)
6549 (const_int 8) (const_int 8)
6550 (const_int 10) (const_int 10)
6551 (const_int 12) (const_int 12)
6552 (const_int 14) (const_int 14)])))]
6554 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6555 [(set_attr "type" "sse")
6556 (set_attr "prefix" "evex")
6557 (set_attr "mode" "V16SF")])
6559 (define_expand "avx_shufps256<mask_expand4_name>"
6560 [(match_operand:V8SF 0 "register_operand")
6561 (match_operand:V8SF 1 "register_operand")
6562 (match_operand:V8SF 2 "nonimmediate_operand")
6563 (match_operand:SI 3 "const_int_operand")]
6566 int mask = INTVAL (operands[3]);
6567 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6570 GEN_INT ((mask >> 0) & 3),
6571 GEN_INT ((mask >> 2) & 3),
6572 GEN_INT (((mask >> 4) & 3) + 8),
6573 GEN_INT (((mask >> 6) & 3) + 8),
6574 GEN_INT (((mask >> 0) & 3) + 4),
6575 GEN_INT (((mask >> 2) & 3) + 4),
6576 GEN_INT (((mask >> 4) & 3) + 12),
6577 GEN_INT (((mask >> 6) & 3) + 12)
6578 <mask_expand4_args>));
6582 ;; One bit in mask selects 2 elements.
6583 (define_insn "avx_shufps256_1<mask_name>"
6584 [(set (match_operand:V8SF 0 "register_operand" "=v")
6587 (match_operand:V8SF 1 "register_operand" "v")
6588 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6589 (parallel [(match_operand 3 "const_0_to_3_operand" )
6590 (match_operand 4 "const_0_to_3_operand" )
6591 (match_operand 5 "const_8_to_11_operand" )
6592 (match_operand 6 "const_8_to_11_operand" )
6593 (match_operand 7 "const_4_to_7_operand" )
6594 (match_operand 8 "const_4_to_7_operand" )
6595 (match_operand 9 "const_12_to_15_operand")
6596 (match_operand 10 "const_12_to_15_operand")])))]
6598 && <mask_avx512vl_condition>
6599 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6600 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6601 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6602 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6605 mask = INTVAL (operands[3]);
6606 mask |= INTVAL (operands[4]) << 2;
6607 mask |= (INTVAL (operands[5]) - 8) << 4;
6608 mask |= (INTVAL (operands[6]) - 8) << 6;
6609 operands[3] = GEN_INT (mask);
6611 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6613 [(set_attr "type" "sseshuf")
6614 (set_attr "length_immediate" "1")
6615 (set_attr "prefix" "<mask_prefix>")
6616 (set_attr "mode" "V8SF")])
6618 (define_expand "sse_shufps<mask_expand4_name>"
6619 [(match_operand:V4SF 0 "register_operand")
6620 (match_operand:V4SF 1 "register_operand")
6621 (match_operand:V4SF 2 "vector_operand")
6622 (match_operand:SI 3 "const_int_operand")]
6625 int mask = INTVAL (operands[3]);
6626 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6629 GEN_INT ((mask >> 0) & 3),
6630 GEN_INT ((mask >> 2) & 3),
6631 GEN_INT (((mask >> 4) & 3) + 4),
6632 GEN_INT (((mask >> 6) & 3) + 4)
6633 <mask_expand4_args>));
6637 (define_insn "sse_shufps_v4sf_mask"
6638 [(set (match_operand:V4SF 0 "register_operand" "=v")
6642 (match_operand:V4SF 1 "register_operand" "v")
6643 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6644 (parallel [(match_operand 3 "const_0_to_3_operand")
6645 (match_operand 4 "const_0_to_3_operand")
6646 (match_operand 5 "const_4_to_7_operand")
6647 (match_operand 6 "const_4_to_7_operand")]))
6648 (match_operand:V4SF 7 "vector_move_operand" "0C")
6649 (match_operand:QI 8 "register_operand" "Yk")))]
6653 mask |= INTVAL (operands[3]) << 0;
6654 mask |= INTVAL (operands[4]) << 2;
6655 mask |= (INTVAL (operands[5]) - 4) << 4;
6656 mask |= (INTVAL (operands[6]) - 4) << 6;
6657 operands[3] = GEN_INT (mask);
6659 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6661 [(set_attr "type" "sseshuf")
6662 (set_attr "length_immediate" "1")
6663 (set_attr "prefix" "evex")
6664 (set_attr "mode" "V4SF")])
6666 (define_insn "sse_shufps_<mode>"
6667 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6668 (vec_select:VI4F_128
6669 (vec_concat:<ssedoublevecmode>
6670 (match_operand:VI4F_128 1 "register_operand" "0,v")
6671 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6672 (parallel [(match_operand 3 "const_0_to_3_operand")
6673 (match_operand 4 "const_0_to_3_operand")
6674 (match_operand 5 "const_4_to_7_operand")
6675 (match_operand 6 "const_4_to_7_operand")])))]
6679 mask |= INTVAL (operands[3]) << 0;
6680 mask |= INTVAL (operands[4]) << 2;
6681 mask |= (INTVAL (operands[5]) - 4) << 4;
6682 mask |= (INTVAL (operands[6]) - 4) << 6;
6683 operands[3] = GEN_INT (mask);
6685 switch (which_alternative)
6688 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6690 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6695 [(set_attr "isa" "noavx,avx")
6696 (set_attr "type" "sseshuf")
6697 (set_attr "length_immediate" "1")
6698 (set_attr "prefix" "orig,maybe_evex")
6699 (set_attr "mode" "V4SF")])
6701 (define_insn "sse_storehps"
6702 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6704 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6705 (parallel [(const_int 2) (const_int 3)])))]
6706 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6708 %vmovhps\t{%1, %0|%q0, %1}
6709 %vmovhlps\t{%1, %d0|%d0, %1}
6710 %vmovlps\t{%H1, %d0|%d0, %H1}"
6711 [(set_attr "type" "ssemov")
6712 (set_attr "prefix" "maybe_vex")
6713 (set_attr "mode" "V2SF,V4SF,V2SF")])
6715 (define_expand "sse_loadhps_exp"
6716 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6719 (match_operand:V4SF 1 "nonimmediate_operand")
6720 (parallel [(const_int 0) (const_int 1)]))
6721 (match_operand:V2SF 2 "nonimmediate_operand")))]
6724 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6726 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6728 /* Fix up the destination if needed. */
6729 if (dst != operands[0])
6730 emit_move_insn (operands[0], dst);
6735 (define_insn "sse_loadhps"
6736 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6739 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6740 (parallel [(const_int 0) (const_int 1)]))
6741 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
6744 movhps\t{%2, %0|%0, %q2}
6745 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6746 movlhps\t{%2, %0|%0, %2}
6747 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6748 %vmovlps\t{%2, %H0|%H0, %2}"
6749 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6750 (set_attr "type" "ssemov")
6751 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6752 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6754 (define_insn "sse_storelps"
6755 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6757 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6758 (parallel [(const_int 0) (const_int 1)])))]
6759 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6761 %vmovlps\t{%1, %0|%q0, %1}
6762 %vmovaps\t{%1, %0|%0, %1}
6763 %vmovlps\t{%1, %d0|%d0, %q1}"
6764 [(set_attr "type" "ssemov")
6765 (set_attr "prefix" "maybe_vex")
6766 (set_attr "mode" "V2SF,V4SF,V2SF")])
6768 (define_expand "sse_loadlps_exp"
6769 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6771 (match_operand:V2SF 2 "nonimmediate_operand")
6773 (match_operand:V4SF 1 "nonimmediate_operand")
6774 (parallel [(const_int 2) (const_int 3)]))))]
6777 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6779 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6781 /* Fix up the destination if needed. */
6782 if (dst != operands[0])
6783 emit_move_insn (operands[0], dst);
6788 (define_insn "sse_loadlps"
6789 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6791 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
6793 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6794 (parallel [(const_int 2) (const_int 3)]))))]
6797 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6798 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6799 movlps\t{%2, %0|%0, %q2}
6800 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6801 %vmovlps\t{%2, %0|%q0, %2}"
6802 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6803 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6804 (set (attr "length_immediate")
6805 (if_then_else (eq_attr "alternative" "0,1")
6807 (const_string "*")))
6808 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6809 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6811 (define_insn "sse_movss"
6812 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6814 (match_operand:V4SF 2 "register_operand" " x,v")
6815 (match_operand:V4SF 1 "register_operand" " 0,v")
6819 movss\t{%2, %0|%0, %2}
6820 vmovss\t{%2, %1, %0|%0, %1, %2}"
6821 [(set_attr "isa" "noavx,avx")
6822 (set_attr "type" "ssemov")
6823 (set_attr "prefix" "orig,maybe_evex")
6824 (set_attr "mode" "SF")])
6826 (define_insn "avx2_vec_dup<mode>"
6827 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6828 (vec_duplicate:VF1_128_256
6830 (match_operand:V4SF 1 "register_operand" "v")
6831 (parallel [(const_int 0)]))))]
6833 "vbroadcastss\t{%1, %0|%0, %1}"
6834 [(set_attr "type" "sselog1")
6835 (set_attr "prefix" "maybe_evex")
6836 (set_attr "mode" "<MODE>")])
6838 (define_insn "avx2_vec_dupv8sf_1"
6839 [(set (match_operand:V8SF 0 "register_operand" "=v")
6842 (match_operand:V8SF 1 "register_operand" "v")
6843 (parallel [(const_int 0)]))))]
6845 "vbroadcastss\t{%x1, %0|%0, %x1}"
6846 [(set_attr "type" "sselog1")
6847 (set_attr "prefix" "maybe_evex")
6848 (set_attr "mode" "V8SF")])
6850 (define_insn "avx512f_vec_dup<mode>_1"
6851 [(set (match_operand:VF_512 0 "register_operand" "=v")
6852 (vec_duplicate:VF_512
6853 (vec_select:<ssescalarmode>
6854 (match_operand:VF_512 1 "register_operand" "v")
6855 (parallel [(const_int 0)]))))]
6857 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6858 [(set_attr "type" "sselog1")
6859 (set_attr "prefix" "evex")
6860 (set_attr "mode" "<MODE>")])
6862 ;; Although insertps takes register source, we prefer
6863 ;; unpcklps with register source since it is shorter.
6864 (define_insn "*vec_concatv2sf_sse4_1"
6865 [(set (match_operand:V2SF 0 "register_operand"
6866 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
6868 (match_operand:SF 1 "nonimmediate_operand"
6869 " 0, 0,Yv, 0,0, v,m, 0 , m")
6870 (match_operand:SF 2 "vector_move_operand"
6871 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
6872 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6874 unpcklps\t{%2, %0|%0, %2}
6875 unpcklps\t{%2, %0|%0, %2}
6876 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6877 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6878 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6879 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6880 %vmovss\t{%1, %0|%0, %1}
6881 punpckldq\t{%2, %0|%0, %2}
6882 movd\t{%1, %0|%0, %1}"
6884 (cond [(eq_attr "alternative" "0,1,3,4")
6885 (const_string "noavx")
6886 (eq_attr "alternative" "2,5")
6887 (const_string "avx")
6889 (const_string "*")))
6891 (cond [(eq_attr "alternative" "6")
6892 (const_string "ssemov")
6893 (eq_attr "alternative" "7")
6894 (const_string "mmxcvt")
6895 (eq_attr "alternative" "8")
6896 (const_string "mmxmov")
6898 (const_string "sselog")))
6899 (set (attr "prefix_data16")
6900 (if_then_else (eq_attr "alternative" "3,4")
6902 (const_string "*")))
6903 (set (attr "prefix_extra")
6904 (if_then_else (eq_attr "alternative" "3,4,5")
6906 (const_string "*")))
6907 (set (attr "length_immediate")
6908 (if_then_else (eq_attr "alternative" "3,4,5")
6910 (const_string "*")))
6911 (set (attr "prefix")
6912 (cond [(eq_attr "alternative" "2,5")
6913 (const_string "maybe_evex")
6914 (eq_attr "alternative" "6")
6915 (const_string "maybe_vex")
6917 (const_string "orig")))
6918 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6920 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6921 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
6922 ;; alternatives pretty much forces the MMX alternative to be chosen.
6923 (define_insn "*vec_concatv2sf_sse"
6924 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6926 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6927 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6930 unpcklps\t{%2, %0|%0, %2}
6931 movss\t{%1, %0|%0, %1}
6932 punpckldq\t{%2, %0|%0, %2}
6933 movd\t{%1, %0|%0, %1}"
6934 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6935 (set_attr "mode" "V4SF,SF,DI,DI")])
6937 (define_insn "*vec_concatv4sf"
6938 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
6940 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
6941 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
6944 movlhps\t{%2, %0|%0, %2}
6945 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6946 movhps\t{%2, %0|%0, %q2}
6947 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6948 [(set_attr "isa" "noavx,avx,noavx,avx")
6949 (set_attr "type" "ssemov")
6950 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
6951 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6953 ;; Avoid combining registers from different units in a single alternative,
6954 ;; see comment above inline_secondary_memory_needed function in i386.c
6955 (define_insn "vec_set<mode>_0"
6956 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6957 "=Yr,*x,v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6959 (vec_duplicate:VI4F_128
6960 (match_operand:<ssescalarmode> 2 "general_operand"
6961 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6962 (match_operand:VI4F_128 1 "vector_move_operand"
6963 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6967 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6968 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6969 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
6970 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6971 %vmovd\t{%2, %0|%0, %2}
6972 movss\t{%2, %0|%0, %2}
6973 movss\t{%2, %0|%0, %2}
6974 vmovss\t{%2, %1, %0|%0, %1, %2}
6975 pinsrd\t{$0, %2, %0|%0, %2, 0}
6976 pinsrd\t{$0, %2, %0|%0, %2, 0}
6977 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6982 (cond [(eq_attr "alternative" "0,1,8,9")
6983 (const_string "sse4_noavx")
6984 (eq_attr "alternative" "2,7,10")
6985 (const_string "avx")
6986 (eq_attr "alternative" "3,4")
6987 (const_string "sse2")
6988 (eq_attr "alternative" "5,6")
6989 (const_string "noavx")
6991 (const_string "*")))
6993 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
6994 (const_string "sselog")
6995 (eq_attr "alternative" "12")
6996 (const_string "imov")
6997 (eq_attr "alternative" "13")
6998 (const_string "fmov")
7000 (const_string "ssemov")))
7001 (set (attr "prefix_extra")
7002 (if_then_else (eq_attr "alternative" "8,9,10")
7004 (const_string "*")))
7005 (set (attr "length_immediate")
7006 (if_then_else (eq_attr "alternative" "8,9,10")
7008 (const_string "*")))
7009 (set (attr "prefix")
7010 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7011 (const_string "orig")
7012 (eq_attr "alternative" "2")
7013 (const_string "maybe_evex")
7014 (eq_attr "alternative" "3,4")
7015 (const_string "maybe_vex")
7016 (eq_attr "alternative" "7,10")
7017 (const_string "vex")
7019 (const_string "*")))
7020 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
7022 ;; A subset is vec_setv4sf.
7023 (define_insn "*vec_setv4sf_sse4_1"
7024 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7027 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7028 (match_operand:V4SF 1 "register_operand" "0,0,v")
7029 (match_operand:SI 3 "const_int_operand")))]
7031 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7032 < GET_MODE_NUNITS (V4SFmode))"
7034 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7035 switch (which_alternative)
7039 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7041 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7046 [(set_attr "isa" "noavx,noavx,avx")
7047 (set_attr "type" "sselog")
7048 (set_attr "prefix_data16" "1,1,*")
7049 (set_attr "prefix_extra" "1")
7050 (set_attr "length_immediate" "1")
7051 (set_attr "prefix" "orig,orig,maybe_evex")
7052 (set_attr "mode" "V4SF")])
7054 (define_insn "sse4_1_insertps"
7055 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7056 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7057 (match_operand:V4SF 1 "register_operand" "0,0,v")
7058 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7062 if (MEM_P (operands[2]))
7064 unsigned count_s = INTVAL (operands[3]) >> 6;
7066 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7067 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7069 switch (which_alternative)
7073 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7075 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7080 [(set_attr "isa" "noavx,noavx,avx")
7081 (set_attr "type" "sselog")
7082 (set_attr "prefix_data16" "1,1,*")
7083 (set_attr "prefix_extra" "1")
7084 (set_attr "length_immediate" "1")
7085 (set_attr "prefix" "orig,orig,maybe_evex")
7086 (set_attr "mode" "V4SF")])
7089 [(set (match_operand:VI4F_128 0 "memory_operand")
7091 (vec_duplicate:VI4F_128
7092 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7095 "TARGET_SSE && reload_completed"
7096 [(set (match_dup 0) (match_dup 1))]
7097 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7099 (define_expand "vec_set<mode>"
7100 [(match_operand:V 0 "register_operand")
7101 (match_operand:<ssescalarmode> 1 "register_operand")
7102 (match_operand 2 "const_int_operand")]
7105 ix86_expand_vector_set (false, operands[0], operands[1],
7106 INTVAL (operands[2]));
7110 (define_insn_and_split "*vec_extractv4sf_0"
7111 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7113 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7114 (parallel [(const_int 0)])))]
7115 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7117 "&& reload_completed"
7118 [(set (match_dup 0) (match_dup 1))]
7119 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7121 (define_insn_and_split "*sse4_1_extractps"
7122 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7124 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7125 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7128 extractps\t{%2, %1, %0|%0, %1, %2}
7129 extractps\t{%2, %1, %0|%0, %1, %2}
7130 vextractps\t{%2, %1, %0|%0, %1, %2}
7133 "&& reload_completed && SSE_REG_P (operands[0])"
7136 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7137 switch (INTVAL (operands[2]))
7141 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7142 operands[2], operands[2],
7143 GEN_INT (INTVAL (operands[2]) + 4),
7144 GEN_INT (INTVAL (operands[2]) + 4)));
7147 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7150 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7155 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7156 (set_attr "type" "sselog,sselog,sselog,*,*")
7157 (set_attr "prefix_data16" "1,1,1,*,*")
7158 (set_attr "prefix_extra" "1,1,1,*,*")
7159 (set_attr "length_immediate" "1,1,1,*,*")
7160 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7161 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7163 (define_insn_and_split "*vec_extractv4sf_mem"
7164 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7166 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7167 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7170 "&& reload_completed"
7171 [(set (match_dup 0) (match_dup 1))]
7173 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7176 (define_mode_attr extract_type
7177 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7179 (define_mode_attr extract_suf
7180 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7182 (define_mode_iterator AVX512_VEC
7183 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7185 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7186 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7187 (match_operand:AVX512_VEC 1 "register_operand")
7188 (match_operand:SI 2 "const_0_to_3_operand")
7189 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7190 (match_operand:QI 4 "register_operand")]
7194 mask = INTVAL (operands[2]);
7195 rtx dest = operands[0];
7197 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7198 dest = gen_reg_rtx (<ssequartermode>mode);
7200 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7201 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7202 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7203 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7206 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7207 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7209 if (dest != operands[0])
7210 emit_move_insn (operands[0], dest);
7214 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7215 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7216 (vec_merge:<ssequartermode>
7217 (vec_select:<ssequartermode>
7218 (match_operand:V8FI 1 "register_operand" "v")
7219 (parallel [(match_operand 2 "const_0_to_7_operand")
7220 (match_operand 3 "const_0_to_7_operand")]))
7221 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7222 (match_operand:QI 5 "register_operand" "Yk")))]
7224 && INTVAL (operands[2]) % 2 == 0
7225 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7226 && rtx_equal_p (operands[4], operands[0])"
7228 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7229 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7231 [(set_attr "type" "sselog")
7232 (set_attr "prefix_extra" "1")
7233 (set_attr "length_immediate" "1")
7234 (set_attr "memory" "store")
7235 (set_attr "prefix" "evex")
7236 (set_attr "mode" "<sseinsnmode>")])
7238 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7239 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7240 (vec_merge:<ssequartermode>
7241 (vec_select:<ssequartermode>
7242 (match_operand:V16FI 1 "register_operand" "v")
7243 (parallel [(match_operand 2 "const_0_to_15_operand")
7244 (match_operand 3 "const_0_to_15_operand")
7245 (match_operand 4 "const_0_to_15_operand")
7246 (match_operand 5 "const_0_to_15_operand")]))
7247 (match_operand:<ssequartermode> 6 "memory_operand" "0")
7248 (match_operand:QI 7 "register_operand" "Yk")))]
7250 && INTVAL (operands[2]) % 4 == 0
7251 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7252 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7253 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7254 && rtx_equal_p (operands[6], operands[0])"
7256 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7257 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7259 [(set_attr "type" "sselog")
7260 (set_attr "prefix_extra" "1")
7261 (set_attr "length_immediate" "1")
7262 (set_attr "memory" "store")
7263 (set_attr "prefix" "evex")
7264 (set_attr "mode" "<sseinsnmode>")])
7266 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7267 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7268 (vec_select:<ssequartermode>
7269 (match_operand:V8FI 1 "register_operand" "v")
7270 (parallel [(match_operand 2 "const_0_to_7_operand")
7271 (match_operand 3 "const_0_to_7_operand")])))]
7273 && INTVAL (operands[2]) % 2 == 0
7274 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7276 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7277 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7279 [(set_attr "type" "sselog1")
7280 (set_attr "prefix_extra" "1")
7281 (set_attr "length_immediate" "1")
7282 (set_attr "prefix" "evex")
7283 (set_attr "mode" "<sseinsnmode>")])
7285 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7286 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7287 (vec_select:<ssequartermode>
7288 (match_operand:V16FI 1 "register_operand" "v")
7289 (parallel [(match_operand 2 "const_0_to_15_operand")
7290 (match_operand 3 "const_0_to_15_operand")
7291 (match_operand 4 "const_0_to_15_operand")
7292 (match_operand 5 "const_0_to_15_operand")])))]
7294 && INTVAL (operands[2]) % 4 == 0
7295 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7296 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7297 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7299 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7300 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7302 [(set_attr "type" "sselog1")
7303 (set_attr "prefix_extra" "1")
7304 (set_attr "length_immediate" "1")
7305 (set_attr "prefix" "evex")
7306 (set_attr "mode" "<sseinsnmode>")])
7308 (define_mode_attr extract_type_2
7309 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7311 (define_mode_attr extract_suf_2
7312 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7314 (define_mode_iterator AVX512_VEC_2
7315 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7317 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7318 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7319 (match_operand:AVX512_VEC_2 1 "register_operand")
7320 (match_operand:SI 2 "const_0_to_1_operand")
7321 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7322 (match_operand:QI 4 "register_operand")]
7325 rtx (*insn)(rtx, rtx, rtx, rtx);
7326 rtx dest = operands[0];
7328 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7329 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7331 switch (INTVAL (operands[2]))
7334 insn = gen_vec_extract_lo_<mode>_mask;
7337 insn = gen_vec_extract_hi_<mode>_mask;
7343 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7344 if (dest != operands[0])
7345 emit_move_insn (operands[0], dest);
7350 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7351 (vec_select:<ssehalfvecmode>
7352 (match_operand:V8FI 1 "nonimmediate_operand")
7353 (parallel [(const_int 0) (const_int 1)
7354 (const_int 2) (const_int 3)])))]
7355 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7358 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7359 [(set (match_dup 0) (match_dup 1))]
7360 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7362 (define_insn "vec_extract_lo_<mode>_maskm"
7363 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7364 (vec_merge:<ssehalfvecmode>
7365 (vec_select:<ssehalfvecmode>
7366 (match_operand:V8FI 1 "register_operand" "v")
7367 (parallel [(const_int 0) (const_int 1)
7368 (const_int 2) (const_int 3)]))
7369 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7370 (match_operand:QI 3 "register_operand" "Yk")))]
7372 && rtx_equal_p (operands[2], operands[0])"
7373 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7374 [(set_attr "type" "sselog1")
7375 (set_attr "prefix_extra" "1")
7376 (set_attr "length_immediate" "1")
7377 (set_attr "prefix" "evex")
7378 (set_attr "mode" "<sseinsnmode>")])
7380 (define_insn "vec_extract_lo_<mode><mask_name>"
7381 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
7382 (vec_select:<ssehalfvecmode>
7383 (match_operand:V8FI 1 "<store_mask_predicate>" "v,<store_mask_constraint>")
7384 (parallel [(const_int 0) (const_int 1)
7385 (const_int 2) (const_int 3)])))]
7387 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7389 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7390 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7394 [(set_attr "type" "sselog1")
7395 (set_attr "prefix_extra" "1")
7396 (set_attr "length_immediate" "1")
7397 (set_attr "prefix" "evex")
7398 (set_attr "mode" "<sseinsnmode>")])
7400 (define_insn "vec_extract_hi_<mode>_maskm"
7401 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7402 (vec_merge:<ssehalfvecmode>
7403 (vec_select:<ssehalfvecmode>
7404 (match_operand:V8FI 1 "register_operand" "v")
7405 (parallel [(const_int 4) (const_int 5)
7406 (const_int 6) (const_int 7)]))
7407 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7408 (match_operand:QI 3 "register_operand" "Yk")))]
7410 && rtx_equal_p (operands[2], operands[0])"
7411 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7412 [(set_attr "type" "sselog")
7413 (set_attr "prefix_extra" "1")
7414 (set_attr "length_immediate" "1")
7415 (set_attr "memory" "store")
7416 (set_attr "prefix" "evex")
7417 (set_attr "mode" "<sseinsnmode>")])
7419 (define_insn "vec_extract_hi_<mode><mask_name>"
7420 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7421 (vec_select:<ssehalfvecmode>
7422 (match_operand:V8FI 1 "register_operand" "v")
7423 (parallel [(const_int 4) (const_int 5)
7424 (const_int 6) (const_int 7)])))]
7426 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7427 [(set_attr "type" "sselog1")
7428 (set_attr "prefix_extra" "1")
7429 (set_attr "length_immediate" "1")
7430 (set_attr "prefix" "evex")
7431 (set_attr "mode" "<sseinsnmode>")])
7433 (define_insn "vec_extract_hi_<mode>_maskm"
7434 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7435 (vec_merge:<ssehalfvecmode>
7436 (vec_select:<ssehalfvecmode>
7437 (match_operand:V16FI 1 "register_operand" "v")
7438 (parallel [(const_int 8) (const_int 9)
7439 (const_int 10) (const_int 11)
7440 (const_int 12) (const_int 13)
7441 (const_int 14) (const_int 15)]))
7442 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7443 (match_operand:QI 3 "register_operand" "Yk")))]
7445 && rtx_equal_p (operands[2], operands[0])"
7446 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7447 [(set_attr "type" "sselog1")
7448 (set_attr "prefix_extra" "1")
7449 (set_attr "length_immediate" "1")
7450 (set_attr "prefix" "evex")
7451 (set_attr "mode" "<sseinsnmode>")])
7453 (define_insn "vec_extract_hi_<mode><mask_name>"
7454 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7455 (vec_select:<ssehalfvecmode>
7456 (match_operand:V16FI 1 "register_operand" "v,v")
7457 (parallel [(const_int 8) (const_int 9)
7458 (const_int 10) (const_int 11)
7459 (const_int 12) (const_int 13)
7460 (const_int 14) (const_int 15)])))]
7461 "TARGET_AVX512F && <mask_avx512dq_condition>"
7463 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7464 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7465 [(set_attr "type" "sselog1")
7466 (set_attr "prefix_extra" "1")
7467 (set_attr "isa" "avx512dq,noavx512dq")
7468 (set_attr "length_immediate" "1")
7469 (set_attr "prefix" "evex")
7470 (set_attr "mode" "<sseinsnmode>")])
7472 (define_expand "avx512vl_vextractf128<mode>"
7473 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7474 (match_operand:VI48F_256 1 "register_operand")
7475 (match_operand:SI 2 "const_0_to_1_operand")
7476 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7477 (match_operand:QI 4 "register_operand")]
7478 "TARGET_AVX512DQ && TARGET_AVX512VL"
7480 rtx (*insn)(rtx, rtx, rtx, rtx);
7481 rtx dest = operands[0];
7484 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7485 /* For V8S[IF]mode there are maskm insns with =m and 0
7487 ? !rtx_equal_p (dest, operands[3])
7488 /* For V4D[IF]mode, hi insns don't allow memory, and
7489 lo insns have =m and 0C constraints. */
7490 : (operands[2] != const0_rtx
7491 || (!rtx_equal_p (dest, operands[3])
7492 && GET_CODE (operands[3]) != CONST_VECTOR))))
7493 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7494 switch (INTVAL (operands[2]))
7497 insn = gen_vec_extract_lo_<mode>_mask;
7500 insn = gen_vec_extract_hi_<mode>_mask;
7506 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7507 if (dest != operands[0])
7508 emit_move_insn (operands[0], dest);
7512 (define_expand "avx_vextractf128<mode>"
7513 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7514 (match_operand:V_256 1 "register_operand")
7515 (match_operand:SI 2 "const_0_to_1_operand")]
7518 rtx (*insn)(rtx, rtx);
7520 switch (INTVAL (operands[2]))
7523 insn = gen_vec_extract_lo_<mode>;
7526 insn = gen_vec_extract_hi_<mode>;
7532 emit_insn (insn (operands[0], operands[1]));
7536 (define_insn "vec_extract_lo_<mode><mask_name>"
7537 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7538 (vec_select:<ssehalfvecmode>
7539 (match_operand:V16FI 1 "<store_mask_predicate>"
7540 "<store_mask_constraint>,v")
7541 (parallel [(const_int 0) (const_int 1)
7542 (const_int 2) (const_int 3)
7543 (const_int 4) (const_int 5)
7544 (const_int 6) (const_int 7)])))]
7546 && <mask_mode512bit_condition>
7547 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7550 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7556 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7557 (vec_select:<ssehalfvecmode>
7558 (match_operand:V16FI 1 "nonimmediate_operand")
7559 (parallel [(const_int 0) (const_int 1)
7560 (const_int 2) (const_int 3)
7561 (const_int 4) (const_int 5)
7562 (const_int 6) (const_int 7)])))]
7563 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7564 && reload_completed"
7565 [(set (match_dup 0) (match_dup 1))]
7566 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7568 (define_insn "vec_extract_lo_<mode><mask_name>"
7569 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7570 (vec_select:<ssehalfvecmode>
7571 (match_operand:VI8F_256 1 "<store_mask_predicate>"
7572 "<store_mask_constraint>,v")
7573 (parallel [(const_int 0) (const_int 1)])))]
7575 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7576 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7579 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7583 [(set_attr "type" "sselog")
7584 (set_attr "prefix_extra" "1")
7585 (set_attr "length_immediate" "1")
7586 (set_attr "memory" "none,store")
7587 (set_attr "prefix" "evex")
7588 (set_attr "mode" "XI")])
7591 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7592 (vec_select:<ssehalfvecmode>
7593 (match_operand:VI8F_256 1 "nonimmediate_operand")
7594 (parallel [(const_int 0) (const_int 1)])))]
7595 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7596 && reload_completed"
7597 [(set (match_dup 0) (match_dup 1))]
7598 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7600 (define_insn "vec_extract_hi_<mode><mask_name>"
7601 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7602 (vec_select:<ssehalfvecmode>
7603 (match_operand:VI8F_256 1 "register_operand" "v,v")
7604 (parallel [(const_int 2) (const_int 3)])))]
7605 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7607 if (TARGET_AVX512VL)
7609 if (TARGET_AVX512DQ)
7610 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7612 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7615 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7617 [(set_attr "type" "sselog")
7618 (set_attr "prefix_extra" "1")
7619 (set_attr "length_immediate" "1")
7620 (set_attr "memory" "none,store")
7621 (set_attr "prefix" "vex")
7622 (set_attr "mode" "<sseinsnmode>")])
7625 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7626 (vec_select:<ssehalfvecmode>
7627 (match_operand:VI4F_256 1 "nonimmediate_operand")
7628 (parallel [(const_int 0) (const_int 1)
7629 (const_int 2) (const_int 3)])))]
7630 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7631 && reload_completed"
7632 [(set (match_dup 0) (match_dup 1))]
7633 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7635 (define_insn "vec_extract_lo_<mode><mask_name>"
7636 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
7637 "=<store_mask_constraint>,v")
7638 (vec_select:<ssehalfvecmode>
7639 (match_operand:VI4F_256 1 "<store_mask_predicate>"
7640 "v,<store_mask_constraint>")
7641 (parallel [(const_int 0) (const_int 1)
7642 (const_int 2) (const_int 3)])))]
7644 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7645 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7648 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7652 [(set_attr "type" "sselog1")
7653 (set_attr "prefix_extra" "1")
7654 (set_attr "length_immediate" "1")
7655 (set_attr "prefix" "evex")
7656 (set_attr "mode" "<sseinsnmode>")])
7658 (define_insn "vec_extract_lo_<mode>_maskm"
7659 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7660 (vec_merge:<ssehalfvecmode>
7661 (vec_select:<ssehalfvecmode>
7662 (match_operand:VI4F_256 1 "register_operand" "v")
7663 (parallel [(const_int 0) (const_int 1)
7664 (const_int 2) (const_int 3)]))
7665 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7666 (match_operand:QI 3 "register_operand" "Yk")))]
7667 "TARGET_AVX512VL && TARGET_AVX512F
7668 && rtx_equal_p (operands[2], operands[0])"
7669 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7670 [(set_attr "type" "sselog1")
7671 (set_attr "prefix_extra" "1")
7672 (set_attr "length_immediate" "1")
7673 (set_attr "prefix" "evex")
7674 (set_attr "mode" "<sseinsnmode>")])
7676 (define_insn "vec_extract_hi_<mode>_maskm"
7677 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7678 (vec_merge:<ssehalfvecmode>
7679 (vec_select:<ssehalfvecmode>
7680 (match_operand:VI4F_256 1 "register_operand" "v")
7681 (parallel [(const_int 4) (const_int 5)
7682 (const_int 6) (const_int 7)]))
7683 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7684 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
7685 "TARGET_AVX512F && TARGET_AVX512VL
7686 && rtx_equal_p (operands[2], operands[0])"
7687 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7688 [(set_attr "type" "sselog1")
7689 (set_attr "length_immediate" "1")
7690 (set_attr "prefix" "evex")
7691 (set_attr "mode" "<sseinsnmode>")])
7693 (define_insn "vec_extract_hi_<mode>_mask"
7694 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7695 (vec_merge:<ssehalfvecmode>
7696 (vec_select:<ssehalfvecmode>
7697 (match_operand:VI4F_256 1 "register_operand" "v")
7698 (parallel [(const_int 4) (const_int 5)
7699 (const_int 6) (const_int 7)]))
7700 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7701 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7703 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7704 [(set_attr "type" "sselog1")
7705 (set_attr "length_immediate" "1")
7706 (set_attr "prefix" "evex")
7707 (set_attr "mode" "<sseinsnmode>")])
7709 (define_insn "vec_extract_hi_<mode>"
7710 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7711 (vec_select:<ssehalfvecmode>
7712 (match_operand:VI4F_256 1 "register_operand" "x, v")
7713 (parallel [(const_int 4) (const_int 5)
7714 (const_int 6) (const_int 7)])))]
7717 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7718 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7719 [(set_attr "isa" "*, avx512vl")
7720 (set_attr "prefix" "vex, evex")
7721 (set_attr "type" "sselog1")
7722 (set_attr "length_immediate" "1")
7723 (set_attr "mode" "<sseinsnmode>")])
7725 (define_insn_and_split "vec_extract_lo_v32hi"
7726 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7728 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7729 (parallel [(const_int 0) (const_int 1)
7730 (const_int 2) (const_int 3)
7731 (const_int 4) (const_int 5)
7732 (const_int 6) (const_int 7)
7733 (const_int 8) (const_int 9)
7734 (const_int 10) (const_int 11)
7735 (const_int 12) (const_int 13)
7736 (const_int 14) (const_int 15)])))]
7737 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7739 "&& reload_completed"
7740 [(set (match_dup 0) (match_dup 1))]
7741 "operands[1] = gen_lowpart (V16HImode, operands[1]);")
7743 (define_insn "vec_extract_hi_v32hi"
7744 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7746 (match_operand:V32HI 1 "register_operand" "v,v")
7747 (parallel [(const_int 16) (const_int 17)
7748 (const_int 18) (const_int 19)
7749 (const_int 20) (const_int 21)
7750 (const_int 22) (const_int 23)
7751 (const_int 24) (const_int 25)
7752 (const_int 26) (const_int 27)
7753 (const_int 28) (const_int 29)
7754 (const_int 30) (const_int 31)])))]
7756 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7757 [(set_attr "type" "sselog")
7758 (set_attr "prefix_extra" "1")
7759 (set_attr "length_immediate" "1")
7760 (set_attr "memory" "none,store")
7761 (set_attr "prefix" "evex")
7762 (set_attr "mode" "XI")])
7764 (define_insn_and_split "vec_extract_lo_v16hi"
7765 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7767 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7768 (parallel [(const_int 0) (const_int 1)
7769 (const_int 2) (const_int 3)
7770 (const_int 4) (const_int 5)
7771 (const_int 6) (const_int 7)])))]
7772 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7774 "&& reload_completed"
7775 [(set (match_dup 0) (match_dup 1))]
7776 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7778 (define_insn "vec_extract_hi_v16hi"
7779 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7781 (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
7782 (parallel [(const_int 8) (const_int 9)
7783 (const_int 10) (const_int 11)
7784 (const_int 12) (const_int 13)
7785 (const_int 14) (const_int 15)])))]
7788 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7789 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7790 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7791 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7792 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7793 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7794 [(set_attr "type" "sselog")
7795 (set_attr "prefix_extra" "1")
7796 (set_attr "length_immediate" "1")
7797 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7798 (set_attr "memory" "none,store,none,store,none,store")
7799 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7800 (set_attr "mode" "OI")])
7802 (define_insn_and_split "vec_extract_lo_v64qi"
7803 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7805 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7806 (parallel [(const_int 0) (const_int 1)
7807 (const_int 2) (const_int 3)
7808 (const_int 4) (const_int 5)
7809 (const_int 6) (const_int 7)
7810 (const_int 8) (const_int 9)
7811 (const_int 10) (const_int 11)
7812 (const_int 12) (const_int 13)
7813 (const_int 14) (const_int 15)
7814 (const_int 16) (const_int 17)
7815 (const_int 18) (const_int 19)
7816 (const_int 20) (const_int 21)
7817 (const_int 22) (const_int 23)
7818 (const_int 24) (const_int 25)
7819 (const_int 26) (const_int 27)
7820 (const_int 28) (const_int 29)
7821 (const_int 30) (const_int 31)])))]
7822 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7824 "&& reload_completed"
7825 [(set (match_dup 0) (match_dup 1))]
7826 "operands[1] = gen_lowpart (V32QImode, operands[1]);")
7828 (define_insn "vec_extract_hi_v64qi"
7829 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7831 (match_operand:V64QI 1 "register_operand" "v,v")
7832 (parallel [(const_int 32) (const_int 33)
7833 (const_int 34) (const_int 35)
7834 (const_int 36) (const_int 37)
7835 (const_int 38) (const_int 39)
7836 (const_int 40) (const_int 41)
7837 (const_int 42) (const_int 43)
7838 (const_int 44) (const_int 45)
7839 (const_int 46) (const_int 47)
7840 (const_int 48) (const_int 49)
7841 (const_int 50) (const_int 51)
7842 (const_int 52) (const_int 53)
7843 (const_int 54) (const_int 55)
7844 (const_int 56) (const_int 57)
7845 (const_int 58) (const_int 59)
7846 (const_int 60) (const_int 61)
7847 (const_int 62) (const_int 63)])))]
7849 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7850 [(set_attr "type" "sselog")
7851 (set_attr "prefix_extra" "1")
7852 (set_attr "length_immediate" "1")
7853 (set_attr "memory" "none,store")
7854 (set_attr "prefix" "evex")
7855 (set_attr "mode" "XI")])
7857 (define_insn_and_split "vec_extract_lo_v32qi"
7858 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
7860 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
7861 (parallel [(const_int 0) (const_int 1)
7862 (const_int 2) (const_int 3)
7863 (const_int 4) (const_int 5)
7864 (const_int 6) (const_int 7)
7865 (const_int 8) (const_int 9)
7866 (const_int 10) (const_int 11)
7867 (const_int 12) (const_int 13)
7868 (const_int 14) (const_int 15)])))]
7869 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7871 "&& reload_completed"
7872 [(set (match_dup 0) (match_dup 1))]
7873 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
7875 (define_insn "vec_extract_hi_v32qi"
7876 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7878 (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
7879 (parallel [(const_int 16) (const_int 17)
7880 (const_int 18) (const_int 19)
7881 (const_int 20) (const_int 21)
7882 (const_int 22) (const_int 23)
7883 (const_int 24) (const_int 25)
7884 (const_int 26) (const_int 27)
7885 (const_int 28) (const_int 29)
7886 (const_int 30) (const_int 31)])))]
7889 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7890 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7891 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7892 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7893 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7894 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7895 [(set_attr "type" "sselog")
7896 (set_attr "prefix_extra" "1")
7897 (set_attr "length_immediate" "1")
7898 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7899 (set_attr "memory" "none,store,none,store,none,store")
7900 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7901 (set_attr "mode" "OI")])
7903 ;; Modes handled by vec_extract patterns.
7904 (define_mode_iterator VEC_EXTRACT_MODE
7905 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7906 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7907 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7908 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7909 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7910 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
7911 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
7913 (define_expand "vec_extract<mode><ssescalarmodelower>"
7914 [(match_operand:<ssescalarmode> 0 "register_operand")
7915 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7916 (match_operand 2 "const_int_operand")]
7919 ix86_expand_vector_extract (false, operands[0], operands[1],
7920 INTVAL (operands[2]));
7924 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
7925 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7926 (match_operand:V_512 1 "register_operand")
7927 (match_operand 2 "const_0_to_1_operand")]
7930 if (INTVAL (operands[2]))
7931 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
7933 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
7937 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7939 ;; Parallel double-precision floating point element swizzling
7941 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7943 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7944 [(set (match_operand:V8DF 0 "register_operand" "=v")
7947 (match_operand:V8DF 1 "register_operand" "v")
7948 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7949 (parallel [(const_int 1) (const_int 9)
7950 (const_int 3) (const_int 11)
7951 (const_int 5) (const_int 13)
7952 (const_int 7) (const_int 15)])))]
7954 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7955 [(set_attr "type" "sselog")
7956 (set_attr "prefix" "evex")
7957 (set_attr "mode" "V8DF")])
7959 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7960 (define_insn "avx_unpckhpd256<mask_name>"
7961 [(set (match_operand:V4DF 0 "register_operand" "=v")
7964 (match_operand:V4DF 1 "register_operand" "v")
7965 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7966 (parallel [(const_int 1) (const_int 5)
7967 (const_int 3) (const_int 7)])))]
7968 "TARGET_AVX && <mask_avx512vl_condition>"
7969 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7970 [(set_attr "type" "sselog")
7971 (set_attr "prefix" "vex")
7972 (set_attr "mode" "V4DF")])
7974 (define_expand "vec_interleave_highv4df"
7978 (match_operand:V4DF 1 "register_operand")
7979 (match_operand:V4DF 2 "nonimmediate_operand"))
7980 (parallel [(const_int 0) (const_int 4)
7981 (const_int 2) (const_int 6)])))
7987 (parallel [(const_int 1) (const_int 5)
7988 (const_int 3) (const_int 7)])))
7989 (set (match_operand:V4DF 0 "register_operand")
7994 (parallel [(const_int 2) (const_int 3)
7995 (const_int 6) (const_int 7)])))]
7998 operands[3] = gen_reg_rtx (V4DFmode);
7999 operands[4] = gen_reg_rtx (V4DFmode);
8003 (define_insn "avx512vl_unpckhpd128_mask"
8004 [(set (match_operand:V2DF 0 "register_operand" "=v")
8008 (match_operand:V2DF 1 "register_operand" "v")
8009 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8010 (parallel [(const_int 1) (const_int 3)]))
8011 (match_operand:V2DF 3 "vector_move_operand" "0C")
8012 (match_operand:QI 4 "register_operand" "Yk")))]
8014 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8015 [(set_attr "type" "sselog")
8016 (set_attr "prefix" "evex")
8017 (set_attr "mode" "V2DF")])
8019 (define_expand "vec_interleave_highv2df"
8020 [(set (match_operand:V2DF 0 "register_operand")
8023 (match_operand:V2DF 1 "nonimmediate_operand")
8024 (match_operand:V2DF 2 "nonimmediate_operand"))
8025 (parallel [(const_int 1)
8029 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8030 operands[2] = force_reg (V2DFmode, operands[2]);
8033 (define_insn "*vec_interleave_highv2df"
8034 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8037 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8038 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8039 (parallel [(const_int 1)
8041 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8043 unpckhpd\t{%2, %0|%0, %2}
8044 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8045 %vmovddup\t{%H1, %0|%0, %H1}
8046 movlpd\t{%H1, %0|%0, %H1}
8047 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8048 %vmovhpd\t{%1, %0|%q0, %1}"
8049 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8050 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8051 (set (attr "prefix_data16")
8052 (if_then_else (eq_attr "alternative" "3,5")
8054 (const_string "*")))
8055 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8056 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8058 (define_expand "avx512f_movddup512<mask_name>"
8059 [(set (match_operand:V8DF 0 "register_operand")
8062 (match_operand:V8DF 1 "nonimmediate_operand")
8064 (parallel [(const_int 0) (const_int 8)
8065 (const_int 2) (const_int 10)
8066 (const_int 4) (const_int 12)
8067 (const_int 6) (const_int 14)])))]
8070 (define_expand "avx512f_unpcklpd512<mask_name>"
8071 [(set (match_operand:V8DF 0 "register_operand")
8074 (match_operand:V8DF 1 "register_operand")
8075 (match_operand:V8DF 2 "nonimmediate_operand"))
8076 (parallel [(const_int 0) (const_int 8)
8077 (const_int 2) (const_int 10)
8078 (const_int 4) (const_int 12)
8079 (const_int 6) (const_int 14)])))]
8082 (define_insn "*avx512f_unpcklpd512<mask_name>"
8083 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8086 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8087 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8088 (parallel [(const_int 0) (const_int 8)
8089 (const_int 2) (const_int 10)
8090 (const_int 4) (const_int 12)
8091 (const_int 6) (const_int 14)])))]
8094 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8095 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8096 [(set_attr "type" "sselog")
8097 (set_attr "prefix" "evex")
8098 (set_attr "mode" "V8DF")])
8100 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8101 (define_expand "avx_movddup256<mask_name>"
8102 [(set (match_operand:V4DF 0 "register_operand")
8105 (match_operand:V4DF 1 "nonimmediate_operand")
8107 (parallel [(const_int 0) (const_int 4)
8108 (const_int 2) (const_int 6)])))]
8109 "TARGET_AVX && <mask_avx512vl_condition>")
8111 (define_expand "avx_unpcklpd256<mask_name>"
8112 [(set (match_operand:V4DF 0 "register_operand")
8115 (match_operand:V4DF 1 "register_operand")
8116 (match_operand:V4DF 2 "nonimmediate_operand"))
8117 (parallel [(const_int 0) (const_int 4)
8118 (const_int 2) (const_int 6)])))]
8119 "TARGET_AVX && <mask_avx512vl_condition>")
8121 (define_insn "*avx_unpcklpd256<mask_name>"
8122 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
8125 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8126 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8127 (parallel [(const_int 0) (const_int 4)
8128 (const_int 2) (const_int 6)])))]
8129 "TARGET_AVX && <mask_avx512vl_condition>"
8131 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8132 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8133 [(set_attr "type" "sselog")
8134 (set_attr "prefix" "vex")
8135 (set_attr "mode" "V4DF")])
8137 (define_expand "vec_interleave_lowv4df"
8141 (match_operand:V4DF 1 "register_operand")
8142 (match_operand:V4DF 2 "nonimmediate_operand"))
8143 (parallel [(const_int 0) (const_int 4)
8144 (const_int 2) (const_int 6)])))
8150 (parallel [(const_int 1) (const_int 5)
8151 (const_int 3) (const_int 7)])))
8152 (set (match_operand:V4DF 0 "register_operand")
8157 (parallel [(const_int 0) (const_int 1)
8158 (const_int 4) (const_int 5)])))]
8161 operands[3] = gen_reg_rtx (V4DFmode);
8162 operands[4] = gen_reg_rtx (V4DFmode);
8165 (define_insn "avx512vl_unpcklpd128_mask"
8166 [(set (match_operand:V2DF 0 "register_operand" "=v")
8170 (match_operand:V2DF 1 "register_operand" "v")
8171 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8172 (parallel [(const_int 0) (const_int 2)]))
8173 (match_operand:V2DF 3 "vector_move_operand" "0C")
8174 (match_operand:QI 4 "register_operand" "Yk")))]
8176 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8177 [(set_attr "type" "sselog")
8178 (set_attr "prefix" "evex")
8179 (set_attr "mode" "V2DF")])
8181 (define_expand "vec_interleave_lowv2df"
8182 [(set (match_operand:V2DF 0 "register_operand")
8185 (match_operand:V2DF 1 "nonimmediate_operand")
8186 (match_operand:V2DF 2 "nonimmediate_operand"))
8187 (parallel [(const_int 0)
8191 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8192 operands[1] = force_reg (V2DFmode, operands[1]);
8195 (define_insn "*vec_interleave_lowv2df"
8196 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
8199 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8200 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8201 (parallel [(const_int 0)
8203 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8205 unpcklpd\t{%2, %0|%0, %2}
8206 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8207 %vmovddup\t{%1, %0|%0, %q1}
8208 movhpd\t{%2, %0|%0, %q2}
8209 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8210 %vmovlpd\t{%2, %H0|%H0, %2}"
8211 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8212 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8213 (set (attr "prefix_data16")
8214 (if_then_else (eq_attr "alternative" "3,5")
8216 (const_string "*")))
8217 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8218 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8221 [(set (match_operand:V2DF 0 "memory_operand")
8224 (match_operand:V2DF 1 "register_operand")
8226 (parallel [(const_int 0)
8228 "TARGET_SSE3 && reload_completed"
8231 rtx low = gen_lowpart (DFmode, operands[1]);
8233 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8234 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8239 [(set (match_operand:V2DF 0 "register_operand")
8242 (match_operand:V2DF 1 "memory_operand")
8244 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8245 (match_operand:SI 3 "const_int_operand")])))]
8246 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8247 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8249 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8252 (define_insn "avx512f_vmscalef<mode><round_name>"
8253 [(set (match_operand:VF_128 0 "register_operand" "=v")
8256 [(match_operand:VF_128 1 "register_operand" "v")
8257 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
8262 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
8263 [(set_attr "prefix" "evex")
8264 (set_attr "mode" "<ssescalarmode>")])
8266 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8267 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8269 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8270 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8273 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8274 [(set_attr "prefix" "evex")
8275 (set_attr "mode" "<MODE>")])
8277 (define_expand "<avx512>_vternlog<mode>_maskz"
8278 [(match_operand:VI48_AVX512VL 0 "register_operand")
8279 (match_operand:VI48_AVX512VL 1 "register_operand")
8280 (match_operand:VI48_AVX512VL 2 "register_operand")
8281 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8282 (match_operand:SI 4 "const_0_to_255_operand")
8283 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8286 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8287 operands[0], operands[1], operands[2], operands[3],
8288 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8292 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8293 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8294 (unspec:VI48_AVX512VL
8295 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8296 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8297 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8298 (match_operand:SI 4 "const_0_to_255_operand")]
8301 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8302 [(set_attr "type" "sselog")
8303 (set_attr "prefix" "evex")
8304 (set_attr "mode" "<sseinsnmode>")])
8306 (define_insn "<avx512>_vternlog<mode>_mask"
8307 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8308 (vec_merge:VI48_AVX512VL
8309 (unspec:VI48_AVX512VL
8310 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8311 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8312 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8313 (match_operand:SI 4 "const_0_to_255_operand")]
8316 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8318 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8319 [(set_attr "type" "sselog")
8320 (set_attr "prefix" "evex")
8321 (set_attr "mode" "<sseinsnmode>")])
8323 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8324 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8325 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8328 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8329 [(set_attr "prefix" "evex")
8330 (set_attr "mode" "<MODE>")])
8332 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8333 [(set (match_operand:VF_128 0 "register_operand" "=v")
8336 [(match_operand:VF_128 1 "register_operand" "v")
8337 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8342 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_saeonly_scalar_mask_op3>}";
8343 [(set_attr "prefix" "evex")
8344 (set_attr "mode" "<ssescalarmode>")])
8346 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8347 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8348 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8349 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8350 (match_operand:SI 3 "const_0_to_255_operand")]
8353 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8354 [(set_attr "prefix" "evex")
8355 (set_attr "mode" "<sseinsnmode>")])
8357 (define_expand "avx512f_shufps512_mask"
8358 [(match_operand:V16SF 0 "register_operand")
8359 (match_operand:V16SF 1 "register_operand")
8360 (match_operand:V16SF 2 "nonimmediate_operand")
8361 (match_operand:SI 3 "const_0_to_255_operand")
8362 (match_operand:V16SF 4 "register_operand")
8363 (match_operand:HI 5 "register_operand")]
8366 int mask = INTVAL (operands[3]);
8367 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8368 GEN_INT ((mask >> 0) & 3),
8369 GEN_INT ((mask >> 2) & 3),
8370 GEN_INT (((mask >> 4) & 3) + 16),
8371 GEN_INT (((mask >> 6) & 3) + 16),
8372 GEN_INT (((mask >> 0) & 3) + 4),
8373 GEN_INT (((mask >> 2) & 3) + 4),
8374 GEN_INT (((mask >> 4) & 3) + 20),
8375 GEN_INT (((mask >> 6) & 3) + 20),
8376 GEN_INT (((mask >> 0) & 3) + 8),
8377 GEN_INT (((mask >> 2) & 3) + 8),
8378 GEN_INT (((mask >> 4) & 3) + 24),
8379 GEN_INT (((mask >> 6) & 3) + 24),
8380 GEN_INT (((mask >> 0) & 3) + 12),
8381 GEN_INT (((mask >> 2) & 3) + 12),
8382 GEN_INT (((mask >> 4) & 3) + 28),
8383 GEN_INT (((mask >> 6) & 3) + 28),
8384 operands[4], operands[5]));
8389 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8390 [(match_operand:VF_AVX512VL 0 "register_operand")
8391 (match_operand:VF_AVX512VL 1 "register_operand")
8392 (match_operand:VF_AVX512VL 2 "register_operand")
8393 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8394 (match_operand:SI 4 "const_0_to_255_operand")
8395 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8398 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8399 operands[0], operands[1], operands[2], operands[3],
8400 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8401 <round_saeonly_expand_operand6>));
8405 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8406 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8408 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8409 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8410 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8411 (match_operand:SI 4 "const_0_to_255_operand")]
8414 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8415 [(set_attr "prefix" "evex")
8416 (set_attr "mode" "<MODE>")])
8418 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8419 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8420 (vec_merge:VF_AVX512VL
8422 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8423 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8424 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8425 (match_operand:SI 4 "const_0_to_255_operand")]
8428 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8430 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8431 [(set_attr "prefix" "evex")
8432 (set_attr "mode" "<MODE>")])
8434 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8435 [(match_operand:VF_128 0 "register_operand")
8436 (match_operand:VF_128 1 "register_operand")
8437 (match_operand:VF_128 2 "register_operand")
8438 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8439 (match_operand:SI 4 "const_0_to_255_operand")
8440 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8443 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8444 operands[0], operands[1], operands[2], operands[3],
8445 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8446 <round_saeonly_expand_operand6>));
8450 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8451 [(set (match_operand:VF_128 0 "register_operand" "=v")
8454 [(match_operand:VF_128 1 "register_operand" "0")
8455 (match_operand:VF_128 2 "register_operand" "v")
8456 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8457 (match_operand:SI 4 "const_0_to_255_operand")]
8462 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8463 [(set_attr "prefix" "evex")
8464 (set_attr "mode" "<ssescalarmode>")])
8466 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8467 [(set (match_operand:VF_128 0 "register_operand" "=v")
8471 [(match_operand:VF_128 1 "register_operand" "0")
8472 (match_operand:VF_128 2 "register_operand" "v")
8473 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8474 (match_operand:SI 4 "const_0_to_255_operand")]
8479 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8481 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8482 [(set_attr "prefix" "evex")
8483 (set_attr "mode" "<ssescalarmode>")])
8485 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8486 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8488 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8489 (match_operand:SI 2 "const_0_to_255_operand")]
8492 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8493 [(set_attr "length_immediate" "1")
8494 (set_attr "prefix" "evex")
8495 (set_attr "mode" "<MODE>")])
8497 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8498 [(set (match_operand:VF_128 0 "register_operand" "=v")
8501 [(match_operand:VF_128 1 "register_operand" "v")
8502 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8503 (match_operand:SI 3 "const_0_to_255_operand")]
8508 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
8509 [(set_attr "length_immediate" "1")
8510 (set_attr "prefix" "evex")
8511 (set_attr "mode" "<MODE>")])
8513 ;; One bit in mask selects 2 elements.
8514 (define_insn "avx512f_shufps512_1<mask_name>"
8515 [(set (match_operand:V16SF 0 "register_operand" "=v")
8518 (match_operand:V16SF 1 "register_operand" "v")
8519 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8520 (parallel [(match_operand 3 "const_0_to_3_operand")
8521 (match_operand 4 "const_0_to_3_operand")
8522 (match_operand 5 "const_16_to_19_operand")
8523 (match_operand 6 "const_16_to_19_operand")
8524 (match_operand 7 "const_4_to_7_operand")
8525 (match_operand 8 "const_4_to_7_operand")
8526 (match_operand 9 "const_20_to_23_operand")
8527 (match_operand 10 "const_20_to_23_operand")
8528 (match_operand 11 "const_8_to_11_operand")
8529 (match_operand 12 "const_8_to_11_operand")
8530 (match_operand 13 "const_24_to_27_operand")
8531 (match_operand 14 "const_24_to_27_operand")
8532 (match_operand 15 "const_12_to_15_operand")
8533 (match_operand 16 "const_12_to_15_operand")
8534 (match_operand 17 "const_28_to_31_operand")
8535 (match_operand 18 "const_28_to_31_operand")])))]
8537 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8538 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8539 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8540 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8541 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8542 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8543 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8544 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8545 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8546 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8547 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8548 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8551 mask = INTVAL (operands[3]);
8552 mask |= INTVAL (operands[4]) << 2;
8553 mask |= (INTVAL (operands[5]) - 16) << 4;
8554 mask |= (INTVAL (operands[6]) - 16) << 6;
8555 operands[3] = GEN_INT (mask);
8557 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8559 [(set_attr "type" "sselog")
8560 (set_attr "length_immediate" "1")
8561 (set_attr "prefix" "evex")
8562 (set_attr "mode" "V16SF")])
8564 (define_expand "avx512f_shufpd512_mask"
8565 [(match_operand:V8DF 0 "register_operand")
8566 (match_operand:V8DF 1 "register_operand")
8567 (match_operand:V8DF 2 "nonimmediate_operand")
8568 (match_operand:SI 3 "const_0_to_255_operand")
8569 (match_operand:V8DF 4 "register_operand")
8570 (match_operand:QI 5 "register_operand")]
8573 int mask = INTVAL (operands[3]);
8574 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8576 GEN_INT (mask & 2 ? 9 : 8),
8577 GEN_INT (mask & 4 ? 3 : 2),
8578 GEN_INT (mask & 8 ? 11 : 10),
8579 GEN_INT (mask & 16 ? 5 : 4),
8580 GEN_INT (mask & 32 ? 13 : 12),
8581 GEN_INT (mask & 64 ? 7 : 6),
8582 GEN_INT (mask & 128 ? 15 : 14),
8583 operands[4], operands[5]));
8587 (define_insn "avx512f_shufpd512_1<mask_name>"
8588 [(set (match_operand:V8DF 0 "register_operand" "=v")
8591 (match_operand:V8DF 1 "register_operand" "v")
8592 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8593 (parallel [(match_operand 3 "const_0_to_1_operand")
8594 (match_operand 4 "const_8_to_9_operand")
8595 (match_operand 5 "const_2_to_3_operand")
8596 (match_operand 6 "const_10_to_11_operand")
8597 (match_operand 7 "const_4_to_5_operand")
8598 (match_operand 8 "const_12_to_13_operand")
8599 (match_operand 9 "const_6_to_7_operand")
8600 (match_operand 10 "const_14_to_15_operand")])))]
8604 mask = INTVAL (operands[3]);
8605 mask |= (INTVAL (operands[4]) - 8) << 1;
8606 mask |= (INTVAL (operands[5]) - 2) << 2;
8607 mask |= (INTVAL (operands[6]) - 10) << 3;
8608 mask |= (INTVAL (operands[7]) - 4) << 4;
8609 mask |= (INTVAL (operands[8]) - 12) << 5;
8610 mask |= (INTVAL (operands[9]) - 6) << 6;
8611 mask |= (INTVAL (operands[10]) - 14) << 7;
8612 operands[3] = GEN_INT (mask);
8614 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8616 [(set_attr "type" "sselog")
8617 (set_attr "length_immediate" "1")
8618 (set_attr "prefix" "evex")
8619 (set_attr "mode" "V8DF")])
8621 (define_expand "avx_shufpd256<mask_expand4_name>"
8622 [(match_operand:V4DF 0 "register_operand")
8623 (match_operand:V4DF 1 "register_operand")
8624 (match_operand:V4DF 2 "nonimmediate_operand")
8625 (match_operand:SI 3 "const_int_operand")]
8628 int mask = INTVAL (operands[3]);
8629 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8633 GEN_INT (mask & 2 ? 5 : 4),
8634 GEN_INT (mask & 4 ? 3 : 2),
8635 GEN_INT (mask & 8 ? 7 : 6)
8636 <mask_expand4_args>));
8640 (define_insn "avx_shufpd256_1<mask_name>"
8641 [(set (match_operand:V4DF 0 "register_operand" "=v")
8644 (match_operand:V4DF 1 "register_operand" "v")
8645 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8646 (parallel [(match_operand 3 "const_0_to_1_operand")
8647 (match_operand 4 "const_4_to_5_operand")
8648 (match_operand 5 "const_2_to_3_operand")
8649 (match_operand 6 "const_6_to_7_operand")])))]
8650 "TARGET_AVX && <mask_avx512vl_condition>"
8653 mask = INTVAL (operands[3]);
8654 mask |= (INTVAL (operands[4]) - 4) << 1;
8655 mask |= (INTVAL (operands[5]) - 2) << 2;
8656 mask |= (INTVAL (operands[6]) - 6) << 3;
8657 operands[3] = GEN_INT (mask);
8659 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8661 [(set_attr "type" "sseshuf")
8662 (set_attr "length_immediate" "1")
8663 (set_attr "prefix" "vex")
8664 (set_attr "mode" "V4DF")])
8666 (define_expand "sse2_shufpd<mask_expand4_name>"
8667 [(match_operand:V2DF 0 "register_operand")
8668 (match_operand:V2DF 1 "register_operand")
8669 (match_operand:V2DF 2 "vector_operand")
8670 (match_operand:SI 3 "const_int_operand")]
8673 int mask = INTVAL (operands[3]);
8674 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8675 operands[2], GEN_INT (mask & 1),
8676 GEN_INT (mask & 2 ? 3 : 2)
8677 <mask_expand4_args>));
8681 (define_insn "sse2_shufpd_v2df_mask"
8682 [(set (match_operand:V2DF 0 "register_operand" "=v")
8686 (match_operand:V2DF 1 "register_operand" "v")
8687 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8688 (parallel [(match_operand 3 "const_0_to_1_operand")
8689 (match_operand 4 "const_2_to_3_operand")]))
8690 (match_operand:V2DF 5 "vector_move_operand" "0C")
8691 (match_operand:QI 6 "register_operand" "Yk")))]
8695 mask = INTVAL (operands[3]);
8696 mask |= (INTVAL (operands[4]) - 2) << 1;
8697 operands[3] = GEN_INT (mask);
8699 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8701 [(set_attr "type" "sseshuf")
8702 (set_attr "length_immediate" "1")
8703 (set_attr "prefix" "evex")
8704 (set_attr "mode" "V2DF")])
8706 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8707 (define_insn "avx2_interleave_highv4di<mask_name>"
8708 [(set (match_operand:V4DI 0 "register_operand" "=v")
8711 (match_operand:V4DI 1 "register_operand" "v")
8712 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8713 (parallel [(const_int 1)
8717 "TARGET_AVX2 && <mask_avx512vl_condition>"
8718 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8719 [(set_attr "type" "sselog")
8720 (set_attr "prefix" "vex")
8721 (set_attr "mode" "OI")])
8723 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8724 [(set (match_operand:V8DI 0 "register_operand" "=v")
8727 (match_operand:V8DI 1 "register_operand" "v")
8728 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8729 (parallel [(const_int 1) (const_int 9)
8730 (const_int 3) (const_int 11)
8731 (const_int 5) (const_int 13)
8732 (const_int 7) (const_int 15)])))]
8734 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8735 [(set_attr "type" "sselog")
8736 (set_attr "prefix" "evex")
8737 (set_attr "mode" "XI")])
8739 (define_insn "vec_interleave_highv2di<mask_name>"
8740 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8743 (match_operand:V2DI 1 "register_operand" "0,v")
8744 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8745 (parallel [(const_int 1)
8747 "TARGET_SSE2 && <mask_avx512vl_condition>"
8749 punpckhqdq\t{%2, %0|%0, %2}
8750 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8751 [(set_attr "isa" "noavx,avx")
8752 (set_attr "type" "sselog")
8753 (set_attr "prefix_data16" "1,*")
8754 (set_attr "prefix" "orig,<mask_prefix>")
8755 (set_attr "mode" "TI")])
8757 (define_insn "avx2_interleave_lowv4di<mask_name>"
8758 [(set (match_operand:V4DI 0 "register_operand" "=v")
8761 (match_operand:V4DI 1 "register_operand" "v")
8762 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8763 (parallel [(const_int 0)
8767 "TARGET_AVX2 && <mask_avx512vl_condition>"
8768 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8769 [(set_attr "type" "sselog")
8770 (set_attr "prefix" "vex")
8771 (set_attr "mode" "OI")])
8773 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8774 [(set (match_operand:V8DI 0 "register_operand" "=v")
8777 (match_operand:V8DI 1 "register_operand" "v")
8778 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8779 (parallel [(const_int 0) (const_int 8)
8780 (const_int 2) (const_int 10)
8781 (const_int 4) (const_int 12)
8782 (const_int 6) (const_int 14)])))]
8784 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8785 [(set_attr "type" "sselog")
8786 (set_attr "prefix" "evex")
8787 (set_attr "mode" "XI")])
8789 (define_insn "vec_interleave_lowv2di<mask_name>"
8790 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8793 (match_operand:V2DI 1 "register_operand" "0,v")
8794 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8795 (parallel [(const_int 0)
8797 "TARGET_SSE2 && <mask_avx512vl_condition>"
8799 punpcklqdq\t{%2, %0|%0, %2}
8800 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8801 [(set_attr "isa" "noavx,avx")
8802 (set_attr "type" "sselog")
8803 (set_attr "prefix_data16" "1,*")
8804 (set_attr "prefix" "orig,vex")
8805 (set_attr "mode" "TI")])
8807 (define_insn "sse2_shufpd_<mode>"
8808 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8809 (vec_select:VI8F_128
8810 (vec_concat:<ssedoublevecmode>
8811 (match_operand:VI8F_128 1 "register_operand" "0,v")
8812 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8813 (parallel [(match_operand 3 "const_0_to_1_operand")
8814 (match_operand 4 "const_2_to_3_operand")])))]
8818 mask = INTVAL (operands[3]);
8819 mask |= (INTVAL (operands[4]) - 2) << 1;
8820 operands[3] = GEN_INT (mask);
8822 switch (which_alternative)
8825 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8827 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8832 [(set_attr "isa" "noavx,avx")
8833 (set_attr "type" "sseshuf")
8834 (set_attr "length_immediate" "1")
8835 (set_attr "prefix" "orig,maybe_evex")
8836 (set_attr "mode" "V2DF")])
8838 ;; Avoid combining registers from different units in a single alternative,
8839 ;; see comment above inline_secondary_memory_needed function in i386.c
8840 (define_insn "sse2_storehpd"
8841 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
8843 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
8844 (parallel [(const_int 1)])))]
8845 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8847 %vmovhpd\t{%1, %0|%0, %1}
8849 vunpckhpd\t{%d1, %0|%0, %d1}
8853 [(set_attr "isa" "*,noavx,avx,*,*,*")
8854 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8855 (set (attr "prefix_data16")
8857 (and (eq_attr "alternative" "0")
8858 (not (match_test "TARGET_AVX")))
8860 (const_string "*")))
8861 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
8862 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8865 [(set (match_operand:DF 0 "register_operand")
8867 (match_operand:V2DF 1 "memory_operand")
8868 (parallel [(const_int 1)])))]
8869 "TARGET_SSE2 && reload_completed"
8870 [(set (match_dup 0) (match_dup 1))]
8871 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8873 (define_insn "*vec_extractv2df_1_sse"
8874 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8876 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8877 (parallel [(const_int 1)])))]
8878 "!TARGET_SSE2 && TARGET_SSE
8879 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8881 movhps\t{%1, %0|%q0, %1}
8882 movhlps\t{%1, %0|%0, %1}
8883 movlps\t{%H1, %0|%0, %H1}"
8884 [(set_attr "type" "ssemov")
8885 (set_attr "mode" "V2SF,V4SF,V2SF")])
8887 ;; Avoid combining registers from different units in a single alternative,
8888 ;; see comment above inline_secondary_memory_needed function in i386.c
8889 (define_insn "sse2_storelpd"
8890 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8892 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
8893 (parallel [(const_int 0)])))]
8894 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8896 %vmovlpd\t{%1, %0|%0, %1}
8901 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8902 (set (attr "prefix_data16")
8903 (if_then_else (eq_attr "alternative" "0")
8905 (const_string "*")))
8906 (set_attr "prefix" "maybe_vex")
8907 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8910 [(set (match_operand:DF 0 "register_operand")
8912 (match_operand:V2DF 1 "nonimmediate_operand")
8913 (parallel [(const_int 0)])))]
8914 "TARGET_SSE2 && reload_completed"
8915 [(set (match_dup 0) (match_dup 1))]
8916 "operands[1] = gen_lowpart (DFmode, operands[1]);")
8918 (define_insn "*vec_extractv2df_0_sse"
8919 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8921 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8922 (parallel [(const_int 0)])))]
8923 "!TARGET_SSE2 && TARGET_SSE
8924 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8926 movlps\t{%1, %0|%0, %1}
8927 movaps\t{%1, %0|%0, %1}
8928 movlps\t{%1, %0|%0, %q1}"
8929 [(set_attr "type" "ssemov")
8930 (set_attr "mode" "V2SF,V4SF,V2SF")])
8932 (define_expand "sse2_loadhpd_exp"
8933 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8936 (match_operand:V2DF 1 "nonimmediate_operand")
8937 (parallel [(const_int 0)]))
8938 (match_operand:DF 2 "nonimmediate_operand")))]
8941 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8943 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8945 /* Fix up the destination if needed. */
8946 if (dst != operands[0])
8947 emit_move_insn (operands[0], dst);
8952 ;; Avoid combining registers from different units in a single alternative,
8953 ;; see comment above inline_secondary_memory_needed function in i386.c
8954 (define_insn "sse2_loadhpd"
8955 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8959 (match_operand:V2DF 1 "nonimmediate_operand"
8961 (parallel [(const_int 0)]))
8962 (match_operand:DF 2 "nonimmediate_operand"
8963 " m,m,x,v,x,*f,r")))]
8964 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8966 movhpd\t{%2, %0|%0, %2}
8967 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8968 unpcklpd\t{%2, %0|%0, %2}
8969 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8973 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8974 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8975 (set (attr "prefix_data16")
8976 (if_then_else (eq_attr "alternative" "0")
8978 (const_string "*")))
8979 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
8980 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8983 [(set (match_operand:V2DF 0 "memory_operand")
8985 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8986 (match_operand:DF 1 "register_operand")))]
8987 "TARGET_SSE2 && reload_completed"
8988 [(set (match_dup 0) (match_dup 1))]
8989 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8991 (define_expand "sse2_loadlpd_exp"
8992 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8994 (match_operand:DF 2 "nonimmediate_operand")
8996 (match_operand:V2DF 1 "nonimmediate_operand")
8997 (parallel [(const_int 1)]))))]
9000 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
9002 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
9004 /* Fix up the destination if needed. */
9005 if (dst != operands[0])
9006 emit_move_insn (operands[0], dst);
9011 ;; Avoid combining registers from different units in a single alternative,
9012 ;; see comment above inline_secondary_memory_needed function in i386.c
9013 (define_insn "sse2_loadlpd"
9014 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9015 "=v,x,v,x,v,x,x,v,m,m ,m")
9017 (match_operand:DF 2 "nonimmediate_operand"
9018 "vm,m,m,x,v,0,0,v,x,*f,r")
9020 (match_operand:V2DF 1 "vector_move_operand"
9021 " C,0,v,0,v,x,o,o,0,0 ,0")
9022 (parallel [(const_int 1)]))))]
9023 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9025 %vmovq\t{%2, %0|%0, %2}
9026 movlpd\t{%2, %0|%0, %2}
9027 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9028 movsd\t{%2, %0|%0, %2}
9029 vmovsd\t{%2, %1, %0|%0, %1, %2}
9030 shufpd\t{$2, %1, %0|%0, %1, 2}
9031 movhpd\t{%H1, %0|%0, %H1}
9032 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9036 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9038 (cond [(eq_attr "alternative" "5")
9039 (const_string "sselog")
9040 (eq_attr "alternative" "9")
9041 (const_string "fmov")
9042 (eq_attr "alternative" "10")
9043 (const_string "imov")
9045 (const_string "ssemov")))
9046 (set (attr "prefix_data16")
9047 (if_then_else (eq_attr "alternative" "1,6")
9049 (const_string "*")))
9050 (set (attr "length_immediate")
9051 (if_then_else (eq_attr "alternative" "5")
9053 (const_string "*")))
9054 (set (attr "prefix")
9055 (cond [(eq_attr "alternative" "0")
9056 (const_string "maybe_vex")
9057 (eq_attr "alternative" "1,3,5,6")
9058 (const_string "orig")
9059 (eq_attr "alternative" "2,4,7")
9060 (const_string "maybe_evex")
9062 (const_string "*")))
9063 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9066 [(set (match_operand:V2DF 0 "memory_operand")
9068 (match_operand:DF 1 "register_operand")
9069 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9070 "TARGET_SSE2 && reload_completed"
9071 [(set (match_dup 0) (match_dup 1))]
9072 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9074 (define_insn "sse2_movsd"
9075 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9077 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9078 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9082 movsd\t{%2, %0|%0, %2}
9083 vmovsd\t{%2, %1, %0|%0, %1, %2}
9084 movlpd\t{%2, %0|%0, %q2}
9085 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9086 %vmovlpd\t{%2, %0|%q0, %2}
9087 shufpd\t{$2, %1, %0|%0, %1, 2}
9088 movhps\t{%H1, %0|%0, %H1}
9089 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9090 %vmovhps\t{%1, %H0|%H0, %1}"
9091 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9094 (eq_attr "alternative" "5")
9095 (const_string "sselog")
9096 (const_string "ssemov")))
9097 (set (attr "prefix_data16")
9099 (and (eq_attr "alternative" "2,4")
9100 (not (match_test "TARGET_AVX")))
9102 (const_string "*")))
9103 (set (attr "length_immediate")
9104 (if_then_else (eq_attr "alternative" "5")
9106 (const_string "*")))
9107 (set (attr "prefix")
9108 (cond [(eq_attr "alternative" "1,3,7")
9109 (const_string "maybe_evex")
9110 (eq_attr "alternative" "4,8")
9111 (const_string "maybe_vex")
9113 (const_string "orig")))
9114 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9116 (define_insn "vec_dupv2df<mask_name>"
9117 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
9119 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9120 "TARGET_SSE2 && <mask_avx512vl_condition>"
9123 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9124 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9125 [(set_attr "isa" "noavx,sse3,avx512vl")
9126 (set_attr "type" "sselog1")
9127 (set_attr "prefix" "orig,maybe_vex,evex")
9128 (set_attr "mode" "V2DF,DF,DF")])
9130 (define_insn "vec_concatv2df"
9131 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
9133 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
9134 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
9136 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9137 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9139 unpcklpd\t{%2, %0|%0, %2}
9140 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9141 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9142 %vmovddup\t{%1, %0|%0, %1}
9143 vmovddup\t{%1, %0|%0, %1}
9144 movhpd\t{%2, %0|%0, %2}
9145 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9146 %vmovq\t{%1, %0|%0, %1}
9147 movlhps\t{%2, %0|%0, %2}
9148 movhps\t{%2, %0|%0, %2}"
9150 (cond [(eq_attr "alternative" "0,5")
9151 (const_string "sse2_noavx")
9152 (eq_attr "alternative" "1,6")
9153 (const_string "avx")
9154 (eq_attr "alternative" "2,4")
9155 (const_string "avx512vl")
9156 (eq_attr "alternative" "3")
9157 (const_string "sse3")
9158 (eq_attr "alternative" "7")
9159 (const_string "sse2")
9161 (const_string "noavx")))
9164 (eq_attr "alternative" "0,1,2,3,4")
9165 (const_string "sselog")
9166 (const_string "ssemov")))
9167 (set (attr "prefix_data16")
9168 (if_then_else (eq_attr "alternative" "5")
9170 (const_string "*")))
9171 (set (attr "prefix")
9172 (cond [(eq_attr "alternative" "1,6")
9173 (const_string "vex")
9174 (eq_attr "alternative" "2,4")
9175 (const_string "evex")
9176 (eq_attr "alternative" "3,7")
9177 (const_string "maybe_vex")
9179 (const_string "orig")))
9180 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9182 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9184 ;; Parallel integer down-conversion operations
9186 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9188 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9189 (define_mode_attr pmov_src_mode
9190 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9191 (define_mode_attr pmov_src_lower
9192 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9193 (define_mode_attr pmov_suff_1
9194 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9196 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9197 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9198 (any_truncate:PMOV_DST_MODE_1
9199 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9201 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9202 [(set_attr "type" "ssemov")
9203 (set_attr "memory" "none,store")
9204 (set_attr "prefix" "evex")
9205 (set_attr "mode" "<sseinsnmode>")])
9207 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9208 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9209 (vec_merge:PMOV_DST_MODE_1
9210 (any_truncate:PMOV_DST_MODE_1
9211 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9212 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9213 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9215 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9216 [(set_attr "type" "ssemov")
9217 (set_attr "memory" "none,store")
9218 (set_attr "prefix" "evex")
9219 (set_attr "mode" "<sseinsnmode>")])
9221 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9222 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9223 (vec_merge:PMOV_DST_MODE_1
9224 (any_truncate:PMOV_DST_MODE_1
9225 (match_operand:<pmov_src_mode> 1 "register_operand"))
9227 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9230 (define_insn "avx512bw_<code>v32hiv32qi2"
9231 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9233 (match_operand:V32HI 1 "register_operand" "v,v")))]
9235 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9236 [(set_attr "type" "ssemov")
9237 (set_attr "memory" "none,store")
9238 (set_attr "prefix" "evex")
9239 (set_attr "mode" "XI")])
9241 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
9242 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9245 (match_operand:V32HI 1 "register_operand" "v,v"))
9246 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9247 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9249 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9250 [(set_attr "type" "ssemov")
9251 (set_attr "memory" "none,store")
9252 (set_attr "prefix" "evex")
9253 (set_attr "mode" "XI")])
9255 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9256 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9259 (match_operand:V32HI 1 "register_operand"))
9261 (match_operand:SI 2 "register_operand")))]
9264 (define_mode_iterator PMOV_DST_MODE_2
9265 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9266 (define_mode_attr pmov_suff_2
9267 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9269 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9270 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9271 (any_truncate:PMOV_DST_MODE_2
9272 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9274 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9275 [(set_attr "type" "ssemov")
9276 (set_attr "memory" "none,store")
9277 (set_attr "prefix" "evex")
9278 (set_attr "mode" "<sseinsnmode>")])
9280 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9281 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9282 (vec_merge:PMOV_DST_MODE_2
9283 (any_truncate:PMOV_DST_MODE_2
9284 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9285 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9286 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9288 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9289 [(set_attr "type" "ssemov")
9290 (set_attr "memory" "none,store")
9291 (set_attr "prefix" "evex")
9292 (set_attr "mode" "<sseinsnmode>")])
9294 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9295 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9296 (vec_merge:PMOV_DST_MODE_2
9297 (any_truncate:PMOV_DST_MODE_2
9298 (match_operand:<ssedoublemode> 1 "register_operand"))
9300 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9303 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9304 (define_mode_attr pmov_dst_3
9305 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9306 (define_mode_attr pmov_dst_zeroed_3
9307 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9308 (define_mode_attr pmov_suff_3
9309 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9311 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9312 [(set (match_operand:V16QI 0 "register_operand" "=v")
9314 (any_truncate:<pmov_dst_3>
9315 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9316 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9318 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9319 [(set_attr "type" "ssemov")
9320 (set_attr "prefix" "evex")
9321 (set_attr "mode" "TI")])
9323 (define_insn "*avx512vl_<code>v2div2qi2_store"
9324 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9327 (match_operand:V2DI 1 "register_operand" "v"))
9330 (parallel [(const_int 2) (const_int 3)
9331 (const_int 4) (const_int 5)
9332 (const_int 6) (const_int 7)
9333 (const_int 8) (const_int 9)
9334 (const_int 10) (const_int 11)
9335 (const_int 12) (const_int 13)
9336 (const_int 14) (const_int 15)]))))]
9338 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9339 [(set_attr "type" "ssemov")
9340 (set_attr "memory" "store")
9341 (set_attr "prefix" "evex")
9342 (set_attr "mode" "TI")])
9344 (define_insn "avx512vl_<code>v2div2qi2_mask"
9345 [(set (match_operand:V16QI 0 "register_operand" "=v")
9349 (match_operand:V2DI 1 "register_operand" "v"))
9351 (match_operand:V16QI 2 "vector_move_operand" "0C")
9352 (parallel [(const_int 0) (const_int 1)]))
9353 (match_operand:QI 3 "register_operand" "Yk"))
9354 (const_vector:V14QI [(const_int 0) (const_int 0)
9355 (const_int 0) (const_int 0)
9356 (const_int 0) (const_int 0)
9357 (const_int 0) (const_int 0)
9358 (const_int 0) (const_int 0)
9359 (const_int 0) (const_int 0)
9360 (const_int 0) (const_int 0)])))]
9362 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9363 [(set_attr "type" "ssemov")
9364 (set_attr "prefix" "evex")
9365 (set_attr "mode" "TI")])
9367 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9368 [(set (match_operand:V16QI 0 "register_operand" "=v")
9372 (match_operand:V2DI 1 "register_operand" "v"))
9373 (const_vector:V2QI [(const_int 0) (const_int 0)])
9374 (match_operand:QI 2 "register_operand" "Yk"))
9375 (const_vector:V14QI [(const_int 0) (const_int 0)
9376 (const_int 0) (const_int 0)
9377 (const_int 0) (const_int 0)
9378 (const_int 0) (const_int 0)
9379 (const_int 0) (const_int 0)
9380 (const_int 0) (const_int 0)
9381 (const_int 0) (const_int 0)])))]
9383 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9384 [(set_attr "type" "ssemov")
9385 (set_attr "prefix" "evex")
9386 (set_attr "mode" "TI")])
9388 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
9389 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9393 (match_operand:V2DI 1 "register_operand" "v"))
9396 (parallel [(const_int 0) (const_int 1)]))
9397 (match_operand:QI 2 "register_operand" "Yk"))
9400 (parallel [(const_int 2) (const_int 3)
9401 (const_int 4) (const_int 5)
9402 (const_int 6) (const_int 7)
9403 (const_int 8) (const_int 9)
9404 (const_int 10) (const_int 11)
9405 (const_int 12) (const_int 13)
9406 (const_int 14) (const_int 15)]))))]
9408 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9409 [(set_attr "type" "ssemov")
9410 (set_attr "memory" "store")
9411 (set_attr "prefix" "evex")
9412 (set_attr "mode" "TI")])
9414 (define_insn "*avx512vl_<code><mode>v4qi2_store"
9415 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9418 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9421 (parallel [(const_int 4) (const_int 5)
9422 (const_int 6) (const_int 7)
9423 (const_int 8) (const_int 9)
9424 (const_int 10) (const_int 11)
9425 (const_int 12) (const_int 13)
9426 (const_int 14) (const_int 15)]))))]
9428 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9429 [(set_attr "type" "ssemov")
9430 (set_attr "memory" "store")
9431 (set_attr "prefix" "evex")
9432 (set_attr "mode" "TI")])
9434 (define_insn "avx512vl_<code><mode>v4qi2_mask"
9435 [(set (match_operand:V16QI 0 "register_operand" "=v")
9439 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9441 (match_operand:V16QI 2 "vector_move_operand" "0C")
9442 (parallel [(const_int 0) (const_int 1)
9443 (const_int 2) (const_int 3)]))
9444 (match_operand:QI 3 "register_operand" "Yk"))
9445 (const_vector:V12QI [(const_int 0) (const_int 0)
9446 (const_int 0) (const_int 0)
9447 (const_int 0) (const_int 0)
9448 (const_int 0) (const_int 0)
9449 (const_int 0) (const_int 0)
9450 (const_int 0) (const_int 0)])))]
9452 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9453 [(set_attr "type" "ssemov")
9454 (set_attr "prefix" "evex")
9455 (set_attr "mode" "TI")])
9457 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9458 [(set (match_operand:V16QI 0 "register_operand" "=v")
9462 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9463 (const_vector:V4QI [(const_int 0) (const_int 0)
9464 (const_int 0) (const_int 0)])
9465 (match_operand:QI 2 "register_operand" "Yk"))
9466 (const_vector:V12QI [(const_int 0) (const_int 0)
9467 (const_int 0) (const_int 0)
9468 (const_int 0) (const_int 0)
9469 (const_int 0) (const_int 0)
9470 (const_int 0) (const_int 0)
9471 (const_int 0) (const_int 0)])))]
9473 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9474 [(set_attr "type" "ssemov")
9475 (set_attr "prefix" "evex")
9476 (set_attr "mode" "TI")])
9478 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9479 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9483 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9486 (parallel [(const_int 0) (const_int 1)
9487 (const_int 2) (const_int 3)]))
9488 (match_operand:QI 2 "register_operand" "Yk"))
9491 (parallel [(const_int 4) (const_int 5)
9492 (const_int 6) (const_int 7)
9493 (const_int 8) (const_int 9)
9494 (const_int 10) (const_int 11)
9495 (const_int 12) (const_int 13)
9496 (const_int 14) (const_int 15)]))))]
9499 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
9500 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}";
9501 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9503 [(set_attr "type" "ssemov")
9504 (set_attr "memory" "store")
9505 (set_attr "prefix" "evex")
9506 (set_attr "mode" "TI")])
9508 (define_mode_iterator VI2_128_BW_4_256
9509 [(V8HI "TARGET_AVX512BW") V8SI])
9511 (define_insn "*avx512vl_<code><mode>v8qi2_store"
9512 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9515 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9518 (parallel [(const_int 8) (const_int 9)
9519 (const_int 10) (const_int 11)
9520 (const_int 12) (const_int 13)
9521 (const_int 14) (const_int 15)]))))]
9523 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9524 [(set_attr "type" "ssemov")
9525 (set_attr "memory" "store")
9526 (set_attr "prefix" "evex")
9527 (set_attr "mode" "TI")])
9529 (define_insn "avx512vl_<code><mode>v8qi2_mask"
9530 [(set (match_operand:V16QI 0 "register_operand" "=v")
9534 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9536 (match_operand:V16QI 2 "vector_move_operand" "0C")
9537 (parallel [(const_int 0) (const_int 1)
9538 (const_int 2) (const_int 3)
9539 (const_int 4) (const_int 5)
9540 (const_int 6) (const_int 7)]))
9541 (match_operand:QI 3 "register_operand" "Yk"))
9542 (const_vector:V8QI [(const_int 0) (const_int 0)
9543 (const_int 0) (const_int 0)
9544 (const_int 0) (const_int 0)
9545 (const_int 0) (const_int 0)])))]
9547 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9548 [(set_attr "type" "ssemov")
9549 (set_attr "prefix" "evex")
9550 (set_attr "mode" "TI")])
9552 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9553 [(set (match_operand:V16QI 0 "register_operand" "=v")
9557 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9558 (const_vector:V8QI [(const_int 0) (const_int 0)
9559 (const_int 0) (const_int 0)
9560 (const_int 0) (const_int 0)
9561 (const_int 0) (const_int 0)])
9562 (match_operand:QI 2 "register_operand" "Yk"))
9563 (const_vector:V8QI [(const_int 0) (const_int 0)
9564 (const_int 0) (const_int 0)
9565 (const_int 0) (const_int 0)
9566 (const_int 0) (const_int 0)])))]
9568 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9569 [(set_attr "type" "ssemov")
9570 (set_attr "prefix" "evex")
9571 (set_attr "mode" "TI")])
9573 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9574 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9578 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9581 (parallel [(const_int 0) (const_int 1)
9582 (const_int 2) (const_int 3)
9583 (const_int 4) (const_int 5)
9584 (const_int 6) (const_int 7)]))
9585 (match_operand:QI 2 "register_operand" "Yk"))
9588 (parallel [(const_int 8) (const_int 9)
9589 (const_int 10) (const_int 11)
9590 (const_int 12) (const_int 13)
9591 (const_int 14) (const_int 15)]))))]
9594 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9595 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9596 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
9598 [(set_attr "type" "ssemov")
9599 (set_attr "memory" "store")
9600 (set_attr "prefix" "evex")
9601 (set_attr "mode" "TI")])
9603 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9604 (define_mode_attr pmov_dst_4
9605 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9606 (define_mode_attr pmov_dst_zeroed_4
9607 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9608 (define_mode_attr pmov_suff_4
9609 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9611 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9612 [(set (match_operand:V8HI 0 "register_operand" "=v")
9614 (any_truncate:<pmov_dst_4>
9615 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9616 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9618 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9619 [(set_attr "type" "ssemov")
9620 (set_attr "prefix" "evex")
9621 (set_attr "mode" "TI")])
9623 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9624 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9627 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9630 (parallel [(const_int 4) (const_int 5)
9631 (const_int 6) (const_int 7)]))))]
9633 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9634 [(set_attr "type" "ssemov")
9635 (set_attr "memory" "store")
9636 (set_attr "prefix" "evex")
9637 (set_attr "mode" "TI")])
9639 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9640 [(set (match_operand:V8HI 0 "register_operand" "=v")
9644 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9646 (match_operand:V8HI 2 "vector_move_operand" "0C")
9647 (parallel [(const_int 0) (const_int 1)
9648 (const_int 2) (const_int 3)]))
9649 (match_operand:QI 3 "register_operand" "Yk"))
9650 (const_vector:V4HI [(const_int 0) (const_int 0)
9651 (const_int 0) (const_int 0)])))]
9653 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9654 [(set_attr "type" "ssemov")
9655 (set_attr "prefix" "evex")
9656 (set_attr "mode" "TI")])
9658 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9659 [(set (match_operand:V8HI 0 "register_operand" "=v")
9663 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9664 (const_vector:V4HI [(const_int 0) (const_int 0)
9665 (const_int 0) (const_int 0)])
9666 (match_operand:QI 2 "register_operand" "Yk"))
9667 (const_vector:V4HI [(const_int 0) (const_int 0)
9668 (const_int 0) (const_int 0)])))]
9670 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9671 [(set_attr "type" "ssemov")
9672 (set_attr "prefix" "evex")
9673 (set_attr "mode" "TI")])
9675 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9676 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9680 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9683 (parallel [(const_int 0) (const_int 1)
9684 (const_int 2) (const_int 3)]))
9685 (match_operand:QI 2 "register_operand" "Yk"))
9688 (parallel [(const_int 4) (const_int 5)
9689 (const_int 6) (const_int 7)]))))]
9692 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9693 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
9694 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9696 [(set_attr "type" "ssemov")
9697 (set_attr "memory" "store")
9698 (set_attr "prefix" "evex")
9699 (set_attr "mode" "TI")])
9701 (define_insn "*avx512vl_<code>v2div2hi2_store"
9702 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9705 (match_operand:V2DI 1 "register_operand" "v"))
9708 (parallel [(const_int 2) (const_int 3)
9709 (const_int 4) (const_int 5)
9710 (const_int 6) (const_int 7)]))))]
9712 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9713 [(set_attr "type" "ssemov")
9714 (set_attr "memory" "store")
9715 (set_attr "prefix" "evex")
9716 (set_attr "mode" "TI")])
9718 (define_insn "avx512vl_<code>v2div2hi2_mask"
9719 [(set (match_operand:V8HI 0 "register_operand" "=v")
9723 (match_operand:V2DI 1 "register_operand" "v"))
9725 (match_operand:V8HI 2 "vector_move_operand" "0C")
9726 (parallel [(const_int 0) (const_int 1)]))
9727 (match_operand:QI 3 "register_operand" "Yk"))
9728 (const_vector:V6HI [(const_int 0) (const_int 0)
9729 (const_int 0) (const_int 0)
9730 (const_int 0) (const_int 0)])))]
9732 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9733 [(set_attr "type" "ssemov")
9734 (set_attr "prefix" "evex")
9735 (set_attr "mode" "TI")])
9737 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9738 [(set (match_operand:V8HI 0 "register_operand" "=v")
9742 (match_operand:V2DI 1 "register_operand" "v"))
9743 (const_vector:V2HI [(const_int 0) (const_int 0)])
9744 (match_operand:QI 2 "register_operand" "Yk"))
9745 (const_vector:V6HI [(const_int 0) (const_int 0)
9746 (const_int 0) (const_int 0)
9747 (const_int 0) (const_int 0)])))]
9749 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9750 [(set_attr "type" "ssemov")
9751 (set_attr "prefix" "evex")
9752 (set_attr "mode" "TI")])
9754 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9755 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9759 (match_operand:V2DI 1 "register_operand" "v"))
9762 (parallel [(const_int 0) (const_int 1)]))
9763 (match_operand:QI 2 "register_operand" "Yk"))
9766 (parallel [(const_int 2) (const_int 3)
9767 (const_int 4) (const_int 5)
9768 (const_int 6) (const_int 7)]))))]
9770 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
9771 [(set_attr "type" "ssemov")
9772 (set_attr "memory" "store")
9773 (set_attr "prefix" "evex")
9774 (set_attr "mode" "TI")])
9776 (define_insn "*avx512vl_<code>v2div2si2"
9777 [(set (match_operand:V4SI 0 "register_operand" "=v")
9780 (match_operand:V2DI 1 "register_operand" "v"))
9781 (match_operand:V2SI 2 "const0_operand")))]
9783 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9784 [(set_attr "type" "ssemov")
9785 (set_attr "prefix" "evex")
9786 (set_attr "mode" "TI")])
9788 (define_insn "*avx512vl_<code>v2div2si2_store"
9789 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9792 (match_operand:V2DI 1 "register_operand" "v"))
9795 (parallel [(const_int 2) (const_int 3)]))))]
9797 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9798 [(set_attr "type" "ssemov")
9799 (set_attr "memory" "store")
9800 (set_attr "prefix" "evex")
9801 (set_attr "mode" "TI")])
9803 (define_insn "avx512vl_<code>v2div2si2_mask"
9804 [(set (match_operand:V4SI 0 "register_operand" "=v")
9808 (match_operand:V2DI 1 "register_operand" "v"))
9810 (match_operand:V4SI 2 "vector_move_operand" "0C")
9811 (parallel [(const_int 0) (const_int 1)]))
9812 (match_operand:QI 3 "register_operand" "Yk"))
9813 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9815 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9816 [(set_attr "type" "ssemov")
9817 (set_attr "prefix" "evex")
9818 (set_attr "mode" "TI")])
9820 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
9821 [(set (match_operand:V4SI 0 "register_operand" "=v")
9825 (match_operand:V2DI 1 "register_operand" "v"))
9826 (const_vector:V2SI [(const_int 0) (const_int 0)])
9827 (match_operand:QI 2 "register_operand" "Yk"))
9828 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9830 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9831 [(set_attr "type" "ssemov")
9832 (set_attr "prefix" "evex")
9833 (set_attr "mode" "TI")])
9835 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9836 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9840 (match_operand:V2DI 1 "register_operand" "v"))
9843 (parallel [(const_int 0) (const_int 1)]))
9844 (match_operand:QI 2 "register_operand" "Yk"))
9847 (parallel [(const_int 2) (const_int 3)]))))]
9849 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
9850 [(set_attr "type" "ssemov")
9851 (set_attr "memory" "store")
9852 (set_attr "prefix" "evex")
9853 (set_attr "mode" "TI")])
9855 (define_insn "*avx512f_<code>v8div16qi2"
9856 [(set (match_operand:V16QI 0 "register_operand" "=v")
9859 (match_operand:V8DI 1 "register_operand" "v"))
9860 (const_vector:V8QI [(const_int 0) (const_int 0)
9861 (const_int 0) (const_int 0)
9862 (const_int 0) (const_int 0)
9863 (const_int 0) (const_int 0)])))]
9865 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9866 [(set_attr "type" "ssemov")
9867 (set_attr "prefix" "evex")
9868 (set_attr "mode" "TI")])
9870 (define_insn "*avx512f_<code>v8div16qi2_store"
9871 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9874 (match_operand:V8DI 1 "register_operand" "v"))
9877 (parallel [(const_int 8) (const_int 9)
9878 (const_int 10) (const_int 11)
9879 (const_int 12) (const_int 13)
9880 (const_int 14) (const_int 15)]))))]
9882 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9883 [(set_attr "type" "ssemov")
9884 (set_attr "memory" "store")
9885 (set_attr "prefix" "evex")
9886 (set_attr "mode" "TI")])
9888 (define_insn "avx512f_<code>v8div16qi2_mask"
9889 [(set (match_operand:V16QI 0 "register_operand" "=v")
9893 (match_operand:V8DI 1 "register_operand" "v"))
9895 (match_operand:V16QI 2 "vector_move_operand" "0C")
9896 (parallel [(const_int 0) (const_int 1)
9897 (const_int 2) (const_int 3)
9898 (const_int 4) (const_int 5)
9899 (const_int 6) (const_int 7)]))
9900 (match_operand:QI 3 "register_operand" "Yk"))
9901 (const_vector:V8QI [(const_int 0) (const_int 0)
9902 (const_int 0) (const_int 0)
9903 (const_int 0) (const_int 0)
9904 (const_int 0) (const_int 0)])))]
9906 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9907 [(set_attr "type" "ssemov")
9908 (set_attr "prefix" "evex")
9909 (set_attr "mode" "TI")])
9911 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
9912 [(set (match_operand:V16QI 0 "register_operand" "=v")
9916 (match_operand:V8DI 1 "register_operand" "v"))
9917 (const_vector:V8QI [(const_int 0) (const_int 0)
9918 (const_int 0) (const_int 0)
9919 (const_int 0) (const_int 0)
9920 (const_int 0) (const_int 0)])
9921 (match_operand:QI 2 "register_operand" "Yk"))
9922 (const_vector:V8QI [(const_int 0) (const_int 0)
9923 (const_int 0) (const_int 0)
9924 (const_int 0) (const_int 0)
9925 (const_int 0) (const_int 0)])))]
9927 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9928 [(set_attr "type" "ssemov")
9929 (set_attr "prefix" "evex")
9930 (set_attr "mode" "TI")])
9932 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9933 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9937 (match_operand:V8DI 1 "register_operand" "v"))
9940 (parallel [(const_int 0) (const_int 1)
9941 (const_int 2) (const_int 3)
9942 (const_int 4) (const_int 5)
9943 (const_int 6) (const_int 7)]))
9944 (match_operand:QI 2 "register_operand" "Yk"))
9947 (parallel [(const_int 8) (const_int 9)
9948 (const_int 10) (const_int 11)
9949 (const_int 12) (const_int 13)
9950 (const_int 14) (const_int 15)]))))]
9952 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
9953 [(set_attr "type" "ssemov")
9954 (set_attr "memory" "store")
9955 (set_attr "prefix" "evex")
9956 (set_attr "mode" "TI")])
9958 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9960 ;; Parallel integral arithmetic
9962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9964 (define_expand "neg<mode>2"
9965 [(set (match_operand:VI_AVX2 0 "register_operand")
9968 (match_operand:VI_AVX2 1 "vector_operand")))]
9970 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9972 (define_expand "<plusminus_insn><mode>3"
9973 [(set (match_operand:VI_AVX2 0 "register_operand")
9975 (match_operand:VI_AVX2 1 "vector_operand")
9976 (match_operand:VI_AVX2 2 "vector_operand")))]
9978 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9980 (define_expand "<plusminus_insn><mode>3_mask"
9981 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9982 (vec_merge:VI48_AVX512VL
9983 (plusminus:VI48_AVX512VL
9984 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9985 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9986 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9987 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9989 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9991 (define_expand "<plusminus_insn><mode>3_mask"
9992 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9993 (vec_merge:VI12_AVX512VL
9994 (plusminus:VI12_AVX512VL
9995 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9996 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9997 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9998 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10000 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10002 (define_insn "*<plusminus_insn><mode>3"
10003 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
10005 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
10006 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10008 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10010 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10011 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10012 [(set_attr "isa" "noavx,avx")
10013 (set_attr "type" "sseiadd")
10014 (set_attr "prefix_data16" "1,*")
10015 (set_attr "prefix" "<mask_prefix3>")
10016 (set_attr "mode" "<sseinsnmode>")])
10018 (define_insn "*<plusminus_insn><mode>3_mask"
10019 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10020 (vec_merge:VI48_AVX512VL
10021 (plusminus:VI48_AVX512VL
10022 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10023 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10024 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10025 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10027 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10028 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10029 [(set_attr "type" "sseiadd")
10030 (set_attr "prefix" "evex")
10031 (set_attr "mode" "<sseinsnmode>")])
10033 (define_insn "*<plusminus_insn><mode>3_mask"
10034 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10035 (vec_merge:VI12_AVX512VL
10036 (plusminus:VI12_AVX512VL
10037 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10038 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10039 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10040 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10041 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10042 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10043 [(set_attr "type" "sseiadd")
10044 (set_attr "prefix" "evex")
10045 (set_attr "mode" "<sseinsnmode>")])
10047 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10048 [(set (match_operand:VI12_AVX2 0 "register_operand")
10049 (sat_plusminus:VI12_AVX2
10050 (match_operand:VI12_AVX2 1 "vector_operand")
10051 (match_operand:VI12_AVX2 2 "vector_operand")))]
10052 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10053 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10055 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10056 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10057 (sat_plusminus:VI12_AVX2
10058 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10059 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10060 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10061 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10063 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10064 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10065 [(set_attr "isa" "noavx,avx")
10066 (set_attr "type" "sseiadd")
10067 (set_attr "prefix_data16" "1,*")
10068 (set_attr "prefix" "orig,maybe_evex")
10069 (set_attr "mode" "TI")])
10071 (define_expand "mul<mode>3<mask_name>"
10072 [(set (match_operand:VI1_AVX512 0 "register_operand")
10073 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10074 (match_operand:VI1_AVX512 2 "register_operand")))]
10075 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10077 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10081 (define_expand "mul<mode>3<mask_name>"
10082 [(set (match_operand:VI2_AVX2 0 "register_operand")
10083 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10084 (match_operand:VI2_AVX2 2 "vector_operand")))]
10085 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10086 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10088 (define_insn "*mul<mode>3<mask_name>"
10089 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10090 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10091 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10093 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
10094 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10096 pmullw\t{%2, %0|%0, %2}
10097 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10098 [(set_attr "isa" "noavx,avx")
10099 (set_attr "type" "sseimul")
10100 (set_attr "prefix_data16" "1,*")
10101 (set_attr "prefix" "orig,vex")
10102 (set_attr "mode" "<sseinsnmode>")])
10104 (define_expand "<s>mul<mode>3_highpart<mask_name>"
10105 [(set (match_operand:VI2_AVX2 0 "register_operand")
10107 (lshiftrt:<ssedoublemode>
10108 (mult:<ssedoublemode>
10109 (any_extend:<ssedoublemode>
10110 (match_operand:VI2_AVX2 1 "vector_operand"))
10111 (any_extend:<ssedoublemode>
10112 (match_operand:VI2_AVX2 2 "vector_operand")))
10115 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10116 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10118 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
10119 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10121 (lshiftrt:<ssedoublemode>
10122 (mult:<ssedoublemode>
10123 (any_extend:<ssedoublemode>
10124 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10125 (any_extend:<ssedoublemode>
10126 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10129 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
10130 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10132 pmulh<u>w\t{%2, %0|%0, %2}
10133 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10134 [(set_attr "isa" "noavx,avx")
10135 (set_attr "type" "sseimul")
10136 (set_attr "prefix_data16" "1,*")
10137 (set_attr "prefix" "orig,vex")
10138 (set_attr "mode" "<sseinsnmode>")])
10140 (define_expand "vec_widen_umult_even_v16si<mask_name>"
10141 [(set (match_operand:V8DI 0 "register_operand")
10145 (match_operand:V16SI 1 "nonimmediate_operand")
10146 (parallel [(const_int 0) (const_int 2)
10147 (const_int 4) (const_int 6)
10148 (const_int 8) (const_int 10)
10149 (const_int 12) (const_int 14)])))
10152 (match_operand:V16SI 2 "nonimmediate_operand")
10153 (parallel [(const_int 0) (const_int 2)
10154 (const_int 4) (const_int 6)
10155 (const_int 8) (const_int 10)
10156 (const_int 12) (const_int 14)])))))]
10158 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10160 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
10161 [(set (match_operand:V8DI 0 "register_operand" "=v")
10165 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10166 (parallel [(const_int 0) (const_int 2)
10167 (const_int 4) (const_int 6)
10168 (const_int 8) (const_int 10)
10169 (const_int 12) (const_int 14)])))
10172 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10173 (parallel [(const_int 0) (const_int 2)
10174 (const_int 4) (const_int 6)
10175 (const_int 8) (const_int 10)
10176 (const_int 12) (const_int 14)])))))]
10177 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
10178 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10179 [(set_attr "type" "sseimul")
10180 (set_attr "prefix_extra" "1")
10181 (set_attr "prefix" "evex")
10182 (set_attr "mode" "XI")])
10184 (define_expand "vec_widen_umult_even_v8si<mask_name>"
10185 [(set (match_operand:V4DI 0 "register_operand")
10189 (match_operand:V8SI 1 "nonimmediate_operand")
10190 (parallel [(const_int 0) (const_int 2)
10191 (const_int 4) (const_int 6)])))
10194 (match_operand:V8SI 2 "nonimmediate_operand")
10195 (parallel [(const_int 0) (const_int 2)
10196 (const_int 4) (const_int 6)])))))]
10197 "TARGET_AVX2 && <mask_avx512vl_condition>"
10198 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10200 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
10201 [(set (match_operand:V4DI 0 "register_operand" "=v")
10205 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10206 (parallel [(const_int 0) (const_int 2)
10207 (const_int 4) (const_int 6)])))
10210 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10211 (parallel [(const_int 0) (const_int 2)
10212 (const_int 4) (const_int 6)])))))]
10213 "TARGET_AVX2 && <mask_avx512vl_condition>
10214 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
10215 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10216 [(set_attr "type" "sseimul")
10217 (set_attr "prefix" "maybe_evex")
10218 (set_attr "mode" "OI")])
10220 (define_expand "vec_widen_umult_even_v4si<mask_name>"
10221 [(set (match_operand:V2DI 0 "register_operand")
10225 (match_operand:V4SI 1 "vector_operand")
10226 (parallel [(const_int 0) (const_int 2)])))
10229 (match_operand:V4SI 2 "vector_operand")
10230 (parallel [(const_int 0) (const_int 2)])))))]
10231 "TARGET_SSE2 && <mask_avx512vl_condition>"
10232 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10234 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
10235 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10239 (match_operand:V4SI 1 "vector_operand" "%0,v")
10240 (parallel [(const_int 0) (const_int 2)])))
10243 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10244 (parallel [(const_int 0) (const_int 2)])))))]
10245 "TARGET_SSE2 && <mask_avx512vl_condition>
10246 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
10248 pmuludq\t{%2, %0|%0, %2}
10249 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10250 [(set_attr "isa" "noavx,avx")
10251 (set_attr "type" "sseimul")
10252 (set_attr "prefix_data16" "1,*")
10253 (set_attr "prefix" "orig,maybe_evex")
10254 (set_attr "mode" "TI")])
10256 (define_expand "vec_widen_smult_even_v16si<mask_name>"
10257 [(set (match_operand:V8DI 0 "register_operand")
10261 (match_operand:V16SI 1 "nonimmediate_operand")
10262 (parallel [(const_int 0) (const_int 2)
10263 (const_int 4) (const_int 6)
10264 (const_int 8) (const_int 10)
10265 (const_int 12) (const_int 14)])))
10268 (match_operand:V16SI 2 "nonimmediate_operand")
10269 (parallel [(const_int 0) (const_int 2)
10270 (const_int 4) (const_int 6)
10271 (const_int 8) (const_int 10)
10272 (const_int 12) (const_int 14)])))))]
10274 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10276 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
10277 [(set (match_operand:V8DI 0 "register_operand" "=v")
10281 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10282 (parallel [(const_int 0) (const_int 2)
10283 (const_int 4) (const_int 6)
10284 (const_int 8) (const_int 10)
10285 (const_int 12) (const_int 14)])))
10288 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10289 (parallel [(const_int 0) (const_int 2)
10290 (const_int 4) (const_int 6)
10291 (const_int 8) (const_int 10)
10292 (const_int 12) (const_int 14)])))))]
10293 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
10294 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10295 [(set_attr "type" "sseimul")
10296 (set_attr "prefix_extra" "1")
10297 (set_attr "prefix" "evex")
10298 (set_attr "mode" "XI")])
10300 (define_expand "vec_widen_smult_even_v8si<mask_name>"
10301 [(set (match_operand:V4DI 0 "register_operand")
10305 (match_operand:V8SI 1 "nonimmediate_operand")
10306 (parallel [(const_int 0) (const_int 2)
10307 (const_int 4) (const_int 6)])))
10310 (match_operand:V8SI 2 "nonimmediate_operand")
10311 (parallel [(const_int 0) (const_int 2)
10312 (const_int 4) (const_int 6)])))))]
10313 "TARGET_AVX2 && <mask_avx512vl_condition>"
10314 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10316 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
10317 [(set (match_operand:V4DI 0 "register_operand" "=v")
10321 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10322 (parallel [(const_int 0) (const_int 2)
10323 (const_int 4) (const_int 6)])))
10326 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10327 (parallel [(const_int 0) (const_int 2)
10328 (const_int 4) (const_int 6)])))))]
10330 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
10331 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10332 [(set_attr "type" "sseimul")
10333 (set_attr "prefix_extra" "1")
10334 (set_attr "prefix" "vex")
10335 (set_attr "mode" "OI")])
10337 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
10338 [(set (match_operand:V2DI 0 "register_operand")
10342 (match_operand:V4SI 1 "vector_operand")
10343 (parallel [(const_int 0) (const_int 2)])))
10346 (match_operand:V4SI 2 "vector_operand")
10347 (parallel [(const_int 0) (const_int 2)])))))]
10348 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10349 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10351 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10352 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10356 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10357 (parallel [(const_int 0) (const_int 2)])))
10360 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10361 (parallel [(const_int 0) (const_int 2)])))))]
10362 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10363 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
10365 pmuldq\t{%2, %0|%0, %2}
10366 pmuldq\t{%2, %0|%0, %2}
10367 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10368 [(set_attr "isa" "noavx,noavx,avx")
10369 (set_attr "type" "sseimul")
10370 (set_attr "prefix_data16" "1,1,*")
10371 (set_attr "prefix_extra" "1")
10372 (set_attr "prefix" "orig,orig,vex")
10373 (set_attr "mode" "TI")])
10375 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10376 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10377 (unspec:<sseunpackmode>
10378 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10379 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10380 UNSPEC_PMADDWD512))]
10381 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10382 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10383 [(set_attr "type" "sseiadd")
10384 (set_attr "prefix" "evex")
10385 (set_attr "mode" "XI")])
10387 (define_expand "avx2_pmaddwd"
10388 [(set (match_operand:V8SI 0 "register_operand")
10393 (match_operand:V16HI 1 "nonimmediate_operand")
10394 (parallel [(const_int 0) (const_int 2)
10395 (const_int 4) (const_int 6)
10396 (const_int 8) (const_int 10)
10397 (const_int 12) (const_int 14)])))
10400 (match_operand:V16HI 2 "nonimmediate_operand")
10401 (parallel [(const_int 0) (const_int 2)
10402 (const_int 4) (const_int 6)
10403 (const_int 8) (const_int 10)
10404 (const_int 12) (const_int 14)]))))
10407 (vec_select:V8HI (match_dup 1)
10408 (parallel [(const_int 1) (const_int 3)
10409 (const_int 5) (const_int 7)
10410 (const_int 9) (const_int 11)
10411 (const_int 13) (const_int 15)])))
10413 (vec_select:V8HI (match_dup 2)
10414 (parallel [(const_int 1) (const_int 3)
10415 (const_int 5) (const_int 7)
10416 (const_int 9) (const_int 11)
10417 (const_int 13) (const_int 15)]))))))]
10419 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10421 (define_insn "*avx2_pmaddwd"
10422 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10427 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10428 (parallel [(const_int 0) (const_int 2)
10429 (const_int 4) (const_int 6)
10430 (const_int 8) (const_int 10)
10431 (const_int 12) (const_int 14)])))
10434 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10435 (parallel [(const_int 0) (const_int 2)
10436 (const_int 4) (const_int 6)
10437 (const_int 8) (const_int 10)
10438 (const_int 12) (const_int 14)]))))
10441 (vec_select:V8HI (match_dup 1)
10442 (parallel [(const_int 1) (const_int 3)
10443 (const_int 5) (const_int 7)
10444 (const_int 9) (const_int 11)
10445 (const_int 13) (const_int 15)])))
10447 (vec_select:V8HI (match_dup 2)
10448 (parallel [(const_int 1) (const_int 3)
10449 (const_int 5) (const_int 7)
10450 (const_int 9) (const_int 11)
10451 (const_int 13) (const_int 15)]))))))]
10452 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
10453 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10454 [(set_attr "type" "sseiadd")
10455 (set_attr "isa" "*,avx512bw")
10456 (set_attr "prefix" "vex,evex")
10457 (set_attr "mode" "OI")])
10459 (define_expand "sse2_pmaddwd"
10460 [(set (match_operand:V4SI 0 "register_operand")
10465 (match_operand:V8HI 1 "vector_operand")
10466 (parallel [(const_int 0) (const_int 2)
10467 (const_int 4) (const_int 6)])))
10470 (match_operand:V8HI 2 "vector_operand")
10471 (parallel [(const_int 0) (const_int 2)
10472 (const_int 4) (const_int 6)]))))
10475 (vec_select:V4HI (match_dup 1)
10476 (parallel [(const_int 1) (const_int 3)
10477 (const_int 5) (const_int 7)])))
10479 (vec_select:V4HI (match_dup 2)
10480 (parallel [(const_int 1) (const_int 3)
10481 (const_int 5) (const_int 7)]))))))]
10483 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10485 (define_insn "*sse2_pmaddwd"
10486 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10491 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
10492 (parallel [(const_int 0) (const_int 2)
10493 (const_int 4) (const_int 6)])))
10496 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10497 (parallel [(const_int 0) (const_int 2)
10498 (const_int 4) (const_int 6)]))))
10501 (vec_select:V4HI (match_dup 1)
10502 (parallel [(const_int 1) (const_int 3)
10503 (const_int 5) (const_int 7)])))
10505 (vec_select:V4HI (match_dup 2)
10506 (parallel [(const_int 1) (const_int 3)
10507 (const_int 5) (const_int 7)]))))))]
10508 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
10510 pmaddwd\t{%2, %0|%0, %2}
10511 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10512 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10513 [(set_attr "isa" "noavx,avx,avx512bw")
10514 (set_attr "type" "sseiadd")
10515 (set_attr "atom_unit" "simul")
10516 (set_attr "prefix_data16" "1,*,*")
10517 (set_attr "prefix" "orig,vex,evex")
10518 (set_attr "mode" "TI")])
10520 (define_insn "avx512dq_mul<mode>3<mask_name>"
10521 [(set (match_operand:VI8 0 "register_operand" "=v")
10523 (match_operand:VI8 1 "register_operand" "v")
10524 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10525 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10526 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10527 [(set_attr "type" "sseimul")
10528 (set_attr "prefix" "evex")
10529 (set_attr "mode" "<sseinsnmode>")])
10531 (define_expand "mul<mode>3<mask_name>"
10532 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10534 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10535 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10536 "TARGET_SSE2 && <mask_mode512bit_condition>"
10540 if (!vector_operand (operands[1], <MODE>mode))
10541 operands[1] = force_reg (<MODE>mode, operands[1]);
10542 if (!vector_operand (operands[2], <MODE>mode))
10543 operands[2] = force_reg (<MODE>mode, operands[2]);
10544 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10548 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10553 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10554 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10556 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10557 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10558 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
10560 pmulld\t{%2, %0|%0, %2}
10561 pmulld\t{%2, %0|%0, %2}
10562 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10563 [(set_attr "isa" "noavx,noavx,avx")
10564 (set_attr "type" "sseimul")
10565 (set_attr "prefix_extra" "1")
10566 (set_attr "prefix" "<mask_prefix4>")
10567 (set_attr "btver2_decode" "vector,vector,vector")
10568 (set_attr "mode" "<sseinsnmode>")])
10570 (define_expand "mul<mode>3"
10571 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10572 (mult:VI8_AVX2_AVX512F
10573 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10574 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10577 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10581 (define_expand "vec_widen_<s>mult_hi_<mode>"
10582 [(match_operand:<sseunpackmode> 0 "register_operand")
10583 (any_extend:<sseunpackmode>
10584 (match_operand:VI124_AVX2 1 "register_operand"))
10585 (match_operand:VI124_AVX2 2 "register_operand")]
10588 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10593 (define_expand "vec_widen_<s>mult_lo_<mode>"
10594 [(match_operand:<sseunpackmode> 0 "register_operand")
10595 (any_extend:<sseunpackmode>
10596 (match_operand:VI124_AVX2 1 "register_operand"))
10597 (match_operand:VI124_AVX2 2 "register_operand")]
10600 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10605 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
10606 ;; named patterns, but signed V4SI needs special help for plain SSE2.
10607 (define_expand "vec_widen_smult_even_v4si"
10608 [(match_operand:V2DI 0 "register_operand")
10609 (match_operand:V4SI 1 "vector_operand")
10610 (match_operand:V4SI 2 "vector_operand")]
10613 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10618 (define_expand "vec_widen_<s>mult_odd_<mode>"
10619 [(match_operand:<sseunpackmode> 0 "register_operand")
10620 (any_extend:<sseunpackmode>
10621 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10622 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10625 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10630 (define_mode_attr SDOT_PMADD_SUF
10631 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10633 (define_expand "sdot_prod<mode>"
10634 [(match_operand:<sseunpackmode> 0 "register_operand")
10635 (match_operand:VI2_AVX2 1 "register_operand")
10636 (match_operand:VI2_AVX2 2 "register_operand")
10637 (match_operand:<sseunpackmode> 3 "register_operand")]
10640 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10641 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10642 emit_insn (gen_rtx_SET (operands[0],
10643 gen_rtx_PLUS (<sseunpackmode>mode,
10648 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10649 ;; back together when madd is available.
10650 (define_expand "sdot_prodv4si"
10651 [(match_operand:V2DI 0 "register_operand")
10652 (match_operand:V4SI 1 "register_operand")
10653 (match_operand:V4SI 2 "register_operand")
10654 (match_operand:V2DI 3 "register_operand")]
10657 rtx t = gen_reg_rtx (V2DImode);
10658 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10659 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10663 (define_expand "usadv16qi"
10664 [(match_operand:V4SI 0 "register_operand")
10665 (match_operand:V16QI 1 "register_operand")
10666 (match_operand:V16QI 2 "vector_operand")
10667 (match_operand:V4SI 3 "vector_operand")]
10670 rtx t1 = gen_reg_rtx (V2DImode);
10671 rtx t2 = gen_reg_rtx (V4SImode);
10672 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10673 convert_move (t2, t1, 0);
10674 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10678 (define_expand "usadv32qi"
10679 [(match_operand:V8SI 0 "register_operand")
10680 (match_operand:V32QI 1 "register_operand")
10681 (match_operand:V32QI 2 "nonimmediate_operand")
10682 (match_operand:V8SI 3 "nonimmediate_operand")]
10685 rtx t1 = gen_reg_rtx (V4DImode);
10686 rtx t2 = gen_reg_rtx (V8SImode);
10687 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10688 convert_move (t2, t1, 0);
10689 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10693 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10694 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
10695 (ashiftrt:VI248_AVX512BW_1
10696 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10697 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10699 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10700 [(set_attr "type" "sseishft")
10701 (set (attr "length_immediate")
10702 (if_then_else (match_operand 2 "const_int_operand")
10704 (const_string "0")))
10705 (set_attr "mode" "<sseinsnmode>")])
10707 (define_insn "ashr<mode>3"
10708 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10709 (ashiftrt:VI24_AVX2
10710 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10711 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10714 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10715 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10716 [(set_attr "isa" "noavx,avx")
10717 (set_attr "type" "sseishft")
10718 (set (attr "length_immediate")
10719 (if_then_else (match_operand 2 "const_int_operand")
10721 (const_string "0")))
10722 (set_attr "prefix_data16" "1,*")
10723 (set_attr "prefix" "orig,vex")
10724 (set_attr "mode" "<sseinsnmode>")])
10726 (define_insn "ashr<mode>3<mask_name>"
10727 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10728 (ashiftrt:VI248_AVX512BW_AVX512VL
10729 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10730 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10732 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10733 [(set_attr "type" "sseishft")
10734 (set (attr "length_immediate")
10735 (if_then_else (match_operand 2 "const_int_operand")
10737 (const_string "0")))
10738 (set_attr "mode" "<sseinsnmode>")])
10740 (define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
10741 [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
10742 (any_lshift:VI248_AVX512BW_2
10743 (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
10744 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10746 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10747 [(set_attr "type" "sseishft")
10748 (set (attr "length_immediate")
10749 (if_then_else (match_operand 2 "const_int_operand")
10751 (const_string "0")))
10752 (set_attr "mode" "<sseinsnmode>")])
10754 (define_insn "<shift_insn><mode>3"
10755 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
10756 (any_lshift:VI248_AVX2
10757 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
10758 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10761 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10762 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10763 [(set_attr "isa" "noavx,avx")
10764 (set_attr "type" "sseishft")
10765 (set (attr "length_immediate")
10766 (if_then_else (match_operand 2 "const_int_operand")
10768 (const_string "0")))
10769 (set_attr "prefix_data16" "1,*")
10770 (set_attr "prefix" "orig,vex")
10771 (set_attr "mode" "<sseinsnmode>")])
10773 (define_insn "<shift_insn><mode>3<mask_name>"
10774 [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
10775 (any_lshift:VI248_AVX512BW
10776 (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
10777 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
10779 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10780 [(set_attr "type" "sseishft")
10781 (set (attr "length_immediate")
10782 (if_then_else (match_operand 2 "const_int_operand")
10784 (const_string "0")))
10785 (set_attr "mode" "<sseinsnmode>")])
10788 (define_expand "vec_shr_<mode>"
10789 [(set (match_dup 3)
10791 (match_operand:VI_128 1 "register_operand")
10792 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10793 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10796 operands[1] = gen_lowpart (V1TImode, operands[1]);
10797 operands[3] = gen_reg_rtx (V1TImode);
10798 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10801 (define_insn "avx512bw_<shift_insn><mode>3"
10802 [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
10803 (any_lshift:VIMAX_AVX512VL
10804 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
10805 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
10808 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10809 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10811 [(set_attr "type" "sseishft")
10812 (set_attr "length_immediate" "1")
10813 (set_attr "prefix" "maybe_evex")
10814 (set_attr "mode" "<sseinsnmode>")])
10816 (define_insn "<sse2_avx2>_<shift_insn><mode>3"
10817 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10818 (any_lshift:VIMAX_AVX2
10819 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10820 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10823 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10825 switch (which_alternative)
10828 return "p<vshift>dq\t{%2, %0|%0, %2}";
10830 return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
10832 gcc_unreachable ();
10835 [(set_attr "isa" "noavx,avx")
10836 (set_attr "type" "sseishft")
10837 (set_attr "length_immediate" "1")
10838 (set_attr "atom_unit" "sishuf")
10839 (set_attr "prefix_data16" "1,*")
10840 (set_attr "prefix" "orig,vex")
10841 (set_attr "mode" "<sseinsnmode>")])
10843 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10844 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10845 (any_rotate:VI48_AVX512VL
10846 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10847 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10849 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10850 [(set_attr "prefix" "evex")
10851 (set_attr "mode" "<sseinsnmode>")])
10853 (define_insn "<avx512>_<rotate><mode><mask_name>"
10854 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10855 (any_rotate:VI48_AVX512VL
10856 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10857 (match_operand:SI 2 "const_0_to_255_operand")))]
10859 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10860 [(set_attr "prefix" "evex")
10861 (set_attr "mode" "<sseinsnmode>")])
10863 (define_expand "<code><mode>3"
10864 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10865 (maxmin:VI124_256_AVX512F_AVX512BW
10866 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10867 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10869 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10871 (define_insn "*avx2_<code><mode>3"
10872 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10874 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10875 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10876 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10877 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10878 [(set_attr "type" "sseiadd")
10879 (set_attr "prefix_extra" "1")
10880 (set_attr "prefix" "vex")
10881 (set_attr "mode" "OI")])
10883 (define_expand "<code><mode>3_mask"
10884 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10885 (vec_merge:VI48_AVX512VL
10886 (maxmin:VI48_AVX512VL
10887 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10888 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10889 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10890 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10892 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10894 (define_insn "*avx512f_<code><mode>3<mask_name>"
10895 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10896 (maxmin:VI48_AVX512VL
10897 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10898 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10899 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10900 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10901 [(set_attr "type" "sseiadd")
10902 (set_attr "prefix_extra" "1")
10903 (set_attr "prefix" "maybe_evex")
10904 (set_attr "mode" "<sseinsnmode>")])
10906 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10907 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10908 (maxmin:VI12_AVX512VL
10909 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10910 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10912 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10913 [(set_attr "type" "sseiadd")
10914 (set_attr "prefix" "evex")
10915 (set_attr "mode" "<sseinsnmode>")])
10917 (define_expand "<code><mode>3"
10918 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10919 (maxmin:VI8_AVX2_AVX512F
10920 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10921 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10925 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10926 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10929 enum rtx_code code;
10934 xops[0] = operands[0];
10936 if (<CODE> == SMAX || <CODE> == UMAX)
10938 xops[1] = operands[1];
10939 xops[2] = operands[2];
10943 xops[1] = operands[2];
10944 xops[2] = operands[1];
10947 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10949 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10950 xops[4] = operands[1];
10951 xops[5] = operands[2];
10953 ok = ix86_expand_int_vcond (xops);
10959 (define_expand "<code><mode>3"
10960 [(set (match_operand:VI124_128 0 "register_operand")
10962 (match_operand:VI124_128 1 "vector_operand")
10963 (match_operand:VI124_128 2 "vector_operand")))]
10966 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10967 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10973 xops[0] = operands[0];
10974 operands[1] = force_reg (<MODE>mode, operands[1]);
10975 operands[2] = force_reg (<MODE>mode, operands[2]);
10977 if (<CODE> == SMAX)
10979 xops[1] = operands[1];
10980 xops[2] = operands[2];
10984 xops[1] = operands[2];
10985 xops[2] = operands[1];
10988 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10989 xops[4] = operands[1];
10990 xops[5] = operands[2];
10992 ok = ix86_expand_int_vcond (xops);
10998 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10999 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11001 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11002 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11004 && <mask_mode512bit_condition>
11005 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11007 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11008 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11009 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11010 [(set_attr "isa" "noavx,noavx,avx")
11011 (set_attr "type" "sseiadd")
11012 (set_attr "prefix_extra" "1,1,*")
11013 (set_attr "prefix" "orig,orig,vex")
11014 (set_attr "mode" "TI")])
11016 (define_insn "*<code>v8hi3"
11017 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11019 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11020 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11021 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
11023 p<maxmin_int>w\t{%2, %0|%0, %2}
11024 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11025 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11026 [(set_attr "isa" "noavx,avx,avx512bw")
11027 (set_attr "type" "sseiadd")
11028 (set_attr "prefix_data16" "1,*,*")
11029 (set_attr "prefix_extra" "*,1,1")
11030 (set_attr "prefix" "orig,vex,evex")
11031 (set_attr "mode" "TI")])
11033 (define_expand "<code><mode>3"
11034 [(set (match_operand:VI124_128 0 "register_operand")
11036 (match_operand:VI124_128 1 "vector_operand")
11037 (match_operand:VI124_128 2 "vector_operand")))]
11040 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11041 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11042 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11044 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11045 operands[1] = force_reg (<MODE>mode, operands[1]);
11046 if (rtx_equal_p (op3, op2))
11047 op3 = gen_reg_rtx (V8HImode);
11048 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11049 emit_insn (gen_addv8hi3 (op0, op3, op2));
11057 operands[1] = force_reg (<MODE>mode, operands[1]);
11058 operands[2] = force_reg (<MODE>mode, operands[2]);
11060 xops[0] = operands[0];
11062 if (<CODE> == UMAX)
11064 xops[1] = operands[1];
11065 xops[2] = operands[2];
11069 xops[1] = operands[2];
11070 xops[2] = operands[1];
11073 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11074 xops[4] = operands[1];
11075 xops[5] = operands[2];
11077 ok = ix86_expand_int_vcond (xops);
11083 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11084 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11086 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11087 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11089 && <mask_mode512bit_condition>
11090 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11092 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11093 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11094 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11095 [(set_attr "isa" "noavx,noavx,avx")
11096 (set_attr "type" "sseiadd")
11097 (set_attr "prefix_extra" "1,1,*")
11098 (set_attr "prefix" "orig,orig,vex")
11099 (set_attr "mode" "TI")])
11101 (define_insn "*<code>v16qi3"
11102 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11104 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11105 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11106 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
11108 p<maxmin_int>b\t{%2, %0|%0, %2}
11109 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11110 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11111 [(set_attr "isa" "noavx,avx,avx512bw")
11112 (set_attr "type" "sseiadd")
11113 (set_attr "prefix_data16" "1,*,*")
11114 (set_attr "prefix_extra" "*,1,1")
11115 (set_attr "prefix" "orig,vex,evex")
11116 (set_attr "mode" "TI")])
11118 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11120 ;; Parallel integral comparisons
11122 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11124 (define_expand "avx2_eq<mode>3"
11125 [(set (match_operand:VI_256 0 "register_operand")
11127 (match_operand:VI_256 1 "nonimmediate_operand")
11128 (match_operand:VI_256 2 "nonimmediate_operand")))]
11130 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11132 (define_insn "*avx2_eq<mode>3"
11133 [(set (match_operand:VI_256 0 "register_operand" "=x")
11135 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11136 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11137 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11138 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11139 [(set_attr "type" "ssecmp")
11140 (set_attr "prefix_extra" "1")
11141 (set_attr "prefix" "vex")
11142 (set_attr "mode" "OI")])
11144 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11145 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11146 (unspec:<avx512fmaskmode>
11147 [(match_operand:VI12_AVX512VL 1 "register_operand")
11148 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11149 UNSPEC_MASKED_EQ))]
11151 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11153 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11154 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11155 (unspec:<avx512fmaskmode>
11156 [(match_operand:VI48_AVX512VL 1 "register_operand")
11157 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11158 UNSPEC_MASKED_EQ))]
11160 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11162 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11163 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11164 (unspec:<avx512fmaskmode>
11165 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
11166 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11167 UNSPEC_MASKED_EQ))]
11168 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11169 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11170 [(set_attr "type" "ssecmp")
11171 (set_attr "prefix_extra" "1")
11172 (set_attr "prefix" "evex")
11173 (set_attr "mode" "<sseinsnmode>")])
11175 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11176 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11177 (unspec:<avx512fmaskmode>
11178 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
11179 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11180 UNSPEC_MASKED_EQ))]
11181 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11182 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11183 [(set_attr "type" "ssecmp")
11184 (set_attr "prefix_extra" "1")
11185 (set_attr "prefix" "evex")
11186 (set_attr "mode" "<sseinsnmode>")])
11188 (define_insn "*sse4_1_eqv2di3"
11189 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11191 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11192 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11193 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
11195 pcmpeqq\t{%2, %0|%0, %2}
11196 pcmpeqq\t{%2, %0|%0, %2}
11197 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11198 [(set_attr "isa" "noavx,noavx,avx")
11199 (set_attr "type" "ssecmp")
11200 (set_attr "prefix_extra" "1")
11201 (set_attr "prefix" "orig,orig,vex")
11202 (set_attr "mode" "TI")])
11204 (define_insn "*sse2_eq<mode>3"
11205 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11207 (match_operand:VI124_128 1 "vector_operand" "%0,x")
11208 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11209 "TARGET_SSE2 && !TARGET_XOP
11210 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11212 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11213 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11214 [(set_attr "isa" "noavx,avx")
11215 (set_attr "type" "ssecmp")
11216 (set_attr "prefix_data16" "1,*")
11217 (set_attr "prefix" "orig,vex")
11218 (set_attr "mode" "TI")])
11220 (define_expand "sse2_eq<mode>3"
11221 [(set (match_operand:VI124_128 0 "register_operand")
11223 (match_operand:VI124_128 1 "vector_operand")
11224 (match_operand:VI124_128 2 "vector_operand")))]
11225 "TARGET_SSE2 && !TARGET_XOP "
11226 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11228 (define_expand "sse4_1_eqv2di3"
11229 [(set (match_operand:V2DI 0 "register_operand")
11231 (match_operand:V2DI 1 "vector_operand")
11232 (match_operand:V2DI 2 "vector_operand")))]
11234 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11236 (define_insn "sse4_2_gtv2di3"
11237 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11239 (match_operand:V2DI 1 "register_operand" "0,0,x")
11240 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11243 pcmpgtq\t{%2, %0|%0, %2}
11244 pcmpgtq\t{%2, %0|%0, %2}
11245 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11246 [(set_attr "isa" "noavx,noavx,avx")
11247 (set_attr "type" "ssecmp")
11248 (set_attr "prefix_extra" "1")
11249 (set_attr "prefix" "orig,orig,vex")
11250 (set_attr "mode" "TI")])
11252 (define_insn "avx2_gt<mode>3"
11253 [(set (match_operand:VI_256 0 "register_operand" "=x")
11255 (match_operand:VI_256 1 "register_operand" "x")
11256 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11258 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11259 [(set_attr "type" "ssecmp")
11260 (set_attr "prefix_extra" "1")
11261 (set_attr "prefix" "vex")
11262 (set_attr "mode" "OI")])
11264 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11265 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11266 (unspec:<avx512fmaskmode>
11267 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11268 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11270 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11271 [(set_attr "type" "ssecmp")
11272 (set_attr "prefix_extra" "1")
11273 (set_attr "prefix" "evex")
11274 (set_attr "mode" "<sseinsnmode>")])
11276 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11277 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11278 (unspec:<avx512fmaskmode>
11279 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11280 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11282 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11283 [(set_attr "type" "ssecmp")
11284 (set_attr "prefix_extra" "1")
11285 (set_attr "prefix" "evex")
11286 (set_attr "mode" "<sseinsnmode>")])
11288 (define_insn "sse2_gt<mode>3"
11289 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11291 (match_operand:VI124_128 1 "register_operand" "0,x")
11292 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11293 "TARGET_SSE2 && !TARGET_XOP"
11295 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11296 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11297 [(set_attr "isa" "noavx,avx")
11298 (set_attr "type" "ssecmp")
11299 (set_attr "prefix_data16" "1,*")
11300 (set_attr "prefix" "orig,vex")
11301 (set_attr "mode" "TI")])
11303 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11304 [(set (match_operand:V_512 0 "register_operand")
11305 (if_then_else:V_512
11306 (match_operator 3 ""
11307 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11308 (match_operand:VI_AVX512BW 5 "general_operand")])
11309 (match_operand:V_512 1)
11310 (match_operand:V_512 2)))]
11312 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11313 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11315 bool ok = ix86_expand_int_vcond (operands);
11320 (define_expand "vcond<V_256:mode><VI_256:mode>"
11321 [(set (match_operand:V_256 0 "register_operand")
11322 (if_then_else:V_256
11323 (match_operator 3 ""
11324 [(match_operand:VI_256 4 "nonimmediate_operand")
11325 (match_operand:VI_256 5 "general_operand")])
11326 (match_operand:V_256 1)
11327 (match_operand:V_256 2)))]
11329 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11330 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11332 bool ok = ix86_expand_int_vcond (operands);
11337 (define_expand "vcond<V_128:mode><VI124_128:mode>"
11338 [(set (match_operand:V_128 0 "register_operand")
11339 (if_then_else:V_128
11340 (match_operator 3 ""
11341 [(match_operand:VI124_128 4 "vector_operand")
11342 (match_operand:VI124_128 5 "general_operand")])
11343 (match_operand:V_128 1)
11344 (match_operand:V_128 2)))]
11346 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11347 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11349 bool ok = ix86_expand_int_vcond (operands);
11354 (define_expand "vcond<VI8F_128:mode>v2di"
11355 [(set (match_operand:VI8F_128 0 "register_operand")
11356 (if_then_else:VI8F_128
11357 (match_operator 3 ""
11358 [(match_operand:V2DI 4 "vector_operand")
11359 (match_operand:V2DI 5 "general_operand")])
11360 (match_operand:VI8F_128 1)
11361 (match_operand:VI8F_128 2)))]
11364 bool ok = ix86_expand_int_vcond (operands);
11369 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11370 [(set (match_operand:V_512 0 "register_operand")
11371 (if_then_else:V_512
11372 (match_operator 3 ""
11373 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11374 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11375 (match_operand:V_512 1 "general_operand")
11376 (match_operand:V_512 2 "general_operand")))]
11378 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11379 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11381 bool ok = ix86_expand_int_vcond (operands);
11386 (define_expand "vcondu<V_256:mode><VI_256:mode>"
11387 [(set (match_operand:V_256 0 "register_operand")
11388 (if_then_else:V_256
11389 (match_operator 3 ""
11390 [(match_operand:VI_256 4 "nonimmediate_operand")
11391 (match_operand:VI_256 5 "nonimmediate_operand")])
11392 (match_operand:V_256 1 "general_operand")
11393 (match_operand:V_256 2 "general_operand")))]
11395 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11396 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11398 bool ok = ix86_expand_int_vcond (operands);
11403 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
11404 [(set (match_operand:V_128 0 "register_operand")
11405 (if_then_else:V_128
11406 (match_operator 3 ""
11407 [(match_operand:VI124_128 4 "vector_operand")
11408 (match_operand:VI124_128 5 "vector_operand")])
11409 (match_operand:V_128 1 "general_operand")
11410 (match_operand:V_128 2 "general_operand")))]
11412 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11413 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11415 bool ok = ix86_expand_int_vcond (operands);
11420 (define_expand "vcondu<VI8F_128:mode>v2di"
11421 [(set (match_operand:VI8F_128 0 "register_operand")
11422 (if_then_else:VI8F_128
11423 (match_operator 3 ""
11424 [(match_operand:V2DI 4 "vector_operand")
11425 (match_operand:V2DI 5 "vector_operand")])
11426 (match_operand:VI8F_128 1 "general_operand")
11427 (match_operand:VI8F_128 2 "general_operand")))]
11430 bool ok = ix86_expand_int_vcond (operands);
11435 (define_expand "vcondeq<VI8F_128:mode>v2di"
11436 [(set (match_operand:VI8F_128 0 "register_operand")
11437 (if_then_else:VI8F_128
11438 (match_operator 3 ""
11439 [(match_operand:V2DI 4 "vector_operand")
11440 (match_operand:V2DI 5 "general_operand")])
11441 (match_operand:VI8F_128 1)
11442 (match_operand:VI8F_128 2)))]
11445 bool ok = ix86_expand_int_vcond (operands);
11450 (define_mode_iterator VEC_PERM_AVX2
11451 [V16QI V8HI V4SI V2DI V4SF V2DF
11452 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11453 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11454 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11455 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11456 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11457 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11459 (define_expand "vec_perm<mode>"
11460 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11461 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11462 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11463 (match_operand:<sseintvecmode> 3 "register_operand")]
11464 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11466 ix86_expand_vec_perm (operands);
11470 (define_mode_iterator VEC_PERM_CONST
11471 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
11472 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
11473 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
11474 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
11475 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
11476 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11477 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11478 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11479 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
11481 (define_expand "vec_perm_const<mode>"
11482 [(match_operand:VEC_PERM_CONST 0 "register_operand")
11483 (match_operand:VEC_PERM_CONST 1 "register_operand")
11484 (match_operand:VEC_PERM_CONST 2 "register_operand")
11485 (match_operand:<sseintvecmode> 3)]
11488 if (ix86_expand_vec_perm_const (operands))
11494 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11496 ;; Parallel bitwise logical operations
11498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11500 (define_expand "one_cmpl<mode>2"
11501 [(set (match_operand:VI 0 "register_operand")
11502 (xor:VI (match_operand:VI 1 "vector_operand")
11506 int i, n = GET_MODE_NUNITS (<MODE>mode);
11507 rtvec v = rtvec_alloc (n);
11509 for (i = 0; i < n; ++i)
11510 RTVEC_ELT (v, i) = constm1_rtx;
11512 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
11515 (define_expand "<sse2_avx2>_andnot<mode>3"
11516 [(set (match_operand:VI_AVX2 0 "register_operand")
11518 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11519 (match_operand:VI_AVX2 2 "vector_operand")))]
11522 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11523 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11524 (vec_merge:VI48_AVX512VL
11527 (match_operand:VI48_AVX512VL 1 "register_operand"))
11528 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11529 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11530 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11533 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11534 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11535 (vec_merge:VI12_AVX512VL
11538 (match_operand:VI12_AVX512VL 1 "register_operand"))
11539 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11540 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11541 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11544 (define_insn "*andnot<mode>3"
11545 [(set (match_operand:VI 0 "register_operand" "=x,x,v")
11547 (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
11548 (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
11551 static char buf[64];
11554 const char *ssesuffix;
11556 switch (get_attr_mode (insn))
11559 gcc_assert (TARGET_AVX512F);
11562 gcc_assert (TARGET_AVX2);
11565 gcc_assert (TARGET_SSE2);
11567 switch (<MODE>mode)
11571 /* There is no vpandnb or vpandnw instruction, nor vpandn for
11572 512-bit vectors. Use vpandnq instead. */
11577 ssesuffix = "<ssemodesuffix>";
11583 ssesuffix = (TARGET_AVX512VL && which_alternative == 2
11584 ? "<ssemodesuffix>" : "");
11587 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11592 gcc_assert (TARGET_AVX512F);
11595 gcc_assert (TARGET_AVX);
11598 gcc_assert (TARGET_SSE);
11604 gcc_unreachable ();
11607 switch (which_alternative)
11610 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11614 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11617 gcc_unreachable ();
11620 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11623 [(set_attr "isa" "noavx,avx,avx")
11624 (set_attr "type" "sselog")
11625 (set (attr "prefix_data16")
11627 (and (eq_attr "alternative" "0")
11628 (eq_attr "mode" "TI"))
11630 (const_string "*")))
11631 (set_attr "prefix" "orig,vex,evex")
11633 (cond [(and (match_test "<MODE_SIZE> == 16")
11634 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11635 (const_string "<ssePSmode>")
11636 (match_test "TARGET_AVX2")
11637 (const_string "<sseinsnmode>")
11638 (match_test "TARGET_AVX")
11640 (match_test "<MODE_SIZE> > 16")
11641 (const_string "V8SF")
11642 (const_string "<sseinsnmode>"))
11643 (ior (not (match_test "TARGET_SSE2"))
11644 (match_test "optimize_function_for_size_p (cfun)"))
11645 (const_string "V4SF")
11647 (const_string "<sseinsnmode>")))])
11649 (define_insn "*andnot<mode>3_mask"
11650 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11651 (vec_merge:VI48_AVX512VL
11654 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11655 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11656 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11657 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11659 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11660 [(set_attr "type" "sselog")
11661 (set_attr "prefix" "evex")
11662 (set_attr "mode" "<sseinsnmode>")])
11664 (define_expand "<code><mode>3"
11665 [(set (match_operand:VI 0 "register_operand")
11667 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11668 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11671 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11675 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11676 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
11677 (any_logic:VI48_AVX_AVX512F
11678 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11679 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11680 "TARGET_SSE && <mask_mode512bit_condition>
11681 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11683 static char buf[64];
11686 const char *ssesuffix;
11688 switch (get_attr_mode (insn))
11691 gcc_assert (TARGET_AVX512F);
11694 gcc_assert (TARGET_AVX2);
11697 gcc_assert (TARGET_SSE2);
11699 switch (<MODE>mode)
11703 ssesuffix = "<ssemodesuffix>";
11709 ssesuffix = (TARGET_AVX512VL
11710 && (<mask_applied> || which_alternative == 2)
11711 ? "<ssemodesuffix>" : "");
11714 gcc_unreachable ();
11719 gcc_assert (TARGET_AVX);
11722 gcc_assert (TARGET_SSE);
11728 gcc_unreachable ();
11731 switch (which_alternative)
11734 if (<mask_applied>)
11735 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11737 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11741 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11744 gcc_unreachable ();
11747 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11750 [(set_attr "isa" "noavx,avx,avx")
11751 (set_attr "type" "sselog")
11752 (set (attr "prefix_data16")
11754 (and (eq_attr "alternative" "0")
11755 (eq_attr "mode" "TI"))
11757 (const_string "*")))
11758 (set_attr "prefix" "<mask_prefix3>,evex")
11760 (cond [(and (match_test "<MODE_SIZE> == 16")
11761 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11762 (const_string "<ssePSmode>")
11763 (match_test "TARGET_AVX2")
11764 (const_string "<sseinsnmode>")
11765 (match_test "TARGET_AVX")
11767 (match_test "<MODE_SIZE> > 16")
11768 (const_string "V8SF")
11769 (const_string "<sseinsnmode>"))
11770 (ior (not (match_test "TARGET_SSE2"))
11771 (match_test "optimize_function_for_size_p (cfun)"))
11772 (const_string "V4SF")
11774 (const_string "<sseinsnmode>")))])
11776 (define_insn "*<code><mode>3"
11777 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
11778 (any_logic: VI12_AVX_AVX512F
11779 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
11780 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
11781 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11783 static char buf[64];
11786 const char *ssesuffix;
11788 switch (get_attr_mode (insn))
11791 gcc_assert (TARGET_AVX512F);
11794 gcc_assert (TARGET_AVX2);
11797 gcc_assert (TARGET_SSE2);
11799 switch (<MODE>mode)
11809 ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
11812 gcc_unreachable ();
11817 gcc_assert (TARGET_AVX);
11820 gcc_assert (TARGET_SSE);
11826 gcc_unreachable ();
11829 switch (which_alternative)
11832 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11836 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11839 gcc_unreachable ();
11842 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11845 [(set_attr "isa" "noavx,avx,avx")
11846 (set_attr "type" "sselog")
11847 (set (attr "prefix_data16")
11849 (and (eq_attr "alternative" "0")
11850 (eq_attr "mode" "TI"))
11852 (const_string "*")))
11853 (set_attr "prefix" "<mask_prefix3>,evex")
11855 (cond [(and (match_test "<MODE_SIZE> == 16")
11856 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11857 (const_string "<ssePSmode>")
11858 (match_test "TARGET_AVX2")
11859 (const_string "<sseinsnmode>")
11860 (match_test "TARGET_AVX")
11862 (match_test "<MODE_SIZE> > 16")
11863 (const_string "V8SF")
11864 (const_string "<sseinsnmode>"))
11865 (ior (not (match_test "TARGET_SSE2"))
11866 (match_test "optimize_function_for_size_p (cfun)"))
11867 (const_string "V4SF")
11869 (const_string "<sseinsnmode>")))])
11871 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11872 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11873 (unspec:<avx512fmaskmode>
11874 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11875 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11878 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11879 [(set_attr "prefix" "evex")
11880 (set_attr "mode" "<sseinsnmode>")])
11882 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11883 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11884 (unspec:<avx512fmaskmode>
11885 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11886 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11889 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11890 [(set_attr "prefix" "evex")
11891 (set_attr "mode" "<sseinsnmode>")])
11893 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11894 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11895 (unspec:<avx512fmaskmode>
11896 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11897 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11900 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11901 [(set_attr "prefix" "evex")
11902 (set_attr "mode" "<sseinsnmode>")])
11904 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11905 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11906 (unspec:<avx512fmaskmode>
11907 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11908 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11911 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11912 [(set_attr "prefix" "evex")
11913 (set_attr "mode" "<sseinsnmode>")])
11915 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11917 ;; Parallel integral element swizzling
11919 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11921 (define_expand "vec_pack_trunc_<mode>"
11922 [(match_operand:<ssepackmode> 0 "register_operand")
11923 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11924 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11927 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11928 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11929 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11933 (define_expand "vec_pack_trunc_qi"
11934 [(set (match_operand:HI 0 ("register_operand"))
11935 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
11937 (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
11940 (define_expand "vec_pack_trunc_<mode>"
11941 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11942 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
11944 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
11947 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11950 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11951 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11952 (vec_concat:VI1_AVX512
11953 (ss_truncate:<ssehalfvecmode>
11954 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11955 (ss_truncate:<ssehalfvecmode>
11956 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11957 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11959 packsswb\t{%2, %0|%0, %2}
11960 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11961 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11962 [(set_attr "isa" "noavx,avx,avx512bw")
11963 (set_attr "type" "sselog")
11964 (set_attr "prefix_data16" "1,*,*")
11965 (set_attr "prefix" "orig,<mask_prefix>,evex")
11966 (set_attr "mode" "<sseinsnmode>")])
11968 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11969 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
11970 (vec_concat:VI2_AVX2
11971 (ss_truncate:<ssehalfvecmode>
11972 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11973 (ss_truncate:<ssehalfvecmode>
11974 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11975 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11977 packssdw\t{%2, %0|%0, %2}
11978 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11979 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11980 [(set_attr "isa" "noavx,avx,avx512bw")
11981 (set_attr "type" "sselog")
11982 (set_attr "prefix_data16" "1,*,*")
11983 (set_attr "prefix" "orig,<mask_prefix>,evex")
11984 (set_attr "mode" "<sseinsnmode>")])
11986 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11987 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11988 (vec_concat:VI1_AVX512
11989 (us_truncate:<ssehalfvecmode>
11990 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11991 (us_truncate:<ssehalfvecmode>
11992 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11993 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11995 packuswb\t{%2, %0|%0, %2}
11996 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11997 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11998 [(set_attr "isa" "noavx,avx,avx512bw")
11999 (set_attr "type" "sselog")
12000 (set_attr "prefix_data16" "1,*,*")
12001 (set_attr "prefix" "orig,<mask_prefix>,evex")
12002 (set_attr "mode" "<sseinsnmode>")])
12004 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
12005 [(set (match_operand:V64QI 0 "register_operand" "=v")
12008 (match_operand:V64QI 1 "register_operand" "v")
12009 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12010 (parallel [(const_int 8) (const_int 72)
12011 (const_int 9) (const_int 73)
12012 (const_int 10) (const_int 74)
12013 (const_int 11) (const_int 75)
12014 (const_int 12) (const_int 76)
12015 (const_int 13) (const_int 77)
12016 (const_int 14) (const_int 78)
12017 (const_int 15) (const_int 79)
12018 (const_int 24) (const_int 88)
12019 (const_int 25) (const_int 89)
12020 (const_int 26) (const_int 90)
12021 (const_int 27) (const_int 91)
12022 (const_int 28) (const_int 92)
12023 (const_int 29) (const_int 93)
12024 (const_int 30) (const_int 94)
12025 (const_int 31) (const_int 95)
12026 (const_int 40) (const_int 104)
12027 (const_int 41) (const_int 105)
12028 (const_int 42) (const_int 106)
12029 (const_int 43) (const_int 107)
12030 (const_int 44) (const_int 108)
12031 (const_int 45) (const_int 109)
12032 (const_int 46) (const_int 110)
12033 (const_int 47) (const_int 111)
12034 (const_int 56) (const_int 120)
12035 (const_int 57) (const_int 121)
12036 (const_int 58) (const_int 122)
12037 (const_int 59) (const_int 123)
12038 (const_int 60) (const_int 124)
12039 (const_int 61) (const_int 125)
12040 (const_int 62) (const_int 126)
12041 (const_int 63) (const_int 127)])))]
12043 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12044 [(set_attr "type" "sselog")
12045 (set_attr "prefix" "evex")
12046 (set_attr "mode" "XI")])
12048 (define_insn "avx2_interleave_highv32qi<mask_name>"
12049 [(set (match_operand:V32QI 0 "register_operand" "=v")
12052 (match_operand:V32QI 1 "register_operand" "v")
12053 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12054 (parallel [(const_int 8) (const_int 40)
12055 (const_int 9) (const_int 41)
12056 (const_int 10) (const_int 42)
12057 (const_int 11) (const_int 43)
12058 (const_int 12) (const_int 44)
12059 (const_int 13) (const_int 45)
12060 (const_int 14) (const_int 46)
12061 (const_int 15) (const_int 47)
12062 (const_int 24) (const_int 56)
12063 (const_int 25) (const_int 57)
12064 (const_int 26) (const_int 58)
12065 (const_int 27) (const_int 59)
12066 (const_int 28) (const_int 60)
12067 (const_int 29) (const_int 61)
12068 (const_int 30) (const_int 62)
12069 (const_int 31) (const_int 63)])))]
12070 "TARGET_AVX2 && <mask_avx512vl_condition>"
12071 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12072 [(set_attr "type" "sselog")
12073 (set_attr "prefix" "<mask_prefix>")
12074 (set_attr "mode" "OI")])
12076 (define_insn "vec_interleave_highv16qi<mask_name>"
12077 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12080 (match_operand:V16QI 1 "register_operand" "0,v")
12081 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12082 (parallel [(const_int 8) (const_int 24)
12083 (const_int 9) (const_int 25)
12084 (const_int 10) (const_int 26)
12085 (const_int 11) (const_int 27)
12086 (const_int 12) (const_int 28)
12087 (const_int 13) (const_int 29)
12088 (const_int 14) (const_int 30)
12089 (const_int 15) (const_int 31)])))]
12090 "TARGET_SSE2 && <mask_avx512vl_condition>"
12092 punpckhbw\t{%2, %0|%0, %2}
12093 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12094 [(set_attr "isa" "noavx,avx")
12095 (set_attr "type" "sselog")
12096 (set_attr "prefix_data16" "1,*")
12097 (set_attr "prefix" "orig,<mask_prefix>")
12098 (set_attr "mode" "TI")])
12100 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12101 [(set (match_operand:V64QI 0 "register_operand" "=v")
12104 (match_operand:V64QI 1 "register_operand" "v")
12105 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12106 (parallel [(const_int 0) (const_int 64)
12107 (const_int 1) (const_int 65)
12108 (const_int 2) (const_int 66)
12109 (const_int 3) (const_int 67)
12110 (const_int 4) (const_int 68)
12111 (const_int 5) (const_int 69)
12112 (const_int 6) (const_int 70)
12113 (const_int 7) (const_int 71)
12114 (const_int 16) (const_int 80)
12115 (const_int 17) (const_int 81)
12116 (const_int 18) (const_int 82)
12117 (const_int 19) (const_int 83)
12118 (const_int 20) (const_int 84)
12119 (const_int 21) (const_int 85)
12120 (const_int 22) (const_int 86)
12121 (const_int 23) (const_int 87)
12122 (const_int 32) (const_int 96)
12123 (const_int 33) (const_int 97)
12124 (const_int 34) (const_int 98)
12125 (const_int 35) (const_int 99)
12126 (const_int 36) (const_int 100)
12127 (const_int 37) (const_int 101)
12128 (const_int 38) (const_int 102)
12129 (const_int 39) (const_int 103)
12130 (const_int 48) (const_int 112)
12131 (const_int 49) (const_int 113)
12132 (const_int 50) (const_int 114)
12133 (const_int 51) (const_int 115)
12134 (const_int 52) (const_int 116)
12135 (const_int 53) (const_int 117)
12136 (const_int 54) (const_int 118)
12137 (const_int 55) (const_int 119)])))]
12139 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12140 [(set_attr "type" "sselog")
12141 (set_attr "prefix" "evex")
12142 (set_attr "mode" "XI")])
12144 (define_insn "avx2_interleave_lowv32qi<mask_name>"
12145 [(set (match_operand:V32QI 0 "register_operand" "=v")
12148 (match_operand:V32QI 1 "register_operand" "v")
12149 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12150 (parallel [(const_int 0) (const_int 32)
12151 (const_int 1) (const_int 33)
12152 (const_int 2) (const_int 34)
12153 (const_int 3) (const_int 35)
12154 (const_int 4) (const_int 36)
12155 (const_int 5) (const_int 37)
12156 (const_int 6) (const_int 38)
12157 (const_int 7) (const_int 39)
12158 (const_int 16) (const_int 48)
12159 (const_int 17) (const_int 49)
12160 (const_int 18) (const_int 50)
12161 (const_int 19) (const_int 51)
12162 (const_int 20) (const_int 52)
12163 (const_int 21) (const_int 53)
12164 (const_int 22) (const_int 54)
12165 (const_int 23) (const_int 55)])))]
12166 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12167 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12168 [(set_attr "type" "sselog")
12169 (set_attr "prefix" "maybe_vex")
12170 (set_attr "mode" "OI")])
12172 (define_insn "vec_interleave_lowv16qi<mask_name>"
12173 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12176 (match_operand:V16QI 1 "register_operand" "0,v")
12177 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12178 (parallel [(const_int 0) (const_int 16)
12179 (const_int 1) (const_int 17)
12180 (const_int 2) (const_int 18)
12181 (const_int 3) (const_int 19)
12182 (const_int 4) (const_int 20)
12183 (const_int 5) (const_int 21)
12184 (const_int 6) (const_int 22)
12185 (const_int 7) (const_int 23)])))]
12186 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12188 punpcklbw\t{%2, %0|%0, %2}
12189 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12190 [(set_attr "isa" "noavx,avx")
12191 (set_attr "type" "sselog")
12192 (set_attr "prefix_data16" "1,*")
12193 (set_attr "prefix" "orig,vex")
12194 (set_attr "mode" "TI")])
12196 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
12197 [(set (match_operand:V32HI 0 "register_operand" "=v")
12200 (match_operand:V32HI 1 "register_operand" "v")
12201 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12202 (parallel [(const_int 4) (const_int 36)
12203 (const_int 5) (const_int 37)
12204 (const_int 6) (const_int 38)
12205 (const_int 7) (const_int 39)
12206 (const_int 12) (const_int 44)
12207 (const_int 13) (const_int 45)
12208 (const_int 14) (const_int 46)
12209 (const_int 15) (const_int 47)
12210 (const_int 20) (const_int 52)
12211 (const_int 21) (const_int 53)
12212 (const_int 22) (const_int 54)
12213 (const_int 23) (const_int 55)
12214 (const_int 28) (const_int 60)
12215 (const_int 29) (const_int 61)
12216 (const_int 30) (const_int 62)
12217 (const_int 31) (const_int 63)])))]
12219 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12220 [(set_attr "type" "sselog")
12221 (set_attr "prefix" "evex")
12222 (set_attr "mode" "XI")])
12224 (define_insn "avx2_interleave_highv16hi<mask_name>"
12225 [(set (match_operand:V16HI 0 "register_operand" "=v")
12228 (match_operand:V16HI 1 "register_operand" "v")
12229 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12230 (parallel [(const_int 4) (const_int 20)
12231 (const_int 5) (const_int 21)
12232 (const_int 6) (const_int 22)
12233 (const_int 7) (const_int 23)
12234 (const_int 12) (const_int 28)
12235 (const_int 13) (const_int 29)
12236 (const_int 14) (const_int 30)
12237 (const_int 15) (const_int 31)])))]
12238 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12239 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12240 [(set_attr "type" "sselog")
12241 (set_attr "prefix" "maybe_evex")
12242 (set_attr "mode" "OI")])
12244 (define_insn "vec_interleave_highv8hi<mask_name>"
12245 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12248 (match_operand:V8HI 1 "register_operand" "0,v")
12249 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12250 (parallel [(const_int 4) (const_int 12)
12251 (const_int 5) (const_int 13)
12252 (const_int 6) (const_int 14)
12253 (const_int 7) (const_int 15)])))]
12254 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12256 punpckhwd\t{%2, %0|%0, %2}
12257 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12258 [(set_attr "isa" "noavx,avx")
12259 (set_attr "type" "sselog")
12260 (set_attr "prefix_data16" "1,*")
12261 (set_attr "prefix" "orig,maybe_vex")
12262 (set_attr "mode" "TI")])
12264 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12265 [(set (match_operand:V32HI 0 "register_operand" "=v")
12268 (match_operand:V32HI 1 "register_operand" "v")
12269 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12270 (parallel [(const_int 0) (const_int 32)
12271 (const_int 1) (const_int 33)
12272 (const_int 2) (const_int 34)
12273 (const_int 3) (const_int 35)
12274 (const_int 8) (const_int 40)
12275 (const_int 9) (const_int 41)
12276 (const_int 10) (const_int 42)
12277 (const_int 11) (const_int 43)
12278 (const_int 16) (const_int 48)
12279 (const_int 17) (const_int 49)
12280 (const_int 18) (const_int 50)
12281 (const_int 19) (const_int 51)
12282 (const_int 24) (const_int 56)
12283 (const_int 25) (const_int 57)
12284 (const_int 26) (const_int 58)
12285 (const_int 27) (const_int 59)])))]
12287 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12288 [(set_attr "type" "sselog")
12289 (set_attr "prefix" "evex")
12290 (set_attr "mode" "XI")])
12292 (define_insn "avx2_interleave_lowv16hi<mask_name>"
12293 [(set (match_operand:V16HI 0 "register_operand" "=v")
12296 (match_operand:V16HI 1 "register_operand" "v")
12297 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12298 (parallel [(const_int 0) (const_int 16)
12299 (const_int 1) (const_int 17)
12300 (const_int 2) (const_int 18)
12301 (const_int 3) (const_int 19)
12302 (const_int 8) (const_int 24)
12303 (const_int 9) (const_int 25)
12304 (const_int 10) (const_int 26)
12305 (const_int 11) (const_int 27)])))]
12306 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12307 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12308 [(set_attr "type" "sselog")
12309 (set_attr "prefix" "maybe_evex")
12310 (set_attr "mode" "OI")])
12312 (define_insn "vec_interleave_lowv8hi<mask_name>"
12313 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12316 (match_operand:V8HI 1 "register_operand" "0,v")
12317 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12318 (parallel [(const_int 0) (const_int 8)
12319 (const_int 1) (const_int 9)
12320 (const_int 2) (const_int 10)
12321 (const_int 3) (const_int 11)])))]
12322 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12324 punpcklwd\t{%2, %0|%0, %2}
12325 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12326 [(set_attr "isa" "noavx,avx")
12327 (set_attr "type" "sselog")
12328 (set_attr "prefix_data16" "1,*")
12329 (set_attr "prefix" "orig,maybe_evex")
12330 (set_attr "mode" "TI")])
12332 (define_insn "avx2_interleave_highv8si<mask_name>"
12333 [(set (match_operand:V8SI 0 "register_operand" "=v")
12336 (match_operand:V8SI 1 "register_operand" "v")
12337 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12338 (parallel [(const_int 2) (const_int 10)
12339 (const_int 3) (const_int 11)
12340 (const_int 6) (const_int 14)
12341 (const_int 7) (const_int 15)])))]
12342 "TARGET_AVX2 && <mask_avx512vl_condition>"
12343 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12344 [(set_attr "type" "sselog")
12345 (set_attr "prefix" "maybe_evex")
12346 (set_attr "mode" "OI")])
12348 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12349 [(set (match_operand:V16SI 0 "register_operand" "=v")
12352 (match_operand:V16SI 1 "register_operand" "v")
12353 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12354 (parallel [(const_int 2) (const_int 18)
12355 (const_int 3) (const_int 19)
12356 (const_int 6) (const_int 22)
12357 (const_int 7) (const_int 23)
12358 (const_int 10) (const_int 26)
12359 (const_int 11) (const_int 27)
12360 (const_int 14) (const_int 30)
12361 (const_int 15) (const_int 31)])))]
12363 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12364 [(set_attr "type" "sselog")
12365 (set_attr "prefix" "evex")
12366 (set_attr "mode" "XI")])
12369 (define_insn "vec_interleave_highv4si<mask_name>"
12370 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12373 (match_operand:V4SI 1 "register_operand" "0,v")
12374 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12375 (parallel [(const_int 2) (const_int 6)
12376 (const_int 3) (const_int 7)])))]
12377 "TARGET_SSE2 && <mask_avx512vl_condition>"
12379 punpckhdq\t{%2, %0|%0, %2}
12380 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12381 [(set_attr "isa" "noavx,avx")
12382 (set_attr "type" "sselog")
12383 (set_attr "prefix_data16" "1,*")
12384 (set_attr "prefix" "orig,maybe_vex")
12385 (set_attr "mode" "TI")])
12387 (define_insn "avx2_interleave_lowv8si<mask_name>"
12388 [(set (match_operand:V8SI 0 "register_operand" "=v")
12391 (match_operand:V8SI 1 "register_operand" "v")
12392 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12393 (parallel [(const_int 0) (const_int 8)
12394 (const_int 1) (const_int 9)
12395 (const_int 4) (const_int 12)
12396 (const_int 5) (const_int 13)])))]
12397 "TARGET_AVX2 && <mask_avx512vl_condition>"
12398 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12399 [(set_attr "type" "sselog")
12400 (set_attr "prefix" "maybe_evex")
12401 (set_attr "mode" "OI")])
12403 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12404 [(set (match_operand:V16SI 0 "register_operand" "=v")
12407 (match_operand:V16SI 1 "register_operand" "v")
12408 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12409 (parallel [(const_int 0) (const_int 16)
12410 (const_int 1) (const_int 17)
12411 (const_int 4) (const_int 20)
12412 (const_int 5) (const_int 21)
12413 (const_int 8) (const_int 24)
12414 (const_int 9) (const_int 25)
12415 (const_int 12) (const_int 28)
12416 (const_int 13) (const_int 29)])))]
12418 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12419 [(set_attr "type" "sselog")
12420 (set_attr "prefix" "evex")
12421 (set_attr "mode" "XI")])
12423 (define_insn "vec_interleave_lowv4si<mask_name>"
12424 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12427 (match_operand:V4SI 1 "register_operand" "0,v")
12428 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12429 (parallel [(const_int 0) (const_int 4)
12430 (const_int 1) (const_int 5)])))]
12431 "TARGET_SSE2 && <mask_avx512vl_condition>"
12433 punpckldq\t{%2, %0|%0, %2}
12434 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12435 [(set_attr "isa" "noavx,avx")
12436 (set_attr "type" "sselog")
12437 (set_attr "prefix_data16" "1,*")
12438 (set_attr "prefix" "orig,vex")
12439 (set_attr "mode" "TI")])
12441 (define_expand "vec_interleave_high<mode>"
12442 [(match_operand:VI_256 0 "register_operand")
12443 (match_operand:VI_256 1 "register_operand")
12444 (match_operand:VI_256 2 "nonimmediate_operand")]
12447 rtx t1 = gen_reg_rtx (<MODE>mode);
12448 rtx t2 = gen_reg_rtx (<MODE>mode);
12449 rtx t3 = gen_reg_rtx (V4DImode);
12450 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12451 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12452 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12453 gen_lowpart (V4DImode, t2),
12454 GEN_INT (1 + (3 << 4))));
12455 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12459 (define_expand "vec_interleave_low<mode>"
12460 [(match_operand:VI_256 0 "register_operand")
12461 (match_operand:VI_256 1 "register_operand")
12462 (match_operand:VI_256 2 "nonimmediate_operand")]
12465 rtx t1 = gen_reg_rtx (<MODE>mode);
12466 rtx t2 = gen_reg_rtx (<MODE>mode);
12467 rtx t3 = gen_reg_rtx (V4DImode);
12468 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12469 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12470 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12471 gen_lowpart (V4DImode, t2),
12472 GEN_INT (0 + (2 << 4))));
12473 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12477 ;; Modes handled by pinsr patterns.
12478 (define_mode_iterator PINSR_MODE
12479 [(V16QI "TARGET_SSE4_1") V8HI
12480 (V4SI "TARGET_SSE4_1")
12481 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12483 (define_mode_attr sse2p4_1
12484 [(V16QI "sse4_1") (V8HI "sse2")
12485 (V4SI "sse4_1") (V2DI "sse4_1")])
12487 (define_mode_attr pinsr_evex_isa
12488 [(V16QI "avx512bw") (V8HI "avx512bw")
12489 (V4SI "avx512dq") (V2DI "avx512dq")])
12491 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12492 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12493 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
12494 (vec_merge:PINSR_MODE
12495 (vec_duplicate:PINSR_MODE
12496 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
12497 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
12498 (match_operand:SI 3 "const_int_operand")))]
12500 && ((unsigned) exact_log2 (INTVAL (operands[3]))
12501 < GET_MODE_NUNITS (<MODE>mode))"
12503 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12505 switch (which_alternative)
12508 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12509 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12512 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12515 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12516 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12520 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12522 gcc_unreachable ();
12525 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
12526 (set_attr "type" "sselog")
12527 (set (attr "prefix_rex")
12529 (and (not (match_test "TARGET_AVX"))
12530 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12532 (const_string "*")))
12533 (set (attr "prefix_data16")
12535 (and (not (match_test "TARGET_AVX"))
12536 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12538 (const_string "*")))
12539 (set (attr "prefix_extra")
12541 (and (not (match_test "TARGET_AVX"))
12542 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12544 (const_string "1")))
12545 (set_attr "length_immediate" "1")
12546 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
12547 (set_attr "mode" "TI")])
12549 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12550 [(match_operand:AVX512_VEC 0 "register_operand")
12551 (match_operand:AVX512_VEC 1 "register_operand")
12552 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12553 (match_operand:SI 3 "const_0_to_3_operand")
12554 (match_operand:AVX512_VEC 4 "register_operand")
12555 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12559 mask = INTVAL (operands[3]);
12560 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
12561 0xFFFF ^ (0xF000 >> mask * 4)
12562 : 0xFF ^ (0xC0 >> mask * 2);
12563 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12564 (operands[0], operands[1], operands[2], GEN_INT (selector),
12565 operands[4], operands[5]));
12569 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12570 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12571 (vec_merge:AVX512_VEC
12572 (match_operand:AVX512_VEC 1 "register_operand" "v")
12573 (vec_duplicate:AVX512_VEC
12574 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12575 (match_operand:SI 3 "const_int_operand" "n")))]
12579 int selector = INTVAL (operands[3]);
12581 if (selector == 0xFFF || selector == 0x3F)
12583 else if ( selector == 0xF0FF || selector == 0xCF)
12585 else if ( selector == 0xFF0F || selector == 0xF3)
12587 else if ( selector == 0xFFF0 || selector == 0xFC)
12590 gcc_unreachable ();
12592 operands[3] = GEN_INT (mask);
12594 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12596 [(set_attr "type" "sselog")
12597 (set_attr "length_immediate" "1")
12598 (set_attr "prefix" "evex")
12599 (set_attr "mode" "<sseinsnmode>")])
12601 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12602 [(match_operand:AVX512_VEC_2 0 "register_operand")
12603 (match_operand:AVX512_VEC_2 1 "register_operand")
12604 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12605 (match_operand:SI 3 "const_0_to_1_operand")
12606 (match_operand:AVX512_VEC_2 4 "register_operand")
12607 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12610 int mask = INTVAL (operands[3]);
12612 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12613 operands[2], operands[4],
12616 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12617 operands[2], operands[4],
12622 (define_insn "vec_set_lo_<mode><mask_name>"
12623 [(set (match_operand:V16FI 0 "register_operand" "=v")
12625 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12626 (vec_select:<ssehalfvecmode>
12627 (match_operand:V16FI 1 "register_operand" "v")
12628 (parallel [(const_int 8) (const_int 9)
12629 (const_int 10) (const_int 11)
12630 (const_int 12) (const_int 13)
12631 (const_int 14) (const_int 15)]))))]
12633 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12634 [(set_attr "type" "sselog")
12635 (set_attr "length_immediate" "1")
12636 (set_attr "prefix" "evex")
12637 (set_attr "mode" "<sseinsnmode>")])
12639 (define_insn "vec_set_hi_<mode><mask_name>"
12640 [(set (match_operand:V16FI 0 "register_operand" "=v")
12642 (vec_select:<ssehalfvecmode>
12643 (match_operand:V16FI 1 "register_operand" "v")
12644 (parallel [(const_int 0) (const_int 1)
12645 (const_int 2) (const_int 3)
12646 (const_int 4) (const_int 5)
12647 (const_int 6) (const_int 7)]))
12648 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12650 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12651 [(set_attr "type" "sselog")
12652 (set_attr "length_immediate" "1")
12653 (set_attr "prefix" "evex")
12654 (set_attr "mode" "<sseinsnmode>")])
12656 (define_insn "vec_set_lo_<mode><mask_name>"
12657 [(set (match_operand:V8FI 0 "register_operand" "=v")
12659 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12660 (vec_select:<ssehalfvecmode>
12661 (match_operand:V8FI 1 "register_operand" "v")
12662 (parallel [(const_int 4) (const_int 5)
12663 (const_int 6) (const_int 7)]))))]
12665 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12666 [(set_attr "type" "sselog")
12667 (set_attr "length_immediate" "1")
12668 (set_attr "prefix" "evex")
12669 (set_attr "mode" "XI")])
12671 (define_insn "vec_set_hi_<mode><mask_name>"
12672 [(set (match_operand:V8FI 0 "register_operand" "=v")
12674 (vec_select:<ssehalfvecmode>
12675 (match_operand:V8FI 1 "register_operand" "v")
12676 (parallel [(const_int 0) (const_int 1)
12677 (const_int 2) (const_int 3)]))
12678 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12680 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12681 [(set_attr "type" "sselog")
12682 (set_attr "length_immediate" "1")
12683 (set_attr "prefix" "evex")
12684 (set_attr "mode" "XI")])
12686 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12687 [(match_operand:VI8F_256 0 "register_operand")
12688 (match_operand:VI8F_256 1 "register_operand")
12689 (match_operand:VI8F_256 2 "nonimmediate_operand")
12690 (match_operand:SI 3 "const_0_to_3_operand")
12691 (match_operand:VI8F_256 4 "register_operand")
12692 (match_operand:QI 5 "register_operand")]
12695 int mask = INTVAL (operands[3]);
12696 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12697 (operands[0], operands[1], operands[2],
12698 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12699 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12700 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12701 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12702 operands[4], operands[5]));
12706 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12707 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12708 (vec_select:VI8F_256
12709 (vec_concat:<ssedoublemode>
12710 (match_operand:VI8F_256 1 "register_operand" "v")
12711 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12712 (parallel [(match_operand 3 "const_0_to_3_operand")
12713 (match_operand 4 "const_0_to_3_operand")
12714 (match_operand 5 "const_4_to_7_operand")
12715 (match_operand 6 "const_4_to_7_operand")])))]
12717 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12718 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12721 mask = INTVAL (operands[3]) / 2;
12722 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12723 operands[3] = GEN_INT (mask);
12724 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12726 [(set_attr "type" "sselog")
12727 (set_attr "length_immediate" "1")
12728 (set_attr "prefix" "evex")
12729 (set_attr "mode" "XI")])
12731 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12732 [(match_operand:V8FI 0 "register_operand")
12733 (match_operand:V8FI 1 "register_operand")
12734 (match_operand:V8FI 2 "nonimmediate_operand")
12735 (match_operand:SI 3 "const_0_to_255_operand")
12736 (match_operand:V8FI 4 "register_operand")
12737 (match_operand:QI 5 "register_operand")]
12740 int mask = INTVAL (operands[3]);
12741 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12742 (operands[0], operands[1], operands[2],
12743 GEN_INT (((mask >> 0) & 3) * 2),
12744 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12745 GEN_INT (((mask >> 2) & 3) * 2),
12746 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12747 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12748 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12749 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12750 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12751 operands[4], operands[5]));
12755 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12756 [(set (match_operand:V8FI 0 "register_operand" "=v")
12758 (vec_concat:<ssedoublemode>
12759 (match_operand:V8FI 1 "register_operand" "v")
12760 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12761 (parallel [(match_operand 3 "const_0_to_7_operand")
12762 (match_operand 4 "const_0_to_7_operand")
12763 (match_operand 5 "const_0_to_7_operand")
12764 (match_operand 6 "const_0_to_7_operand")
12765 (match_operand 7 "const_8_to_15_operand")
12766 (match_operand 8 "const_8_to_15_operand")
12767 (match_operand 9 "const_8_to_15_operand")
12768 (match_operand 10 "const_8_to_15_operand")])))]
12770 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12771 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12772 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12773 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12776 mask = INTVAL (operands[3]) / 2;
12777 mask |= INTVAL (operands[5]) / 2 << 2;
12778 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12779 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12780 operands[3] = GEN_INT (mask);
12782 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12784 [(set_attr "type" "sselog")
12785 (set_attr "length_immediate" "1")
12786 (set_attr "prefix" "evex")
12787 (set_attr "mode" "<sseinsnmode>")])
12789 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12790 [(match_operand:VI4F_256 0 "register_operand")
12791 (match_operand:VI4F_256 1 "register_operand")
12792 (match_operand:VI4F_256 2 "nonimmediate_operand")
12793 (match_operand:SI 3 "const_0_to_3_operand")
12794 (match_operand:VI4F_256 4 "register_operand")
12795 (match_operand:QI 5 "register_operand")]
12798 int mask = INTVAL (operands[3]);
12799 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12800 (operands[0], operands[1], operands[2],
12801 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12802 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12803 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12804 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12805 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12806 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12807 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12808 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12809 operands[4], operands[5]));
12813 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12814 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12815 (vec_select:VI4F_256
12816 (vec_concat:<ssedoublemode>
12817 (match_operand:VI4F_256 1 "register_operand" "v")
12818 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12819 (parallel [(match_operand 3 "const_0_to_7_operand")
12820 (match_operand 4 "const_0_to_7_operand")
12821 (match_operand 5 "const_0_to_7_operand")
12822 (match_operand 6 "const_0_to_7_operand")
12823 (match_operand 7 "const_8_to_15_operand")
12824 (match_operand 8 "const_8_to_15_operand")
12825 (match_operand 9 "const_8_to_15_operand")
12826 (match_operand 10 "const_8_to_15_operand")])))]
12828 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12829 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12830 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12831 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12832 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12833 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12836 mask = INTVAL (operands[3]) / 4;
12837 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12838 operands[3] = GEN_INT (mask);
12840 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12842 [(set_attr "type" "sselog")
12843 (set_attr "length_immediate" "1")
12844 (set_attr "prefix" "evex")
12845 (set_attr "mode" "<sseinsnmode>")])
12847 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12848 [(match_operand:V16FI 0 "register_operand")
12849 (match_operand:V16FI 1 "register_operand")
12850 (match_operand:V16FI 2 "nonimmediate_operand")
12851 (match_operand:SI 3 "const_0_to_255_operand")
12852 (match_operand:V16FI 4 "register_operand")
12853 (match_operand:HI 5 "register_operand")]
12856 int mask = INTVAL (operands[3]);
12857 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12858 (operands[0], operands[1], operands[2],
12859 GEN_INT (((mask >> 0) & 3) * 4),
12860 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12861 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12862 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12863 GEN_INT (((mask >> 2) & 3) * 4),
12864 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12865 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12866 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12867 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12868 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12869 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12870 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12871 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12872 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12873 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12874 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12875 operands[4], operands[5]));
12879 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12880 [(set (match_operand:V16FI 0 "register_operand" "=v")
12882 (vec_concat:<ssedoublemode>
12883 (match_operand:V16FI 1 "register_operand" "v")
12884 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12885 (parallel [(match_operand 3 "const_0_to_15_operand")
12886 (match_operand 4 "const_0_to_15_operand")
12887 (match_operand 5 "const_0_to_15_operand")
12888 (match_operand 6 "const_0_to_15_operand")
12889 (match_operand 7 "const_0_to_15_operand")
12890 (match_operand 8 "const_0_to_15_operand")
12891 (match_operand 9 "const_0_to_15_operand")
12892 (match_operand 10 "const_0_to_15_operand")
12893 (match_operand 11 "const_16_to_31_operand")
12894 (match_operand 12 "const_16_to_31_operand")
12895 (match_operand 13 "const_16_to_31_operand")
12896 (match_operand 14 "const_16_to_31_operand")
12897 (match_operand 15 "const_16_to_31_operand")
12898 (match_operand 16 "const_16_to_31_operand")
12899 (match_operand 17 "const_16_to_31_operand")
12900 (match_operand 18 "const_16_to_31_operand")])))]
12902 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12903 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12904 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12905 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12906 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12907 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12908 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12909 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12910 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12911 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12912 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12913 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12916 mask = INTVAL (operands[3]) / 4;
12917 mask |= INTVAL (operands[7]) / 4 << 2;
12918 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12919 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12920 operands[3] = GEN_INT (mask);
12922 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12924 [(set_attr "type" "sselog")
12925 (set_attr "length_immediate" "1")
12926 (set_attr "prefix" "evex")
12927 (set_attr "mode" "<sseinsnmode>")])
12929 (define_expand "avx512f_pshufdv3_mask"
12930 [(match_operand:V16SI 0 "register_operand")
12931 (match_operand:V16SI 1 "nonimmediate_operand")
12932 (match_operand:SI 2 "const_0_to_255_operand")
12933 (match_operand:V16SI 3 "register_operand")
12934 (match_operand:HI 4 "register_operand")]
12937 int mask = INTVAL (operands[2]);
12938 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12939 GEN_INT ((mask >> 0) & 3),
12940 GEN_INT ((mask >> 2) & 3),
12941 GEN_INT ((mask >> 4) & 3),
12942 GEN_INT ((mask >> 6) & 3),
12943 GEN_INT (((mask >> 0) & 3) + 4),
12944 GEN_INT (((mask >> 2) & 3) + 4),
12945 GEN_INT (((mask >> 4) & 3) + 4),
12946 GEN_INT (((mask >> 6) & 3) + 4),
12947 GEN_INT (((mask >> 0) & 3) + 8),
12948 GEN_INT (((mask >> 2) & 3) + 8),
12949 GEN_INT (((mask >> 4) & 3) + 8),
12950 GEN_INT (((mask >> 6) & 3) + 8),
12951 GEN_INT (((mask >> 0) & 3) + 12),
12952 GEN_INT (((mask >> 2) & 3) + 12),
12953 GEN_INT (((mask >> 4) & 3) + 12),
12954 GEN_INT (((mask >> 6) & 3) + 12),
12955 operands[3], operands[4]));
12959 (define_insn "avx512f_pshufd_1<mask_name>"
12960 [(set (match_operand:V16SI 0 "register_operand" "=v")
12962 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12963 (parallel [(match_operand 2 "const_0_to_3_operand")
12964 (match_operand 3 "const_0_to_3_operand")
12965 (match_operand 4 "const_0_to_3_operand")
12966 (match_operand 5 "const_0_to_3_operand")
12967 (match_operand 6 "const_4_to_7_operand")
12968 (match_operand 7 "const_4_to_7_operand")
12969 (match_operand 8 "const_4_to_7_operand")
12970 (match_operand 9 "const_4_to_7_operand")
12971 (match_operand 10 "const_8_to_11_operand")
12972 (match_operand 11 "const_8_to_11_operand")
12973 (match_operand 12 "const_8_to_11_operand")
12974 (match_operand 13 "const_8_to_11_operand")
12975 (match_operand 14 "const_12_to_15_operand")
12976 (match_operand 15 "const_12_to_15_operand")
12977 (match_operand 16 "const_12_to_15_operand")
12978 (match_operand 17 "const_12_to_15_operand")])))]
12980 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12981 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12982 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12983 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12984 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12985 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12986 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12987 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12988 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12989 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12990 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12991 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12994 mask |= INTVAL (operands[2]) << 0;
12995 mask |= INTVAL (operands[3]) << 2;
12996 mask |= INTVAL (operands[4]) << 4;
12997 mask |= INTVAL (operands[5]) << 6;
12998 operands[2] = GEN_INT (mask);
13000 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
13002 [(set_attr "type" "sselog1")
13003 (set_attr "prefix" "evex")
13004 (set_attr "length_immediate" "1")
13005 (set_attr "mode" "XI")])
13007 (define_expand "avx512vl_pshufdv3_mask"
13008 [(match_operand:V8SI 0 "register_operand")
13009 (match_operand:V8SI 1 "nonimmediate_operand")
13010 (match_operand:SI 2 "const_0_to_255_operand")
13011 (match_operand:V8SI 3 "register_operand")
13012 (match_operand:QI 4 "register_operand")]
13015 int mask = INTVAL (operands[2]);
13016 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13017 GEN_INT ((mask >> 0) & 3),
13018 GEN_INT ((mask >> 2) & 3),
13019 GEN_INT ((mask >> 4) & 3),
13020 GEN_INT ((mask >> 6) & 3),
13021 GEN_INT (((mask >> 0) & 3) + 4),
13022 GEN_INT (((mask >> 2) & 3) + 4),
13023 GEN_INT (((mask >> 4) & 3) + 4),
13024 GEN_INT (((mask >> 6) & 3) + 4),
13025 operands[3], operands[4]));
13029 (define_expand "avx2_pshufdv3"
13030 [(match_operand:V8SI 0 "register_operand")
13031 (match_operand:V8SI 1 "nonimmediate_operand")
13032 (match_operand:SI 2 "const_0_to_255_operand")]
13035 int mask = INTVAL (operands[2]);
13036 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13037 GEN_INT ((mask >> 0) & 3),
13038 GEN_INT ((mask >> 2) & 3),
13039 GEN_INT ((mask >> 4) & 3),
13040 GEN_INT ((mask >> 6) & 3),
13041 GEN_INT (((mask >> 0) & 3) + 4),
13042 GEN_INT (((mask >> 2) & 3) + 4),
13043 GEN_INT (((mask >> 4) & 3) + 4),
13044 GEN_INT (((mask >> 6) & 3) + 4)));
13048 (define_insn "avx2_pshufd_1<mask_name>"
13049 [(set (match_operand:V8SI 0 "register_operand" "=v")
13051 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13052 (parallel [(match_operand 2 "const_0_to_3_operand")
13053 (match_operand 3 "const_0_to_3_operand")
13054 (match_operand 4 "const_0_to_3_operand")
13055 (match_operand 5 "const_0_to_3_operand")
13056 (match_operand 6 "const_4_to_7_operand")
13057 (match_operand 7 "const_4_to_7_operand")
13058 (match_operand 8 "const_4_to_7_operand")
13059 (match_operand 9 "const_4_to_7_operand")])))]
13061 && <mask_avx512vl_condition>
13062 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13063 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13064 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13065 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13068 mask |= INTVAL (operands[2]) << 0;
13069 mask |= INTVAL (operands[3]) << 2;
13070 mask |= INTVAL (operands[4]) << 4;
13071 mask |= INTVAL (operands[5]) << 6;
13072 operands[2] = GEN_INT (mask);
13074 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13076 [(set_attr "type" "sselog1")
13077 (set_attr "prefix" "maybe_evex")
13078 (set_attr "length_immediate" "1")
13079 (set_attr "mode" "OI")])
13081 (define_expand "avx512vl_pshufd_mask"
13082 [(match_operand:V4SI 0 "register_operand")
13083 (match_operand:V4SI 1 "nonimmediate_operand")
13084 (match_operand:SI 2 "const_0_to_255_operand")
13085 (match_operand:V4SI 3 "register_operand")
13086 (match_operand:QI 4 "register_operand")]
13089 int mask = INTVAL (operands[2]);
13090 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13091 GEN_INT ((mask >> 0) & 3),
13092 GEN_INT ((mask >> 2) & 3),
13093 GEN_INT ((mask >> 4) & 3),
13094 GEN_INT ((mask >> 6) & 3),
13095 operands[3], operands[4]));
13099 (define_expand "sse2_pshufd"
13100 [(match_operand:V4SI 0 "register_operand")
13101 (match_operand:V4SI 1 "vector_operand")
13102 (match_operand:SI 2 "const_int_operand")]
13105 int mask = INTVAL (operands[2]);
13106 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13107 GEN_INT ((mask >> 0) & 3),
13108 GEN_INT ((mask >> 2) & 3),
13109 GEN_INT ((mask >> 4) & 3),
13110 GEN_INT ((mask >> 6) & 3)));
13114 (define_insn "sse2_pshufd_1<mask_name>"
13115 [(set (match_operand:V4SI 0 "register_operand" "=v")
13117 (match_operand:V4SI 1 "vector_operand" "vBm")
13118 (parallel [(match_operand 2 "const_0_to_3_operand")
13119 (match_operand 3 "const_0_to_3_operand")
13120 (match_operand 4 "const_0_to_3_operand")
13121 (match_operand 5 "const_0_to_3_operand")])))]
13122 "TARGET_SSE2 && <mask_avx512vl_condition>"
13125 mask |= INTVAL (operands[2]) << 0;
13126 mask |= INTVAL (operands[3]) << 2;
13127 mask |= INTVAL (operands[4]) << 4;
13128 mask |= INTVAL (operands[5]) << 6;
13129 operands[2] = GEN_INT (mask);
13131 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13133 [(set_attr "type" "sselog1")
13134 (set_attr "prefix_data16" "1")
13135 (set_attr "prefix" "<mask_prefix2>")
13136 (set_attr "length_immediate" "1")
13137 (set_attr "mode" "TI")])
13139 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13140 [(set (match_operand:V32HI 0 "register_operand" "=v")
13142 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13143 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13146 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13147 [(set_attr "type" "sselog")
13148 (set_attr "prefix" "evex")
13149 (set_attr "mode" "XI")])
13151 (define_expand "avx512vl_pshuflwv3_mask"
13152 [(match_operand:V16HI 0 "register_operand")
13153 (match_operand:V16HI 1 "nonimmediate_operand")
13154 (match_operand:SI 2 "const_0_to_255_operand")
13155 (match_operand:V16HI 3 "register_operand")
13156 (match_operand:HI 4 "register_operand")]
13157 "TARGET_AVX512VL && TARGET_AVX512BW"
13159 int mask = INTVAL (operands[2]);
13160 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13161 GEN_INT ((mask >> 0) & 3),
13162 GEN_INT ((mask >> 2) & 3),
13163 GEN_INT ((mask >> 4) & 3),
13164 GEN_INT ((mask >> 6) & 3),
13165 GEN_INT (((mask >> 0) & 3) + 8),
13166 GEN_INT (((mask >> 2) & 3) + 8),
13167 GEN_INT (((mask >> 4) & 3) + 8),
13168 GEN_INT (((mask >> 6) & 3) + 8),
13169 operands[3], operands[4]));
13173 (define_expand "avx2_pshuflwv3"
13174 [(match_operand:V16HI 0 "register_operand")
13175 (match_operand:V16HI 1 "nonimmediate_operand")
13176 (match_operand:SI 2 "const_0_to_255_operand")]
13179 int mask = INTVAL (operands[2]);
13180 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13181 GEN_INT ((mask >> 0) & 3),
13182 GEN_INT ((mask >> 2) & 3),
13183 GEN_INT ((mask >> 4) & 3),
13184 GEN_INT ((mask >> 6) & 3),
13185 GEN_INT (((mask >> 0) & 3) + 8),
13186 GEN_INT (((mask >> 2) & 3) + 8),
13187 GEN_INT (((mask >> 4) & 3) + 8),
13188 GEN_INT (((mask >> 6) & 3) + 8)));
13192 (define_insn "avx2_pshuflw_1<mask_name>"
13193 [(set (match_operand:V16HI 0 "register_operand" "=v")
13195 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13196 (parallel [(match_operand 2 "const_0_to_3_operand")
13197 (match_operand 3 "const_0_to_3_operand")
13198 (match_operand 4 "const_0_to_3_operand")
13199 (match_operand 5 "const_0_to_3_operand")
13204 (match_operand 6 "const_8_to_11_operand")
13205 (match_operand 7 "const_8_to_11_operand")
13206 (match_operand 8 "const_8_to_11_operand")
13207 (match_operand 9 "const_8_to_11_operand")
13211 (const_int 15)])))]
13213 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13214 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13215 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13216 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13217 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13220 mask |= INTVAL (operands[2]) << 0;
13221 mask |= INTVAL (operands[3]) << 2;
13222 mask |= INTVAL (operands[4]) << 4;
13223 mask |= INTVAL (operands[5]) << 6;
13224 operands[2] = GEN_INT (mask);
13226 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13228 [(set_attr "type" "sselog")
13229 (set_attr "prefix" "maybe_evex")
13230 (set_attr "length_immediate" "1")
13231 (set_attr "mode" "OI")])
13233 (define_expand "avx512vl_pshuflw_mask"
13234 [(match_operand:V8HI 0 "register_operand")
13235 (match_operand:V8HI 1 "nonimmediate_operand")
13236 (match_operand:SI 2 "const_0_to_255_operand")
13237 (match_operand:V8HI 3 "register_operand")
13238 (match_operand:QI 4 "register_operand")]
13239 "TARGET_AVX512VL && TARGET_AVX512BW"
13241 int mask = INTVAL (operands[2]);
13242 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13243 GEN_INT ((mask >> 0) & 3),
13244 GEN_INT ((mask >> 2) & 3),
13245 GEN_INT ((mask >> 4) & 3),
13246 GEN_INT ((mask >> 6) & 3),
13247 operands[3], operands[4]));
13251 (define_expand "sse2_pshuflw"
13252 [(match_operand:V8HI 0 "register_operand")
13253 (match_operand:V8HI 1 "vector_operand")
13254 (match_operand:SI 2 "const_int_operand")]
13257 int mask = INTVAL (operands[2]);
13258 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13259 GEN_INT ((mask >> 0) & 3),
13260 GEN_INT ((mask >> 2) & 3),
13261 GEN_INT ((mask >> 4) & 3),
13262 GEN_INT ((mask >> 6) & 3)));
13266 (define_insn "sse2_pshuflw_1<mask_name>"
13267 [(set (match_operand:V8HI 0 "register_operand" "=v")
13269 (match_operand:V8HI 1 "vector_operand" "vBm")
13270 (parallel [(match_operand 2 "const_0_to_3_operand")
13271 (match_operand 3 "const_0_to_3_operand")
13272 (match_operand 4 "const_0_to_3_operand")
13273 (match_operand 5 "const_0_to_3_operand")
13278 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13281 mask |= INTVAL (operands[2]) << 0;
13282 mask |= INTVAL (operands[3]) << 2;
13283 mask |= INTVAL (operands[4]) << 4;
13284 mask |= INTVAL (operands[5]) << 6;
13285 operands[2] = GEN_INT (mask);
13287 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13289 [(set_attr "type" "sselog")
13290 (set_attr "prefix_data16" "0")
13291 (set_attr "prefix_rep" "1")
13292 (set_attr "prefix" "maybe_vex")
13293 (set_attr "length_immediate" "1")
13294 (set_attr "mode" "TI")])
13296 (define_expand "avx2_pshufhwv3"
13297 [(match_operand:V16HI 0 "register_operand")
13298 (match_operand:V16HI 1 "nonimmediate_operand")
13299 (match_operand:SI 2 "const_0_to_255_operand")]
13302 int mask = INTVAL (operands[2]);
13303 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13304 GEN_INT (((mask >> 0) & 3) + 4),
13305 GEN_INT (((mask >> 2) & 3) + 4),
13306 GEN_INT (((mask >> 4) & 3) + 4),
13307 GEN_INT (((mask >> 6) & 3) + 4),
13308 GEN_INT (((mask >> 0) & 3) + 12),
13309 GEN_INT (((mask >> 2) & 3) + 12),
13310 GEN_INT (((mask >> 4) & 3) + 12),
13311 GEN_INT (((mask >> 6) & 3) + 12)));
13315 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13316 [(set (match_operand:V32HI 0 "register_operand" "=v")
13318 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13319 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13322 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13323 [(set_attr "type" "sselog")
13324 (set_attr "prefix" "evex")
13325 (set_attr "mode" "XI")])
13327 (define_expand "avx512vl_pshufhwv3_mask"
13328 [(match_operand:V16HI 0 "register_operand")
13329 (match_operand:V16HI 1 "nonimmediate_operand")
13330 (match_operand:SI 2 "const_0_to_255_operand")
13331 (match_operand:V16HI 3 "register_operand")
13332 (match_operand:HI 4 "register_operand")]
13333 "TARGET_AVX512VL && TARGET_AVX512BW"
13335 int mask = INTVAL (operands[2]);
13336 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13337 GEN_INT (((mask >> 0) & 3) + 4),
13338 GEN_INT (((mask >> 2) & 3) + 4),
13339 GEN_INT (((mask >> 4) & 3) + 4),
13340 GEN_INT (((mask >> 6) & 3) + 4),
13341 GEN_INT (((mask >> 0) & 3) + 12),
13342 GEN_INT (((mask >> 2) & 3) + 12),
13343 GEN_INT (((mask >> 4) & 3) + 12),
13344 GEN_INT (((mask >> 6) & 3) + 12),
13345 operands[3], operands[4]));
13349 (define_insn "avx2_pshufhw_1<mask_name>"
13350 [(set (match_operand:V16HI 0 "register_operand" "=v")
13352 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13353 (parallel [(const_int 0)
13357 (match_operand 2 "const_4_to_7_operand")
13358 (match_operand 3 "const_4_to_7_operand")
13359 (match_operand 4 "const_4_to_7_operand")
13360 (match_operand 5 "const_4_to_7_operand")
13365 (match_operand 6 "const_12_to_15_operand")
13366 (match_operand 7 "const_12_to_15_operand")
13367 (match_operand 8 "const_12_to_15_operand")
13368 (match_operand 9 "const_12_to_15_operand")])))]
13370 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13371 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13372 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13373 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13374 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13377 mask |= (INTVAL (operands[2]) - 4) << 0;
13378 mask |= (INTVAL (operands[3]) - 4) << 2;
13379 mask |= (INTVAL (operands[4]) - 4) << 4;
13380 mask |= (INTVAL (operands[5]) - 4) << 6;
13381 operands[2] = GEN_INT (mask);
13383 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13385 [(set_attr "type" "sselog")
13386 (set_attr "prefix" "maybe_evex")
13387 (set_attr "length_immediate" "1")
13388 (set_attr "mode" "OI")])
13390 (define_expand "avx512vl_pshufhw_mask"
13391 [(match_operand:V8HI 0 "register_operand")
13392 (match_operand:V8HI 1 "nonimmediate_operand")
13393 (match_operand:SI 2 "const_0_to_255_operand")
13394 (match_operand:V8HI 3 "register_operand")
13395 (match_operand:QI 4 "register_operand")]
13396 "TARGET_AVX512VL && TARGET_AVX512BW"
13398 int mask = INTVAL (operands[2]);
13399 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13400 GEN_INT (((mask >> 0) & 3) + 4),
13401 GEN_INT (((mask >> 2) & 3) + 4),
13402 GEN_INT (((mask >> 4) & 3) + 4),
13403 GEN_INT (((mask >> 6) & 3) + 4),
13404 operands[3], operands[4]));
13408 (define_expand "sse2_pshufhw"
13409 [(match_operand:V8HI 0 "register_operand")
13410 (match_operand:V8HI 1 "vector_operand")
13411 (match_operand:SI 2 "const_int_operand")]
13414 int mask = INTVAL (operands[2]);
13415 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13416 GEN_INT (((mask >> 0) & 3) + 4),
13417 GEN_INT (((mask >> 2) & 3) + 4),
13418 GEN_INT (((mask >> 4) & 3) + 4),
13419 GEN_INT (((mask >> 6) & 3) + 4)));
13423 (define_insn "sse2_pshufhw_1<mask_name>"
13424 [(set (match_operand:V8HI 0 "register_operand" "=v")
13426 (match_operand:V8HI 1 "vector_operand" "vBm")
13427 (parallel [(const_int 0)
13431 (match_operand 2 "const_4_to_7_operand")
13432 (match_operand 3 "const_4_to_7_operand")
13433 (match_operand 4 "const_4_to_7_operand")
13434 (match_operand 5 "const_4_to_7_operand")])))]
13435 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13438 mask |= (INTVAL (operands[2]) - 4) << 0;
13439 mask |= (INTVAL (operands[3]) - 4) << 2;
13440 mask |= (INTVAL (operands[4]) - 4) << 4;
13441 mask |= (INTVAL (operands[5]) - 4) << 6;
13442 operands[2] = GEN_INT (mask);
13444 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13446 [(set_attr "type" "sselog")
13447 (set_attr "prefix_rep" "1")
13448 (set_attr "prefix_data16" "0")
13449 (set_attr "prefix" "maybe_vex")
13450 (set_attr "length_immediate" "1")
13451 (set_attr "mode" "TI")])
13453 (define_expand "sse2_loadd"
13454 [(set (match_operand:V4SI 0 "register_operand")
13456 (vec_duplicate:V4SI
13457 (match_operand:SI 1 "nonimmediate_operand"))
13461 "operands[2] = CONST0_RTX (V4SImode);")
13463 (define_insn "sse2_loadld"
13464 [(set (match_operand:V4SI 0 "register_operand" "=v,Yi,x,x,v")
13466 (vec_duplicate:V4SI
13467 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
13468 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
13472 %vmovd\t{%2, %0|%0, %2}
13473 %vmovd\t{%2, %0|%0, %2}
13474 movss\t{%2, %0|%0, %2}
13475 movss\t{%2, %0|%0, %2}
13476 vmovss\t{%2, %1, %0|%0, %1, %2}"
13477 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13478 (set_attr "type" "ssemov")
13479 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
13480 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13482 ;; QI and HI modes handled by pextr patterns.
13483 (define_mode_iterator PEXTR_MODE12
13484 [(V16QI "TARGET_SSE4_1") V8HI])
13486 (define_insn "*vec_extract<mode>"
13487 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
13488 (vec_select:<ssescalarmode>
13489 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
13491 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13494 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13495 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
13496 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13497 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13498 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
13499 (set_attr "type" "sselog1")
13500 (set_attr "prefix_data16" "1")
13501 (set (attr "prefix_extra")
13503 (and (eq_attr "alternative" "0,2")
13504 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13506 (const_string "1")))
13507 (set_attr "length_immediate" "1")
13508 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
13509 (set_attr "mode" "TI")])
13511 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13512 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
13514 (vec_select:<PEXTR_MODE12:ssescalarmode>
13515 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
13517 [(match_operand:SI 2
13518 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13521 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13522 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13523 [(set_attr "isa" "*,avx512bw")
13524 (set_attr "type" "sselog1")
13525 (set_attr "prefix_data16" "1")
13526 (set (attr "prefix_extra")
13528 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13530 (const_string "1")))
13531 (set_attr "length_immediate" "1")
13532 (set_attr "prefix" "maybe_vex")
13533 (set_attr "mode" "TI")])
13535 (define_insn "*vec_extract<mode>_mem"
13536 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13537 (vec_select:<ssescalarmode>
13538 (match_operand:VI12_128 1 "memory_operand" "o")
13540 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13544 (define_insn "*vec_extract<ssevecmodelower>_0"
13545 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,v ,m")
13547 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
13548 (parallel [(const_int 0)])))]
13549 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13552 (define_insn "*vec_extractv2di_0_sse"
13553 [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
13555 (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
13556 (parallel [(const_int 0)])))]
13557 "TARGET_SSE && !TARGET_64BIT
13558 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13562 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13564 (match_operand:<ssevecmode> 1 "register_operand")
13565 (parallel [(const_int 0)])))]
13566 "TARGET_SSE && reload_completed"
13567 [(set (match_dup 0) (match_dup 1))]
13568 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
13570 (define_insn "*vec_extractv4si_0_zext_sse4"
13571 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
13574 (match_operand:V4SI 1 "register_operand" "Yj,x,v")
13575 (parallel [(const_int 0)]))))]
13578 [(set_attr "isa" "x64,*,avx512f")])
13580 (define_insn "*vec_extractv4si_0_zext"
13581 [(set (match_operand:DI 0 "register_operand" "=r")
13584 (match_operand:V4SI 1 "register_operand" "x")
13585 (parallel [(const_int 0)]))))]
13586 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13590 [(set (match_operand:DI 0 "register_operand")
13593 (match_operand:V4SI 1 "register_operand")
13594 (parallel [(const_int 0)]))))]
13595 "TARGET_SSE2 && reload_completed"
13596 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13597 "operands[1] = gen_lowpart (SImode, operands[1]);")
13599 (define_insn "*vec_extractv4si"
13600 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
13602 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
13603 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13606 switch (which_alternative)
13610 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13614 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13615 return "psrldq\t{%2, %0|%0, %2}";
13619 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13620 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13623 gcc_unreachable ();
13626 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
13627 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
13628 (set (attr "prefix_extra")
13629 (if_then_else (eq_attr "alternative" "0,1")
13631 (const_string "*")))
13632 (set_attr "length_immediate" "1")
13633 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
13634 (set_attr "mode" "TI")])
13636 (define_insn "*vec_extractv4si_zext"
13637 [(set (match_operand:DI 0 "register_operand" "=r,r")
13640 (match_operand:V4SI 1 "register_operand" "x,v")
13641 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13642 "TARGET_64BIT && TARGET_SSE4_1"
13643 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13644 [(set_attr "isa" "*,avx512dq")
13645 (set_attr "type" "sselog1")
13646 (set_attr "prefix_extra" "1")
13647 (set_attr "length_immediate" "1")
13648 (set_attr "prefix" "maybe_vex")
13649 (set_attr "mode" "TI")])
13651 (define_insn "*vec_extractv4si_mem"
13652 [(set (match_operand:SI 0 "register_operand" "=x,r")
13654 (match_operand:V4SI 1 "memory_operand" "o,o")
13655 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13659 (define_insn_and_split "*vec_extractv4si_zext_mem"
13660 [(set (match_operand:DI 0 "register_operand" "=x,r")
13663 (match_operand:V4SI 1 "memory_operand" "o,o")
13664 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13665 "TARGET_64BIT && TARGET_SSE"
13667 "&& reload_completed"
13668 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13670 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13673 (define_insn "*vec_extractv2di_1"
13674 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
13676 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
13677 (parallel [(const_int 1)])))]
13678 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13680 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13681 vpextrq\t{$1, %1, %0|%0, %1, 1}
13682 %vmovhps\t{%1, %0|%0, %1}
13683 psrldq\t{$8, %0|%0, 8}
13684 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13685 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13686 movhlps\t{%1, %0|%0, %1}
13690 (cond [(eq_attr "alternative" "0")
13691 (const_string "x64_sse4")
13692 (eq_attr "alternative" "1")
13693 (const_string "x64_avx512dq")
13694 (eq_attr "alternative" "3")
13695 (const_string "sse2_noavx")
13696 (eq_attr "alternative" "4")
13697 (const_string "avx")
13698 (eq_attr "alternative" "5")
13699 (const_string "avx512bw")
13700 (eq_attr "alternative" "6")
13701 (const_string "noavx")
13702 (eq_attr "alternative" "8")
13703 (const_string "x64")
13705 (const_string "*")))
13707 (cond [(eq_attr "alternative" "2,6,7")
13708 (const_string "ssemov")
13709 (eq_attr "alternative" "3,4,5")
13710 (const_string "sseishft1")
13711 (eq_attr "alternative" "8")
13712 (const_string "imov")
13714 (const_string "sselog1")))
13715 (set (attr "length_immediate")
13716 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
13718 (const_string "*")))
13719 (set (attr "prefix_rex")
13720 (if_then_else (eq_attr "alternative" "0,1")
13722 (const_string "*")))
13723 (set (attr "prefix_extra")
13724 (if_then_else (eq_attr "alternative" "0,1")
13726 (const_string "*")))
13727 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
13728 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
13731 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13732 (vec_select:<ssescalarmode>
13733 (match_operand:VI_128 1 "memory_operand")
13735 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13736 "TARGET_SSE && reload_completed"
13737 [(set (match_dup 0) (match_dup 1))]
13739 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13741 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13744 (define_insn "*vec_extractv2ti"
13745 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
13747 (match_operand:V2TI 1 "register_operand" "x,v")
13749 [(match_operand:SI 2 "const_0_to_1_operand")])))]
13752 vextract%~128\t{%2, %1, %0|%0, %1, %2}
13753 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
13754 [(set_attr "type" "sselog")
13755 (set_attr "prefix_extra" "1")
13756 (set_attr "length_immediate" "1")
13757 (set_attr "prefix" "vex,evex")
13758 (set_attr "mode" "OI")])
13760 (define_insn "*vec_extractv4ti"
13761 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
13763 (match_operand:V4TI 1 "register_operand" "v")
13765 [(match_operand:SI 2 "const_0_to_3_operand")])))]
13767 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
13768 [(set_attr "type" "sselog")
13769 (set_attr "prefix_extra" "1")
13770 (set_attr "length_immediate" "1")
13771 (set_attr "prefix" "evex")
13772 (set_attr "mode" "XI")])
13774 (define_mode_iterator VEXTRACTI128_MODE
13775 [(V4TI "TARGET_AVX512F") V2TI])
13778 [(set (match_operand:TI 0 "nonimmediate_operand")
13780 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
13781 (parallel [(const_int 0)])))]
13783 && reload_completed
13784 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
13785 [(set (match_dup 0) (match_dup 1))]
13786 "operands[1] = gen_lowpart (TImode, operands[1]);")
13788 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13789 ;; vector modes into vec_extract*.
13791 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13792 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
13793 "can_create_pseudo_p ()
13794 && REG_P (operands[1])
13795 && VECTOR_MODE_P (GET_MODE (operands[1]))
13796 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
13797 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
13798 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
13799 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13800 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13801 (parallel [(const_int 0)])))]
13805 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13808 if (<MODE>mode == SImode)
13810 tmp = gen_reg_rtx (V8SImode);
13811 emit_insn (gen_vec_extract_lo_v16si (tmp,
13812 gen_lowpart (V16SImode,
13817 tmp = gen_reg_rtx (V4DImode);
13818 emit_insn (gen_vec_extract_lo_v8di (tmp,
13819 gen_lowpart (V8DImode,
13825 tmp = gen_reg_rtx (<ssevecmode>mode);
13826 if (<MODE>mode == SImode)
13827 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13830 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13835 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13840 (define_insn "*vec_concatv2si_sse4_1"
13841 [(set (match_operand:V2SI 0 "register_operand"
13842 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
13844 (match_operand:SI 1 "nonimmediate_operand"
13845 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
13846 (match_operand:SI 2 "vector_move_operand"
13847 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
13848 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13850 pinsrd\t{$1, %2, %0|%0, %2, 1}
13851 pinsrd\t{$1, %2, %0|%0, %2, 1}
13852 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13853 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13854 punpckldq\t{%2, %0|%0, %2}
13855 punpckldq\t{%2, %0|%0, %2}
13856 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13857 %vmovd\t{%1, %0|%0, %1}
13858 punpckldq\t{%2, %0|%0, %2}
13859 movd\t{%1, %0|%0, %1}"
13860 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
13862 (cond [(eq_attr "alternative" "7")
13863 (const_string "ssemov")
13864 (eq_attr "alternative" "8")
13865 (const_string "mmxcvt")
13866 (eq_attr "alternative" "9")
13867 (const_string "mmxmov")
13869 (const_string "sselog")))
13870 (set (attr "prefix_extra")
13871 (if_then_else (eq_attr "alternative" "0,1,2,3")
13873 (const_string "*")))
13874 (set (attr "length_immediate")
13875 (if_then_else (eq_attr "alternative" "0,1,2,3")
13877 (const_string "*")))
13878 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
13879 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13881 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13882 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13883 ;; alternatives pretty much forces the MMX alternative to be chosen.
13884 (define_insn "*vec_concatv2si"
13885 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13887 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13888 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13889 "TARGET_SSE && !TARGET_SSE4_1"
13891 punpckldq\t{%2, %0|%0, %2}
13892 movd\t{%1, %0|%0, %1}
13893 movd\t{%1, %0|%0, %1}
13894 unpcklps\t{%2, %0|%0, %2}
13895 movss\t{%1, %0|%0, %1}
13896 punpckldq\t{%2, %0|%0, %2}
13897 movd\t{%1, %0|%0, %1}"
13898 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13899 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13900 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13902 (define_insn "*vec_concatv4si"
13903 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
13905 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
13906 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
13909 punpcklqdq\t{%2, %0|%0, %2}
13910 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13911 movlhps\t{%2, %0|%0, %2}
13912 movhps\t{%2, %0|%0, %q2}
13913 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13914 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13915 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13916 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
13917 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13919 ;; movd instead of movq is required to handle broken assemblers.
13920 (define_insn "vec_concatv2di"
13921 [(set (match_operand:V2DI 0 "register_operand"
13922 "=Yr,*x,x ,v ,Yi,v ,x ,x,v ,x,x,v")
13924 (match_operand:DI 1 "nonimmediate_operand"
13925 " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
13926 (match_operand:DI 2 "vector_move_operand"
13927 "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
13930 pinsrq\t{$1, %2, %0|%0, %2, 1}
13931 pinsrq\t{$1, %2, %0|%0, %2, 1}
13932 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13933 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13934 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13935 %vmovq\t{%1, %0|%0, %1}
13936 movq2dq\t{%1, %0|%0, %1}
13937 punpcklqdq\t{%2, %0|%0, %2}
13938 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13939 movlhps\t{%2, %0|%0, %2}
13940 movhps\t{%2, %0|%0, %2}
13941 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13943 (cond [(eq_attr "alternative" "0,1")
13944 (const_string "x64_sse4_noavx")
13945 (eq_attr "alternative" "2")
13946 (const_string "x64_avx")
13947 (eq_attr "alternative" "3")
13948 (const_string "x64_avx512dq")
13949 (eq_attr "alternative" "4")
13950 (const_string "x64")
13951 (eq_attr "alternative" "5,6")
13952 (const_string "sse2")
13953 (eq_attr "alternative" "7")
13954 (const_string "sse2_noavx")
13955 (eq_attr "alternative" "8,11")
13956 (const_string "avx")
13958 (const_string "noavx")))
13961 (eq_attr "alternative" "0,1,2,3,7,8")
13962 (const_string "sselog")
13963 (const_string "ssemov")))
13964 (set (attr "prefix_rex")
13965 (if_then_else (eq_attr "alternative" "0,1,2,3,4")
13967 (const_string "*")))
13968 (set (attr "prefix_extra")
13969 (if_then_else (eq_attr "alternative" "0,1,2,3")
13971 (const_string "*")))
13972 (set (attr "length_immediate")
13973 (if_then_else (eq_attr "alternative" "0,1,2,3")
13975 (const_string "*")))
13976 (set (attr "prefix")
13977 (cond [(eq_attr "alternative" "2")
13978 (const_string "vex")
13979 (eq_attr "alternative" "3")
13980 (const_string "evex")
13981 (eq_attr "alternative" "4,5")
13982 (const_string "maybe_vex")
13983 (eq_attr "alternative" "8,11")
13984 (const_string "maybe_evex")
13986 (const_string "orig")))
13987 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13989 (define_expand "vec_unpacks_lo_<mode>"
13990 [(match_operand:<sseunpackmode> 0 "register_operand")
13991 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13993 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13995 (define_expand "vec_unpacks_hi_<mode>"
13996 [(match_operand:<sseunpackmode> 0 "register_operand")
13997 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13999 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
14001 (define_expand "vec_unpacku_lo_<mode>"
14002 [(match_operand:<sseunpackmode> 0 "register_operand")
14003 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14005 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14007 (define_expand "vec_unpacks_lo_hi"
14008 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14009 (match_operand:HI 1 "register_operand"))]
14012 (define_expand "vec_unpacks_lo_si"
14013 [(set (match_operand:HI 0 "register_operand")
14014 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14017 (define_expand "vec_unpacks_lo_di"
14018 [(set (match_operand:SI 0 "register_operand")
14019 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14022 (define_expand "vec_unpacku_hi_<mode>"
14023 [(match_operand:<sseunpackmode> 0 "register_operand")
14024 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14026 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14028 (define_expand "vec_unpacks_hi_hi"
14030 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14031 (lshiftrt:HI (match_operand:HI 1 "register_operand")
14033 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14036 (define_expand "vec_unpacks_hi_<mode>"
14038 [(set (subreg:SWI48x
14039 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14040 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14042 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14044 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14046 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14050 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14052 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14053 [(set (match_operand:VI12_AVX2 0 "register_operand")
14054 (truncate:VI12_AVX2
14055 (lshiftrt:<ssedoublemode>
14056 (plus:<ssedoublemode>
14057 (plus:<ssedoublemode>
14058 (zero_extend:<ssedoublemode>
14059 (match_operand:VI12_AVX2 1 "vector_operand"))
14060 (zero_extend:<ssedoublemode>
14061 (match_operand:VI12_AVX2 2 "vector_operand")))
14062 (match_dup <mask_expand_op3>))
14064 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14067 if (<mask_applied>)
14069 operands[3] = CONST1_RTX(<MODE>mode);
14070 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14072 if (<mask_applied>)
14074 operands[5] = operands[3];
14079 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14080 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14081 (truncate:VI12_AVX2
14082 (lshiftrt:<ssedoublemode>
14083 (plus:<ssedoublemode>
14084 (plus:<ssedoublemode>
14085 (zero_extend:<ssedoublemode>
14086 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14087 (zero_extend:<ssedoublemode>
14088 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14089 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14091 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14092 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
14094 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14095 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14096 [(set_attr "isa" "noavx,avx")
14097 (set_attr "type" "sseiadd")
14098 (set_attr "prefix_data16" "1,*")
14099 (set_attr "prefix" "orig,<mask_prefix>")
14100 (set_attr "mode" "<sseinsnmode>")])
14102 ;; The correct representation for this is absolutely enormous, and
14103 ;; surely not generally useful.
14104 (define_insn "<sse2_avx2>_psadbw"
14105 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14106 (unspec:VI8_AVX2_AVX512BW
14107 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14108 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14112 psadbw\t{%2, %0|%0, %2}
14113 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14114 [(set_attr "isa" "noavx,avx")
14115 (set_attr "type" "sseiadd")
14116 (set_attr "atom_unit" "simul")
14117 (set_attr "prefix_data16" "1,*")
14118 (set_attr "prefix" "orig,maybe_evex")
14119 (set_attr "mode" "<sseinsnmode>")])
14121 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14122 [(set (match_operand:SI 0 "register_operand" "=r")
14124 [(match_operand:VF_128_256 1 "register_operand" "x")]
14127 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14128 [(set_attr "type" "ssemov")
14129 (set_attr "prefix" "maybe_vex")
14130 (set_attr "mode" "<MODE>")])
14132 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14133 [(set (match_operand:DI 0 "register_operand" "=r")
14136 [(match_operand:VF_128_256 1 "register_operand" "x")]
14138 "TARGET_64BIT && TARGET_SSE"
14139 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14140 [(set_attr "type" "ssemov")
14141 (set_attr "prefix" "maybe_vex")
14142 (set_attr "mode" "<MODE>")])
14144 (define_insn "<sse2_avx2>_pmovmskb"
14145 [(set (match_operand:SI 0 "register_operand" "=r")
14147 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14150 "%vpmovmskb\t{%1, %0|%0, %1}"
14151 [(set_attr "type" "ssemov")
14152 (set (attr "prefix_data16")
14154 (match_test "TARGET_AVX")
14156 (const_string "1")))
14157 (set_attr "prefix" "maybe_vex")
14158 (set_attr "mode" "SI")])
14160 (define_insn "*<sse2_avx2>_pmovmskb_zext"
14161 [(set (match_operand:DI 0 "register_operand" "=r")
14164 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14166 "TARGET_64BIT && TARGET_SSE2"
14167 "%vpmovmskb\t{%1, %k0|%k0, %1}"
14168 [(set_attr "type" "ssemov")
14169 (set (attr "prefix_data16")
14171 (match_test "TARGET_AVX")
14173 (const_string "1")))
14174 (set_attr "prefix" "maybe_vex")
14175 (set_attr "mode" "SI")])
14177 (define_expand "sse2_maskmovdqu"
14178 [(set (match_operand:V16QI 0 "memory_operand")
14179 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14180 (match_operand:V16QI 2 "register_operand")
14185 (define_insn "*sse2_maskmovdqu"
14186 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14187 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14188 (match_operand:V16QI 2 "register_operand" "x")
14189 (mem:V16QI (match_dup 0))]
14193 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14194 that requires %v to be at the beginning of the opcode name. */
14195 if (Pmode != word_mode)
14196 fputs ("\taddr32", asm_out_file);
14197 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14199 [(set_attr "type" "ssemov")
14200 (set_attr "prefix_data16" "1")
14201 (set (attr "length_address")
14202 (symbol_ref ("Pmode != word_mode")))
14203 ;; The implicit %rdi operand confuses default length_vex computation.
14204 (set (attr "length_vex")
14205 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14206 (set_attr "prefix" "maybe_vex")
14207 (set_attr "znver1_decode" "vector")
14208 (set_attr "mode" "TI")])
14210 (define_insn "sse_ldmxcsr"
14211 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14215 [(set_attr "type" "sse")
14216 (set_attr "atom_sse_attr" "mxcsr")
14217 (set_attr "prefix" "maybe_vex")
14218 (set_attr "memory" "load")])
14220 (define_insn "sse_stmxcsr"
14221 [(set (match_operand:SI 0 "memory_operand" "=m")
14222 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
14225 [(set_attr "type" "sse")
14226 (set_attr "atom_sse_attr" "mxcsr")
14227 (set_attr "prefix" "maybe_vex")
14228 (set_attr "memory" "store")])
14230 (define_insn "sse2_clflush"
14231 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
14235 [(set_attr "type" "sse")
14236 (set_attr "atom_sse_attr" "fence")
14237 (set_attr "memory" "unknown")])
14239 ;; As per AMD and Intel ISA manuals, the first operand is extensions
14240 ;; and it goes to %ecx. The second operand received is hints and it goes
14242 (define_insn "sse3_mwait"
14243 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
14244 (match_operand:SI 1 "register_operand" "a")]
14247 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
14248 ;; Since 32bit register operands are implicitly zero extended to 64bit,
14249 ;; we only need to set up 32bit registers.
14251 [(set_attr "length" "3")])
14253 (define_insn "sse3_monitor_<mode>"
14254 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
14255 (match_operand:SI 1 "register_operand" "c")
14256 (match_operand:SI 2 "register_operand" "d")]
14259 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
14260 ;; RCX and RDX are used. Since 32bit register operands are implicitly
14261 ;; zero extended to 64bit, we only need to set up 32bit registers.
14263 [(set (attr "length")
14264 (symbol_ref ("(Pmode != word_mode) + 3")))])
14266 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14268 ;; SSSE3 instructions
14270 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14272 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
14274 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
14275 [(set (match_operand:V16HI 0 "register_operand" "=x")
14280 (ssse3_plusminus:HI
14282 (match_operand:V16HI 1 "register_operand" "x")
14283 (parallel [(const_int 0)]))
14284 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14285 (ssse3_plusminus:HI
14286 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14287 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14289 (ssse3_plusminus:HI
14290 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14291 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14292 (ssse3_plusminus:HI
14293 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14294 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14297 (ssse3_plusminus:HI
14298 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
14299 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
14300 (ssse3_plusminus:HI
14301 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
14302 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
14304 (ssse3_plusminus:HI
14305 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
14306 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
14307 (ssse3_plusminus:HI
14308 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
14309 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
14313 (ssse3_plusminus:HI
14315 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14316 (parallel [(const_int 0)]))
14317 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14318 (ssse3_plusminus:HI
14319 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14320 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14322 (ssse3_plusminus:HI
14323 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14324 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14325 (ssse3_plusminus:HI
14326 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14327 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
14330 (ssse3_plusminus:HI
14331 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
14332 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
14333 (ssse3_plusminus:HI
14334 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
14335 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
14337 (ssse3_plusminus:HI
14338 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
14339 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
14340 (ssse3_plusminus:HI
14341 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
14342 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
14344 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14345 [(set_attr "type" "sseiadd")
14346 (set_attr "prefix_extra" "1")
14347 (set_attr "prefix" "vex")
14348 (set_attr "mode" "OI")])
14350 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
14351 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14355 (ssse3_plusminus:HI
14357 (match_operand:V8HI 1 "register_operand" "0,x")
14358 (parallel [(const_int 0)]))
14359 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14360 (ssse3_plusminus:HI
14361 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14362 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14364 (ssse3_plusminus:HI
14365 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14366 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14367 (ssse3_plusminus:HI
14368 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14369 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14372 (ssse3_plusminus:HI
14374 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
14375 (parallel [(const_int 0)]))
14376 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14377 (ssse3_plusminus:HI
14378 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14379 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14381 (ssse3_plusminus:HI
14382 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14383 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14384 (ssse3_plusminus:HI
14385 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14386 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
14389 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
14390 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14391 [(set_attr "isa" "noavx,avx")
14392 (set_attr "type" "sseiadd")
14393 (set_attr "atom_unit" "complex")
14394 (set_attr "prefix_data16" "1,*")
14395 (set_attr "prefix_extra" "1")
14396 (set_attr "prefix" "orig,vex")
14397 (set_attr "mode" "TI")])
14399 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
14400 [(set (match_operand:V4HI 0 "register_operand" "=y")
14403 (ssse3_plusminus:HI
14405 (match_operand:V4HI 1 "register_operand" "0")
14406 (parallel [(const_int 0)]))
14407 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14408 (ssse3_plusminus:HI
14409 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14410 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14412 (ssse3_plusminus:HI
14414 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
14415 (parallel [(const_int 0)]))
14416 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14417 (ssse3_plusminus:HI
14418 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14419 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
14421 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
14422 [(set_attr "type" "sseiadd")
14423 (set_attr "atom_unit" "complex")
14424 (set_attr "prefix_extra" "1")
14425 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14426 (set_attr "mode" "DI")])
14428 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
14429 [(set (match_operand:V8SI 0 "register_operand" "=x")
14435 (match_operand:V8SI 1 "register_operand" "x")
14436 (parallel [(const_int 0)]))
14437 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14439 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14440 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14443 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
14444 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
14446 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
14447 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
14452 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
14453 (parallel [(const_int 0)]))
14454 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14456 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14457 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
14460 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
14461 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
14463 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
14464 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
14466 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14467 [(set_attr "type" "sseiadd")
14468 (set_attr "prefix_extra" "1")
14469 (set_attr "prefix" "vex")
14470 (set_attr "mode" "OI")])
14472 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
14473 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
14478 (match_operand:V4SI 1 "register_operand" "0,x")
14479 (parallel [(const_int 0)]))
14480 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14482 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14483 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14487 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
14488 (parallel [(const_int 0)]))
14489 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14491 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14492 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14495 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14496 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14497 [(set_attr "isa" "noavx,avx")
14498 (set_attr "type" "sseiadd")
14499 (set_attr "atom_unit" "complex")
14500 (set_attr "prefix_data16" "1,*")
14501 (set_attr "prefix_extra" "1")
14502 (set_attr "prefix" "orig,vex")
14503 (set_attr "mode" "TI")])
14505 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14506 [(set (match_operand:V2SI 0 "register_operand" "=y")
14510 (match_operand:V2SI 1 "register_operand" "0")
14511 (parallel [(const_int 0)]))
14512 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14515 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14516 (parallel [(const_int 0)]))
14517 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14519 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14520 [(set_attr "type" "sseiadd")
14521 (set_attr "atom_unit" "complex")
14522 (set_attr "prefix_extra" "1")
14523 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14524 (set_attr "mode" "DI")])
14526 (define_insn "avx2_pmaddubsw256"
14527 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
14532 (match_operand:V32QI 1 "register_operand" "x,v")
14533 (parallel [(const_int 0) (const_int 2)
14534 (const_int 4) (const_int 6)
14535 (const_int 8) (const_int 10)
14536 (const_int 12) (const_int 14)
14537 (const_int 16) (const_int 18)
14538 (const_int 20) (const_int 22)
14539 (const_int 24) (const_int 26)
14540 (const_int 28) (const_int 30)])))
14543 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
14544 (parallel [(const_int 0) (const_int 2)
14545 (const_int 4) (const_int 6)
14546 (const_int 8) (const_int 10)
14547 (const_int 12) (const_int 14)
14548 (const_int 16) (const_int 18)
14549 (const_int 20) (const_int 22)
14550 (const_int 24) (const_int 26)
14551 (const_int 28) (const_int 30)]))))
14554 (vec_select:V16QI (match_dup 1)
14555 (parallel [(const_int 1) (const_int 3)
14556 (const_int 5) (const_int 7)
14557 (const_int 9) (const_int 11)
14558 (const_int 13) (const_int 15)
14559 (const_int 17) (const_int 19)
14560 (const_int 21) (const_int 23)
14561 (const_int 25) (const_int 27)
14562 (const_int 29) (const_int 31)])))
14564 (vec_select:V16QI (match_dup 2)
14565 (parallel [(const_int 1) (const_int 3)
14566 (const_int 5) (const_int 7)
14567 (const_int 9) (const_int 11)
14568 (const_int 13) (const_int 15)
14569 (const_int 17) (const_int 19)
14570 (const_int 21) (const_int 23)
14571 (const_int 25) (const_int 27)
14572 (const_int 29) (const_int 31)]))))))]
14574 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14575 [(set_attr "isa" "*,avx512bw")
14576 (set_attr "type" "sseiadd")
14577 (set_attr "prefix_extra" "1")
14578 (set_attr "prefix" "vex,evex")
14579 (set_attr "mode" "OI")])
14581 ;; The correct representation for this is absolutely enormous, and
14582 ;; surely not generally useful.
14583 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14584 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14585 (unspec:VI2_AVX512VL
14586 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14587 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14588 UNSPEC_PMADDUBSW512))]
14590 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14591 [(set_attr "type" "sseiadd")
14592 (set_attr "prefix" "evex")
14593 (set_attr "mode" "XI")])
14595 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14596 [(set (match_operand:V32HI 0 "register_operand" "=v")
14603 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14605 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14607 (const_vector:V32HI [(const_int 1) (const_int 1)
14608 (const_int 1) (const_int 1)
14609 (const_int 1) (const_int 1)
14610 (const_int 1) (const_int 1)
14611 (const_int 1) (const_int 1)
14612 (const_int 1) (const_int 1)
14613 (const_int 1) (const_int 1)
14614 (const_int 1) (const_int 1)
14615 (const_int 1) (const_int 1)
14616 (const_int 1) (const_int 1)
14617 (const_int 1) (const_int 1)
14618 (const_int 1) (const_int 1)
14619 (const_int 1) (const_int 1)
14620 (const_int 1) (const_int 1)
14621 (const_int 1) (const_int 1)
14622 (const_int 1) (const_int 1)]))
14625 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14626 [(set_attr "type" "sseimul")
14627 (set_attr "prefix" "evex")
14628 (set_attr "mode" "XI")])
14630 (define_insn "ssse3_pmaddubsw128"
14631 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
14636 (match_operand:V16QI 1 "register_operand" "0,x,v")
14637 (parallel [(const_int 0) (const_int 2)
14638 (const_int 4) (const_int 6)
14639 (const_int 8) (const_int 10)
14640 (const_int 12) (const_int 14)])))
14643 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
14644 (parallel [(const_int 0) (const_int 2)
14645 (const_int 4) (const_int 6)
14646 (const_int 8) (const_int 10)
14647 (const_int 12) (const_int 14)]))))
14650 (vec_select:V8QI (match_dup 1)
14651 (parallel [(const_int 1) (const_int 3)
14652 (const_int 5) (const_int 7)
14653 (const_int 9) (const_int 11)
14654 (const_int 13) (const_int 15)])))
14656 (vec_select:V8QI (match_dup 2)
14657 (parallel [(const_int 1) (const_int 3)
14658 (const_int 5) (const_int 7)
14659 (const_int 9) (const_int 11)
14660 (const_int 13) (const_int 15)]))))))]
14663 pmaddubsw\t{%2, %0|%0, %2}
14664 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
14665 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14666 [(set_attr "isa" "noavx,avx,avx512bw")
14667 (set_attr "type" "sseiadd")
14668 (set_attr "atom_unit" "simul")
14669 (set_attr "prefix_data16" "1,*,*")
14670 (set_attr "prefix_extra" "1")
14671 (set_attr "prefix" "orig,vex,evex")
14672 (set_attr "mode" "TI")])
14674 (define_insn "ssse3_pmaddubsw"
14675 [(set (match_operand:V4HI 0 "register_operand" "=y")
14680 (match_operand:V8QI 1 "register_operand" "0")
14681 (parallel [(const_int 0) (const_int 2)
14682 (const_int 4) (const_int 6)])))
14685 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
14686 (parallel [(const_int 0) (const_int 2)
14687 (const_int 4) (const_int 6)]))))
14690 (vec_select:V4QI (match_dup 1)
14691 (parallel [(const_int 1) (const_int 3)
14692 (const_int 5) (const_int 7)])))
14694 (vec_select:V4QI (match_dup 2)
14695 (parallel [(const_int 1) (const_int 3)
14696 (const_int 5) (const_int 7)]))))))]
14698 "pmaddubsw\t{%2, %0|%0, %2}"
14699 [(set_attr "type" "sseiadd")
14700 (set_attr "atom_unit" "simul")
14701 (set_attr "prefix_extra" "1")
14702 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14703 (set_attr "mode" "DI")])
14705 (define_mode_iterator PMULHRSW
14706 [V4HI V8HI (V16HI "TARGET_AVX2")])
14708 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14709 [(set (match_operand:PMULHRSW 0 "register_operand")
14710 (vec_merge:PMULHRSW
14712 (lshiftrt:<ssedoublemode>
14713 (plus:<ssedoublemode>
14714 (lshiftrt:<ssedoublemode>
14715 (mult:<ssedoublemode>
14716 (sign_extend:<ssedoublemode>
14717 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14718 (sign_extend:<ssedoublemode>
14719 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14723 (match_operand:PMULHRSW 3 "register_operand")
14724 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14725 "TARGET_AVX512BW && TARGET_AVX512VL"
14727 operands[5] = CONST1_RTX(<MODE>mode);
14728 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14731 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14732 [(set (match_operand:PMULHRSW 0 "register_operand")
14734 (lshiftrt:<ssedoublemode>
14735 (plus:<ssedoublemode>
14736 (lshiftrt:<ssedoublemode>
14737 (mult:<ssedoublemode>
14738 (sign_extend:<ssedoublemode>
14739 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14740 (sign_extend:<ssedoublemode>
14741 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14747 operands[3] = CONST1_RTX(<MODE>mode);
14748 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14751 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14752 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
14754 (lshiftrt:<ssedoublemode>
14755 (plus:<ssedoublemode>
14756 (lshiftrt:<ssedoublemode>
14757 (mult:<ssedoublemode>
14758 (sign_extend:<ssedoublemode>
14759 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
14760 (sign_extend:<ssedoublemode>
14761 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
14763 (match_operand:VI2_AVX2 3 "const1_operand"))
14765 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14766 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
14768 pmulhrsw\t{%2, %0|%0, %2}
14769 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
14770 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14771 [(set_attr "isa" "noavx,avx,avx512bw")
14772 (set_attr "type" "sseimul")
14773 (set_attr "prefix_data16" "1,*,*")
14774 (set_attr "prefix_extra" "1")
14775 (set_attr "prefix" "orig,maybe_evex,evex")
14776 (set_attr "mode" "<sseinsnmode>")])
14778 (define_insn "*ssse3_pmulhrswv4hi3"
14779 [(set (match_operand:V4HI 0 "register_operand" "=y")
14786 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14788 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14790 (match_operand:V4HI 3 "const1_operand"))
14792 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
14793 "pmulhrsw\t{%2, %0|%0, %2}"
14794 [(set_attr "type" "sseimul")
14795 (set_attr "prefix_extra" "1")
14796 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14797 (set_attr "mode" "DI")])
14799 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14800 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
14802 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
14803 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
14805 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14807 pshufb\t{%2, %0|%0, %2}
14808 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
14809 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14810 [(set_attr "isa" "noavx,avx,avx512bw")
14811 (set_attr "type" "sselog1")
14812 (set_attr "prefix_data16" "1,*,*")
14813 (set_attr "prefix_extra" "1")
14814 (set_attr "prefix" "orig,maybe_evex,evex")
14815 (set_attr "btver2_decode" "vector")
14816 (set_attr "mode" "<sseinsnmode>")])
14818 (define_insn "ssse3_pshufbv8qi3"
14819 [(set (match_operand:V8QI 0 "register_operand" "=y")
14820 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14821 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14824 "pshufb\t{%2, %0|%0, %2}";
14825 [(set_attr "type" "sselog1")
14826 (set_attr "prefix_extra" "1")
14827 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14828 (set_attr "mode" "DI")])
14830 (define_insn "<ssse3_avx2>_psign<mode>3"
14831 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14833 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14834 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14838 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14839 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14840 [(set_attr "isa" "noavx,avx")
14841 (set_attr "type" "sselog1")
14842 (set_attr "prefix_data16" "1,*")
14843 (set_attr "prefix_extra" "1")
14844 (set_attr "prefix" "orig,vex")
14845 (set_attr "mode" "<sseinsnmode>")])
14847 (define_insn "ssse3_psign<mode>3"
14848 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14850 [(match_operand:MMXMODEI 1 "register_operand" "0")
14851 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14854 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14855 [(set_attr "type" "sselog1")
14856 (set_attr "prefix_extra" "1")
14857 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14858 (set_attr "mode" "DI")])
14860 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14861 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14862 (vec_merge:VI1_AVX512
14864 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14865 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14866 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14868 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14869 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14870 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14872 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14873 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14875 [(set_attr "type" "sseishft")
14876 (set_attr "atom_unit" "sishuf")
14877 (set_attr "prefix_extra" "1")
14878 (set_attr "length_immediate" "1")
14879 (set_attr "prefix" "evex")
14880 (set_attr "mode" "<sseinsnmode>")])
14882 (define_insn "<ssse3_avx2>_palignr<mode>"
14883 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
14884 (unspec:SSESCALARMODE
14885 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
14886 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
14887 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
14891 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14893 switch (which_alternative)
14896 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14899 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14901 gcc_unreachable ();
14904 [(set_attr "isa" "noavx,avx,avx512bw")
14905 (set_attr "type" "sseishft")
14906 (set_attr "atom_unit" "sishuf")
14907 (set_attr "prefix_data16" "1,*,*")
14908 (set_attr "prefix_extra" "1")
14909 (set_attr "length_immediate" "1")
14910 (set_attr "prefix" "orig,vex,evex")
14911 (set_attr "mode" "<sseinsnmode>")])
14913 (define_insn "ssse3_palignrdi"
14914 [(set (match_operand:DI 0 "register_operand" "=y")
14915 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14916 (match_operand:DI 2 "nonimmediate_operand" "ym")
14917 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14921 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14922 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14924 [(set_attr "type" "sseishft")
14925 (set_attr "atom_unit" "sishuf")
14926 (set_attr "prefix_extra" "1")
14927 (set_attr "length_immediate" "1")
14928 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14929 (set_attr "mode" "DI")])
14931 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14932 ;; modes for abs instruction on pre AVX-512 targets.
14933 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14934 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14935 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14936 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14937 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14939 (define_insn "*abs<mode>2"
14940 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14941 (abs:VI1248_AVX512VL_AVX512BW
14942 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
14944 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14945 [(set_attr "type" "sselog1")
14946 (set_attr "prefix_data16" "1")
14947 (set_attr "prefix_extra" "1")
14948 (set_attr "prefix" "maybe_vex")
14949 (set_attr "mode" "<sseinsnmode>")])
14951 (define_insn "abs<mode>2_mask"
14952 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14953 (vec_merge:VI48_AVX512VL
14955 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14956 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14957 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14959 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14960 [(set_attr "type" "sselog1")
14961 (set_attr "prefix" "evex")
14962 (set_attr "mode" "<sseinsnmode>")])
14964 (define_insn "abs<mode>2_mask"
14965 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14966 (vec_merge:VI12_AVX512VL
14968 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14969 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14970 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14972 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14973 [(set_attr "type" "sselog1")
14974 (set_attr "prefix" "evex")
14975 (set_attr "mode" "<sseinsnmode>")])
14977 (define_expand "abs<mode>2"
14978 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14979 (abs:VI1248_AVX512VL_AVX512BW
14980 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
14985 ix86_expand_sse2_abs (operands[0], operands[1]);
14990 (define_insn "abs<mode>2"
14991 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14993 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14995 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14996 [(set_attr "type" "sselog1")
14997 (set_attr "prefix_rep" "0")
14998 (set_attr "prefix_extra" "1")
14999 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15000 (set_attr "mode" "DI")])
15002 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15004 ;; AMD SSE4A instructions
15006 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15008 (define_insn "sse4a_movnt<mode>"
15009 [(set (match_operand:MODEF 0 "memory_operand" "=m")
15011 [(match_operand:MODEF 1 "register_operand" "x")]
15014 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15015 [(set_attr "type" "ssemov")
15016 (set_attr "mode" "<MODE>")])
15018 (define_insn "sse4a_vmmovnt<mode>"
15019 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15020 (unspec:<ssescalarmode>
15021 [(vec_select:<ssescalarmode>
15022 (match_operand:VF_128 1 "register_operand" "x")
15023 (parallel [(const_int 0)]))]
15026 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15027 [(set_attr "type" "ssemov")
15028 (set_attr "mode" "<ssescalarmode>")])
15030 (define_insn "sse4a_extrqi"
15031 [(set (match_operand:V2DI 0 "register_operand" "=x")
15032 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15033 (match_operand 2 "const_0_to_255_operand")
15034 (match_operand 3 "const_0_to_255_operand")]
15037 "extrq\t{%3, %2, %0|%0, %2, %3}"
15038 [(set_attr "type" "sse")
15039 (set_attr "prefix_data16" "1")
15040 (set_attr "length_immediate" "2")
15041 (set_attr "mode" "TI")])
15043 (define_insn "sse4a_extrq"
15044 [(set (match_operand:V2DI 0 "register_operand" "=x")
15045 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15046 (match_operand:V16QI 2 "register_operand" "x")]
15049 "extrq\t{%2, %0|%0, %2}"
15050 [(set_attr "type" "sse")
15051 (set_attr "prefix_data16" "1")
15052 (set_attr "mode" "TI")])
15054 (define_insn "sse4a_insertqi"
15055 [(set (match_operand:V2DI 0 "register_operand" "=x")
15056 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15057 (match_operand:V2DI 2 "register_operand" "x")
15058 (match_operand 3 "const_0_to_255_operand")
15059 (match_operand 4 "const_0_to_255_operand")]
15062 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15063 [(set_attr "type" "sseins")
15064 (set_attr "prefix_data16" "0")
15065 (set_attr "prefix_rep" "1")
15066 (set_attr "length_immediate" "2")
15067 (set_attr "mode" "TI")])
15069 (define_insn "sse4a_insertq"
15070 [(set (match_operand:V2DI 0 "register_operand" "=x")
15071 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15072 (match_operand:V2DI 2 "register_operand" "x")]
15075 "insertq\t{%2, %0|%0, %2}"
15076 [(set_attr "type" "sseins")
15077 (set_attr "prefix_data16" "0")
15078 (set_attr "prefix_rep" "1")
15079 (set_attr "mode" "TI")])
15081 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15083 ;; Intel SSE4.1 instructions
15085 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15087 ;; Mapping of immediate bits for blend instructions
15088 (define_mode_attr blendbits
15089 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15091 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15092 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15093 (vec_merge:VF_128_256
15094 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15095 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15096 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15099 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15100 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15101 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15102 [(set_attr "isa" "noavx,noavx,avx")
15103 (set_attr "type" "ssemov")
15104 (set_attr "length_immediate" "1")
15105 (set_attr "prefix_data16" "1,1,*")
15106 (set_attr "prefix_extra" "1")
15107 (set_attr "prefix" "orig,orig,vex")
15108 (set_attr "mode" "<MODE>")])
15110 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15111 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15113 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15114 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15115 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15119 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15120 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15121 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15122 [(set_attr "isa" "noavx,noavx,avx")
15123 (set_attr "type" "ssemov")
15124 (set_attr "length_immediate" "1")
15125 (set_attr "prefix_data16" "1,1,*")
15126 (set_attr "prefix_extra" "1")
15127 (set_attr "prefix" "orig,orig,vex")
15128 (set_attr "btver2_decode" "vector,vector,vector")
15129 (set_attr "mode" "<MODE>")])
15131 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
15132 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15134 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
15135 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15136 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15140 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15141 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15142 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15143 [(set_attr "isa" "noavx,noavx,avx")
15144 (set_attr "type" "ssemul")
15145 (set_attr "length_immediate" "1")
15146 (set_attr "prefix_data16" "1,1,*")
15147 (set_attr "prefix_extra" "1")
15148 (set_attr "prefix" "orig,orig,vex")
15149 (set_attr "btver2_decode" "vector,vector,vector")
15150 (set_attr "znver1_decode" "vector,vector,vector")
15151 (set_attr "mode" "<MODE>")])
15153 ;; Mode attribute used by `vmovntdqa' pattern
15154 (define_mode_attr vi8_sse4_1_avx2_avx512
15155 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
15157 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
15158 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
15159 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
15162 "%vmovntdqa\t{%1, %0|%0, %1}"
15163 [(set_attr "isa" "noavx,noavx,avx")
15164 (set_attr "type" "ssemov")
15165 (set_attr "prefix_extra" "1,1,*")
15166 (set_attr "prefix" "orig,orig,maybe_evex")
15167 (set_attr "mode" "<sseinsnmode>")])
15169 (define_insn "<sse4_1_avx2>_mpsadbw"
15170 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15172 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15173 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15174 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15178 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15179 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15180 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15181 [(set_attr "isa" "noavx,noavx,avx")
15182 (set_attr "type" "sselog1")
15183 (set_attr "length_immediate" "1")
15184 (set_attr "prefix_extra" "1")
15185 (set_attr "prefix" "orig,orig,vex")
15186 (set_attr "btver2_decode" "vector,vector,vector")
15187 (set_attr "znver1_decode" "vector,vector,vector")
15188 (set_attr "mode" "<sseinsnmode>")])
15190 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
15191 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
15192 (vec_concat:VI2_AVX2
15193 (us_truncate:<ssehalfvecmode>
15194 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
15195 (us_truncate:<ssehalfvecmode>
15196 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
15197 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15199 packusdw\t{%2, %0|%0, %2}
15200 packusdw\t{%2, %0|%0, %2}
15201 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15202 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15203 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
15204 (set_attr "type" "sselog")
15205 (set_attr "prefix_extra" "1")
15206 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
15207 (set_attr "mode" "<sseinsnmode>")])
15209 (define_insn "<sse4_1_avx2>_pblendvb"
15210 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15212 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15213 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15214 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
15218 pblendvb\t{%3, %2, %0|%0, %2, %3}
15219 pblendvb\t{%3, %2, %0|%0, %2, %3}
15220 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15221 [(set_attr "isa" "noavx,noavx,avx")
15222 (set_attr "type" "ssemov")
15223 (set_attr "prefix_extra" "1")
15224 (set_attr "length_immediate" "*,*,1")
15225 (set_attr "prefix" "orig,orig,vex")
15226 (set_attr "btver2_decode" "vector,vector,vector")
15227 (set_attr "mode" "<sseinsnmode>")])
15229 (define_insn "sse4_1_pblendw"
15230 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15232 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
15233 (match_operand:V8HI 1 "register_operand" "0,0,x")
15234 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
15237 pblendw\t{%3, %2, %0|%0, %2, %3}
15238 pblendw\t{%3, %2, %0|%0, %2, %3}
15239 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15240 [(set_attr "isa" "noavx,noavx,avx")
15241 (set_attr "type" "ssemov")
15242 (set_attr "prefix_extra" "1")
15243 (set_attr "length_immediate" "1")
15244 (set_attr "prefix" "orig,orig,vex")
15245 (set_attr "mode" "TI")])
15247 ;; The builtin uses an 8-bit immediate. Expand that.
15248 (define_expand "avx2_pblendw"
15249 [(set (match_operand:V16HI 0 "register_operand")
15251 (match_operand:V16HI 2 "nonimmediate_operand")
15252 (match_operand:V16HI 1 "register_operand")
15253 (match_operand:SI 3 "const_0_to_255_operand")))]
15256 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
15257 operands[3] = GEN_INT (val << 8 | val);
15260 (define_insn "*avx2_pblendw"
15261 [(set (match_operand:V16HI 0 "register_operand" "=x")
15263 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15264 (match_operand:V16HI 1 "register_operand" "x")
15265 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
15268 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
15269 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15271 [(set_attr "type" "ssemov")
15272 (set_attr "prefix_extra" "1")
15273 (set_attr "length_immediate" "1")
15274 (set_attr "prefix" "vex")
15275 (set_attr "mode" "OI")])
15277 (define_insn "avx2_pblendd<mode>"
15278 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
15279 (vec_merge:VI4_AVX2
15280 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
15281 (match_operand:VI4_AVX2 1 "register_operand" "x")
15282 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
15284 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15285 [(set_attr "type" "ssemov")
15286 (set_attr "prefix_extra" "1")
15287 (set_attr "length_immediate" "1")
15288 (set_attr "prefix" "vex")
15289 (set_attr "mode" "<sseinsnmode>")])
15291 (define_insn "sse4_1_phminposuw"
15292 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15293 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
15294 UNSPEC_PHMINPOSUW))]
15296 "%vphminposuw\t{%1, %0|%0, %1}"
15297 [(set_attr "isa" "noavx,noavx,avx")
15298 (set_attr "type" "sselog1")
15299 (set_attr "prefix_extra" "1")
15300 (set_attr "prefix" "orig,orig,vex")
15301 (set_attr "mode" "TI")])
15303 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
15304 [(set (match_operand:V16HI 0 "register_operand" "=v")
15306 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15307 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15308 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15309 [(set_attr "type" "ssemov")
15310 (set_attr "prefix_extra" "1")
15311 (set_attr "prefix" "maybe_evex")
15312 (set_attr "mode" "OI")])
15314 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
15315 [(set (match_operand:V32HI 0 "register_operand" "=v")
15317 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
15319 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15320 [(set_attr "type" "ssemov")
15321 (set_attr "prefix_extra" "1")
15322 (set_attr "prefix" "evex")
15323 (set_attr "mode" "XI")])
15325 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
15326 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
15329 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15330 (parallel [(const_int 0) (const_int 1)
15331 (const_int 2) (const_int 3)
15332 (const_int 4) (const_int 5)
15333 (const_int 6) (const_int 7)]))))]
15334 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15335 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15336 [(set_attr "isa" "noavx,noavx,avx")
15337 (set_attr "type" "ssemov")
15338 (set_attr "prefix_extra" "1")
15339 (set_attr "prefix" "orig,orig,maybe_evex")
15340 (set_attr "mode" "TI")])
15342 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
15343 [(set (match_operand:V16SI 0 "register_operand" "=v")
15345 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15347 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15348 [(set_attr "type" "ssemov")
15349 (set_attr "prefix" "evex")
15350 (set_attr "mode" "XI")])
15352 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
15353 [(set (match_operand:V8SI 0 "register_operand" "=v")
15356 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15357 (parallel [(const_int 0) (const_int 1)
15358 (const_int 2) (const_int 3)
15359 (const_int 4) (const_int 5)
15360 (const_int 6) (const_int 7)]))))]
15361 "TARGET_AVX2 && <mask_avx512vl_condition>"
15362 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15363 [(set_attr "type" "ssemov")
15364 (set_attr "prefix_extra" "1")
15365 (set_attr "prefix" "maybe_evex")
15366 (set_attr "mode" "OI")])
15368 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
15369 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15372 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15373 (parallel [(const_int 0) (const_int 1)
15374 (const_int 2) (const_int 3)]))))]
15375 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15376 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15377 [(set_attr "isa" "noavx,noavx,avx")
15378 (set_attr "type" "ssemov")
15379 (set_attr "prefix_extra" "1")
15380 (set_attr "prefix" "orig,orig,maybe_evex")
15381 (set_attr "mode" "TI")])
15383 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
15384 [(set (match_operand:V16SI 0 "register_operand" "=v")
15386 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
15388 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15389 [(set_attr "type" "ssemov")
15390 (set_attr "prefix" "evex")
15391 (set_attr "mode" "XI")])
15393 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
15394 [(set (match_operand:V8SI 0 "register_operand" "=v")
15396 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15397 "TARGET_AVX2 && <mask_avx512vl_condition>"
15398 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15399 [(set_attr "type" "ssemov")
15400 (set_attr "prefix_extra" "1")
15401 (set_attr "prefix" "maybe_evex")
15402 (set_attr "mode" "OI")])
15404 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
15405 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15408 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15409 (parallel [(const_int 0) (const_int 1)
15410 (const_int 2) (const_int 3)]))))]
15411 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15412 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15413 [(set_attr "isa" "noavx,noavx,avx")
15414 (set_attr "type" "ssemov")
15415 (set_attr "prefix_extra" "1")
15416 (set_attr "prefix" "orig,orig,maybe_evex")
15417 (set_attr "mode" "TI")])
15419 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
15420 [(set (match_operand:V8DI 0 "register_operand" "=v")
15423 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15424 (parallel [(const_int 0) (const_int 1)
15425 (const_int 2) (const_int 3)
15426 (const_int 4) (const_int 5)
15427 (const_int 6) (const_int 7)]))))]
15429 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15430 [(set_attr "type" "ssemov")
15431 (set_attr "prefix" "evex")
15432 (set_attr "mode" "XI")])
15434 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
15435 [(set (match_operand:V4DI 0 "register_operand" "=v")
15438 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15439 (parallel [(const_int 0) (const_int 1)
15440 (const_int 2) (const_int 3)]))))]
15441 "TARGET_AVX2 && <mask_avx512vl_condition>"
15442 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15443 [(set_attr "type" "ssemov")
15444 (set_attr "prefix_extra" "1")
15445 (set_attr "prefix" "maybe_evex")
15446 (set_attr "mode" "OI")])
15448 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
15449 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15452 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15453 (parallel [(const_int 0) (const_int 1)]))))]
15454 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15455 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
15456 [(set_attr "isa" "noavx,noavx,avx")
15457 (set_attr "type" "ssemov")
15458 (set_attr "prefix_extra" "1")
15459 (set_attr "prefix" "orig,orig,maybe_evex")
15460 (set_attr "mode" "TI")])
15462 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
15463 [(set (match_operand:V8DI 0 "register_operand" "=v")
15465 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15467 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15468 [(set_attr "type" "ssemov")
15469 (set_attr "prefix" "evex")
15470 (set_attr "mode" "XI")])
15472 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
15473 [(set (match_operand:V4DI 0 "register_operand" "=v")
15476 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
15477 (parallel [(const_int 0) (const_int 1)
15478 (const_int 2) (const_int 3)]))))]
15479 "TARGET_AVX2 && <mask_avx512vl_condition>"
15480 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15481 [(set_attr "type" "ssemov")
15482 (set_attr "prefix_extra" "1")
15483 (set_attr "prefix" "maybe_evex")
15484 (set_attr "mode" "OI")])
15486 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
15487 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15490 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15491 (parallel [(const_int 0) (const_int 1)]))))]
15492 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15493 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15494 [(set_attr "isa" "noavx,noavx,avx")
15495 (set_attr "type" "ssemov")
15496 (set_attr "prefix_extra" "1")
15497 (set_attr "prefix" "orig,orig,maybe_evex")
15498 (set_attr "mode" "TI")])
15500 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
15501 [(set (match_operand:V8DI 0 "register_operand" "=v")
15503 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15505 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15506 [(set_attr "type" "ssemov")
15507 (set_attr "prefix" "evex")
15508 (set_attr "mode" "XI")])
15510 (define_insn "avx2_<code>v4siv4di2<mask_name>"
15511 [(set (match_operand:V4DI 0 "register_operand" "=v")
15513 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15514 "TARGET_AVX2 && <mask_avx512vl_condition>"
15515 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15516 [(set_attr "type" "ssemov")
15517 (set_attr "prefix" "maybe_evex")
15518 (set_attr "prefix_extra" "1")
15519 (set_attr "mode" "OI")])
15521 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15522 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15525 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15526 (parallel [(const_int 0) (const_int 1)]))))]
15527 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15528 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15529 [(set_attr "isa" "noavx,noavx,avx")
15530 (set_attr "type" "ssemov")
15531 (set_attr "prefix_extra" "1")
15532 (set_attr "prefix" "orig,orig,maybe_evex")
15533 (set_attr "mode" "TI")])
15535 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
15536 ;; setting FLAGS_REG. But it is not a really compare instruction.
15537 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15538 [(set (reg:CC FLAGS_REG)
15539 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15540 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15543 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15544 [(set_attr "type" "ssecomi")
15545 (set_attr "prefix_extra" "1")
15546 (set_attr "prefix" "vex")
15547 (set_attr "mode" "<MODE>")])
15549 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15550 ;; But it is not a really compare instruction.
15551 (define_insn "<sse4_1>_ptest<mode>"
15552 [(set (reg:CC FLAGS_REG)
15553 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15554 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
15557 "%vptest\t{%1, %0|%0, %1}"
15558 [(set_attr "isa" "noavx,noavx,avx")
15559 (set_attr "type" "ssecomi")
15560 (set_attr "prefix_extra" "1")
15561 (set_attr "prefix" "orig,orig,vex")
15562 (set (attr "btver2_decode")
15564 (match_test "<sseinsnmode>mode==OImode")
15565 (const_string "vector")
15566 (const_string "*")))
15567 (set_attr "mode" "<sseinsnmode>")])
15569 (define_insn "ptesttf2"
15570 [(set (reg:CC FLAGS_REG)
15571 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
15572 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
15575 "%vptest\t{%1, %0|%0, %1}"
15576 [(set_attr "isa" "noavx,noavx,avx")
15577 (set_attr "type" "ssecomi")
15578 (set_attr "prefix_extra" "1")
15579 (set_attr "prefix" "orig,orig,vex")
15580 (set_attr "mode" "TI")])
15582 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15583 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15585 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
15586 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
15589 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15590 [(set_attr "isa" "noavx,noavx,avx")
15591 (set_attr "type" "ssecvt")
15592 (set_attr "prefix_data16" "1,1,*")
15593 (set_attr "prefix_extra" "1")
15594 (set_attr "length_immediate" "1")
15595 (set_attr "prefix" "orig,orig,vex")
15596 (set_attr "mode" "<MODE>")])
15598 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15599 [(match_operand:<sseintvecmode> 0 "register_operand")
15600 (match_operand:VF1_128_256 1 "vector_operand")
15601 (match_operand:SI 2 "const_0_to_15_operand")]
15604 rtx tmp = gen_reg_rtx (<MODE>mode);
15607 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15610 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15614 (define_expand "avx512f_round<castmode>512"
15615 [(match_operand:VF_512 0 "register_operand")
15616 (match_operand:VF_512 1 "nonimmediate_operand")
15617 (match_operand:SI 2 "const_0_to_15_operand")]
15620 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
15624 (define_expand "avx512f_roundps512_sfix"
15625 [(match_operand:V16SI 0 "register_operand")
15626 (match_operand:V16SF 1 "nonimmediate_operand")
15627 (match_operand:SI 2 "const_0_to_15_operand")]
15630 rtx tmp = gen_reg_rtx (V16SFmode);
15631 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
15632 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
15636 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15637 [(match_operand:<ssepackfltmode> 0 "register_operand")
15638 (match_operand:VF2 1 "vector_operand")
15639 (match_operand:VF2 2 "vector_operand")
15640 (match_operand:SI 3 "const_0_to_15_operand")]
15645 if (<MODE>mode == V2DFmode
15646 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15648 rtx tmp2 = gen_reg_rtx (V4DFmode);
15650 tmp0 = gen_reg_rtx (V4DFmode);
15651 tmp1 = force_reg (V2DFmode, operands[1]);
15653 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15654 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15655 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15659 tmp0 = gen_reg_rtx (<MODE>mode);
15660 tmp1 = gen_reg_rtx (<MODE>mode);
15663 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15666 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15669 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15674 (define_insn "sse4_1_round<ssescalarmodesuffix>"
15675 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
15678 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
15679 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
15681 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
15685 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15686 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15687 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
15688 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15689 [(set_attr "isa" "noavx,noavx,avx,avx512f")
15690 (set_attr "type" "ssecvt")
15691 (set_attr "length_immediate" "1")
15692 (set_attr "prefix_data16" "1,1,*,*")
15693 (set_attr "prefix_extra" "1")
15694 (set_attr "prefix" "orig,orig,vex,evex")
15695 (set_attr "mode" "<MODE>")])
15697 (define_expand "round<mode>2"
15698 [(set (match_dup 3)
15700 (match_operand:VF 1 "register_operand")
15702 (set (match_operand:VF 0 "register_operand")
15704 [(match_dup 3) (match_dup 4)]
15706 "TARGET_SSE4_1 && !flag_trapping_math"
15708 machine_mode scalar_mode;
15709 const struct real_format *fmt;
15710 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15711 rtx half, vec_half;
15713 scalar_mode = GET_MODE_INNER (<MODE>mode);
15715 /* load nextafter (0.5, 0.0) */
15716 fmt = REAL_MODE_FORMAT (scalar_mode);
15717 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15718 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15719 half = const_double_from_real_value (pred_half, scalar_mode);
15721 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15722 vec_half = force_reg (<MODE>mode, vec_half);
15724 operands[2] = gen_reg_rtx (<MODE>mode);
15725 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
15727 operands[3] = gen_reg_rtx (<MODE>mode);
15728 operands[4] = GEN_INT (ROUND_TRUNC);
15731 (define_expand "round<mode>2_sfix"
15732 [(match_operand:<sseintvecmode> 0 "register_operand")
15733 (match_operand:VF1 1 "register_operand")]
15734 "TARGET_SSE4_1 && !flag_trapping_math"
15736 rtx tmp = gen_reg_rtx (<MODE>mode);
15738 emit_insn (gen_round<mode>2 (tmp, operands[1]));
15741 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15745 (define_expand "round<mode>2_vec_pack_sfix"
15746 [(match_operand:<ssepackfltmode> 0 "register_operand")
15747 (match_operand:VF2 1 "register_operand")
15748 (match_operand:VF2 2 "register_operand")]
15749 "TARGET_SSE4_1 && !flag_trapping_math"
15753 if (<MODE>mode == V2DFmode
15754 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15756 rtx tmp2 = gen_reg_rtx (V4DFmode);
15758 tmp0 = gen_reg_rtx (V4DFmode);
15759 tmp1 = force_reg (V2DFmode, operands[1]);
15761 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15762 emit_insn (gen_roundv4df2 (tmp2, tmp0));
15763 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15767 tmp0 = gen_reg_rtx (<MODE>mode);
15768 tmp1 = gen_reg_rtx (<MODE>mode);
15770 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15771 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15774 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15781 ;; Intel SSE4.2 string/text processing instructions
15783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15785 (define_insn_and_split "sse4_2_pcmpestr"
15786 [(set (match_operand:SI 0 "register_operand" "=c,c")
15788 [(match_operand:V16QI 2 "register_operand" "x,x")
15789 (match_operand:SI 3 "register_operand" "a,a")
15790 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15791 (match_operand:SI 5 "register_operand" "d,d")
15792 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15794 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15802 (set (reg:CC FLAGS_REG)
15811 && can_create_pseudo_p ()"
15816 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15817 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15818 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15821 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15822 operands[3], operands[4],
15823 operands[5], operands[6]));
15825 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15826 operands[3], operands[4],
15827 operands[5], operands[6]));
15828 if (flags && !(ecx || xmm0))
15829 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15830 operands[2], operands[3],
15831 operands[4], operands[5],
15833 if (!(flags || ecx || xmm0))
15834 emit_note (NOTE_INSN_DELETED);
15838 [(set_attr "type" "sselog")
15839 (set_attr "prefix_data16" "1")
15840 (set_attr "prefix_extra" "1")
15841 (set_attr "length_immediate" "1")
15842 (set_attr "memory" "none,load")
15843 (set_attr "mode" "TI")])
15845 (define_insn "sse4_2_pcmpestri"
15846 [(set (match_operand:SI 0 "register_operand" "=c,c")
15848 [(match_operand:V16QI 1 "register_operand" "x,x")
15849 (match_operand:SI 2 "register_operand" "a,a")
15850 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15851 (match_operand:SI 4 "register_operand" "d,d")
15852 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15854 (set (reg:CC FLAGS_REG)
15863 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15864 [(set_attr "type" "sselog")
15865 (set_attr "prefix_data16" "1")
15866 (set_attr "prefix_extra" "1")
15867 (set_attr "prefix" "maybe_vex")
15868 (set_attr "length_immediate" "1")
15869 (set_attr "btver2_decode" "vector")
15870 (set_attr "memory" "none,load")
15871 (set_attr "mode" "TI")])
15873 (define_insn "sse4_2_pcmpestrm"
15874 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15876 [(match_operand:V16QI 1 "register_operand" "x,x")
15877 (match_operand:SI 2 "register_operand" "a,a")
15878 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15879 (match_operand:SI 4 "register_operand" "d,d")
15880 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15882 (set (reg:CC FLAGS_REG)
15891 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15892 [(set_attr "type" "sselog")
15893 (set_attr "prefix_data16" "1")
15894 (set_attr "prefix_extra" "1")
15895 (set_attr "length_immediate" "1")
15896 (set_attr "prefix" "maybe_vex")
15897 (set_attr "btver2_decode" "vector")
15898 (set_attr "memory" "none,load")
15899 (set_attr "mode" "TI")])
15901 (define_insn "sse4_2_pcmpestr_cconly"
15902 [(set (reg:CC FLAGS_REG)
15904 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15905 (match_operand:SI 3 "register_operand" "a,a,a,a")
15906 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15907 (match_operand:SI 5 "register_operand" "d,d,d,d")
15908 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15910 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15911 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15914 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15915 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15916 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15917 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15918 [(set_attr "type" "sselog")
15919 (set_attr "prefix_data16" "1")
15920 (set_attr "prefix_extra" "1")
15921 (set_attr "length_immediate" "1")
15922 (set_attr "memory" "none,load,none,load")
15923 (set_attr "btver2_decode" "vector,vector,vector,vector")
15924 (set_attr "prefix" "maybe_vex")
15925 (set_attr "mode" "TI")])
15927 (define_insn_and_split "sse4_2_pcmpistr"
15928 [(set (match_operand:SI 0 "register_operand" "=c,c")
15930 [(match_operand:V16QI 2 "register_operand" "x,x")
15931 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15932 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15934 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15940 (set (reg:CC FLAGS_REG)
15947 && can_create_pseudo_p ()"
15952 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15953 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15954 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15957 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15958 operands[3], operands[4]));
15960 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15961 operands[3], operands[4]));
15962 if (flags && !(ecx || xmm0))
15963 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15964 operands[2], operands[3],
15966 if (!(flags || ecx || xmm0))
15967 emit_note (NOTE_INSN_DELETED);
15971 [(set_attr "type" "sselog")
15972 (set_attr "prefix_data16" "1")
15973 (set_attr "prefix_extra" "1")
15974 (set_attr "length_immediate" "1")
15975 (set_attr "memory" "none,load")
15976 (set_attr "mode" "TI")])
15978 (define_insn "sse4_2_pcmpistri"
15979 [(set (match_operand:SI 0 "register_operand" "=c,c")
15981 [(match_operand:V16QI 1 "register_operand" "x,x")
15982 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15983 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15985 (set (reg:CC FLAGS_REG)
15992 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15993 [(set_attr "type" "sselog")
15994 (set_attr "prefix_data16" "1")
15995 (set_attr "prefix_extra" "1")
15996 (set_attr "length_immediate" "1")
15997 (set_attr "prefix" "maybe_vex")
15998 (set_attr "memory" "none,load")
15999 (set_attr "btver2_decode" "vector")
16000 (set_attr "mode" "TI")])
16002 (define_insn "sse4_2_pcmpistrm"
16003 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16005 [(match_operand:V16QI 1 "register_operand" "x,x")
16006 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16007 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16009 (set (reg:CC FLAGS_REG)
16016 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
16017 [(set_attr "type" "sselog")
16018 (set_attr "prefix_data16" "1")
16019 (set_attr "prefix_extra" "1")
16020 (set_attr "length_immediate" "1")
16021 (set_attr "prefix" "maybe_vex")
16022 (set_attr "memory" "none,load")
16023 (set_attr "btver2_decode" "vector")
16024 (set_attr "mode" "TI")])
16026 (define_insn "sse4_2_pcmpistr_cconly"
16027 [(set (reg:CC FLAGS_REG)
16029 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16030 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
16031 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
16033 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16034 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
16037 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16038 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16039 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
16040 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
16041 [(set_attr "type" "sselog")
16042 (set_attr "prefix_data16" "1")
16043 (set_attr "prefix_extra" "1")
16044 (set_attr "length_immediate" "1")
16045 (set_attr "memory" "none,load,none,load")
16046 (set_attr "prefix" "maybe_vex")
16047 (set_attr "btver2_decode" "vector,vector,vector,vector")
16048 (set_attr "mode" "TI")])
16050 ;; Packed float variants
16051 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
16052 [(V8DI "V8SF") (V16SI "V16SF")])
16054 (define_expand "avx512pf_gatherpf<mode>sf"
16056 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16057 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16059 [(match_operand 2 "vsib_address_operand")
16060 (match_operand:VI48_512 1 "register_operand")
16061 (match_operand:SI 3 "const1248_operand")]))
16062 (match_operand:SI 4 "const_2_to_3_operand")]
16063 UNSPEC_GATHER_PREFETCH)]
16067 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16068 operands[3]), UNSPEC_VSIBADDR);
16071 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
16073 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16074 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16076 [(match_operand:P 2 "vsib_address_operand" "Tv")
16077 (match_operand:VI48_512 1 "register_operand" "v")
16078 (match_operand:SI 3 "const1248_operand" "n")]
16080 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16081 UNSPEC_GATHER_PREFETCH)]
16084 switch (INTVAL (operands[4]))
16087 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16089 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16091 gcc_unreachable ();
16094 [(set_attr "type" "sse")
16095 (set_attr "prefix" "evex")
16096 (set_attr "mode" "XI")])
16098 ;; Packed double variants
16099 (define_expand "avx512pf_gatherpf<mode>df"
16101 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16104 [(match_operand 2 "vsib_address_operand")
16105 (match_operand:VI4_256_8_512 1 "register_operand")
16106 (match_operand:SI 3 "const1248_operand")]))
16107 (match_operand:SI 4 "const_2_to_3_operand")]
16108 UNSPEC_GATHER_PREFETCH)]
16112 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16113 operands[3]), UNSPEC_VSIBADDR);
16116 (define_insn "*avx512pf_gatherpf<mode>df_mask"
16118 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16119 (match_operator:V8DF 5 "vsib_mem_operator"
16121 [(match_operand:P 2 "vsib_address_operand" "Tv")
16122 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16123 (match_operand:SI 3 "const1248_operand" "n")]
16125 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16126 UNSPEC_GATHER_PREFETCH)]
16129 switch (INTVAL (operands[4]))
16132 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16134 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16136 gcc_unreachable ();
16139 [(set_attr "type" "sse")
16140 (set_attr "prefix" "evex")
16141 (set_attr "mode" "XI")])
16143 ;; Packed float variants
16144 (define_expand "avx512pf_scatterpf<mode>sf"
16146 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16147 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16149 [(match_operand 2 "vsib_address_operand")
16150 (match_operand:VI48_512 1 "register_operand")
16151 (match_operand:SI 3 "const1248_operand")]))
16152 (match_operand:SI 4 "const2367_operand")]
16153 UNSPEC_SCATTER_PREFETCH)]
16157 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16158 operands[3]), UNSPEC_VSIBADDR);
16161 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
16163 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16164 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16166 [(match_operand:P 2 "vsib_address_operand" "Tv")
16167 (match_operand:VI48_512 1 "register_operand" "v")
16168 (match_operand:SI 3 "const1248_operand" "n")]
16170 (match_operand:SI 4 "const2367_operand" "n")]
16171 UNSPEC_SCATTER_PREFETCH)]
16174 switch (INTVAL (operands[4]))
16178 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16181 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16183 gcc_unreachable ();
16186 [(set_attr "type" "sse")
16187 (set_attr "prefix" "evex")
16188 (set_attr "mode" "XI")])
16190 ;; Packed double variants
16191 (define_expand "avx512pf_scatterpf<mode>df"
16193 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16196 [(match_operand 2 "vsib_address_operand")
16197 (match_operand:VI4_256_8_512 1 "register_operand")
16198 (match_operand:SI 3 "const1248_operand")]))
16199 (match_operand:SI 4 "const2367_operand")]
16200 UNSPEC_SCATTER_PREFETCH)]
16204 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16205 operands[3]), UNSPEC_VSIBADDR);
16208 (define_insn "*avx512pf_scatterpf<mode>df_mask"
16210 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16211 (match_operator:V8DF 5 "vsib_mem_operator"
16213 [(match_operand:P 2 "vsib_address_operand" "Tv")
16214 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16215 (match_operand:SI 3 "const1248_operand" "n")]
16217 (match_operand:SI 4 "const2367_operand" "n")]
16218 UNSPEC_SCATTER_PREFETCH)]
16221 switch (INTVAL (operands[4]))
16225 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16228 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16230 gcc_unreachable ();
16233 [(set_attr "type" "sse")
16234 (set_attr "prefix" "evex")
16235 (set_attr "mode" "XI")])
16237 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
16238 [(set (match_operand:VF_512 0 "register_operand" "=v")
16240 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16243 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16244 [(set_attr "prefix" "evex")
16245 (set_attr "type" "sse")
16246 (set_attr "mode" "<MODE>")])
16248 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
16249 [(set (match_operand:VF_512 0 "register_operand" "=v")
16251 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16254 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16255 [(set_attr "prefix" "evex")
16256 (set_attr "type" "sse")
16257 (set_attr "mode" "<MODE>")])
16259 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
16260 [(set (match_operand:VF_128 0 "register_operand" "=v")
16263 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16265 (match_operand:VF_128 2 "register_operand" "v")
16268 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16269 [(set_attr "length_immediate" "1")
16270 (set_attr "prefix" "evex")
16271 (set_attr "type" "sse")
16272 (set_attr "mode" "<MODE>")])
16274 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16275 [(set (match_operand:VF_512 0 "register_operand" "=v")
16277 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16280 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16281 [(set_attr "prefix" "evex")
16282 (set_attr "type" "sse")
16283 (set_attr "mode" "<MODE>")])
16285 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16286 [(set (match_operand:VF_128 0 "register_operand" "=v")
16289 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16291 (match_operand:VF_128 2 "register_operand" "v")
16294 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16295 [(set_attr "length_immediate" "1")
16296 (set_attr "type" "sse")
16297 (set_attr "prefix" "evex")
16298 (set_attr "mode" "<MODE>")])
16300 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16302 ;; XOP instructions
16304 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16306 (define_code_iterator xop_plus [plus ss_plus])
16308 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
16309 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16311 ;; XOP parallel integer multiply/add instructions.
16313 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16314 [(set (match_operand:VI24_128 0 "register_operand" "=x")
16317 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16318 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16319 (match_operand:VI24_128 3 "register_operand" "x")))]
16321 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16322 [(set_attr "type" "ssemuladd")
16323 (set_attr "mode" "TI")])
16325 (define_insn "xop_p<macs>dql"
16326 [(set (match_operand:V2DI 0 "register_operand" "=x")
16331 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16332 (parallel [(const_int 0) (const_int 2)])))
16335 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16336 (parallel [(const_int 0) (const_int 2)]))))
16337 (match_operand:V2DI 3 "register_operand" "x")))]
16339 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16340 [(set_attr "type" "ssemuladd")
16341 (set_attr "mode" "TI")])
16343 (define_insn "xop_p<macs>dqh"
16344 [(set (match_operand:V2DI 0 "register_operand" "=x")
16349 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16350 (parallel [(const_int 1) (const_int 3)])))
16353 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16354 (parallel [(const_int 1) (const_int 3)]))))
16355 (match_operand:V2DI 3 "register_operand" "x")))]
16357 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16358 [(set_attr "type" "ssemuladd")
16359 (set_attr "mode" "TI")])
16361 ;; XOP parallel integer multiply/add instructions for the intrinisics
16362 (define_insn "xop_p<macs>wd"
16363 [(set (match_operand:V4SI 0 "register_operand" "=x")
16368 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16369 (parallel [(const_int 1) (const_int 3)
16370 (const_int 5) (const_int 7)])))
16373 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16374 (parallel [(const_int 1) (const_int 3)
16375 (const_int 5) (const_int 7)]))))
16376 (match_operand:V4SI 3 "register_operand" "x")))]
16378 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16379 [(set_attr "type" "ssemuladd")
16380 (set_attr "mode" "TI")])
16382 (define_insn "xop_p<madcs>wd"
16383 [(set (match_operand:V4SI 0 "register_operand" "=x")
16389 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16390 (parallel [(const_int 0) (const_int 2)
16391 (const_int 4) (const_int 6)])))
16394 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16395 (parallel [(const_int 0) (const_int 2)
16396 (const_int 4) (const_int 6)]))))
16401 (parallel [(const_int 1) (const_int 3)
16402 (const_int 5) (const_int 7)])))
16406 (parallel [(const_int 1) (const_int 3)
16407 (const_int 5) (const_int 7)])))))
16408 (match_operand:V4SI 3 "register_operand" "x")))]
16410 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16411 [(set_attr "type" "ssemuladd")
16412 (set_attr "mode" "TI")])
16414 ;; XOP parallel XMM conditional moves
16415 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
16416 [(set (match_operand:V 0 "register_operand" "=x,x")
16418 (match_operand:V 3 "nonimmediate_operand" "x,m")
16419 (match_operand:V 1 "register_operand" "x,x")
16420 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
16422 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16423 [(set_attr "type" "sse4arg")])
16425 ;; XOP horizontal add/subtract instructions
16426 (define_insn "xop_phadd<u>bw"
16427 [(set (match_operand:V8HI 0 "register_operand" "=x")
16431 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16432 (parallel [(const_int 0) (const_int 2)
16433 (const_int 4) (const_int 6)
16434 (const_int 8) (const_int 10)
16435 (const_int 12) (const_int 14)])))
16439 (parallel [(const_int 1) (const_int 3)
16440 (const_int 5) (const_int 7)
16441 (const_int 9) (const_int 11)
16442 (const_int 13) (const_int 15)])))))]
16444 "vphadd<u>bw\t{%1, %0|%0, %1}"
16445 [(set_attr "type" "sseiadd1")])
16447 (define_insn "xop_phadd<u>bd"
16448 [(set (match_operand:V4SI 0 "register_operand" "=x")
16453 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16454 (parallel [(const_int 0) (const_int 4)
16455 (const_int 8) (const_int 12)])))
16459 (parallel [(const_int 1) (const_int 5)
16460 (const_int 9) (const_int 13)]))))
16465 (parallel [(const_int 2) (const_int 6)
16466 (const_int 10) (const_int 14)])))
16470 (parallel [(const_int 3) (const_int 7)
16471 (const_int 11) (const_int 15)]))))))]
16473 "vphadd<u>bd\t{%1, %0|%0, %1}"
16474 [(set_attr "type" "sseiadd1")])
16476 (define_insn "xop_phadd<u>bq"
16477 [(set (match_operand:V2DI 0 "register_operand" "=x")
16483 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16484 (parallel [(const_int 0) (const_int 8)])))
16488 (parallel [(const_int 1) (const_int 9)]))))
16493 (parallel [(const_int 2) (const_int 10)])))
16497 (parallel [(const_int 3) (const_int 11)])))))
16503 (parallel [(const_int 4) (const_int 12)])))
16507 (parallel [(const_int 5) (const_int 13)]))))
16512 (parallel [(const_int 6) (const_int 14)])))
16516 (parallel [(const_int 7) (const_int 15)])))))))]
16518 "vphadd<u>bq\t{%1, %0|%0, %1}"
16519 [(set_attr "type" "sseiadd1")])
16521 (define_insn "xop_phadd<u>wd"
16522 [(set (match_operand:V4SI 0 "register_operand" "=x")
16526 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16527 (parallel [(const_int 0) (const_int 2)
16528 (const_int 4) (const_int 6)])))
16532 (parallel [(const_int 1) (const_int 3)
16533 (const_int 5) (const_int 7)])))))]
16535 "vphadd<u>wd\t{%1, %0|%0, %1}"
16536 [(set_attr "type" "sseiadd1")])
16538 (define_insn "xop_phadd<u>wq"
16539 [(set (match_operand:V2DI 0 "register_operand" "=x")
16544 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16545 (parallel [(const_int 0) (const_int 4)])))
16549 (parallel [(const_int 1) (const_int 5)]))))
16554 (parallel [(const_int 2) (const_int 6)])))
16558 (parallel [(const_int 3) (const_int 7)]))))))]
16560 "vphadd<u>wq\t{%1, %0|%0, %1}"
16561 [(set_attr "type" "sseiadd1")])
16563 (define_insn "xop_phadd<u>dq"
16564 [(set (match_operand:V2DI 0 "register_operand" "=x")
16568 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16569 (parallel [(const_int 0) (const_int 2)])))
16573 (parallel [(const_int 1) (const_int 3)])))))]
16575 "vphadd<u>dq\t{%1, %0|%0, %1}"
16576 [(set_attr "type" "sseiadd1")])
16578 (define_insn "xop_phsubbw"
16579 [(set (match_operand:V8HI 0 "register_operand" "=x")
16583 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16584 (parallel [(const_int 0) (const_int 2)
16585 (const_int 4) (const_int 6)
16586 (const_int 8) (const_int 10)
16587 (const_int 12) (const_int 14)])))
16591 (parallel [(const_int 1) (const_int 3)
16592 (const_int 5) (const_int 7)
16593 (const_int 9) (const_int 11)
16594 (const_int 13) (const_int 15)])))))]
16596 "vphsubbw\t{%1, %0|%0, %1}"
16597 [(set_attr "type" "sseiadd1")])
16599 (define_insn "xop_phsubwd"
16600 [(set (match_operand:V4SI 0 "register_operand" "=x")
16604 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16605 (parallel [(const_int 0) (const_int 2)
16606 (const_int 4) (const_int 6)])))
16610 (parallel [(const_int 1) (const_int 3)
16611 (const_int 5) (const_int 7)])))))]
16613 "vphsubwd\t{%1, %0|%0, %1}"
16614 [(set_attr "type" "sseiadd1")])
16616 (define_insn "xop_phsubdq"
16617 [(set (match_operand:V2DI 0 "register_operand" "=x")
16621 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16622 (parallel [(const_int 0) (const_int 2)])))
16626 (parallel [(const_int 1) (const_int 3)])))))]
16628 "vphsubdq\t{%1, %0|%0, %1}"
16629 [(set_attr "type" "sseiadd1")])
16631 ;; XOP permute instructions
16632 (define_insn "xop_pperm"
16633 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16635 [(match_operand:V16QI 1 "register_operand" "x,x")
16636 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16637 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16638 UNSPEC_XOP_PERMUTE))]
16639 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16640 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16641 [(set_attr "type" "sse4arg")
16642 (set_attr "mode" "TI")])
16644 ;; XOP pack instructions that combine two vectors into a smaller vector
16645 (define_insn "xop_pperm_pack_v2di_v4si"
16646 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16649 (match_operand:V2DI 1 "register_operand" "x,x"))
16651 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16652 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16653 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16654 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16655 [(set_attr "type" "sse4arg")
16656 (set_attr "mode" "TI")])
16658 (define_insn "xop_pperm_pack_v4si_v8hi"
16659 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16662 (match_operand:V4SI 1 "register_operand" "x,x"))
16664 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16665 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16666 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16667 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16668 [(set_attr "type" "sse4arg")
16669 (set_attr "mode" "TI")])
16671 (define_insn "xop_pperm_pack_v8hi_v16qi"
16672 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16675 (match_operand:V8HI 1 "register_operand" "x,x"))
16677 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16678 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16679 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16680 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16681 [(set_attr "type" "sse4arg")
16682 (set_attr "mode" "TI")])
16684 ;; XOP packed rotate instructions
16685 (define_expand "rotl<mode>3"
16686 [(set (match_operand:VI_128 0 "register_operand")
16688 (match_operand:VI_128 1 "nonimmediate_operand")
16689 (match_operand:SI 2 "general_operand")))]
16692 /* If we were given a scalar, convert it to parallel */
16693 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16695 rtvec vs = rtvec_alloc (<ssescalarnum>);
16696 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16697 rtx reg = gen_reg_rtx (<MODE>mode);
16698 rtx op2 = operands[2];
16701 if (GET_MODE (op2) != <ssescalarmode>mode)
16703 op2 = gen_reg_rtx (<ssescalarmode>mode);
16704 convert_move (op2, operands[2], false);
16707 for (i = 0; i < <ssescalarnum>; i++)
16708 RTVEC_ELT (vs, i) = op2;
16710 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16711 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16716 (define_expand "rotr<mode>3"
16717 [(set (match_operand:VI_128 0 "register_operand")
16719 (match_operand:VI_128 1 "nonimmediate_operand")
16720 (match_operand:SI 2 "general_operand")))]
16723 /* If we were given a scalar, convert it to parallel */
16724 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16726 rtvec vs = rtvec_alloc (<ssescalarnum>);
16727 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16728 rtx neg = gen_reg_rtx (<MODE>mode);
16729 rtx reg = gen_reg_rtx (<MODE>mode);
16730 rtx op2 = operands[2];
16733 if (GET_MODE (op2) != <ssescalarmode>mode)
16735 op2 = gen_reg_rtx (<ssescalarmode>mode);
16736 convert_move (op2, operands[2], false);
16739 for (i = 0; i < <ssescalarnum>; i++)
16740 RTVEC_ELT (vs, i) = op2;
16742 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16743 emit_insn (gen_neg<mode>2 (neg, reg));
16744 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16749 (define_insn "xop_rotl<mode>3"
16750 [(set (match_operand:VI_128 0 "register_operand" "=x")
16752 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16753 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16755 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16756 [(set_attr "type" "sseishft")
16757 (set_attr "length_immediate" "1")
16758 (set_attr "mode" "TI")])
16760 (define_insn "xop_rotr<mode>3"
16761 [(set (match_operand:VI_128 0 "register_operand" "=x")
16763 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16764 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16768 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16769 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16771 [(set_attr "type" "sseishft")
16772 (set_attr "length_immediate" "1")
16773 (set_attr "mode" "TI")])
16775 (define_expand "vrotr<mode>3"
16776 [(match_operand:VI_128 0 "register_operand")
16777 (match_operand:VI_128 1 "register_operand")
16778 (match_operand:VI_128 2 "register_operand")]
16781 rtx reg = gen_reg_rtx (<MODE>mode);
16782 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16783 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16787 (define_expand "vrotl<mode>3"
16788 [(match_operand:VI_128 0 "register_operand")
16789 (match_operand:VI_128 1 "register_operand")
16790 (match_operand:VI_128 2 "register_operand")]
16793 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16797 (define_insn "xop_vrotl<mode>3"
16798 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16799 (if_then_else:VI_128
16801 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16804 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16808 (neg:VI_128 (match_dup 2)))))]
16809 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16810 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16811 [(set_attr "type" "sseishft")
16812 (set_attr "prefix_data16" "0")
16813 (set_attr "prefix_extra" "2")
16814 (set_attr "mode" "TI")])
16816 ;; XOP packed shift instructions.
16817 (define_expand "vlshr<mode>3"
16818 [(set (match_operand:VI12_128 0 "register_operand")
16820 (match_operand:VI12_128 1 "register_operand")
16821 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16824 rtx neg = gen_reg_rtx (<MODE>mode);
16825 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16826 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16830 (define_expand "vlshr<mode>3"
16831 [(set (match_operand:VI48_128 0 "register_operand")
16833 (match_operand:VI48_128 1 "register_operand")
16834 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16835 "TARGET_AVX2 || TARGET_XOP"
16839 rtx neg = gen_reg_rtx (<MODE>mode);
16840 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16841 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16846 (define_expand "vlshr<mode>3"
16847 [(set (match_operand:VI48_512 0 "register_operand")
16849 (match_operand:VI48_512 1 "register_operand")
16850 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16853 (define_expand "vlshr<mode>3"
16854 [(set (match_operand:VI48_256 0 "register_operand")
16856 (match_operand:VI48_256 1 "register_operand")
16857 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16860 (define_expand "vashrv8hi3<mask_name>"
16861 [(set (match_operand:V8HI 0 "register_operand")
16863 (match_operand:V8HI 1 "register_operand")
16864 (match_operand:V8HI 2 "nonimmediate_operand")))]
16865 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16869 rtx neg = gen_reg_rtx (V8HImode);
16870 emit_insn (gen_negv8hi2 (neg, operands[2]));
16871 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16876 (define_expand "vashrv16qi3"
16877 [(set (match_operand:V16QI 0 "register_operand")
16879 (match_operand:V16QI 1 "register_operand")
16880 (match_operand:V16QI 2 "nonimmediate_operand")))]
16883 rtx neg = gen_reg_rtx (V16QImode);
16884 emit_insn (gen_negv16qi2 (neg, operands[2]));
16885 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16889 (define_expand "vashrv2di3<mask_name>"
16890 [(set (match_operand:V2DI 0 "register_operand")
16892 (match_operand:V2DI 1 "register_operand")
16893 (match_operand:V2DI 2 "nonimmediate_operand")))]
16894 "TARGET_XOP || TARGET_AVX512VL"
16898 rtx neg = gen_reg_rtx (V2DImode);
16899 emit_insn (gen_negv2di2 (neg, operands[2]));
16900 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16905 (define_expand "vashrv4si3"
16906 [(set (match_operand:V4SI 0 "register_operand")
16907 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16908 (match_operand:V4SI 2 "nonimmediate_operand")))]
16909 "TARGET_AVX2 || TARGET_XOP"
16913 rtx neg = gen_reg_rtx (V4SImode);
16914 emit_insn (gen_negv4si2 (neg, operands[2]));
16915 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16920 (define_expand "vashrv16si3"
16921 [(set (match_operand:V16SI 0 "register_operand")
16922 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16923 (match_operand:V16SI 2 "nonimmediate_operand")))]
16926 (define_expand "vashrv8si3"
16927 [(set (match_operand:V8SI 0 "register_operand")
16928 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16929 (match_operand:V8SI 2 "nonimmediate_operand")))]
16932 (define_expand "vashl<mode>3"
16933 [(set (match_operand:VI12_128 0 "register_operand")
16935 (match_operand:VI12_128 1 "register_operand")
16936 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16939 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16943 (define_expand "vashl<mode>3"
16944 [(set (match_operand:VI48_128 0 "register_operand")
16946 (match_operand:VI48_128 1 "register_operand")
16947 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16948 "TARGET_AVX2 || TARGET_XOP"
16952 operands[2] = force_reg (<MODE>mode, operands[2]);
16953 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16958 (define_expand "vashl<mode>3"
16959 [(set (match_operand:VI48_512 0 "register_operand")
16961 (match_operand:VI48_512 1 "register_operand")
16962 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16965 (define_expand "vashl<mode>3"
16966 [(set (match_operand:VI48_256 0 "register_operand")
16968 (match_operand:VI48_256 1 "register_operand")
16969 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16972 (define_insn "xop_sha<mode>3"
16973 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16974 (if_then_else:VI_128
16976 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16979 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16983 (neg:VI_128 (match_dup 2)))))]
16984 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16985 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16986 [(set_attr "type" "sseishft")
16987 (set_attr "prefix_data16" "0")
16988 (set_attr "prefix_extra" "2")
16989 (set_attr "mode" "TI")])
16991 (define_insn "xop_shl<mode>3"
16992 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16993 (if_then_else:VI_128
16995 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16998 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17002 (neg:VI_128 (match_dup 2)))))]
17003 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17004 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17005 [(set_attr "type" "sseishft")
17006 (set_attr "prefix_data16" "0")
17007 (set_attr "prefix_extra" "2")
17008 (set_attr "mode" "TI")])
17010 (define_expand "<shift_insn><mode>3"
17011 [(set (match_operand:VI1_AVX512 0 "register_operand")
17012 (any_shift:VI1_AVX512
17013 (match_operand:VI1_AVX512 1 "register_operand")
17014 (match_operand:SI 2 "nonmemory_operand")))]
17017 if (TARGET_XOP && <MODE>mode == V16QImode)
17019 bool negate = false;
17020 rtx (*gen) (rtx, rtx, rtx);
17024 if (<CODE> != ASHIFT)
17026 if (CONST_INT_P (operands[2]))
17027 operands[2] = GEN_INT (-INTVAL (operands[2]));
17031 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
17032 for (i = 0; i < 16; i++)
17033 XVECEXP (par, 0, i) = operands[2];
17035 tmp = gen_reg_rtx (V16QImode);
17036 emit_insn (gen_vec_initv16qiqi (tmp, par));
17039 emit_insn (gen_negv16qi2 (tmp, tmp));
17041 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
17042 emit_insn (gen (operands[0], operands[1], tmp));
17045 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
17049 (define_expand "ashrv2di3"
17050 [(set (match_operand:V2DI 0 "register_operand")
17052 (match_operand:V2DI 1 "register_operand")
17053 (match_operand:DI 2 "nonmemory_operand")))]
17054 "TARGET_XOP || TARGET_AVX512VL"
17056 if (!TARGET_AVX512VL)
17058 rtx reg = gen_reg_rtx (V2DImode);
17060 bool negate = false;
17063 if (CONST_INT_P (operands[2]))
17064 operands[2] = GEN_INT (-INTVAL (operands[2]));
17068 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
17069 for (i = 0; i < 2; i++)
17070 XVECEXP (par, 0, i) = operands[2];
17072 emit_insn (gen_vec_initv2didi (reg, par));
17075 emit_insn (gen_negv2di2 (reg, reg));
17077 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
17082 ;; XOP FRCZ support
17083 (define_insn "xop_frcz<mode>2"
17084 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
17086 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
17089 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
17090 [(set_attr "type" "ssecvt1")
17091 (set_attr "mode" "<MODE>")])
17093 (define_expand "xop_vmfrcz<mode>2"
17094 [(set (match_operand:VF_128 0 "register_operand")
17097 [(match_operand:VF_128 1 "nonimmediate_operand")]
17102 "operands[2] = CONST0_RTX (<MODE>mode);")
17104 (define_insn "*xop_vmfrcz<mode>2"
17105 [(set (match_operand:VF_128 0 "register_operand" "=x")
17108 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
17110 (match_operand:VF_128 2 "const0_operand")
17113 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
17114 [(set_attr "type" "ssecvt1")
17115 (set_attr "mode" "<MODE>")])
17117 (define_insn "xop_maskcmp<mode>3"
17118 [(set (match_operand:VI_128 0 "register_operand" "=x")
17119 (match_operator:VI_128 1 "ix86_comparison_int_operator"
17120 [(match_operand:VI_128 2 "register_operand" "x")
17121 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17123 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17124 [(set_attr "type" "sse4arg")
17125 (set_attr "prefix_data16" "0")
17126 (set_attr "prefix_rep" "0")
17127 (set_attr "prefix_extra" "2")
17128 (set_attr "length_immediate" "1")
17129 (set_attr "mode" "TI")])
17131 (define_insn "xop_maskcmp_uns<mode>3"
17132 [(set (match_operand:VI_128 0 "register_operand" "=x")
17133 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
17134 [(match_operand:VI_128 2 "register_operand" "x")
17135 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17137 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17138 [(set_attr "type" "ssecmp")
17139 (set_attr "prefix_data16" "0")
17140 (set_attr "prefix_rep" "0")
17141 (set_attr "prefix_extra" "2")
17142 (set_attr "length_immediate" "1")
17143 (set_attr "mode" "TI")])
17145 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
17146 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
17147 ;; the exact instruction generated for the intrinsic.
17148 (define_insn "xop_maskcmp_uns2<mode>3"
17149 [(set (match_operand:VI_128 0 "register_operand" "=x")
17151 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17152 [(match_operand:VI_128 2 "register_operand" "x")
17153 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
17154 UNSPEC_XOP_UNSIGNED_CMP))]
17156 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17157 [(set_attr "type" "ssecmp")
17158 (set_attr "prefix_data16" "0")
17159 (set_attr "prefix_extra" "2")
17160 (set_attr "length_immediate" "1")
17161 (set_attr "mode" "TI")])
17163 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
17164 ;; being added here to be complete.
17165 (define_insn "xop_pcom_tf<mode>3"
17166 [(set (match_operand:VI_128 0 "register_operand" "=x")
17168 [(match_operand:VI_128 1 "register_operand" "x")
17169 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
17170 (match_operand:SI 3 "const_int_operand" "n")]
17171 UNSPEC_XOP_TRUEFALSE))]
17174 return ((INTVAL (operands[3]) != 0)
17175 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17176 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
17178 [(set_attr "type" "ssecmp")
17179 (set_attr "prefix_data16" "0")
17180 (set_attr "prefix_extra" "2")
17181 (set_attr "length_immediate" "1")
17182 (set_attr "mode" "TI")])
17184 (define_insn "xop_vpermil2<mode>3"
17185 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
17187 [(match_operand:VF_128_256 1 "register_operand" "x,x")
17188 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
17189 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
17190 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
17193 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
17194 [(set_attr "type" "sse4arg")
17195 (set_attr "length_immediate" "1")
17196 (set_attr "mode" "<MODE>")])
17198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17200 (define_insn "aesenc"
17201 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17202 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17203 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17207 aesenc\t{%2, %0|%0, %2}
17208 vaesenc\t{%2, %1, %0|%0, %1, %2}"
17209 [(set_attr "isa" "noavx,avx")
17210 (set_attr "type" "sselog1")
17211 (set_attr "prefix_extra" "1")
17212 (set_attr "prefix" "orig,vex")
17213 (set_attr "btver2_decode" "double,double")
17214 (set_attr "mode" "TI")])
17216 (define_insn "aesenclast"
17217 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17218 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17219 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17220 UNSPEC_AESENCLAST))]
17223 aesenclast\t{%2, %0|%0, %2}
17224 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
17225 [(set_attr "isa" "noavx,avx")
17226 (set_attr "type" "sselog1")
17227 (set_attr "prefix_extra" "1")
17228 (set_attr "prefix" "orig,vex")
17229 (set_attr "btver2_decode" "double,double")
17230 (set_attr "mode" "TI")])
17232 (define_insn "aesdec"
17233 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17234 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17235 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17239 aesdec\t{%2, %0|%0, %2}
17240 vaesdec\t{%2, %1, %0|%0, %1, %2}"
17241 [(set_attr "isa" "noavx,avx")
17242 (set_attr "type" "sselog1")
17243 (set_attr "prefix_extra" "1")
17244 (set_attr "prefix" "orig,vex")
17245 (set_attr "btver2_decode" "double,double")
17246 (set_attr "mode" "TI")])
17248 (define_insn "aesdeclast"
17249 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17250 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17251 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17252 UNSPEC_AESDECLAST))]
17255 aesdeclast\t{%2, %0|%0, %2}
17256 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
17257 [(set_attr "isa" "noavx,avx")
17258 (set_attr "type" "sselog1")
17259 (set_attr "prefix_extra" "1")
17260 (set_attr "prefix" "orig,vex")
17261 (set_attr "btver2_decode" "double,double")
17262 (set_attr "mode" "TI")])
17264 (define_insn "aesimc"
17265 [(set (match_operand:V2DI 0 "register_operand" "=x")
17266 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
17269 "%vaesimc\t{%1, %0|%0, %1}"
17270 [(set_attr "type" "sselog1")
17271 (set_attr "prefix_extra" "1")
17272 (set_attr "prefix" "maybe_vex")
17273 (set_attr "mode" "TI")])
17275 (define_insn "aeskeygenassist"
17276 [(set (match_operand:V2DI 0 "register_operand" "=x")
17277 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
17278 (match_operand:SI 2 "const_0_to_255_operand" "n")]
17279 UNSPEC_AESKEYGENASSIST))]
17281 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17282 [(set_attr "type" "sselog1")
17283 (set_attr "prefix_extra" "1")
17284 (set_attr "length_immediate" "1")
17285 (set_attr "prefix" "maybe_vex")
17286 (set_attr "mode" "TI")])
17288 (define_insn "pclmulqdq"
17289 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17290 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17291 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
17292 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17296 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17297 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17298 [(set_attr "isa" "noavx,avx")
17299 (set_attr "type" "sselog1")
17300 (set_attr "prefix_extra" "1")
17301 (set_attr "length_immediate" "1")
17302 (set_attr "prefix" "orig,vex")
17303 (set_attr "mode" "TI")])
17305 (define_expand "avx_vzeroall"
17306 [(match_par_dup 0 [(const_int 0)])]
17309 int nregs = TARGET_64BIT ? 16 : 8;
17312 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17314 XVECEXP (operands[0], 0, 0)
17315 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17318 for (regno = 0; regno < nregs; regno++)
17319 XVECEXP (operands[0], 0, regno + 1)
17320 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
17321 CONST0_RTX (V8SImode));
17324 (define_insn "*avx_vzeroall"
17325 [(match_parallel 0 "vzeroall_operation"
17326 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17329 [(set_attr "type" "sse")
17330 (set_attr "modrm" "0")
17331 (set_attr "memory" "none")
17332 (set_attr "prefix" "vex")
17333 (set_attr "btver2_decode" "vector")
17334 (set_attr "mode" "OI")])
17336 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17337 ;; if the upper 128bits are unused.
17338 (define_insn "avx_vzeroupper"
17339 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17342 [(set_attr "type" "sse")
17343 (set_attr "modrm" "0")
17344 (set_attr "memory" "none")
17345 (set_attr "prefix" "vex")
17346 (set_attr "btver2_decode" "vector")
17347 (set_attr "mode" "OI")])
17349 (define_mode_attr pbroadcast_evex_isa
17350 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
17351 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
17352 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
17353 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
17355 (define_insn "avx2_pbroadcast<mode>"
17356 [(set (match_operand:VI 0 "register_operand" "=x,v")
17358 (vec_select:<ssescalarmode>
17359 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
17360 (parallel [(const_int 0)]))))]
17362 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17363 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
17364 (set_attr "type" "ssemov")
17365 (set_attr "prefix_extra" "1")
17366 (set_attr "prefix" "vex,evex")
17367 (set_attr "mode" "<sseinsnmode>")])
17369 (define_insn "avx2_pbroadcast<mode>_1"
17370 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
17371 (vec_duplicate:VI_256
17372 (vec_select:<ssescalarmode>
17373 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
17374 (parallel [(const_int 0)]))))]
17377 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17378 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17379 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17380 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17381 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
17382 (set_attr "type" "ssemov")
17383 (set_attr "prefix_extra" "1")
17384 (set_attr "prefix" "vex")
17385 (set_attr "mode" "<sseinsnmode>")])
17387 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17388 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17389 (unspec:VI48F_256_512
17390 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17391 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17393 "TARGET_AVX2 && <mask_mode512bit_condition>"
17394 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17395 [(set_attr "type" "sselog")
17396 (set_attr "prefix" "<mask_prefix2>")
17397 (set_attr "mode" "<sseinsnmode>")])
17399 (define_insn "<avx512>_permvar<mode><mask_name>"
17400 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17401 (unspec:VI1_AVX512VL
17402 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17403 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17405 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17406 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17407 [(set_attr "type" "sselog")
17408 (set_attr "prefix" "<mask_prefix2>")
17409 (set_attr "mode" "<sseinsnmode>")])
17411 (define_insn "<avx512>_permvar<mode><mask_name>"
17412 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17413 (unspec:VI2_AVX512VL
17414 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17415 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17417 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17418 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17419 [(set_attr "type" "sselog")
17420 (set_attr "prefix" "<mask_prefix2>")
17421 (set_attr "mode" "<sseinsnmode>")])
17423 (define_expand "avx2_perm<mode>"
17424 [(match_operand:VI8F_256 0 "register_operand")
17425 (match_operand:VI8F_256 1 "nonimmediate_operand")
17426 (match_operand:SI 2 "const_0_to_255_operand")]
17429 int mask = INTVAL (operands[2]);
17430 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
17431 GEN_INT ((mask >> 0) & 3),
17432 GEN_INT ((mask >> 2) & 3),
17433 GEN_INT ((mask >> 4) & 3),
17434 GEN_INT ((mask >> 6) & 3)));
17438 (define_expand "avx512vl_perm<mode>_mask"
17439 [(match_operand:VI8F_256 0 "register_operand")
17440 (match_operand:VI8F_256 1 "nonimmediate_operand")
17441 (match_operand:SI 2 "const_0_to_255_operand")
17442 (match_operand:VI8F_256 3 "vector_move_operand")
17443 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17446 int mask = INTVAL (operands[2]);
17447 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17448 GEN_INT ((mask >> 0) & 3),
17449 GEN_INT ((mask >> 2) & 3),
17450 GEN_INT ((mask >> 4) & 3),
17451 GEN_INT ((mask >> 6) & 3),
17452 operands[3], operands[4]));
17456 (define_insn "avx2_perm<mode>_1<mask_name>"
17457 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17458 (vec_select:VI8F_256
17459 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
17460 (parallel [(match_operand 2 "const_0_to_3_operand")
17461 (match_operand 3 "const_0_to_3_operand")
17462 (match_operand 4 "const_0_to_3_operand")
17463 (match_operand 5 "const_0_to_3_operand")])))]
17464 "TARGET_AVX2 && <mask_mode512bit_condition>"
17467 mask |= INTVAL (operands[2]) << 0;
17468 mask |= INTVAL (operands[3]) << 2;
17469 mask |= INTVAL (operands[4]) << 4;
17470 mask |= INTVAL (operands[5]) << 6;
17471 operands[2] = GEN_INT (mask);
17472 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17474 [(set_attr "type" "sselog")
17475 (set_attr "prefix" "<mask_prefix2>")
17476 (set_attr "mode" "<sseinsnmode>")])
17478 (define_expand "avx512f_perm<mode>"
17479 [(match_operand:V8FI 0 "register_operand")
17480 (match_operand:V8FI 1 "nonimmediate_operand")
17481 (match_operand:SI 2 "const_0_to_255_operand")]
17484 int mask = INTVAL (operands[2]);
17485 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
17486 GEN_INT ((mask >> 0) & 3),
17487 GEN_INT ((mask >> 2) & 3),
17488 GEN_INT ((mask >> 4) & 3),
17489 GEN_INT ((mask >> 6) & 3),
17490 GEN_INT (((mask >> 0) & 3) + 4),
17491 GEN_INT (((mask >> 2) & 3) + 4),
17492 GEN_INT (((mask >> 4) & 3) + 4),
17493 GEN_INT (((mask >> 6) & 3) + 4)));
17497 (define_expand "avx512f_perm<mode>_mask"
17498 [(match_operand:V8FI 0 "register_operand")
17499 (match_operand:V8FI 1 "nonimmediate_operand")
17500 (match_operand:SI 2 "const_0_to_255_operand")
17501 (match_operand:V8FI 3 "vector_move_operand")
17502 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17505 int mask = INTVAL (operands[2]);
17506 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
17507 GEN_INT ((mask >> 0) & 3),
17508 GEN_INT ((mask >> 2) & 3),
17509 GEN_INT ((mask >> 4) & 3),
17510 GEN_INT ((mask >> 6) & 3),
17511 GEN_INT (((mask >> 0) & 3) + 4),
17512 GEN_INT (((mask >> 2) & 3) + 4),
17513 GEN_INT (((mask >> 4) & 3) + 4),
17514 GEN_INT (((mask >> 6) & 3) + 4),
17515 operands[3], operands[4]));
17519 (define_insn "avx512f_perm<mode>_1<mask_name>"
17520 [(set (match_operand:V8FI 0 "register_operand" "=v")
17522 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
17523 (parallel [(match_operand 2 "const_0_to_3_operand")
17524 (match_operand 3 "const_0_to_3_operand")
17525 (match_operand 4 "const_0_to_3_operand")
17526 (match_operand 5 "const_0_to_3_operand")
17527 (match_operand 6 "const_4_to_7_operand")
17528 (match_operand 7 "const_4_to_7_operand")
17529 (match_operand 8 "const_4_to_7_operand")
17530 (match_operand 9 "const_4_to_7_operand")])))]
17531 "TARGET_AVX512F && <mask_mode512bit_condition>
17532 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
17533 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
17534 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
17535 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
17538 mask |= INTVAL (operands[2]) << 0;
17539 mask |= INTVAL (operands[3]) << 2;
17540 mask |= INTVAL (operands[4]) << 4;
17541 mask |= INTVAL (operands[5]) << 6;
17542 operands[2] = GEN_INT (mask);
17543 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
17545 [(set_attr "type" "sselog")
17546 (set_attr "prefix" "<mask_prefix2>")
17547 (set_attr "mode" "<sseinsnmode>")])
17549 (define_insn "avx2_permv2ti"
17550 [(set (match_operand:V4DI 0 "register_operand" "=x")
17552 [(match_operand:V4DI 1 "register_operand" "x")
17553 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17554 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17557 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17558 [(set_attr "type" "sselog")
17559 (set_attr "prefix" "vex")
17560 (set_attr "mode" "OI")])
17562 (define_insn "avx2_vec_dupv4df"
17563 [(set (match_operand:V4DF 0 "register_operand" "=v")
17564 (vec_duplicate:V4DF
17566 (match_operand:V2DF 1 "register_operand" "v")
17567 (parallel [(const_int 0)]))))]
17569 "vbroadcastsd\t{%1, %0|%0, %1}"
17570 [(set_attr "type" "sselog1")
17571 (set_attr "prefix" "maybe_evex")
17572 (set_attr "mode" "V4DF")])
17574 (define_insn "<avx512>_vec_dup<mode>_1"
17575 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17576 (vec_duplicate:VI_AVX512BW
17577 (vec_select:<ssescalarmode>
17578 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17579 (parallel [(const_int 0)]))))]
17582 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17583 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17584 [(set_attr "type" "ssemov")
17585 (set_attr "prefix" "evex")
17586 (set_attr "mode" "<sseinsnmode>")])
17588 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17589 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17590 (vec_duplicate:V48_AVX512VL
17591 (vec_select:<ssescalarmode>
17592 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17593 (parallel [(const_int 0)]))))]
17596 /* There is no DF broadcast (in AVX-512*) to 128b register.
17597 Mimic it with integer variant. */
17598 if (<MODE>mode == V2DFmode)
17599 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17601 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
17602 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
17604 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17606 [(set_attr "type" "ssemov")
17607 (set_attr "prefix" "evex")
17608 (set_attr "mode" "<sseinsnmode>")])
17610 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17611 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17612 (vec_duplicate:VI12_AVX512VL
17613 (vec_select:<ssescalarmode>
17614 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17615 (parallel [(const_int 0)]))))]
17617 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17618 [(set_attr "type" "ssemov")
17619 (set_attr "prefix" "evex")
17620 (set_attr "mode" "<sseinsnmode>")])
17622 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17623 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17624 (vec_duplicate:V16FI
17625 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17628 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17629 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17630 [(set_attr "type" "ssemov")
17631 (set_attr "prefix" "evex")
17632 (set_attr "mode" "<sseinsnmode>")])
17634 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17635 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17636 (vec_duplicate:V8FI
17637 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17640 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17641 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17642 [(set_attr "type" "ssemov")
17643 (set_attr "prefix" "evex")
17644 (set_attr "mode" "<sseinsnmode>")])
17646 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17647 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17648 (vec_duplicate:VI12_AVX512VL
17649 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17652 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17653 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17654 [(set_attr "type" "ssemov")
17655 (set_attr "prefix" "evex")
17656 (set_attr "mode" "<sseinsnmode>")])
17658 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17659 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17660 (vec_duplicate:V48_AVX512VL
17661 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17663 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17664 [(set_attr "type" "ssemov")
17665 (set_attr "prefix" "evex")
17666 (set_attr "mode" "<sseinsnmode>")
17667 (set (attr "enabled")
17668 (if_then_else (eq_attr "alternative" "1")
17669 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17670 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17673 (define_insn "vec_dupv4sf"
17674 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
17675 (vec_duplicate:V4SF
17676 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
17679 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17680 vbroadcastss\t{%1, %0|%0, %1}
17681 shufps\t{$0, %0, %0|%0, %0, 0}"
17682 [(set_attr "isa" "avx,avx,noavx")
17683 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17684 (set_attr "length_immediate" "1,0,1")
17685 (set_attr "prefix_extra" "0,1,*")
17686 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
17687 (set_attr "mode" "V4SF")])
17689 (define_insn "*vec_dupv4si"
17690 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
17691 (vec_duplicate:V4SI
17692 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
17695 %vpshufd\t{$0, %1, %0|%0, %1, 0}
17696 vbroadcastss\t{%1, %0|%0, %1}
17697 shufps\t{$0, %0, %0|%0, %0, 0}"
17698 [(set_attr "isa" "sse2,avx,noavx")
17699 (set_attr "type" "sselog1,ssemov,sselog1")
17700 (set_attr "length_immediate" "1,0,1")
17701 (set_attr "prefix_extra" "0,1,*")
17702 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
17703 (set_attr "mode" "TI,V4SF,V4SF")])
17705 (define_insn "*vec_dupv2di"
17706 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
17707 (vec_duplicate:V2DI
17708 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
17712 vpunpcklqdq\t{%d1, %0|%0, %d1}
17713 %vmovddup\t{%1, %0|%0, %1}
17715 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17716 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17717 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
17718 (set_attr "mode" "TI,TI,DF,V4SF")])
17720 (define_insn "avx2_vbroadcasti128_<mode>"
17721 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
17723 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
17727 vbroadcasti128\t{%1, %0|%0, %1}
17728 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17729 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
17730 [(set_attr "isa" "*,avx512dq,avx512vl")
17731 (set_attr "type" "ssemov")
17732 (set_attr "prefix_extra" "1")
17733 (set_attr "prefix" "vex,evex,evex")
17734 (set_attr "mode" "OI")])
17736 ;; Modes handled by AVX vec_dup patterns.
17737 (define_mode_iterator AVX_VEC_DUP_MODE
17738 [V8SI V8SF V4DI V4DF])
17739 ;; Modes handled by AVX2 vec_dup patterns.
17740 (define_mode_iterator AVX2_VEC_DUP_MODE
17741 [V32QI V16QI V16HI V8HI V8SI V4SI])
17743 (define_insn "*vec_dup<mode>"
17744 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17745 (vec_duplicate:AVX2_VEC_DUP_MODE
17746 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17749 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17750 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17752 [(set_attr "isa" "*,*,noavx512vl")
17753 (set_attr "type" "ssemov")
17754 (set_attr "prefix_extra" "1")
17755 (set_attr "prefix" "maybe_evex")
17756 (set_attr "mode" "<sseinsnmode>")])
17758 (define_insn "vec_dup<mode>"
17759 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17760 (vec_duplicate:AVX_VEC_DUP_MODE
17761 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17764 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17765 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
17766 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17767 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17769 [(set_attr "type" "ssemov")
17770 (set_attr "prefix_extra" "1")
17771 (set_attr "prefix" "maybe_evex")
17772 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17773 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17776 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17777 (vec_duplicate:AVX2_VEC_DUP_MODE
17778 (match_operand:<ssescalarmode> 1 "register_operand")))]
17780 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17781 available, because then we can broadcast from GPRs directly.
17782 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17783 for V*SI mode it requires just -mavx512vl. */
17784 && !(TARGET_AVX512VL
17785 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17786 && reload_completed && GENERAL_REG_P (operands[1])"
17789 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17790 CONST0_RTX (V4SImode),
17791 gen_lowpart (SImode, operands[1])));
17792 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17793 gen_lowpart (<ssexmmmode>mode,
17799 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17800 (vec_duplicate:AVX_VEC_DUP_MODE
17801 (match_operand:<ssescalarmode> 1 "register_operand")))]
17802 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17803 [(set (match_dup 2)
17804 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17806 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17807 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
17809 (define_insn "avx_vbroadcastf128_<mode>"
17810 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
17812 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
17816 vbroadcast<i128>\t{%1, %0|%0, %1}
17817 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17818 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
17819 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17820 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17821 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
17822 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
17823 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
17824 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
17825 (set_attr "prefix_extra" "1")
17826 (set_attr "length_immediate" "0,1,1,0,1,0,1")
17827 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
17828 (set_attr "mode" "<sseinsnmode>")])
17830 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
17831 (define_mode_iterator VI4F_BRCST32x2
17832 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17833 V16SF (V8SF "TARGET_AVX512VL")])
17835 (define_mode_attr 64x2mode
17836 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17838 (define_mode_attr 32x2mode
17839 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
17840 (V8SF "V2SF") (V4SI "V2SI")])
17842 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
17843 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
17844 (vec_duplicate:VI4F_BRCST32x2
17845 (vec_select:<32x2mode>
17846 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17847 (parallel [(const_int 0) (const_int 1)]))))]
17849 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17850 [(set_attr "type" "ssemov")
17851 (set_attr "prefix_extra" "1")
17852 (set_attr "prefix" "evex")
17853 (set_attr "mode" "<sseinsnmode>")])
17855 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
17856 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
17857 (vec_duplicate:VI4F_256
17858 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17861 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
17862 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17863 [(set_attr "type" "ssemov")
17864 (set_attr "prefix_extra" "1")
17865 (set_attr "prefix" "evex")
17866 (set_attr "mode" "<sseinsnmode>")])
17868 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17869 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17870 (vec_duplicate:V16FI
17871 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17874 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17875 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17876 [(set_attr "type" "ssemov")
17877 (set_attr "prefix_extra" "1")
17878 (set_attr "prefix" "evex")
17879 (set_attr "mode" "<sseinsnmode>")])
17881 ;; For broadcast[i|f]64x2
17882 (define_mode_iterator VI8F_BRCST64x2
17883 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17885 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17886 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17887 (vec_duplicate:VI8F_BRCST64x2
17888 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17891 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
17892 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17893 [(set_attr "type" "ssemov")
17894 (set_attr "prefix_extra" "1")
17895 (set_attr "prefix" "evex")
17896 (set_attr "mode" "<sseinsnmode>")])
17898 (define_insn "avx512cd_maskb_vec_dup<mode>"
17899 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17900 (vec_duplicate:VI8_AVX512VL
17902 (match_operand:QI 1 "register_operand" "Yk"))))]
17904 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17905 [(set_attr "type" "mskmov")
17906 (set_attr "prefix" "evex")
17907 (set_attr "mode" "XI")])
17909 (define_insn "avx512cd_maskw_vec_dup<mode>"
17910 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17911 (vec_duplicate:VI4_AVX512VL
17913 (match_operand:HI 1 "register_operand" "Yk"))))]
17915 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17916 [(set_attr "type" "mskmov")
17917 (set_attr "prefix" "evex")
17918 (set_attr "mode" "XI")])
17920 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17921 ;; If it so happens that the input is in memory, use vbroadcast.
17922 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17923 (define_insn "*avx_vperm_broadcast_v4sf"
17924 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
17926 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
17927 (match_parallel 2 "avx_vbroadcast_operand"
17928 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17931 int elt = INTVAL (operands[3]);
17932 switch (which_alternative)
17936 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17937 return "vbroadcastss\t{%1, %0|%0, %k1}";
17939 operands[2] = GEN_INT (elt * 0x55);
17940 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17942 gcc_unreachable ();
17945 [(set_attr "type" "ssemov,ssemov,sselog1")
17946 (set_attr "prefix_extra" "1")
17947 (set_attr "length_immediate" "0,0,1")
17948 (set_attr "prefix" "maybe_evex")
17949 (set_attr "mode" "SF,SF,V4SF")])
17951 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17952 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
17954 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
17955 (match_parallel 2 "avx_vbroadcast_operand"
17956 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17959 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17960 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17962 rtx op0 = operands[0], op1 = operands[1];
17963 int elt = INTVAL (operands[3]);
17969 if (TARGET_AVX2 && elt == 0)
17971 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17976 /* Shuffle element we care about into all elements of the 128-bit lane.
17977 The other lane gets shuffled too, but we don't care. */
17978 if (<MODE>mode == V4DFmode)
17979 mask = (elt & 1 ? 15 : 0);
17981 mask = (elt & 3) * 0x55;
17982 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17984 /* Shuffle the lane we care about into both lanes of the dest. */
17985 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17986 if (EXT_REX_SSE_REG_P (op0))
17988 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
17990 gcc_assert (<MODE>mode == V8SFmode);
17991 if ((mask & 1) == 0)
17992 emit_insn (gen_avx2_vec_dupv8sf (op0,
17993 gen_lowpart (V4SFmode, op0)));
17995 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
17996 GEN_INT (4), GEN_INT (5),
17997 GEN_INT (6), GEN_INT (7),
17998 GEN_INT (12), GEN_INT (13),
17999 GEN_INT (14), GEN_INT (15)));
18003 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
18007 operands[1] = adjust_address (op1, <ssescalarmode>mode,
18008 elt * GET_MODE_SIZE (<ssescalarmode>mode));
18011 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18012 [(set (match_operand:VF2 0 "register_operand")
18014 (match_operand:VF2 1 "nonimmediate_operand")
18015 (match_operand:SI 2 "const_0_to_255_operand")))]
18016 "TARGET_AVX && <mask_mode512bit_condition>"
18018 int mask = INTVAL (operands[2]);
18019 rtx perm[<ssescalarnum>];
18022 for (i = 0; i < <ssescalarnum>; i = i + 2)
18024 perm[i] = GEN_INT (((mask >> i) & 1) + i);
18025 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
18029 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18032 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18033 [(set (match_operand:VF1 0 "register_operand")
18035 (match_operand:VF1 1 "nonimmediate_operand")
18036 (match_operand:SI 2 "const_0_to_255_operand")))]
18037 "TARGET_AVX && <mask_mode512bit_condition>"
18039 int mask = INTVAL (operands[2]);
18040 rtx perm[<ssescalarnum>];
18043 for (i = 0; i < <ssescalarnum>; i = i + 4)
18045 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
18046 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
18047 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
18048 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
18052 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18055 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
18056 [(set (match_operand:VF 0 "register_operand" "=v")
18058 (match_operand:VF 1 "nonimmediate_operand" "vm")
18059 (match_parallel 2 ""
18060 [(match_operand 3 "const_int_operand")])))]
18061 "TARGET_AVX && <mask_mode512bit_condition>
18062 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
18064 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
18065 operands[2] = GEN_INT (mask);
18066 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
18068 [(set_attr "type" "sselog")
18069 (set_attr "prefix_extra" "1")
18070 (set_attr "length_immediate" "1")
18071 (set_attr "prefix" "<mask_prefix>")
18072 (set_attr "mode" "<sseinsnmode>")])
18074 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
18075 [(set (match_operand:VF 0 "register_operand" "=v")
18077 [(match_operand:VF 1 "register_operand" "v")
18078 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
18080 "TARGET_AVX && <mask_mode512bit_condition>"
18081 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18082 [(set_attr "type" "sselog")
18083 (set_attr "prefix_extra" "1")
18084 (set_attr "btver2_decode" "vector")
18085 (set_attr "prefix" "<mask_prefix>")
18086 (set_attr "mode" "<sseinsnmode>")])
18088 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
18089 [(match_operand:VI48F 0 "register_operand")
18090 (match_operand:VI48F 1 "register_operand")
18091 (match_operand:<sseintvecmode> 2 "register_operand")
18092 (match_operand:VI48F 3 "nonimmediate_operand")
18093 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18096 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
18097 operands[0], operands[1], operands[2], operands[3],
18098 CONST0_RTX (<MODE>mode), operands[4]));
18102 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
18103 [(match_operand:VI1_AVX512VL 0 "register_operand")
18104 (match_operand:VI1_AVX512VL 1 "register_operand")
18105 (match_operand:<sseintvecmode> 2 "register_operand")
18106 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
18107 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18108 "TARGET_AVX512VBMI"
18110 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
18111 operands[0], operands[1], operands[2], operands[3],
18112 CONST0_RTX (<MODE>mode), operands[4]));
18116 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
18117 [(match_operand:VI2_AVX512VL 0 "register_operand")
18118 (match_operand:VI2_AVX512VL 1 "register_operand")
18119 (match_operand:<sseintvecmode> 2 "register_operand")
18120 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
18121 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18124 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
18125 operands[0], operands[1], operands[2], operands[3],
18126 CONST0_RTX (<MODE>mode), operands[4]));
18130 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
18131 [(set (match_operand:VI48F 0 "register_operand" "=v")
18133 [(match_operand:VI48F 1 "register_operand" "v")
18134 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18135 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18138 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18139 [(set_attr "type" "sselog")
18140 (set_attr "prefix" "evex")
18141 (set_attr "mode" "<sseinsnmode>")])
18143 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
18144 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18145 (unspec:VI1_AVX512VL
18146 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18147 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18148 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18150 "TARGET_AVX512VBMI"
18151 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18152 [(set_attr "type" "sselog")
18153 (set_attr "prefix" "evex")
18154 (set_attr "mode" "<sseinsnmode>")])
18156 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
18157 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18158 (unspec:VI2_AVX512VL
18159 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
18160 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18161 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18164 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18165 [(set_attr "type" "sselog")
18166 (set_attr "prefix" "evex")
18167 (set_attr "mode" "<sseinsnmode>")])
18169 (define_insn "<avx512>_vpermi2var<mode>3_mask"
18170 [(set (match_operand:VI48F 0 "register_operand" "=v")
18173 [(match_operand:VI48F 1 "register_operand" "v")
18174 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18175 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18176 UNSPEC_VPERMI2_MASK)
18178 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18180 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18181 [(set_attr "type" "sselog")
18182 (set_attr "prefix" "evex")
18183 (set_attr "mode" "<sseinsnmode>")])
18185 (define_insn "<avx512>_vpermi2var<mode>3_mask"
18186 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18187 (vec_merge:VI1_AVX512VL
18188 (unspec:VI1_AVX512VL
18189 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18190 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18191 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18192 UNSPEC_VPERMI2_MASK)
18194 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18195 "TARGET_AVX512VBMI"
18196 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18197 [(set_attr "type" "sselog")
18198 (set_attr "prefix" "evex")
18199 (set_attr "mode" "<sseinsnmode>")])
18201 (define_insn "<avx512>_vpermi2var<mode>3_mask"
18202 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18203 (vec_merge:VI2_AVX512VL
18204 (unspec:VI2_AVX512VL
18205 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
18206 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18207 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18208 UNSPEC_VPERMI2_MASK)
18210 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18212 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18213 [(set_attr "type" "sselog")
18214 (set_attr "prefix" "evex")
18215 (set_attr "mode" "<sseinsnmode>")])
18217 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18218 [(match_operand:VI48F 0 "register_operand")
18219 (match_operand:<sseintvecmode> 1 "register_operand")
18220 (match_operand:VI48F 2 "register_operand")
18221 (match_operand:VI48F 3 "nonimmediate_operand")
18222 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18225 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18226 operands[0], operands[1], operands[2], operands[3],
18227 CONST0_RTX (<MODE>mode), operands[4]));
18231 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18232 [(match_operand:VI1_AVX512VL 0 "register_operand")
18233 (match_operand:<sseintvecmode> 1 "register_operand")
18234 (match_operand:VI1_AVX512VL 2 "register_operand")
18235 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
18236 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18237 "TARGET_AVX512VBMI"
18239 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18240 operands[0], operands[1], operands[2], operands[3],
18241 CONST0_RTX (<MODE>mode), operands[4]));
18245 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18246 [(match_operand:VI2_AVX512VL 0 "register_operand")
18247 (match_operand:<sseintvecmode> 1 "register_operand")
18248 (match_operand:VI2_AVX512VL 2 "register_operand")
18249 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
18250 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18253 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18254 operands[0], operands[1], operands[2], operands[3],
18255 CONST0_RTX (<MODE>mode), operands[4]));
18259 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18260 [(set (match_operand:VI48F 0 "register_operand" "=v")
18262 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18263 (match_operand:VI48F 2 "register_operand" "0")
18264 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18267 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18268 [(set_attr "type" "sselog")
18269 (set_attr "prefix" "evex")
18270 (set_attr "mode" "<sseinsnmode>")])
18272 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18273 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18274 (unspec:VI1_AVX512VL
18275 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18276 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
18277 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18279 "TARGET_AVX512VBMI"
18280 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18281 [(set_attr "type" "sselog")
18282 (set_attr "prefix" "evex")
18283 (set_attr "mode" "<sseinsnmode>")])
18285 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18286 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18287 (unspec:VI2_AVX512VL
18288 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18289 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
18290 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18293 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18294 [(set_attr "type" "sselog")
18295 (set_attr "prefix" "evex")
18296 (set_attr "mode" "<sseinsnmode>")])
18298 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18299 [(set (match_operand:VI48F 0 "register_operand" "=v")
18302 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18303 (match_operand:VI48F 2 "register_operand" "0")
18304 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18307 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18309 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18310 [(set_attr "type" "sselog")
18311 (set_attr "prefix" "evex")
18312 (set_attr "mode" "<sseinsnmode>")])
18314 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18315 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18316 (vec_merge:VI1_AVX512VL
18317 (unspec:VI1_AVX512VL
18318 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18319 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
18320 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18323 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18324 "TARGET_AVX512VBMI"
18325 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18326 [(set_attr "type" "sselog")
18327 (set_attr "prefix" "evex")
18328 (set_attr "mode" "<sseinsnmode>")])
18330 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18331 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18332 (vec_merge:VI2_AVX512VL
18333 (unspec:VI2_AVX512VL
18334 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18335 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
18336 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18339 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18341 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18342 [(set_attr "type" "sselog")
18343 (set_attr "prefix" "evex")
18344 (set_attr "mode" "<sseinsnmode>")])
18346 (define_expand "avx_vperm2f128<mode>3"
18347 [(set (match_operand:AVX256MODE2P 0 "register_operand")
18348 (unspec:AVX256MODE2P
18349 [(match_operand:AVX256MODE2P 1 "register_operand")
18350 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
18351 (match_operand:SI 3 "const_0_to_255_operand")]
18352 UNSPEC_VPERMIL2F128))]
18355 int mask = INTVAL (operands[3]);
18356 if ((mask & 0x88) == 0)
18358 rtx perm[<ssescalarnum>], t1, t2;
18359 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
18361 base = (mask & 3) * nelt2;
18362 for (i = 0; i < nelt2; ++i)
18363 perm[i] = GEN_INT (base + i);
18365 base = ((mask >> 4) & 3) * nelt2;
18366 for (i = 0; i < nelt2; ++i)
18367 perm[i + nelt2] = GEN_INT (base + i);
18369 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
18370 operands[1], operands[2]);
18371 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
18372 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
18373 t2 = gen_rtx_SET (operands[0], t2);
18379 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
18380 ;; means that in order to represent this properly in rtl we'd have to
18381 ;; nest *another* vec_concat with a zero operand and do the select from
18382 ;; a 4x wide vector. That doesn't seem very nice.
18383 (define_insn "*avx_vperm2f128<mode>_full"
18384 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18385 (unspec:AVX256MODE2P
18386 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
18387 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
18388 (match_operand:SI 3 "const_0_to_255_operand" "n")]
18389 UNSPEC_VPERMIL2F128))]
18391 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18392 [(set_attr "type" "sselog")
18393 (set_attr "prefix_extra" "1")
18394 (set_attr "length_immediate" "1")
18395 (set_attr "prefix" "vex")
18396 (set_attr "mode" "<sseinsnmode>")])
18398 (define_insn "*avx_vperm2f128<mode>_nozero"
18399 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18400 (vec_select:AVX256MODE2P
18401 (vec_concat:<ssedoublevecmode>
18402 (match_operand:AVX256MODE2P 1 "register_operand" "x")
18403 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18404 (match_parallel 3 ""
18405 [(match_operand 4 "const_int_operand")])))]
18407 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18409 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18411 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18413 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18414 operands[3] = GEN_INT (mask);
18415 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18417 [(set_attr "type" "sselog")
18418 (set_attr "prefix_extra" "1")
18419 (set_attr "length_immediate" "1")
18420 (set_attr "prefix" "vex")
18421 (set_attr "mode" "<sseinsnmode>")])
18423 (define_insn "*ssse3_palignr<mode>_perm"
18424 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
18426 (match_operand:V_128 1 "register_operand" "0,x,v")
18427 (match_parallel 2 "palignr_operand"
18428 [(match_operand 3 "const_int_operand" "n,n,n")])))]
18431 operands[2] = (GEN_INT (INTVAL (operands[3])
18432 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
18434 switch (which_alternative)
18437 return "palignr\t{%2, %1, %0|%0, %1, %2}";
18440 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18442 gcc_unreachable ();
18445 [(set_attr "isa" "noavx,avx,avx512bw")
18446 (set_attr "type" "sseishft")
18447 (set_attr "atom_unit" "sishuf")
18448 (set_attr "prefix_data16" "1,*,*")
18449 (set_attr "prefix_extra" "1")
18450 (set_attr "length_immediate" "1")
18451 (set_attr "prefix" "orig,vex,evex")])
18453 (define_expand "avx512vl_vinsert<mode>"
18454 [(match_operand:VI48F_256 0 "register_operand")
18455 (match_operand:VI48F_256 1 "register_operand")
18456 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18457 (match_operand:SI 3 "const_0_to_1_operand")
18458 (match_operand:VI48F_256 4 "register_operand")
18459 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18462 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18464 switch (INTVAL (operands[3]))
18467 insn = gen_vec_set_lo_<mode>_mask;
18470 insn = gen_vec_set_hi_<mode>_mask;
18473 gcc_unreachable ();
18476 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18481 (define_expand "avx_vinsertf128<mode>"
18482 [(match_operand:V_256 0 "register_operand")
18483 (match_operand:V_256 1 "register_operand")
18484 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18485 (match_operand:SI 3 "const_0_to_1_operand")]
18488 rtx (*insn)(rtx, rtx, rtx);
18490 switch (INTVAL (operands[3]))
18493 insn = gen_vec_set_lo_<mode>;
18496 insn = gen_vec_set_hi_<mode>;
18499 gcc_unreachable ();
18502 emit_insn (insn (operands[0], operands[1], operands[2]));
18506 (define_insn "vec_set_lo_<mode><mask_name>"
18507 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18508 (vec_concat:VI8F_256
18509 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18510 (vec_select:<ssehalfvecmode>
18511 (match_operand:VI8F_256 1 "register_operand" "v")
18512 (parallel [(const_int 2) (const_int 3)]))))]
18513 "TARGET_AVX && <mask_avx512dq_condition>"
18515 if (TARGET_AVX512DQ)
18516 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18517 else if (TARGET_AVX512VL)
18518 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18520 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18522 [(set_attr "type" "sselog")
18523 (set_attr "prefix_extra" "1")
18524 (set_attr "length_immediate" "1")
18525 (set_attr "prefix" "vex")
18526 (set_attr "mode" "<sseinsnmode>")])
18528 (define_insn "vec_set_hi_<mode><mask_name>"
18529 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18530 (vec_concat:VI8F_256
18531 (vec_select:<ssehalfvecmode>
18532 (match_operand:VI8F_256 1 "register_operand" "v")
18533 (parallel [(const_int 0) (const_int 1)]))
18534 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18535 "TARGET_AVX && <mask_avx512dq_condition>"
18537 if (TARGET_AVX512DQ)
18538 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18539 else if (TARGET_AVX512VL)
18540 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18542 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18544 [(set_attr "type" "sselog")
18545 (set_attr "prefix_extra" "1")
18546 (set_attr "length_immediate" "1")
18547 (set_attr "prefix" "vex")
18548 (set_attr "mode" "<sseinsnmode>")])
18550 (define_insn "vec_set_lo_<mode><mask_name>"
18551 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18552 (vec_concat:VI4F_256
18553 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18554 (vec_select:<ssehalfvecmode>
18555 (match_operand:VI4F_256 1 "register_operand" "v")
18556 (parallel [(const_int 4) (const_int 5)
18557 (const_int 6) (const_int 7)]))))]
18560 if (TARGET_AVX512VL)
18561 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18563 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18565 [(set_attr "type" "sselog")
18566 (set_attr "prefix_extra" "1")
18567 (set_attr "length_immediate" "1")
18568 (set_attr "prefix" "vex")
18569 (set_attr "mode" "<sseinsnmode>")])
18571 (define_insn "vec_set_hi_<mode><mask_name>"
18572 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18573 (vec_concat:VI4F_256
18574 (vec_select:<ssehalfvecmode>
18575 (match_operand:VI4F_256 1 "register_operand" "v")
18576 (parallel [(const_int 0) (const_int 1)
18577 (const_int 2) (const_int 3)]))
18578 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18581 if (TARGET_AVX512VL)
18582 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18584 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18586 [(set_attr "type" "sselog")
18587 (set_attr "prefix_extra" "1")
18588 (set_attr "length_immediate" "1")
18589 (set_attr "prefix" "vex")
18590 (set_attr "mode" "<sseinsnmode>")])
18592 (define_insn "vec_set_lo_v16hi"
18593 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18595 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
18597 (match_operand:V16HI 1 "register_operand" "x,v")
18598 (parallel [(const_int 8) (const_int 9)
18599 (const_int 10) (const_int 11)
18600 (const_int 12) (const_int 13)
18601 (const_int 14) (const_int 15)]))))]
18604 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18605 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18606 [(set_attr "type" "sselog")
18607 (set_attr "prefix_extra" "1")
18608 (set_attr "length_immediate" "1")
18609 (set_attr "prefix" "vex,evex")
18610 (set_attr "mode" "OI")])
18612 (define_insn "vec_set_hi_v16hi"
18613 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18616 (match_operand:V16HI 1 "register_operand" "x,v")
18617 (parallel [(const_int 0) (const_int 1)
18618 (const_int 2) (const_int 3)
18619 (const_int 4) (const_int 5)
18620 (const_int 6) (const_int 7)]))
18621 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
18624 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18625 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18626 [(set_attr "type" "sselog")
18627 (set_attr "prefix_extra" "1")
18628 (set_attr "length_immediate" "1")
18629 (set_attr "prefix" "vex,evex")
18630 (set_attr "mode" "OI")])
18632 (define_insn "vec_set_lo_v32qi"
18633 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18635 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
18637 (match_operand:V32QI 1 "register_operand" "x,v")
18638 (parallel [(const_int 16) (const_int 17)
18639 (const_int 18) (const_int 19)
18640 (const_int 20) (const_int 21)
18641 (const_int 22) (const_int 23)
18642 (const_int 24) (const_int 25)
18643 (const_int 26) (const_int 27)
18644 (const_int 28) (const_int 29)
18645 (const_int 30) (const_int 31)]))))]
18648 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18649 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18650 [(set_attr "type" "sselog")
18651 (set_attr "prefix_extra" "1")
18652 (set_attr "length_immediate" "1")
18653 (set_attr "prefix" "vex,evex")
18654 (set_attr "mode" "OI")])
18656 (define_insn "vec_set_hi_v32qi"
18657 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18660 (match_operand:V32QI 1 "register_operand" "x,v")
18661 (parallel [(const_int 0) (const_int 1)
18662 (const_int 2) (const_int 3)
18663 (const_int 4) (const_int 5)
18664 (const_int 6) (const_int 7)
18665 (const_int 8) (const_int 9)
18666 (const_int 10) (const_int 11)
18667 (const_int 12) (const_int 13)
18668 (const_int 14) (const_int 15)]))
18669 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
18672 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18673 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18674 [(set_attr "type" "sselog")
18675 (set_attr "prefix_extra" "1")
18676 (set_attr "length_immediate" "1")
18677 (set_attr "prefix" "vex,evex")
18678 (set_attr "mode" "OI")])
18680 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18681 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18683 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18684 (match_operand:V48_AVX2 1 "memory_operand" "m")]
18687 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18688 [(set_attr "type" "sselog1")
18689 (set_attr "prefix_extra" "1")
18690 (set_attr "prefix" "vex")
18691 (set_attr "btver2_decode" "vector")
18692 (set_attr "mode" "<sseinsnmode>")])
18694 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18695 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18697 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18698 (match_operand:V48_AVX2 2 "register_operand" "x")
18702 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18703 [(set_attr "type" "sselog1")
18704 (set_attr "prefix_extra" "1")
18705 (set_attr "prefix" "vex")
18706 (set_attr "btver2_decode" "vector")
18707 (set_attr "mode" "<sseinsnmode>")])
18709 (define_expand "maskload<mode><sseintvecmodelower>"
18710 [(set (match_operand:V48_AVX2 0 "register_operand")
18712 [(match_operand:<sseintvecmode> 2 "register_operand")
18713 (match_operand:V48_AVX2 1 "memory_operand")]
18717 (define_expand "maskload<mode><avx512fmaskmodelower>"
18718 [(set (match_operand:V48_AVX512VL 0 "register_operand")
18719 (vec_merge:V48_AVX512VL
18720 (match_operand:V48_AVX512VL 1 "memory_operand")
18722 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18725 (define_expand "maskload<mode><avx512fmaskmodelower>"
18726 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18727 (vec_merge:VI12_AVX512VL
18728 (match_operand:VI12_AVX512VL 1 "memory_operand")
18730 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18733 (define_expand "maskstore<mode><sseintvecmodelower>"
18734 [(set (match_operand:V48_AVX2 0 "memory_operand")
18736 [(match_operand:<sseintvecmode> 2 "register_operand")
18737 (match_operand:V48_AVX2 1 "register_operand")
18742 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18743 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18744 (vec_merge:V48_AVX512VL
18745 (match_operand:V48_AVX512VL 1 "register_operand")
18747 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18750 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18751 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18752 (vec_merge:VI12_AVX512VL
18753 (match_operand:VI12_AVX512VL 1 "register_operand")
18755 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18758 (define_expand "cbranch<mode>4"
18759 [(set (reg:CC FLAGS_REG)
18760 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
18761 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
18762 (set (pc) (if_then_else
18763 (match_operator 0 "bt_comparison_operator"
18764 [(reg:CC FLAGS_REG) (const_int 0)])
18765 (label_ref (match_operand 3))
18769 ix86_expand_branch (GET_CODE (operands[0]),
18770 operands[1], operands[2], operands[3]);
18775 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18776 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18777 (unspec:AVX256MODE2P
18778 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18780 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18782 "&& reload_completed"
18783 [(set (match_dup 0) (match_dup 1))]
18785 if (REG_P (operands[0]))
18786 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18788 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18789 <ssehalfvecmode>mode);
18792 ;; Modes handled by vec_init expanders.
18793 (define_mode_iterator VEC_INIT_MODE
18794 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18795 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18796 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18797 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
18798 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18799 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
18800 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
18802 ;; Likewise, but for initialization from half sized vectors.
18803 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
18804 (define_mode_iterator VEC_INIT_HALF_MODE
18805 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18806 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18807 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18808 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
18809 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18810 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
18811 (V4TI "TARGET_AVX512F")])
18813 (define_expand "vec_init<mode><ssescalarmodelower>"
18814 [(match_operand:VEC_INIT_MODE 0 "register_operand")
18818 ix86_expand_vector_init (false, operands[0], operands[1]);
18822 (define_expand "vec_init<mode><ssehalfvecmodelower>"
18823 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
18827 ix86_expand_vector_init (false, operands[0], operands[1]);
18831 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18832 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18833 (ashiftrt:VI48_AVX512F_AVX512VL
18834 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18835 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18836 "TARGET_AVX2 && <mask_mode512bit_condition>"
18837 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18838 [(set_attr "type" "sseishft")
18839 (set_attr "prefix" "maybe_evex")
18840 (set_attr "mode" "<sseinsnmode>")])
18842 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18843 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18844 (ashiftrt:VI2_AVX512VL
18845 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18846 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18848 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18849 [(set_attr "type" "sseishft")
18850 (set_attr "prefix" "maybe_evex")
18851 (set_attr "mode" "<sseinsnmode>")])
18853 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18854 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18855 (any_lshift:VI48_AVX512F
18856 (match_operand:VI48_AVX512F 1 "register_operand" "v")
18857 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18858 "TARGET_AVX2 && <mask_mode512bit_condition>"
18859 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18860 [(set_attr "type" "sseishft")
18861 (set_attr "prefix" "maybe_evex")
18862 (set_attr "mode" "<sseinsnmode>")])
18864 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18865 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18866 (any_lshift:VI2_AVX512VL
18867 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18868 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18870 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18871 [(set_attr "type" "sseishft")
18872 (set_attr "prefix" "maybe_evex")
18873 (set_attr "mode" "<sseinsnmode>")])
18875 (define_insn "avx_vec_concat<mode>"
18876 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
18877 (vec_concat:V_256_512
18878 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
18879 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
18882 switch (which_alternative)
18885 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18887 if (<MODE_SIZE> == 64)
18889 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
18890 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18892 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18896 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18897 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18899 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18903 switch (get_attr_mode (insn))
18906 return "vmovaps\t{%1, %t0|%t0, %1}";
18908 return "vmovapd\t{%1, %t0|%t0, %1}";
18910 return "vmovaps\t{%1, %x0|%x0, %1}";
18912 return "vmovapd\t{%1, %x0|%x0, %1}";
18914 if (which_alternative == 2)
18915 return "vmovdqa\t{%1, %t0|%t0, %1}";
18916 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18917 return "vmovdqa64\t{%1, %t0|%t0, %1}";
18919 return "vmovdqa32\t{%1, %t0|%t0, %1}";
18921 if (which_alternative == 2)
18922 return "vmovdqa\t{%1, %x0|%x0, %1}";
18923 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18924 return "vmovdqa64\t{%1, %x0|%x0, %1}";
18926 return "vmovdqa32\t{%1, %x0|%x0, %1}";
18928 gcc_unreachable ();
18931 gcc_unreachable ();
18934 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
18935 (set_attr "prefix_extra" "1,1,*,*")
18936 (set_attr "length_immediate" "1,1,*,*")
18937 (set_attr "prefix" "maybe_evex")
18938 (set_attr "mode" "<sseinsnmode>")])
18940 (define_insn "vcvtph2ps<mask_name>"
18941 [(set (match_operand:V4SF 0 "register_operand" "=v")
18943 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18945 (parallel [(const_int 0) (const_int 1)
18946 (const_int 2) (const_int 3)])))]
18947 "TARGET_F16C || TARGET_AVX512VL"
18948 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18949 [(set_attr "type" "ssecvt")
18950 (set_attr "prefix" "maybe_evex")
18951 (set_attr "mode" "V4SF")])
18953 (define_insn "*vcvtph2ps_load<mask_name>"
18954 [(set (match_operand:V4SF 0 "register_operand" "=v")
18955 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18956 UNSPEC_VCVTPH2PS))]
18957 "TARGET_F16C || TARGET_AVX512VL"
18958 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18959 [(set_attr "type" "ssecvt")
18960 (set_attr "prefix" "vex")
18961 (set_attr "mode" "V8SF")])
18963 (define_insn "vcvtph2ps256<mask_name>"
18964 [(set (match_operand:V8SF 0 "register_operand" "=v")
18965 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18966 UNSPEC_VCVTPH2PS))]
18967 "TARGET_F16C || TARGET_AVX512VL"
18968 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18969 [(set_attr "type" "ssecvt")
18970 (set_attr "prefix" "vex")
18971 (set_attr "btver2_decode" "double")
18972 (set_attr "mode" "V8SF")])
18974 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18975 [(set (match_operand:V16SF 0 "register_operand" "=v")
18977 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18978 UNSPEC_VCVTPH2PS))]
18980 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18981 [(set_attr "type" "ssecvt")
18982 (set_attr "prefix" "evex")
18983 (set_attr "mode" "V16SF")])
18985 (define_expand "vcvtps2ph_mask"
18986 [(set (match_operand:V8HI 0 "register_operand")
18989 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
18990 (match_operand:SI 2 "const_0_to_255_operand")]
18993 (match_operand:V8HI 3 "vector_move_operand")
18994 (match_operand:QI 4 "register_operand")))]
18996 "operands[5] = CONST0_RTX (V4HImode);")
18998 (define_expand "vcvtps2ph"
18999 [(set (match_operand:V8HI 0 "register_operand")
19001 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19002 (match_operand:SI 2 "const_0_to_255_operand")]
19006 "operands[3] = CONST0_RTX (V4HImode);")
19008 (define_insn "*vcvtps2ph<mask_name>"
19009 [(set (match_operand:V8HI 0 "register_operand" "=v")
19011 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19012 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19014 (match_operand:V4HI 3 "const0_operand")))]
19015 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
19016 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
19017 [(set_attr "type" "ssecvt")
19018 (set_attr "prefix" "maybe_evex")
19019 (set_attr "mode" "V4SF")])
19021 (define_insn "*vcvtps2ph_store<mask_name>"
19022 [(set (match_operand:V4HI 0 "memory_operand" "=m")
19023 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19024 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19025 UNSPEC_VCVTPS2PH))]
19026 "TARGET_F16C || TARGET_AVX512VL"
19027 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19028 [(set_attr "type" "ssecvt")
19029 (set_attr "prefix" "maybe_evex")
19030 (set_attr "mode" "V4SF")])
19032 (define_insn "vcvtps2ph256<mask_name>"
19033 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
19034 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
19035 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19036 UNSPEC_VCVTPS2PH))]
19037 "TARGET_F16C || TARGET_AVX512VL"
19038 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19039 [(set_attr "type" "ssecvt")
19040 (set_attr "prefix" "maybe_evex")
19041 (set_attr "btver2_decode" "vector")
19042 (set_attr "mode" "V8SF")])
19044 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
19045 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
19047 [(match_operand:V16SF 1 "register_operand" "v")
19048 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19049 UNSPEC_VCVTPS2PH))]
19051 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19052 [(set_attr "type" "ssecvt")
19053 (set_attr "prefix" "evex")
19054 (set_attr "mode" "V16SF")])
19056 ;; For gather* insn patterns
19057 (define_mode_iterator VEC_GATHER_MODE
19058 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
19059 (define_mode_attr VEC_GATHER_IDXSI
19060 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
19061 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
19062 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
19063 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
19065 (define_mode_attr VEC_GATHER_IDXDI
19066 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19067 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
19068 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
19069 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
19071 (define_mode_attr VEC_GATHER_SRCDI
19072 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19073 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
19074 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
19075 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
19077 (define_expand "avx2_gathersi<mode>"
19078 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19079 (unspec:VEC_GATHER_MODE
19080 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
19081 (mem:<ssescalarmode>
19083 [(match_operand 2 "vsib_address_operand")
19084 (match_operand:<VEC_GATHER_IDXSI>
19085 3 "register_operand")
19086 (match_operand:SI 5 "const1248_operand ")]))
19087 (mem:BLK (scratch))
19088 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
19090 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19094 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19095 operands[5]), UNSPEC_VSIBADDR);
19098 (define_insn "*avx2_gathersi<mode>"
19099 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19100 (unspec:VEC_GATHER_MODE
19101 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
19102 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19104 [(match_operand:P 3 "vsib_address_operand" "Tv")
19105 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
19106 (match_operand:SI 6 "const1248_operand" "n")]
19108 (mem:BLK (scratch))
19109 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
19111 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19113 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
19114 [(set_attr "type" "ssemov")
19115 (set_attr "prefix" "vex")
19116 (set_attr "mode" "<sseinsnmode>")])
19118 (define_insn "*avx2_gathersi<mode>_2"
19119 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19120 (unspec:VEC_GATHER_MODE
19122 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19124 [(match_operand:P 2 "vsib_address_operand" "Tv")
19125 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
19126 (match_operand:SI 5 "const1248_operand" "n")]
19128 (mem:BLK (scratch))
19129 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
19131 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19133 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
19134 [(set_attr "type" "ssemov")
19135 (set_attr "prefix" "vex")
19136 (set_attr "mode" "<sseinsnmode>")])
19138 (define_expand "avx2_gatherdi<mode>"
19139 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19140 (unspec:VEC_GATHER_MODE
19141 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19142 (mem:<ssescalarmode>
19144 [(match_operand 2 "vsib_address_operand")
19145 (match_operand:<VEC_GATHER_IDXDI>
19146 3 "register_operand")
19147 (match_operand:SI 5 "const1248_operand ")]))
19148 (mem:BLK (scratch))
19149 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
19151 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19155 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19156 operands[5]), UNSPEC_VSIBADDR);
19159 (define_insn "*avx2_gatherdi<mode>"
19160 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19161 (unspec:VEC_GATHER_MODE
19162 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19163 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19165 [(match_operand:P 3 "vsib_address_operand" "Tv")
19166 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19167 (match_operand:SI 6 "const1248_operand" "n")]
19169 (mem:BLK (scratch))
19170 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19172 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19174 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
19175 [(set_attr "type" "ssemov")
19176 (set_attr "prefix" "vex")
19177 (set_attr "mode" "<sseinsnmode>")])
19179 (define_insn "*avx2_gatherdi<mode>_2"
19180 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19181 (unspec:VEC_GATHER_MODE
19183 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19185 [(match_operand:P 2 "vsib_address_operand" "Tv")
19186 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19187 (match_operand:SI 5 "const1248_operand" "n")]
19189 (mem:BLK (scratch))
19190 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19192 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19195 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19196 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
19197 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
19199 [(set_attr "type" "ssemov")
19200 (set_attr "prefix" "vex")
19201 (set_attr "mode" "<sseinsnmode>")])
19203 (define_insn "*avx2_gatherdi<mode>_3"
19204 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19205 (vec_select:<VEC_GATHER_SRCDI>
19207 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19208 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19210 [(match_operand:P 3 "vsib_address_operand" "Tv")
19211 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19212 (match_operand:SI 6 "const1248_operand" "n")]
19214 (mem:BLK (scratch))
19215 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19217 (parallel [(const_int 0) (const_int 1)
19218 (const_int 2) (const_int 3)])))
19219 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19221 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
19222 [(set_attr "type" "ssemov")
19223 (set_attr "prefix" "vex")
19224 (set_attr "mode" "<sseinsnmode>")])
19226 (define_insn "*avx2_gatherdi<mode>_4"
19227 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19228 (vec_select:<VEC_GATHER_SRCDI>
19231 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19233 [(match_operand:P 2 "vsib_address_operand" "Tv")
19234 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19235 (match_operand:SI 5 "const1248_operand" "n")]
19237 (mem:BLK (scratch))
19238 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19240 (parallel [(const_int 0) (const_int 1)
19241 (const_int 2) (const_int 3)])))
19242 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19244 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
19245 [(set_attr "type" "ssemov")
19246 (set_attr "prefix" "vex")
19247 (set_attr "mode" "<sseinsnmode>")])
19249 ;; Memory operand override for -masm=intel of the v*gatherq* patterns.
19250 (define_mode_attr gatherq_mode
19251 [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
19252 (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
19253 (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
19255 (define_expand "<avx512>_gathersi<mode>"
19256 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19258 [(match_operand:VI48F 1 "register_operand")
19259 (match_operand:<avx512fmaskmode> 4 "register_operand")
19260 (mem:<ssescalarmode>
19262 [(match_operand 2 "vsib_address_operand")
19263 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
19264 (match_operand:SI 5 "const1248_operand")]))]
19266 (clobber (match_scratch:<avx512fmaskmode> 7))])]
19270 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19271 operands[5]), UNSPEC_VSIBADDR);
19274 (define_insn "*avx512f_gathersi<mode>"
19275 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19277 [(match_operand:VI48F 1 "register_operand" "0")
19278 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
19279 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19281 [(match_operand:P 4 "vsib_address_operand" "Tv")
19282 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
19283 (match_operand:SI 5 "const1248_operand" "n")]
19284 UNSPEC_VSIBADDR)])]
19286 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
19288 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
19289 [(set_attr "type" "ssemov")
19290 (set_attr "prefix" "evex")
19291 (set_attr "mode" "<sseinsnmode>")])
19293 (define_insn "*avx512f_gathersi<mode>_2"
19294 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19297 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19298 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19300 [(match_operand:P 3 "vsib_address_operand" "Tv")
19301 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19302 (match_operand:SI 4 "const1248_operand" "n")]
19303 UNSPEC_VSIBADDR)])]
19305 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19307 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
19308 [(set_attr "type" "ssemov")
19309 (set_attr "prefix" "evex")
19310 (set_attr "mode" "<sseinsnmode>")])
19313 (define_expand "<avx512>_gatherdi<mode>"
19314 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19316 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19317 (match_operand:QI 4 "register_operand")
19318 (mem:<ssescalarmode>
19320 [(match_operand 2 "vsib_address_operand")
19321 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
19322 (match_operand:SI 5 "const1248_operand")]))]
19324 (clobber (match_scratch:QI 7))])]
19328 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19329 operands[5]), UNSPEC_VSIBADDR);
19332 (define_insn "*avx512f_gatherdi<mode>"
19333 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19335 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
19336 (match_operand:QI 7 "register_operand" "2")
19337 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19339 [(match_operand:P 4 "vsib_address_operand" "Tv")
19340 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
19341 (match_operand:SI 5 "const1248_operand" "n")]
19342 UNSPEC_VSIBADDR)])]
19344 (clobber (match_scratch:QI 2 "=&Yk"))]
19347 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
19349 [(set_attr "type" "ssemov")
19350 (set_attr "prefix" "evex")
19351 (set_attr "mode" "<sseinsnmode>")])
19353 (define_insn "*avx512f_gatherdi<mode>_2"
19354 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19357 (match_operand:QI 6 "register_operand" "1")
19358 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19360 [(match_operand:P 3 "vsib_address_operand" "Tv")
19361 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19362 (match_operand:SI 4 "const1248_operand" "n")]
19363 UNSPEC_VSIBADDR)])]
19365 (clobber (match_scratch:QI 1 "=&Yk"))]
19368 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19370 if (<MODE_SIZE> != 64)
19371 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
19373 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
19375 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
19377 [(set_attr "type" "ssemov")
19378 (set_attr "prefix" "evex")
19379 (set_attr "mode" "<sseinsnmode>")])
19381 (define_expand "<avx512>_scattersi<mode>"
19382 [(parallel [(set (mem:VI48F
19384 [(match_operand 0 "vsib_address_operand")
19385 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
19386 (match_operand:SI 4 "const1248_operand")]))
19388 [(match_operand:<avx512fmaskmode> 1 "register_operand")
19389 (match_operand:VI48F 3 "register_operand")]
19391 (clobber (match_scratch:<avx512fmaskmode> 6))])]
19395 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19396 operands[4]), UNSPEC_VSIBADDR);
19399 (define_insn "*avx512f_scattersi<mode>"
19400 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19402 [(match_operand:P 0 "vsib_address_operand" "Tv")
19403 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19404 (match_operand:SI 4 "const1248_operand" "n")]
19407 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19408 (match_operand:VI48F 3 "register_operand" "v")]
19410 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19412 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
19413 [(set_attr "type" "ssemov")
19414 (set_attr "prefix" "evex")
19415 (set_attr "mode" "<sseinsnmode>")])
19417 (define_expand "<avx512>_scatterdi<mode>"
19418 [(parallel [(set (mem:VI48F
19420 [(match_operand 0 "vsib_address_operand")
19421 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
19422 (match_operand:SI 4 "const1248_operand")]))
19424 [(match_operand:QI 1 "register_operand")
19425 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
19427 (clobber (match_scratch:QI 6))])]
19431 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19432 operands[4]), UNSPEC_VSIBADDR);
19435 (define_insn "*avx512f_scatterdi<mode>"
19436 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19438 [(match_operand:P 0 "vsib_address_operand" "Tv")
19439 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19440 (match_operand:SI 4 "const1248_operand" "n")]
19443 [(match_operand:QI 6 "register_operand" "1")
19444 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
19446 (clobber (match_scratch:QI 1 "=&Yk"))]
19449 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
19450 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
19451 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
19453 [(set_attr "type" "ssemov")
19454 (set_attr "prefix" "evex")
19455 (set_attr "mode" "<sseinsnmode>")])
19457 (define_insn "<avx512>_compress<mode>_mask"
19458 [(set (match_operand:VI48F 0 "register_operand" "=v")
19460 [(match_operand:VI48F 1 "register_operand" "v")
19461 (match_operand:VI48F 2 "vector_move_operand" "0C")
19462 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19465 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19466 [(set_attr "type" "ssemov")
19467 (set_attr "prefix" "evex")
19468 (set_attr "mode" "<sseinsnmode>")])
19470 (define_insn "<avx512>_compressstore<mode>_mask"
19471 [(set (match_operand:VI48F 0 "memory_operand" "=m")
19473 [(match_operand:VI48F 1 "register_operand" "x")
19475 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19476 UNSPEC_COMPRESS_STORE))]
19478 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19479 [(set_attr "type" "ssemov")
19480 (set_attr "prefix" "evex")
19481 (set_attr "memory" "store")
19482 (set_attr "mode" "<sseinsnmode>")])
19484 (define_expand "<avx512>_expand<mode>_maskz"
19485 [(set (match_operand:VI48F 0 "register_operand")
19487 [(match_operand:VI48F 1 "nonimmediate_operand")
19488 (match_operand:VI48F 2 "vector_move_operand")
19489 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19492 "operands[2] = CONST0_RTX (<MODE>mode);")
19494 (define_insn "<avx512>_expand<mode>_mask"
19495 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19497 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19498 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19499 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19502 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19503 [(set_attr "type" "ssemov")
19504 (set_attr "prefix" "evex")
19505 (set_attr "memory" "none,load")
19506 (set_attr "mode" "<sseinsnmode>")])
19508 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19509 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19510 (unspec:VF_AVX512VL
19511 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19512 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19513 (match_operand:SI 3 "const_0_to_15_operand")]
19515 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19516 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19517 [(set_attr "type" "sse")
19518 (set_attr "prefix" "evex")
19519 (set_attr "mode" "<MODE>")])
19521 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
19522 [(set (match_operand:VF_128 0 "register_operand" "=v")
19525 [(match_operand:VF_128 1 "register_operand" "v")
19526 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19527 (match_operand:SI 3 "const_0_to_15_operand")]
19532 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
19533 [(set_attr "type" "sse")
19534 (set_attr "prefix" "evex")
19535 (set_attr "mode" "<MODE>")])
19537 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19538 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19539 (unspec:<avx512fmaskmode>
19540 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19541 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19544 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19545 [(set_attr "type" "sse")
19546 (set_attr "length_immediate" "1")
19547 (set_attr "prefix" "evex")
19548 (set_attr "mode" "<MODE>")])
19550 (define_insn "avx512dq_vmfpclass<mode>"
19551 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19552 (and:<avx512fmaskmode>
19553 (unspec:<avx512fmaskmode>
19554 [(match_operand:VF_128 1 "register_operand" "v")
19555 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19559 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19560 [(set_attr "type" "sse")
19561 (set_attr "length_immediate" "1")
19562 (set_attr "prefix" "evex")
19563 (set_attr "mode" "<MODE>")])
19565 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19566 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19567 (unspec:VF_AVX512VL
19568 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19569 (match_operand:SI 2 "const_0_to_15_operand")]
19572 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19573 [(set_attr "prefix" "evex")
19574 (set_attr "mode" "<MODE>")])
19576 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
19577 [(set (match_operand:VF_128 0 "register_operand" "=v")
19580 [(match_operand:VF_128 1 "register_operand" "v")
19581 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19582 (match_operand:SI 3 "const_0_to_15_operand")]
19587 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}";
19588 [(set_attr "prefix" "evex")
19589 (set_attr "mode" "<ssescalarmode>")])
19591 ;; The correct representation for this is absolutely enormous, and
19592 ;; surely not generally useful.
19593 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19594 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19595 (unspec:VI2_AVX512VL
19596 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19597 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19598 (match_operand:SI 3 "const_0_to_255_operand")]
19601 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19602 [(set_attr "type" "sselog1")
19603 (set_attr "length_immediate" "1")
19604 (set_attr "prefix" "evex")
19605 (set_attr "mode" "<sseinsnmode>")])
19607 (define_insn "clz<mode>2<mask_name>"
19608 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19610 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19612 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19613 [(set_attr "type" "sse")
19614 (set_attr "prefix" "evex")
19615 (set_attr "mode" "<sseinsnmode>")])
19617 (define_insn "<mask_codefor>conflict<mode><mask_name>"
19618 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19619 (unspec:VI48_AVX512VL
19620 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19623 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19624 [(set_attr "type" "sse")
19625 (set_attr "prefix" "evex")
19626 (set_attr "mode" "<sseinsnmode>")])
19628 (define_insn "sha1msg1"
19629 [(set (match_operand:V4SI 0 "register_operand" "=x")
19631 [(match_operand:V4SI 1 "register_operand" "0")
19632 (match_operand:V4SI 2 "vector_operand" "xBm")]
19635 "sha1msg1\t{%2, %0|%0, %2}"
19636 [(set_attr "type" "sselog1")
19637 (set_attr "mode" "TI")])
19639 (define_insn "sha1msg2"
19640 [(set (match_operand:V4SI 0 "register_operand" "=x")
19642 [(match_operand:V4SI 1 "register_operand" "0")
19643 (match_operand:V4SI 2 "vector_operand" "xBm")]
19646 "sha1msg2\t{%2, %0|%0, %2}"
19647 [(set_attr "type" "sselog1")
19648 (set_attr "mode" "TI")])
19650 (define_insn "sha1nexte"
19651 [(set (match_operand:V4SI 0 "register_operand" "=x")
19653 [(match_operand:V4SI 1 "register_operand" "0")
19654 (match_operand:V4SI 2 "vector_operand" "xBm")]
19655 UNSPEC_SHA1NEXTE))]
19657 "sha1nexte\t{%2, %0|%0, %2}"
19658 [(set_attr "type" "sselog1")
19659 (set_attr "mode" "TI")])
19661 (define_insn "sha1rnds4"
19662 [(set (match_operand:V4SI 0 "register_operand" "=x")
19664 [(match_operand:V4SI 1 "register_operand" "0")
19665 (match_operand:V4SI 2 "vector_operand" "xBm")
19666 (match_operand:SI 3 "const_0_to_3_operand" "n")]
19667 UNSPEC_SHA1RNDS4))]
19669 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19670 [(set_attr "type" "sselog1")
19671 (set_attr "length_immediate" "1")
19672 (set_attr "mode" "TI")])
19674 (define_insn "sha256msg1"
19675 [(set (match_operand:V4SI 0 "register_operand" "=x")
19677 [(match_operand:V4SI 1 "register_operand" "0")
19678 (match_operand:V4SI 2 "vector_operand" "xBm")]
19679 UNSPEC_SHA256MSG1))]
19681 "sha256msg1\t{%2, %0|%0, %2}"
19682 [(set_attr "type" "sselog1")
19683 (set_attr "mode" "TI")])
19685 (define_insn "sha256msg2"
19686 [(set (match_operand:V4SI 0 "register_operand" "=x")
19688 [(match_operand:V4SI 1 "register_operand" "0")
19689 (match_operand:V4SI 2 "vector_operand" "xBm")]
19690 UNSPEC_SHA256MSG2))]
19692 "sha256msg2\t{%2, %0|%0, %2}"
19693 [(set_attr "type" "sselog1")
19694 (set_attr "mode" "TI")])
19696 (define_insn "sha256rnds2"
19697 [(set (match_operand:V4SI 0 "register_operand" "=x")
19699 [(match_operand:V4SI 1 "register_operand" "0")
19700 (match_operand:V4SI 2 "vector_operand" "xBm")
19701 (match_operand:V4SI 3 "register_operand" "Yz")]
19702 UNSPEC_SHA256RNDS2))]
19704 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19705 [(set_attr "type" "sselog1")
19706 (set_attr "length_immediate" "1")
19707 (set_attr "mode" "TI")])
19709 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19710 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19711 (unspec:AVX512MODE2P
19712 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19714 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19716 "&& reload_completed"
19717 [(set (match_dup 0) (match_dup 1))]
19719 if (REG_P (operands[0]))
19720 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
19722 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19723 <ssequartermode>mode);
19726 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19727 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19728 (unspec:AVX512MODE2P
19729 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19731 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19733 "&& reload_completed"
19734 [(set (match_dup 0) (match_dup 1))]
19736 if (REG_P (operands[0]))
19737 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19739 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19740 <ssehalfvecmode>mode);
19743 (define_int_iterator VPMADD52
19744 [UNSPEC_VPMADD52LUQ
19745 UNSPEC_VPMADD52HUQ])
19747 (define_int_attr vpmadd52type
19748 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19750 (define_expand "vpamdd52huq<mode>_maskz"
19751 [(match_operand:VI8_AVX512VL 0 "register_operand")
19752 (match_operand:VI8_AVX512VL 1 "register_operand")
19753 (match_operand:VI8_AVX512VL 2 "register_operand")
19754 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19755 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19756 "TARGET_AVX512IFMA"
19758 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19759 operands[0], operands[1], operands[2], operands[3],
19760 CONST0_RTX (<MODE>mode), operands[4]));
19764 (define_expand "vpamdd52luq<mode>_maskz"
19765 [(match_operand:VI8_AVX512VL 0 "register_operand")
19766 (match_operand:VI8_AVX512VL 1 "register_operand")
19767 (match_operand:VI8_AVX512VL 2 "register_operand")
19768 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19769 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19770 "TARGET_AVX512IFMA"
19772 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19773 operands[0], operands[1], operands[2], operands[3],
19774 CONST0_RTX (<MODE>mode), operands[4]));
19778 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19779 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19780 (unspec:VI8_AVX512VL
19781 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19782 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19783 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19785 "TARGET_AVX512IFMA"
19786 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19787 [(set_attr "type" "ssemuladd")
19788 (set_attr "prefix" "evex")
19789 (set_attr "mode" "<sseinsnmode>")])
19791 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
19792 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19793 (vec_merge:VI8_AVX512VL
19794 (unspec:VI8_AVX512VL
19795 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19796 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19797 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19800 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19801 "TARGET_AVX512IFMA"
19802 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19803 [(set_attr "type" "ssemuladd")
19804 (set_attr "prefix" "evex")
19805 (set_attr "mode" "<sseinsnmode>")])
19807 (define_insn "vpmultishiftqb<mode><mask_name>"
19808 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19809 (unspec:VI1_AVX512VL
19810 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19811 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19812 UNSPEC_VPMULTISHIFT))]
19813 "TARGET_AVX512VBMI"
19814 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19815 [(set_attr "type" "sselog")
19816 (set_attr "prefix" "evex")
19817 (set_attr "mode" "<sseinsnmode>")])
19819 (define_mode_iterator IMOD4
19820 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
19822 (define_mode_attr imod4_narrow
19823 [(V64SF "V16SF") (V64SI "V16SI")])
19825 (define_expand "mov<mode>"
19826 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
19827 (match_operand:IMOD4 1 "vector_move_operand"))]
19830 ix86_expand_vector_move (<MODE>mode, operands);
19834 (define_insn_and_split "*mov<mode>_internal"
19835 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
19836 (match_operand:IMOD4 1 "vector_move_operand" " C,vm,v"))]
19838 && (register_operand (operands[0], <MODE>mode)
19839 || register_operand (operands[1], <MODE>mode))"
19841 "&& reload_completed"
19847 for (i = 0; i < 4; i++)
19849 op0 = simplify_subreg
19850 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
19851 op1 = simplify_subreg
19852 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
19853 emit_move_insn (op0, op1);
19858 (define_insn "avx5124fmaddps_4fmaddps"
19859 [(set (match_operand:V16SF 0 "register_operand" "=v")
19861 [(match_operand:V16SF 1 "register_operand" "0")
19862 (match_operand:V64SF 2 "register_operand" "Yh")
19863 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19864 "TARGET_AVX5124FMAPS"
19865 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19866 [(set_attr ("type") ("ssemuladd"))
19867 (set_attr ("prefix") ("evex"))
19868 (set_attr ("mode") ("V16SF"))])
19870 (define_insn "avx5124fmaddps_4fmaddps_mask"
19871 [(set (match_operand:V16SF 0 "register_operand" "=v")
19874 [(match_operand:V64SF 1 "register_operand" "Yh")
19875 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19876 (match_operand:V16SF 3 "register_operand" "0")
19877 (match_operand:HI 4 "register_operand" "Yk")))]
19878 "TARGET_AVX5124FMAPS"
19879 "v4fmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19880 [(set_attr ("type") ("ssemuladd"))
19881 (set_attr ("prefix") ("evex"))
19882 (set_attr ("mode") ("V16SF"))])
19884 (define_insn "avx5124fmaddps_4fmaddps_maskz"
19885 [(set (match_operand:V16SF 0 "register_operand" "=v")
19888 [(match_operand:V16SF 1 "register_operand" "0")
19889 (match_operand:V64SF 2 "register_operand" "Yh")
19890 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19891 (match_operand:V16SF 4 "const0_operand" "C")
19892 (match_operand:HI 5 "register_operand" "Yk")))]
19893 "TARGET_AVX5124FMAPS"
19894 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19895 [(set_attr ("type") ("ssemuladd"))
19896 (set_attr ("prefix") ("evex"))
19897 (set_attr ("mode") ("V16SF"))])
19899 (define_insn "avx5124fmaddps_4fmaddss"
19900 [(set (match_operand:V4SF 0 "register_operand" "=v")
19902 [(match_operand:V4SF 1 "register_operand" "0")
19903 (match_operand:V64SF 2 "register_operand" "Yh")
19904 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19905 "TARGET_AVX5124FMAPS"
19906 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19907 [(set_attr ("type") ("ssemuladd"))
19908 (set_attr ("prefix") ("evex"))
19909 (set_attr ("mode") ("SF"))])
19911 (define_insn "avx5124fmaddps_4fmaddss_mask"
19912 [(set (match_operand:V4SF 0 "register_operand" "=v")
19915 [(match_operand:V64SF 1 "register_operand" "Yh")
19916 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19917 (match_operand:V4SF 3 "register_operand" "0")
19918 (match_operand:QI 4 "register_operand" "Yk")))]
19919 "TARGET_AVX5124FMAPS"
19920 "v4fmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
19921 [(set_attr ("type") ("ssemuladd"))
19922 (set_attr ("prefix") ("evex"))
19923 (set_attr ("mode") ("SF"))])
19925 (define_insn "avx5124fmaddps_4fmaddss_maskz"
19926 [(set (match_operand:V4SF 0 "register_operand" "=v")
19929 [(match_operand:V4SF 1 "register_operand" "0")
19930 (match_operand:V64SF 2 "register_operand" "Yh")
19931 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19932 (match_operand:V4SF 4 "const0_operand" "C")
19933 (match_operand:QI 5 "register_operand" "Yk")))]
19934 "TARGET_AVX5124FMAPS"
19935 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
19936 [(set_attr ("type") ("ssemuladd"))
19937 (set_attr ("prefix") ("evex"))
19938 (set_attr ("mode") ("SF"))])
19940 (define_insn "avx5124fmaddps_4fnmaddps"
19941 [(set (match_operand:V16SF 0 "register_operand" "=v")
19943 [(match_operand:V16SF 1 "register_operand" "0")
19944 (match_operand:V64SF 2 "register_operand" "Yh")
19945 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19946 "TARGET_AVX5124FMAPS"
19947 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19948 [(set_attr ("type") ("ssemuladd"))
19949 (set_attr ("prefix") ("evex"))
19950 (set_attr ("mode") ("V16SF"))])
19952 (define_insn "avx5124fmaddps_4fnmaddps_mask"
19953 [(set (match_operand:V16SF 0 "register_operand" "=v")
19956 [(match_operand:V64SF 1 "register_operand" "Yh")
19957 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19958 (match_operand:V16SF 3 "register_operand" "0")
19959 (match_operand:HI 4 "register_operand" "Yk")))]
19960 "TARGET_AVX5124FMAPS"
19961 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19962 [(set_attr ("type") ("ssemuladd"))
19963 (set_attr ("prefix") ("evex"))
19964 (set_attr ("mode") ("V16SF"))])
19966 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
19967 [(set (match_operand:V16SF 0 "register_operand" "=v")
19970 [(match_operand:V16SF 1 "register_operand" "0")
19971 (match_operand:V64SF 2 "register_operand" "Yh")
19972 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19973 (match_operand:V16SF 4 "const0_operand" "C")
19974 (match_operand:HI 5 "register_operand" "Yk")))]
19975 "TARGET_AVX5124FMAPS"
19976 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19977 [(set_attr ("type") ("ssemuladd"))
19978 (set_attr ("prefix") ("evex"))
19979 (set_attr ("mode") ("V16SF"))])
19981 (define_insn "avx5124fmaddps_4fnmaddss"
19982 [(set (match_operand:V4SF 0 "register_operand" "=v")
19984 [(match_operand:V4SF 1 "register_operand" "0")
19985 (match_operand:V64SF 2 "register_operand" "Yh")
19986 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19987 "TARGET_AVX5124FMAPS"
19988 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19989 [(set_attr ("type") ("ssemuladd"))
19990 (set_attr ("prefix") ("evex"))
19991 (set_attr ("mode") ("SF"))])
19993 (define_insn "avx5124fmaddps_4fnmaddss_mask"
19994 [(set (match_operand:V4SF 0 "register_operand" "=v")
19997 [(match_operand:V64SF 1 "register_operand" "Yh")
19998 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19999 (match_operand:V4SF 3 "register_operand" "0")
20000 (match_operand:QI 4 "register_operand" "Yk")))]
20001 "TARGET_AVX5124FMAPS"
20002 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
20003 [(set_attr ("type") ("ssemuladd"))
20004 (set_attr ("prefix") ("evex"))
20005 (set_attr ("mode") ("SF"))])
20007 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
20008 [(set (match_operand:V4SF 0 "register_operand" "=v")
20011 [(match_operand:V4SF 1 "register_operand" "0")
20012 (match_operand:V64SF 2 "register_operand" "Yh")
20013 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20014 (match_operand:V4SF 4 "const0_operand" "C")
20015 (match_operand:QI 5 "register_operand" "Yk")))]
20016 "TARGET_AVX5124FMAPS"
20017 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
20018 [(set_attr ("type") ("ssemuladd"))
20019 (set_attr ("prefix") ("evex"))
20020 (set_attr ("mode") ("SF"))])
20022 (define_insn "avx5124vnniw_vp4dpwssd"
20023 [(set (match_operand:V16SI 0 "register_operand" "=v")
20025 [(match_operand:V16SI 1 "register_operand" "0")
20026 (match_operand:V64SI 2 "register_operand" "Yh")
20027 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
20028 "TARGET_AVX5124VNNIW"
20029 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
20030 [(set_attr ("type") ("ssemuladd"))
20031 (set_attr ("prefix") ("evex"))
20032 (set_attr ("mode") ("TI"))])
20034 (define_insn "avx5124vnniw_vp4dpwssd_mask"
20035 [(set (match_operand:V16SI 0 "register_operand" "=v")
20038 [(match_operand:V64SI 1 "register_operand" "Yh")
20039 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20040 (match_operand:V16SI 3 "register_operand" "0")
20041 (match_operand:HI 4 "register_operand" "Yk")))]
20042 "TARGET_AVX5124VNNIW"
20043 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
20044 [(set_attr ("type") ("ssemuladd"))
20045 (set_attr ("prefix") ("evex"))
20046 (set_attr ("mode") ("TI"))])
20048 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
20049 [(set (match_operand:V16SI 0 "register_operand" "=v")
20052 [(match_operand:V16SI 1 "register_operand" "0")
20053 (match_operand:V64SI 2 "register_operand" "Yh")
20054 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20055 (match_operand:V16SI 4 "const0_operand" "C")
20056 (match_operand:HI 5 "register_operand" "Yk")))]
20057 "TARGET_AVX5124VNNIW"
20058 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
20059 [(set_attr ("type") ("ssemuladd"))
20060 (set_attr ("prefix") ("evex"))
20061 (set_attr ("mode") ("TI"))])
20063 (define_insn "avx5124vnniw_vp4dpwssds"
20064 [(set (match_operand:V16SI 0 "register_operand" "=v")
20066 [(match_operand:V16SI 1 "register_operand" "0")
20067 (match_operand:V64SI 2 "register_operand" "Yh")
20068 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
20069 "TARGET_AVX5124VNNIW"
20070 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
20071 [(set_attr ("type") ("ssemuladd"))
20072 (set_attr ("prefix") ("evex"))
20073 (set_attr ("mode") ("TI"))])
20075 (define_insn "avx5124vnniw_vp4dpwssds_mask"
20076 [(set (match_operand:V16SI 0 "register_operand" "=v")
20079 [(match_operand:V64SI 1 "register_operand" "Yh")
20080 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20081 (match_operand:V16SI 3 "register_operand" "0")
20082 (match_operand:HI 4 "register_operand" "Yk")))]
20083 "TARGET_AVX5124VNNIW"
20084 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
20085 [(set_attr ("type") ("ssemuladd"))
20086 (set_attr ("prefix") ("evex"))
20087 (set_attr ("mode") ("TI"))])
20089 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
20090 [(set (match_operand:V16SI 0 "register_operand" "=v")
20093 [(match_operand:V16SI 1 "register_operand" "0")
20094 (match_operand:V64SI 2 "register_operand" "Yh")
20095 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20096 (match_operand:V16SI 4 "const0_operand" "C")
20097 (match_operand:HI 5 "register_operand" "Yk")))]
20098 "TARGET_AVX5124VNNIW"
20099 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
20100 [(set_attr ("type") ("ssemuladd"))
20101 (set_attr ("prefix") ("evex"))
20102 (set_attr ("mode") ("TI"))])
20104 (define_insn "vpopcount<mode><mask_name>"
20105 [(set (match_operand:VI48_512 0 "register_operand" "=v")
20107 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
20108 "TARGET_AVX512VPOPCNTDQ"
20109 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20111 ;; Save multiple registers out-of-line.
20112 (define_insn "save_multiple<mode>"
20113 [(match_parallel 0 "save_multiple"
20114 [(use (match_operand:P 1 "symbol_operand"))])]
20115 "TARGET_SSE && TARGET_64BIT"
20118 ;; Restore multiple registers out-of-line.
20119 (define_insn "restore_multiple<mode>"
20120 [(match_parallel 0 "restore_multiple"
20121 [(use (match_operand:P 1 "symbol_operand"))])]
20122 "TARGET_SSE && TARGET_64BIT"
20125 ;; Restore multiple registers out-of-line and return.
20126 (define_insn "restore_multiple_and_return<mode>"
20127 [(match_parallel 0 "restore_multiple"
20129 (use (match_operand:P 1 "symbol_operand"))
20130 (set (reg:DI SP_REG) (reg:DI R10_REG))
20132 "TARGET_SSE && TARGET_64BIT"
20135 ;; Restore multiple registers out-of-line when hard frame pointer is used,
20136 ;; perform the leave operation prior to returning (from the function).
20137 (define_insn "restore_multiple_leave_return<mode>"
20138 [(match_parallel 0 "restore_multiple"
20140 (use (match_operand:P 1 "symbol_operand"))
20141 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20142 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20143 (clobber (mem:BLK (scratch)))
20145 "TARGET_SSE && TARGET_64BIT"