1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2017 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
53 UNSPEC_XOP_UNSIGNED_CMP
64 UNSPEC_AESKEYGENASSIST
85 ;; For AVX512F support
89 UNSPEC_UNSIGNED_FIX_NOTRUNC
104 UNSPEC_COMPRESS_STORE
114 ;; For embed. rounding feature
115 UNSPEC_EMBEDDED_ROUNDING
117 ;; For AVX512PF support
118 UNSPEC_GATHER_PREFETCH
119 UNSPEC_SCATTER_PREFETCH
121 ;; For AVX512ER support
135 ;; For AVX512BW support
143 ;; For AVX512DQ support
148 ;; For AVX512IFMA support
152 ;; For AVX512VBMI support
155 ;; For AVX5124FMAPS/AVX5124VNNIW support
162 (define_c_enum "unspecv" [
172 ;; All vector modes including V?TImode, used in move patterns.
173 (define_mode_iterator VMOVE
174 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
175 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
176 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
177 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
178 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
179 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
180 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
182 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
183 (define_mode_iterator V48_AVX512VL
184 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
185 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
186 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
187 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
189 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
190 (define_mode_iterator VI12_AVX512VL
191 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
192 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
194 (define_mode_iterator VI1_AVX512VL
195 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
198 (define_mode_iterator V
199 [(V32QI "TARGET_AVX") V16QI
200 (V16HI "TARGET_AVX") V8HI
201 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
202 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
203 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
204 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
206 ;; All 128bit vector modes
207 (define_mode_iterator V_128
208 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
210 ;; All 256bit vector modes
211 (define_mode_iterator V_256
212 [V32QI V16HI V8SI V4DI V8SF V4DF])
214 ;; All 512bit vector modes
215 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
217 ;; All 256bit and 512bit vector modes
218 (define_mode_iterator V_256_512
219 [V32QI V16HI V8SI V4DI V8SF V4DF
220 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
221 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
223 ;; All vector float modes
224 (define_mode_iterator VF
225 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
226 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
228 ;; 128- and 256-bit float vector modes
229 (define_mode_iterator VF_128_256
230 [(V8SF "TARGET_AVX") V4SF
231 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
233 ;; All SFmode vector float modes
234 (define_mode_iterator VF1
235 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
237 ;; 128- and 256-bit SF vector modes
238 (define_mode_iterator VF1_128_256
239 [(V8SF "TARGET_AVX") V4SF])
241 (define_mode_iterator VF1_128_256VL
242 [V8SF (V4SF "TARGET_AVX512VL")])
244 ;; All DFmode vector float modes
245 (define_mode_iterator VF2
246 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
248 ;; 128- and 256-bit DF vector modes
249 (define_mode_iterator VF2_128_256
250 [(V4DF "TARGET_AVX") V2DF])
252 (define_mode_iterator VF2_512_256
253 [(V8DF "TARGET_AVX512F") V4DF])
255 (define_mode_iterator VF2_512_256VL
256 [V8DF (V4DF "TARGET_AVX512VL")])
258 ;; All 128bit vector float modes
259 (define_mode_iterator VF_128
260 [V4SF (V2DF "TARGET_SSE2")])
262 ;; All 256bit vector float modes
263 (define_mode_iterator VF_256
266 ;; All 512bit vector float modes
267 (define_mode_iterator VF_512
270 (define_mode_iterator VI48_AVX512VL
271 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
272 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
274 (define_mode_iterator VF_AVX512VL
275 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
276 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
278 (define_mode_iterator VF2_AVX512VL
279 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
281 (define_mode_iterator VF1_AVX512VL
282 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
284 ;; All vector integer modes
285 (define_mode_iterator VI
286 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
287 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
288 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
289 (V8SI "TARGET_AVX") V4SI
290 (V4DI "TARGET_AVX") V2DI])
292 (define_mode_iterator VI_AVX2
293 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
294 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
295 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
296 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
298 ;; All QImode vector integer modes
299 (define_mode_iterator VI1
300 [(V32QI "TARGET_AVX") V16QI])
302 ;; All DImode vector integer modes
303 (define_mode_iterator V_AVX
304 [V16QI V8HI V4SI V2DI V4SF V2DF
305 (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
306 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
307 (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
309 (define_mode_iterator VI48_AVX
311 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
313 (define_mode_iterator VI8
314 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
316 (define_mode_iterator VI8_AVX512VL
317 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
319 (define_mode_iterator VI8_256_512
320 [V8DI (V4DI "TARGET_AVX512VL")])
322 (define_mode_iterator VI1_AVX2
323 [(V32QI "TARGET_AVX2") V16QI])
325 (define_mode_iterator VI1_AVX512
326 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
328 (define_mode_iterator VI2_AVX2
329 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
331 (define_mode_iterator VI2_AVX512F
332 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
334 (define_mode_iterator VI4_AVX
335 [(V8SI "TARGET_AVX") V4SI])
337 (define_mode_iterator VI4_AVX2
338 [(V8SI "TARGET_AVX2") V4SI])
340 (define_mode_iterator VI4_AVX512F
341 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
343 (define_mode_iterator VI4_AVX512VL
344 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
346 (define_mode_iterator VI48_AVX512F_AVX512VL
347 [V4SI V8SI (V16SI "TARGET_AVX512F")
348 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
350 (define_mode_iterator VI2_AVX512VL
351 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
353 (define_mode_iterator VI8_AVX2_AVX512BW
354 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
356 (define_mode_iterator VI8_AVX2
357 [(V4DI "TARGET_AVX2") V2DI])
359 (define_mode_iterator VI8_AVX2_AVX512F
360 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
362 (define_mode_iterator VI4_128_8_256
366 (define_mode_iterator V8FI
370 (define_mode_iterator V16FI
373 ;; ??? We should probably use TImode instead.
374 (define_mode_iterator VIMAX_AVX2
375 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
377 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
378 (define_mode_iterator SSESCALARMODE
379 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
381 (define_mode_iterator VI12_AVX2
382 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
383 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
385 (define_mode_iterator VI24_AVX2
386 [(V16HI "TARGET_AVX2") V8HI
387 (V8SI "TARGET_AVX2") V4SI])
389 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
390 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
391 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
392 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
394 (define_mode_iterator VI124_AVX2
395 [(V32QI "TARGET_AVX2") V16QI
396 (V16HI "TARGET_AVX2") V8HI
397 (V8SI "TARGET_AVX2") V4SI])
399 (define_mode_iterator VI2_AVX2_AVX512BW
400 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
402 (define_mode_iterator VI48_AVX2
403 [(V8SI "TARGET_AVX2") V4SI
404 (V4DI "TARGET_AVX2") V2DI])
406 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
407 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
408 (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
409 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
411 (define_mode_iterator VI248_AVX512BW_AVX512VL
412 [(V32HI "TARGET_AVX512BW")
413 (V4DI "TARGET_AVX512VL") V16SI V8DI])
415 ;; Suppose TARGET_AVX512VL as baseline
416 (define_mode_iterator VI248_AVX512BW_1
417 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
421 (define_mode_iterator VI48_AVX512F
422 [(V16SI "TARGET_AVX512F") V8SI V4SI
423 (V8DI "TARGET_AVX512F") V4DI V2DI])
425 (define_mode_iterator VI48_AVX_AVX512F
426 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
427 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
429 (define_mode_iterator VI12_AVX_AVX512F
430 [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
431 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
433 (define_mode_iterator V48_AVX2
436 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
437 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
439 (define_mode_attr avx512
440 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
441 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
442 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
443 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
444 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
445 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
447 (define_mode_attr sse2_avx_avx512f
448 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
449 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
450 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
451 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
452 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
453 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
455 (define_mode_attr sse2_avx2
456 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
457 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
458 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
459 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
460 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
462 (define_mode_attr ssse3_avx2
463 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
464 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
465 (V4SI "ssse3") (V8SI "avx2")
466 (V2DI "ssse3") (V4DI "avx2")
467 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
469 (define_mode_attr sse4_1_avx2
470 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
471 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
472 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
473 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
475 (define_mode_attr avx_avx2
476 [(V4SF "avx") (V2DF "avx")
477 (V8SF "avx") (V4DF "avx")
478 (V4SI "avx2") (V2DI "avx2")
479 (V8SI "avx2") (V4DI "avx2")])
481 (define_mode_attr vec_avx2
482 [(V16QI "vec") (V32QI "avx2")
483 (V8HI "vec") (V16HI "avx2")
484 (V4SI "vec") (V8SI "avx2")
485 (V2DI "vec") (V4DI "avx2")])
487 (define_mode_attr avx2_avx512
488 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
489 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
490 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
491 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
492 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
494 (define_mode_attr shuffletype
495 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
496 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
497 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
498 (V32HI "i") (V16HI "i") (V8HI "i")
499 (V64QI "i") (V32QI "i") (V16QI "i")
500 (V4TI "i") (V2TI "i") (V1TI "i")])
502 (define_mode_attr ssequartermode
503 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
505 (define_mode_attr ssedoublemodelower
506 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
507 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
508 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
510 (define_mode_attr ssedoublemode
511 [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
512 (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
513 (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
514 (V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
515 (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
516 (V4DI "V8DI") (V8DI "V16DI")])
518 (define_mode_attr ssebytemode
519 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
521 ;; All 128bit vector integer modes
522 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
524 ;; All 256bit vector integer modes
525 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
527 ;; Various 128bit vector integer mode combinations
528 (define_mode_iterator VI12_128 [V16QI V8HI])
529 (define_mode_iterator VI14_128 [V16QI V4SI])
530 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
531 (define_mode_iterator VI24_128 [V8HI V4SI])
532 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
533 (define_mode_iterator VI48_128 [V4SI V2DI])
535 ;; Various 256bit and 512 vector integer mode combinations
536 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
537 (define_mode_iterator VI124_256_AVX512F_AVX512BW
539 (V64QI "TARGET_AVX512BW")
540 (V32HI "TARGET_AVX512BW")
541 (V16SI "TARGET_AVX512F")])
542 (define_mode_iterator VI48_256 [V8SI V4DI])
543 (define_mode_iterator VI48_512 [V16SI V8DI])
544 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
545 (define_mode_iterator VI_AVX512BW
546 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
548 ;; Int-float size matches
549 (define_mode_iterator VI4F_128 [V4SI V4SF])
550 (define_mode_iterator VI8F_128 [V2DI V2DF])
551 (define_mode_iterator VI4F_256 [V8SI V8SF])
552 (define_mode_iterator VI8F_256 [V4DI V4DF])
553 (define_mode_iterator VI48F_256_512
555 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
556 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
557 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
558 (define_mode_iterator VF48_I1248
559 [V16SI V16SF V8DI V8DF V32HI V64QI])
560 (define_mode_iterator VI48F
561 [V16SI V16SF V8DI V8DF
562 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
563 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
564 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
565 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
566 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
568 ;; Mapping from float mode to required SSE level
569 (define_mode_attr sse
570 [(SF "sse") (DF "sse2")
571 (V4SF "sse") (V2DF "sse2")
572 (V16SF "avx512f") (V8SF "avx")
573 (V8DF "avx512f") (V4DF "avx")])
575 (define_mode_attr sse2
576 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
577 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
579 (define_mode_attr sse3
580 [(V16QI "sse3") (V32QI "avx")])
582 (define_mode_attr sse4_1
583 [(V4SF "sse4_1") (V2DF "sse4_1")
584 (V8SF "avx") (V4DF "avx")
586 (V4DI "avx") (V2DI "sse4_1")
587 (V8SI "avx") (V4SI "sse4_1")
588 (V16QI "sse4_1") (V32QI "avx")
589 (V8HI "sse4_1") (V16HI "avx")])
591 (define_mode_attr avxsizesuffix
592 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
593 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
594 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
595 (V16SF "512") (V8DF "512")
596 (V8SF "256") (V4DF "256")
597 (V4SF "") (V2DF "")])
599 ;; SSE instruction mode
600 (define_mode_attr sseinsnmode
601 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
602 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
603 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
604 (V16SF "V16SF") (V8DF "V8DF")
605 (V8SF "V8SF") (V4DF "V4DF")
606 (V4SF "V4SF") (V2DF "V2DF")
609 ;; Mapping of vector modes to corresponding mask size
610 (define_mode_attr avx512fmaskmode
611 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
612 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
613 (V16SI "HI") (V8SI "QI") (V4SI "QI")
614 (V8DI "QI") (V4DI "QI") (V2DI "QI")
615 (V16SF "HI") (V8SF "QI") (V4SF "QI")
616 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
618 ;; Mapping of vector modes to corresponding mask size
619 (define_mode_attr avx512fmaskmodelower
620 [(V64QI "di") (V32QI "si") (V16QI "hi")
621 (V32HI "si") (V16HI "hi") (V8HI "qi") (V4HI "qi")
622 (V16SI "hi") (V8SI "qi") (V4SI "qi")
623 (V8DI "qi") (V4DI "qi") (V2DI "qi")
624 (V16SF "hi") (V8SF "qi") (V4SF "qi")
625 (V8DF "qi") (V4DF "qi") (V2DF "qi")])
627 ;; Mapping of vector float modes to an integer mode of the same size
628 (define_mode_attr sseintvecmode
629 [(V16SF "V16SI") (V8DF "V8DI")
630 (V8SF "V8SI") (V4DF "V4DI")
631 (V4SF "V4SI") (V2DF "V2DI")
632 (V16SI "V16SI") (V8DI "V8DI")
633 (V8SI "V8SI") (V4DI "V4DI")
634 (V4SI "V4SI") (V2DI "V2DI")
635 (V16HI "V16HI") (V8HI "V8HI")
636 (V32HI "V32HI") (V64QI "V64QI")
637 (V32QI "V32QI") (V16QI "V16QI")])
639 (define_mode_attr sseintvecmode2
640 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
641 (V8SF "OI") (V4SF "TI")])
643 (define_mode_attr sseintvecmodelower
644 [(V16SF "v16si") (V8DF "v8di")
645 (V8SF "v8si") (V4DF "v4di")
646 (V4SF "v4si") (V2DF "v2di")
647 (V8SI "v8si") (V4DI "v4di")
648 (V4SI "v4si") (V2DI "v2di")
649 (V16HI "v16hi") (V8HI "v8hi")
650 (V32QI "v32qi") (V16QI "v16qi")])
652 ;; Mapping of vector modes to a vector mode of double size
653 (define_mode_attr ssedoublevecmode
654 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
655 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
656 (V8SF "V16SF") (V4DF "V8DF")
657 (V4SF "V8SF") (V2DF "V4DF")])
659 ;; Mapping of vector modes to a vector mode of half size
660 (define_mode_attr ssehalfvecmode
661 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
662 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
663 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
664 (V16SF "V8SF") (V8DF "V4DF")
665 (V8SF "V4SF") (V4DF "V2DF")
668 (define_mode_attr ssehalfvecmodelower
669 [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
670 (V32QI "v16qi") (V16HI "v8hi") (V8SI "v4si") (V4DI "v2di")
671 (V16QI "v8qi") (V8HI "v4hi") (V4SI "v2si")
672 (V16SF "v8sf") (V8DF "v4df")
673 (V8SF "v4sf") (V4DF "v2df")
676 ;; Mapping of vector modes ti packed single mode of the same size
677 (define_mode_attr ssePSmode
678 [(V16SI "V16SF") (V8DF "V16SF")
679 (V16SF "V16SF") (V8DI "V16SF")
680 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
681 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
682 (V8SI "V8SF") (V4SI "V4SF")
683 (V4DI "V8SF") (V2DI "V4SF")
684 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
685 (V8SF "V8SF") (V4SF "V4SF")
686 (V4DF "V8SF") (V2DF "V4SF")])
688 (define_mode_attr ssePSmode2
689 [(V8DI "V8SF") (V4DI "V4SF")])
691 ;; Mapping of vector modes back to the scalar modes
692 (define_mode_attr ssescalarmode
693 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
694 (V32HI "HI") (V16HI "HI") (V8HI "HI")
695 (V16SI "SI") (V8SI "SI") (V4SI "SI")
696 (V8DI "DI") (V4DI "DI") (V2DI "DI")
697 (V16SF "SF") (V8SF "SF") (V4SF "SF")
698 (V8DF "DF") (V4DF "DF") (V2DF "DF")
699 (V4TI "TI") (V2TI "TI")])
701 ;; Mapping of vector modes back to the scalar modes
702 (define_mode_attr ssescalarmodelower
703 [(V64QI "qi") (V32QI "qi") (V16QI "qi")
704 (V32HI "hi") (V16HI "hi") (V8HI "hi")
705 (V16SI "si") (V8SI "si") (V4SI "si")
706 (V8DI "di") (V4DI "di") (V2DI "di")
707 (V16SF "sf") (V8SF "sf") (V4SF "sf")
708 (V8DF "df") (V4DF "df") (V2DF "df")
709 (V4TI "ti") (V2TI "ti")])
711 ;; Mapping of vector modes to the 128bit modes
712 (define_mode_attr ssexmmmode
713 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
714 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
715 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
716 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
717 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
718 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
720 ;; Pointer size override for scalar modes (Intel asm dialect)
721 (define_mode_attr iptr
722 [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
723 (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
724 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
725 (V8SF "k") (V4DF "q")
726 (V4SF "k") (V2DF "q")
729 ;; Number of scalar elements in each vector type
730 (define_mode_attr ssescalarnum
731 [(V64QI "64") (V16SI "16") (V8DI "8")
732 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
733 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
734 (V16SF "16") (V8DF "8")
735 (V8SF "8") (V4DF "4")
736 (V4SF "4") (V2DF "2")])
738 ;; Mask of scalar elements in each vector type
739 (define_mode_attr ssescalarnummask
740 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
741 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
742 (V8SF "7") (V4DF "3")
743 (V4SF "3") (V2DF "1")])
745 (define_mode_attr ssescalarsize
746 [(V4TI "64") (V2TI "64") (V1TI "64")
747 (V8DI "64") (V4DI "64") (V2DI "64")
748 (V64QI "8") (V32QI "8") (V16QI "8")
749 (V32HI "16") (V16HI "16") (V8HI "16")
750 (V16SI "32") (V8SI "32") (V4SI "32")
751 (V16SF "32") (V8SF "32") (V4SF "32")
752 (V8DF "64") (V4DF "64") (V2DF "64")])
754 ;; SSE prefix for integer vector modes
755 (define_mode_attr sseintprefix
756 [(V2DI "p") (V2DF "")
761 (V16SI "p") (V16SF "")
762 (V16QI "p") (V8HI "p")
763 (V32QI "p") (V16HI "p")
764 (V64QI "p") (V32HI "p")])
766 ;; SSE scalar suffix for vector modes
767 (define_mode_attr ssescalarmodesuffix
769 (V8SF "ss") (V4DF "sd")
770 (V4SF "ss") (V2DF "sd")
771 (V8SI "ss") (V4DI "sd")
774 ;; Pack/unpack vector modes
775 (define_mode_attr sseunpackmode
776 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
777 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
778 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
780 (define_mode_attr ssepackmode
781 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
782 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
783 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
785 ;; Mapping of the max integer size for xop rotate immediate constraint
786 (define_mode_attr sserotatemax
787 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
789 ;; Mapping of mode to cast intrinsic name
790 (define_mode_attr castmode
791 [(V8SI "si") (V8SF "ps") (V4DF "pd")
792 (V16SI "si") (V16SF "ps") (V8DF "pd")])
794 ;; Instruction suffix for sign and zero extensions.
795 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
797 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
798 ;; i64x4 or f64x4 for 512bit modes.
799 (define_mode_attr i128
800 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
801 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
802 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
804 ;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
805 ;; i32x4, f32x4, i64x2 or f64x2 suffixes.
806 (define_mode_attr i128vldq
807 [(V8SF "f32x4") (V4DF "f64x2")
808 (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
811 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
812 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
814 ;; Mapping for dbpsabbw modes
815 (define_mode_attr dbpsadbwmode
816 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
818 ;; Mapping suffixes for broadcast
819 (define_mode_attr bcstscalarsuff
820 [(V64QI "b") (V32QI "b") (V16QI "b")
821 (V32HI "w") (V16HI "w") (V8HI "w")
822 (V16SI "d") (V8SI "d") (V4SI "d")
823 (V8DI "q") (V4DI "q") (V2DI "q")
824 (V16SF "ss") (V8SF "ss") (V4SF "ss")
825 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
827 ;; Tie mode of assembler operand to mode iterator
828 (define_mode_attr concat_tg_mode
829 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
830 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
832 ;; Tie mode of assembler operand to mode iterator
833 (define_mode_attr xtg_mode
834 [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
835 (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
836 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
838 ;; Half mask mode for unpacks
839 (define_mode_attr HALFMASKMODE
840 [(DI "SI") (SI "HI")])
842 ;; Double mask mode for packs
843 (define_mode_attr DOUBLEMASKMODE
844 [(HI "SI") (SI "DI")])
847 ;; Include define_subst patterns for instructions with mask
850 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
852 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
856 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
858 ;; All of these patterns are enabled for SSE1 as well as SSE2.
859 ;; This is essential for maintaining stable calling conventions.
861 (define_expand "mov<mode>"
862 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
863 (match_operand:VMOVE 1 "nonimmediate_operand"))]
866 ix86_expand_vector_move (<MODE>mode, operands);
870 (define_insn "mov<mode>_internal"
871 [(set (match_operand:VMOVE 0 "nonimmediate_operand"
873 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
876 && (register_operand (operands[0], <MODE>mode)
877 || register_operand (operands[1], <MODE>mode))"
879 switch (get_attr_type (insn))
882 return standard_sse_constant_opcode (insn, operands[1]);
885 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
886 in avx512f, so we need to use workarounds, to access sse registers
887 16-31, which are evex-only. In avx512vl we don't need workarounds. */
888 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
889 && (EXT_REX_SSE_REG_P (operands[0])
890 || EXT_REX_SSE_REG_P (operands[1])))
892 if (memory_operand (operands[0], <MODE>mode))
894 if (<MODE_SIZE> == 32)
895 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
896 else if (<MODE_SIZE> == 16)
897 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
901 else if (memory_operand (operands[1], <MODE>mode))
903 if (<MODE_SIZE> == 32)
904 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
905 else if (<MODE_SIZE> == 16)
906 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
911 /* Reg -> reg move is always aligned. Just use wider move. */
912 switch (get_attr_mode (insn))
916 return "vmovaps\t{%g1, %g0|%g0, %g1}";
919 return "vmovapd\t{%g1, %g0|%g0, %g1}";
922 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
928 switch (get_attr_mode (insn))
933 if (misaligned_operand (operands[0], <MODE>mode)
934 || misaligned_operand (operands[1], <MODE>mode))
935 return "%vmovups\t{%1, %0|%0, %1}";
937 return "%vmovaps\t{%1, %0|%0, %1}";
942 if (misaligned_operand (operands[0], <MODE>mode)
943 || misaligned_operand (operands[1], <MODE>mode))
944 return "%vmovupd\t{%1, %0|%0, %1}";
946 return "%vmovapd\t{%1, %0|%0, %1}";
950 if (misaligned_operand (operands[0], <MODE>mode)
951 || misaligned_operand (operands[1], <MODE>mode))
952 return TARGET_AVX512VL ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
953 : "%vmovdqu\t{%1, %0|%0, %1}";
955 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
956 : "%vmovdqa\t{%1, %0|%0, %1}";
958 if (misaligned_operand (operands[0], <MODE>mode)
959 || misaligned_operand (operands[1], <MODE>mode))
960 return (<MODE>mode == V16SImode
961 || <MODE>mode == V8DImode
963 ? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
964 : "vmovdqu64\t{%1, %0|%0, %1}";
966 return "vmovdqa64\t{%1, %0|%0, %1}";
976 [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
977 (set_attr "prefix" "maybe_vex")
979 (cond [(and (eq_attr "alternative" "1")
980 (match_test "TARGET_AVX512VL"))
981 (const_string "<sseinsnmode>")
982 (and (match_test "<MODE_SIZE> == 16")
983 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
984 (and (eq_attr "alternative" "3")
985 (match_test "TARGET_SSE_TYPELESS_STORES"))))
986 (const_string "<ssePSmode>")
987 (match_test "TARGET_AVX")
988 (const_string "<sseinsnmode>")
989 (ior (not (match_test "TARGET_SSE2"))
990 (match_test "optimize_function_for_size_p (cfun)"))
991 (const_string "V4SF")
992 (and (eq_attr "alternative" "0")
993 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
996 (const_string "<sseinsnmode>")))
997 (set (attr "enabled")
998 (cond [(and (match_test "<MODE_SIZE> == 16")
999 (eq_attr "alternative" "1"))
1000 (symbol_ref "TARGET_SSE2")
1001 (and (match_test "<MODE_SIZE> == 32")
1002 (eq_attr "alternative" "1"))
1003 (symbol_ref "TARGET_AVX2")
1005 (symbol_ref "true")))])
1007 (define_insn "<avx512>_load<mode>_mask"
1008 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1009 (vec_merge:V48_AVX512VL
1010 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1011 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
1012 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1015 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1017 if (misaligned_operand (operands[1], <MODE>mode))
1018 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1020 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1024 if (misaligned_operand (operands[1], <MODE>mode))
1025 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1027 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1030 [(set_attr "type" "ssemov")
1031 (set_attr "prefix" "evex")
1032 (set_attr "memory" "none,load")
1033 (set_attr "mode" "<sseinsnmode>")])
1035 (define_insn "<avx512>_load<mode>_mask"
1036 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1037 (vec_merge:VI12_AVX512VL
1038 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1039 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
1040 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1042 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1043 [(set_attr "type" "ssemov")
1044 (set_attr "prefix" "evex")
1045 (set_attr "memory" "none,load")
1046 (set_attr "mode" "<sseinsnmode>")])
1048 (define_insn "<avx512>_blendm<mode>"
1049 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1050 (vec_merge:V48_AVX512VL
1051 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1052 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1053 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1055 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1056 [(set_attr "type" "ssemov")
1057 (set_attr "prefix" "evex")
1058 (set_attr "mode" "<sseinsnmode>")])
1060 (define_insn "<avx512>_blendm<mode>"
1061 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1062 (vec_merge:VI12_AVX512VL
1063 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1064 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1065 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1067 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1068 [(set_attr "type" "ssemov")
1069 (set_attr "prefix" "evex")
1070 (set_attr "mode" "<sseinsnmode>")])
1072 (define_insn "<avx512>_store<mode>_mask"
1073 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1074 (vec_merge:V48_AVX512VL
1075 (match_operand:V48_AVX512VL 1 "register_operand" "v")
1077 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1080 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1082 if (misaligned_operand (operands[0], <MODE>mode))
1083 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1085 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1089 if (misaligned_operand (operands[0], <MODE>mode))
1090 return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1092 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1095 [(set_attr "type" "ssemov")
1096 (set_attr "prefix" "evex")
1097 (set_attr "memory" "store")
1098 (set_attr "mode" "<sseinsnmode>")])
1100 (define_insn "<avx512>_store<mode>_mask"
1101 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1102 (vec_merge:VI12_AVX512VL
1103 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1105 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1107 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1108 [(set_attr "type" "ssemov")
1109 (set_attr "prefix" "evex")
1110 (set_attr "memory" "store")
1111 (set_attr "mode" "<sseinsnmode>")])
1113 (define_insn "sse2_movq128"
1114 [(set (match_operand:V2DI 0 "register_operand" "=v")
1117 (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1118 (parallel [(const_int 0)]))
1121 "%vmovq\t{%1, %0|%0, %q1}"
1122 [(set_attr "type" "ssemov")
1123 (set_attr "prefix" "maybe_vex")
1124 (set_attr "mode" "TI")])
1126 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1127 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1128 ;; from memory, we'd prefer to load the memory directly into the %xmm
1129 ;; register. To facilitate this happy circumstance, this pattern won't
1130 ;; split until after register allocation. If the 64-bit value didn't
1131 ;; come from memory, this is the best we can do. This is much better
1132 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1135 (define_insn_and_split "movdi_to_sse"
1137 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1138 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1139 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1140 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1142 "&& reload_completed"
1145 if (register_operand (operands[1], DImode))
1147 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1148 Assemble the 64-bit DImode value in an xmm register. */
1149 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1150 gen_lowpart (SImode, operands[1])));
1151 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1152 gen_highpart (SImode, operands[1])));
1153 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1156 else if (memory_operand (operands[1], DImode))
1158 rtx tmp = gen_reg_rtx (V2DImode);
1159 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1160 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1168 [(set (match_operand:V4SF 0 "register_operand")
1169 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1170 "TARGET_SSE && reload_completed"
1173 (vec_duplicate:V4SF (match_dup 1))
1177 operands[1] = gen_lowpart (SFmode, operands[1]);
1178 operands[2] = CONST0_RTX (V4SFmode);
1182 [(set (match_operand:V2DF 0 "register_operand")
1183 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1184 "TARGET_SSE2 && reload_completed"
1185 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1187 operands[1] = gen_lowpart (DFmode, operands[1]);
1188 operands[2] = CONST0_RTX (DFmode);
1191 (define_expand "movmisalign<mode>"
1192 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1193 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1196 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1200 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1202 [(set (match_operand:V2DF 0 "sse_reg_operand")
1203 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1204 (match_operand:DF 4 "const0_operand")))
1205 (set (match_operand:V2DF 2 "sse_reg_operand")
1206 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1207 (parallel [(const_int 0)]))
1208 (match_operand:DF 3 "memory_operand")))]
1209 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1210 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1211 [(set (match_dup 2) (match_dup 5))]
1212 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1215 [(set (match_operand:DF 0 "sse_reg_operand")
1216 (match_operand:DF 1 "memory_operand"))
1217 (set (match_operand:V2DF 2 "sse_reg_operand")
1218 (vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1219 (match_operand:DF 3 "memory_operand")))]
1220 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1221 && REGNO (operands[4]) == REGNO (operands[2])
1222 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1223 [(set (match_dup 2) (match_dup 5))]
1224 "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1226 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1228 [(set (match_operand:DF 0 "memory_operand")
1229 (vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1230 (parallel [(const_int 0)])))
1231 (set (match_operand:DF 2 "memory_operand")
1232 (vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1233 (parallel [(const_int 1)])))]
1234 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1235 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1236 [(set (match_dup 4) (match_dup 1))]
1237 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1239 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1240 [(set (match_operand:VI1 0 "register_operand" "=x")
1241 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1244 "%vlddqu\t{%1, %0|%0, %1}"
1245 [(set_attr "type" "ssemov")
1246 (set_attr "movu" "1")
1247 (set (attr "prefix_data16")
1249 (match_test "TARGET_AVX")
1251 (const_string "0")))
1252 (set (attr "prefix_rep")
1254 (match_test "TARGET_AVX")
1256 (const_string "1")))
1257 (set_attr "prefix" "maybe_vex")
1258 (set_attr "mode" "<sseinsnmode>")])
1260 (define_insn "sse2_movnti<mode>"
1261 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1262 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1265 "movnti\t{%1, %0|%0, %1}"
1266 [(set_attr "type" "ssemov")
1267 (set_attr "prefix_data16" "0")
1268 (set_attr "mode" "<MODE>")])
1270 (define_insn "<sse>_movnt<mode>"
1271 [(set (match_operand:VF 0 "memory_operand" "=m")
1273 [(match_operand:VF 1 "register_operand" "v")]
1276 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1277 [(set_attr "type" "ssemov")
1278 (set_attr "prefix" "maybe_vex")
1279 (set_attr "mode" "<MODE>")])
1281 (define_insn "<sse2>_movnt<mode>"
1282 [(set (match_operand:VI8 0 "memory_operand" "=m")
1283 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1286 "%vmovntdq\t{%1, %0|%0, %1}"
1287 [(set_attr "type" "ssecvt")
1288 (set (attr "prefix_data16")
1290 (match_test "TARGET_AVX")
1292 (const_string "1")))
1293 (set_attr "prefix" "maybe_vex")
1294 (set_attr "mode" "<sseinsnmode>")])
1296 ; Expand patterns for non-temporal stores. At the moment, only those
1297 ; that directly map to insns are defined; it would be possible to
1298 ; define patterns for other modes that would expand to several insns.
1300 ;; Modes handled by storent patterns.
1301 (define_mode_iterator STORENT_MODE
1302 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1303 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1304 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1305 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1306 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1308 (define_expand "storent<mode>"
1309 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1310 (unspec:STORENT_MODE
1311 [(match_operand:STORENT_MODE 1 "register_operand")]
1315 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1319 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1321 ;; All integer modes with AVX512BW/DQ.
1322 (define_mode_iterator SWI1248_AVX512BWDQ
1323 [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1325 ;; All integer modes with AVX512BW, where HImode operation
1326 ;; can be used instead of QImode.
1327 (define_mode_iterator SWI1248_AVX512BW
1328 [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1330 ;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1331 (define_mode_iterator SWI1248_AVX512BWDQ2
1332 [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1333 (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1335 (define_expand "kmov<mskmodesuffix>"
1336 [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1337 (match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1339 && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1341 (define_insn "k<code><mode>"
1342 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1343 (any_logic:SWI1248_AVX512BW
1344 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1345 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1346 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1349 if (get_attr_mode (insn) == MODE_HI)
1350 return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1352 return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1354 [(set_attr "type" "msklog")
1355 (set_attr "prefix" "vex")
1357 (cond [(and (match_test "<MODE>mode == QImode")
1358 (not (match_test "TARGET_AVX512DQ")))
1361 (const_string "<MODE>")))])
1363 (define_insn "kandn<mode>"
1364 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1365 (and:SWI1248_AVX512BW
1366 (not:SWI1248_AVX512BW
1367 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1368 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1369 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1372 if (get_attr_mode (insn) == MODE_HI)
1373 return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1375 return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1377 [(set_attr "type" "msklog")
1378 (set_attr "prefix" "vex")
1380 (cond [(and (match_test "<MODE>mode == QImode")
1381 (not (match_test "TARGET_AVX512DQ")))
1384 (const_string "<MODE>")))])
1386 (define_insn "kxnor<mode>"
1387 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1388 (not:SWI1248_AVX512BW
1389 (xor:SWI1248_AVX512BW
1390 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1391 (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1392 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1395 if (get_attr_mode (insn) == MODE_HI)
1396 return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1398 return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1400 [(set_attr "type" "msklog")
1401 (set_attr "prefix" "vex")
1403 (cond [(and (match_test "<MODE>mode == QImode")
1404 (not (match_test "TARGET_AVX512DQ")))
1407 (const_string "<MODE>")))])
1409 (define_insn "knot<mode>"
1410 [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1411 (not:SWI1248_AVX512BW
1412 (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1413 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1416 if (get_attr_mode (insn) == MODE_HI)
1417 return "knotw\t{%1, %0|%0, %1}";
1419 return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1421 [(set_attr "type" "msklog")
1422 (set_attr "prefix" "vex")
1424 (cond [(and (match_test "<MODE>mode == QImode")
1425 (not (match_test "TARGET_AVX512DQ")))
1428 (const_string "<MODE>")))])
1430 (define_insn "kadd<mode>"
1431 [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1432 (plus:SWI1248_AVX512BWDQ2
1433 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1434 (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1435 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1437 "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1438 [(set_attr "type" "msklog")
1439 (set_attr "prefix" "vex")
1440 (set_attr "mode" "<MODE>")])
1442 ;; Mask variant shift mnemonics
1443 (define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1445 (define_insn "k<code><mode>"
1446 [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1447 (any_lshift:SWI1248_AVX512BWDQ
1448 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1449 (match_operand:QI 2 "immediate_operand" "n")))
1450 (unspec [(const_int 0)] UNSPEC_MASKOP)]
1452 "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1453 [(set_attr "type" "msklog")
1454 (set_attr "prefix" "vex")
1455 (set_attr "mode" "<MODE>")])
1457 (define_insn "ktest<mode>"
1458 [(set (reg:CC FLAGS_REG)
1460 [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1461 (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1464 "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1465 [(set_attr "mode" "<MODE>")
1466 (set_attr "type" "msklog")
1467 (set_attr "prefix" "vex")])
1469 (define_insn "kortest<mode>"
1470 [(set (reg:CC FLAGS_REG)
1472 [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1473 (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1476 "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1477 [(set_attr "mode" "<MODE>")
1478 (set_attr "type" "msklog")
1479 (set_attr "prefix" "vex")])
1481 (define_insn "kunpckhi"
1482 [(set (match_operand:HI 0 "register_operand" "=k")
1485 (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1487 (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1489 "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1490 [(set_attr "mode" "HI")
1491 (set_attr "type" "msklog")
1492 (set_attr "prefix" "vex")])
1494 (define_insn "kunpcksi"
1495 [(set (match_operand:SI 0 "register_operand" "=k")
1498 (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1500 (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1502 "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1503 [(set_attr "mode" "SI")])
1505 (define_insn "kunpckdi"
1506 [(set (match_operand:DI 0 "register_operand" "=k")
1509 (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1511 (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1513 "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1514 [(set_attr "mode" "DI")])
1517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1519 ;; Parallel floating point arithmetic
1521 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1523 (define_expand "<code><mode>2"
1524 [(set (match_operand:VF 0 "register_operand")
1526 (match_operand:VF 1 "register_operand")))]
1528 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1530 (define_insn_and_split "*absneg<mode>2"
1531 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1532 (match_operator:VF 3 "absneg_operator"
1533 [(match_operand:VF 1 "vector_operand" "0, xBm,v, m")]))
1534 (use (match_operand:VF 2 "vector_operand" "xBm,0, vm,v"))]
1537 "&& reload_completed"
1540 enum rtx_code absneg_op;
1546 if (MEM_P (operands[1]))
1547 op1 = operands[2], op2 = operands[1];
1549 op1 = operands[1], op2 = operands[2];
1554 if (rtx_equal_p (operands[0], operands[1]))
1560 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1561 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1562 t = gen_rtx_SET (operands[0], t);
1566 [(set_attr "isa" "noavx,noavx,avx,avx")])
1568 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1569 [(set (match_operand:VF 0 "register_operand")
1571 (match_operand:VF 1 "<round_nimm_predicate>")
1572 (match_operand:VF 2 "<round_nimm_predicate>")))]
1573 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1574 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1576 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1577 [(set (match_operand:VF 0 "register_operand" "=x,v")
1579 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1580 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1581 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1583 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1584 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1585 [(set_attr "isa" "noavx,avx")
1586 (set_attr "type" "sseadd")
1587 (set_attr "prefix" "<mask_prefix3>")
1588 (set_attr "mode" "<MODE>")])
1590 (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1591 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1594 (match_operand:VF_128 1 "register_operand" "0,v")
1595 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1600 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1601 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1602 [(set_attr "isa" "noavx,avx")
1603 (set_attr "type" "sseadd")
1604 (set_attr "prefix" "<round_scalar_prefix>")
1605 (set_attr "mode" "<ssescalarmode>")])
1607 (define_expand "mul<mode>3<mask_name><round_name>"
1608 [(set (match_operand:VF 0 "register_operand")
1610 (match_operand:VF 1 "<round_nimm_predicate>")
1611 (match_operand:VF 2 "<round_nimm_predicate>")))]
1612 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1613 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1615 (define_insn "*mul<mode>3<mask_name><round_name>"
1616 [(set (match_operand:VF 0 "register_operand" "=x,v")
1618 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1619 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1620 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1622 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1623 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1624 [(set_attr "isa" "noavx,avx")
1625 (set_attr "type" "ssemul")
1626 (set_attr "prefix" "<mask_prefix3>")
1627 (set_attr "btver2_decode" "direct,double")
1628 (set_attr "mode" "<MODE>")])
1630 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1631 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1634 (match_operand:VF_128 1 "register_operand" "0,v")
1635 (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
1640 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1641 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1642 [(set_attr "isa" "noavx,avx")
1643 (set_attr "type" "sse<multdiv_mnemonic>")
1644 (set_attr "prefix" "<round_scalar_prefix>")
1645 (set_attr "btver2_decode" "direct,double")
1646 (set_attr "mode" "<ssescalarmode>")])
1648 (define_expand "div<mode>3"
1649 [(set (match_operand:VF2 0 "register_operand")
1650 (div:VF2 (match_operand:VF2 1 "register_operand")
1651 (match_operand:VF2 2 "vector_operand")))]
1653 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1655 (define_expand "div<mode>3"
1656 [(set (match_operand:VF1 0 "register_operand")
1657 (div:VF1 (match_operand:VF1 1 "register_operand")
1658 (match_operand:VF1 2 "vector_operand")))]
1661 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1664 && TARGET_RECIP_VEC_DIV
1665 && !optimize_insn_for_size_p ()
1666 && flag_finite_math_only && !flag_trapping_math
1667 && flag_unsafe_math_optimizations)
1669 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1674 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1675 [(set (match_operand:VF 0 "register_operand" "=x,v")
1677 (match_operand:VF 1 "register_operand" "0,v")
1678 (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1679 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1681 div<ssemodesuffix>\t{%2, %0|%0, %2}
1682 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1683 [(set_attr "isa" "noavx,avx")
1684 (set_attr "type" "ssediv")
1685 (set_attr "prefix" "<mask_prefix3>")
1686 (set_attr "mode" "<MODE>")])
1688 (define_insn "<sse>_rcp<mode>2"
1689 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1691 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1693 "%vrcpps\t{%1, %0|%0, %1}"
1694 [(set_attr "type" "sse")
1695 (set_attr "atom_sse_attr" "rcp")
1696 (set_attr "btver2_sse_attr" "rcp")
1697 (set_attr "prefix" "maybe_vex")
1698 (set_attr "mode" "<MODE>")])
1700 (define_insn "sse_vmrcpv4sf2"
1701 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1703 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1705 (match_operand:V4SF 2 "register_operand" "0,x")
1709 rcpss\t{%1, %0|%0, %k1}
1710 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1711 [(set_attr "isa" "noavx,avx")
1712 (set_attr "type" "sse")
1713 (set_attr "atom_sse_attr" "rcp")
1714 (set_attr "btver2_sse_attr" "rcp")
1715 (set_attr "prefix" "orig,vex")
1716 (set_attr "mode" "SF")])
1718 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1719 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1721 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1724 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1725 [(set_attr "type" "sse")
1726 (set_attr "prefix" "evex")
1727 (set_attr "mode" "<MODE>")])
1729 (define_insn "srcp14<mode>"
1730 [(set (match_operand:VF_128 0 "register_operand" "=v")
1733 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1735 (match_operand:VF_128 2 "register_operand" "v")
1738 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1739 [(set_attr "type" "sse")
1740 (set_attr "prefix" "evex")
1741 (set_attr "mode" "<MODE>")])
1743 (define_insn "srcp14<mode>_mask"
1744 [(set (match_operand:VF_128 0 "register_operand" "=v")
1748 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1750 (match_operand:VF_128 3 "vector_move_operand" "0C")
1751 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1752 (match_operand:VF_128 2 "register_operand" "v")
1755 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1756 [(set_attr "type" "sse")
1757 (set_attr "prefix" "evex")
1758 (set_attr "mode" "<MODE>")])
1760 (define_expand "sqrt<mode>2"
1761 [(set (match_operand:VF2 0 "register_operand")
1762 (sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
1765 (define_expand "sqrt<mode>2"
1766 [(set (match_operand:VF1 0 "register_operand")
1767 (sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
1771 && TARGET_RECIP_VEC_SQRT
1772 && !optimize_insn_for_size_p ()
1773 && flag_finite_math_only && !flag_trapping_math
1774 && flag_unsafe_math_optimizations)
1776 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1781 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1782 [(set (match_operand:VF 0 "register_operand" "=x,v")
1783 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1784 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1786 sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
1787 vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1788 [(set_attr "isa" "noavx,avx")
1789 (set_attr "type" "sse")
1790 (set_attr "atom_sse_attr" "sqrt")
1791 (set_attr "btver2_sse_attr" "sqrt")
1792 (set_attr "prefix" "maybe_vex")
1793 (set_attr "mode" "<MODE>")])
1795 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1796 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1799 (match_operand:VF_128 1 "vector_operand" "xBm,<round_constraint>"))
1800 (match_operand:VF_128 2 "register_operand" "0,v")
1804 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1805 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1806 [(set_attr "isa" "noavx,avx")
1807 (set_attr "type" "sse")
1808 (set_attr "atom_sse_attr" "sqrt")
1809 (set_attr "prefix" "<round_prefix>")
1810 (set_attr "btver2_sse_attr" "sqrt")
1811 (set_attr "mode" "<ssescalarmode>")])
1813 (define_expand "rsqrt<mode>2"
1814 [(set (match_operand:VF1_128_256 0 "register_operand")
1816 [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
1819 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1823 (define_expand "rsqrtv16sf2"
1824 [(set (match_operand:V16SF 0 "register_operand")
1826 [(match_operand:V16SF 1 "vector_operand")]
1828 "TARGET_SSE_MATH && TARGET_AVX512ER"
1830 ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
1834 (define_insn "<sse>_rsqrt<mode>2"
1835 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1837 [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
1839 "%vrsqrtps\t{%1, %0|%0, %1}"
1840 [(set_attr "type" "sse")
1841 (set_attr "prefix" "maybe_vex")
1842 (set_attr "mode" "<MODE>")])
1844 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1845 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1847 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1850 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1851 [(set_attr "type" "sse")
1852 (set_attr "prefix" "evex")
1853 (set_attr "mode" "<MODE>")])
1855 (define_insn "rsqrt14<mode>"
1856 [(set (match_operand:VF_128 0 "register_operand" "=v")
1859 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1861 (match_operand:VF_128 2 "register_operand" "v")
1864 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1865 [(set_attr "type" "sse")
1866 (set_attr "prefix" "evex")
1867 (set_attr "mode" "<MODE>")])
1869 (define_insn "rsqrt14_<mode>_mask"
1870 [(set (match_operand:VF_128 0 "register_operand" "=v")
1874 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1876 (match_operand:VF_128 3 "vector_move_operand" "0C")
1877 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1878 (match_operand:VF_128 2 "register_operand" "v")
1881 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
1882 [(set_attr "type" "sse")
1883 (set_attr "prefix" "evex")
1884 (set_attr "mode" "<MODE>")])
1886 (define_insn "sse_vmrsqrtv4sf2"
1887 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1889 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1891 (match_operand:V4SF 2 "register_operand" "0,x")
1895 rsqrtss\t{%1, %0|%0, %k1}
1896 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1897 [(set_attr "isa" "noavx,avx")
1898 (set_attr "type" "sse")
1899 (set_attr "prefix" "orig,vex")
1900 (set_attr "mode" "SF")])
1902 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1903 [(set (match_operand:VF 0 "register_operand")
1905 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1906 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1907 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1909 if (!flag_finite_math_only || flag_signed_zeros)
1911 operands[1] = force_reg (<MODE>mode, operands[1]);
1912 emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
1913 (operands[0], operands[1], operands[2]
1914 <mask_operand_arg34>
1915 <round_saeonly_mask_arg3>));
1919 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1922 ;; These versions of the min/max patterns are intentionally ignorant of
1923 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
1924 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
1925 ;; are undefined in this condition, we're certain this is correct.
1927 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1928 [(set (match_operand:VF 0 "register_operand" "=x,v")
1930 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1931 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
1932 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1933 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1935 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1936 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1937 [(set_attr "isa" "noavx,avx")
1938 (set_attr "type" "sseadd")
1939 (set_attr "btver2_sse_attr" "maxmin")
1940 (set_attr "prefix" "<mask_prefix3>")
1941 (set_attr "mode" "<MODE>")])
1943 ;; These versions of the min/max patterns implement exactly the operations
1944 ;; min = (op1 < op2 ? op1 : op2)
1945 ;; max = (!(op1 < op2) ? op1 : op2)
1946 ;; Their operands are not commutative, and thus they may be used in the
1947 ;; presence of -0.0 and NaN.
1949 (define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
1950 [(set (match_operand:VF 0 "register_operand" "=x,v")
1952 [(match_operand:VF 1 "register_operand" "0,v")
1953 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
1956 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1958 <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
1959 v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1960 [(set_attr "isa" "noavx,avx")
1961 (set_attr "type" "sseadd")
1962 (set_attr "btver2_sse_attr" "maxmin")
1963 (set_attr "prefix" "<mask_prefix3>")
1964 (set_attr "mode" "<MODE>")])
1966 (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
1967 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1970 (match_operand:VF_128 1 "register_operand" "0,v")
1971 (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
1976 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1977 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
1978 [(set_attr "isa" "noavx,avx")
1979 (set_attr "type" "sse")
1980 (set_attr "btver2_sse_attr" "maxmin")
1981 (set_attr "prefix" "<round_saeonly_scalar_prefix>")
1982 (set_attr "mode" "<ssescalarmode>")])
1984 (define_insn "avx_addsubv4df3"
1985 [(set (match_operand:V4DF 0 "register_operand" "=x")
1988 (match_operand:V4DF 1 "register_operand" "x")
1989 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1990 (plus:V4DF (match_dup 1) (match_dup 2))
1993 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1994 [(set_attr "type" "sseadd")
1995 (set_attr "prefix" "vex")
1996 (set_attr "mode" "V4DF")])
1998 (define_insn "sse3_addsubv2df3"
1999 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2002 (match_operand:V2DF 1 "register_operand" "0,x")
2003 (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2004 (plus:V2DF (match_dup 1) (match_dup 2))
2008 addsubpd\t{%2, %0|%0, %2}
2009 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2010 [(set_attr "isa" "noavx,avx")
2011 (set_attr "type" "sseadd")
2012 (set_attr "atom_unit" "complex")
2013 (set_attr "prefix" "orig,vex")
2014 (set_attr "mode" "V2DF")])
2016 (define_insn "avx_addsubv8sf3"
2017 [(set (match_operand:V8SF 0 "register_operand" "=x")
2020 (match_operand:V8SF 1 "register_operand" "x")
2021 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2022 (plus:V8SF (match_dup 1) (match_dup 2))
2025 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2026 [(set_attr "type" "sseadd")
2027 (set_attr "prefix" "vex")
2028 (set_attr "mode" "V8SF")])
2030 (define_insn "sse3_addsubv4sf3"
2031 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2034 (match_operand:V4SF 1 "register_operand" "0,x")
2035 (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2036 (plus:V4SF (match_dup 1) (match_dup 2))
2040 addsubps\t{%2, %0|%0, %2}
2041 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2042 [(set_attr "isa" "noavx,avx")
2043 (set_attr "type" "sseadd")
2044 (set_attr "prefix" "orig,vex")
2045 (set_attr "prefix_rep" "1,*")
2046 (set_attr "mode" "V4SF")])
2049 [(set (match_operand:VF_128_256 0 "register_operand")
2050 (match_operator:VF_128_256 6 "addsub_vm_operator"
2052 (match_operand:VF_128_256 1 "register_operand")
2053 (match_operand:VF_128_256 2 "vector_operand"))
2055 (match_operand:VF_128_256 3 "vector_operand")
2056 (match_operand:VF_128_256 4 "vector_operand"))
2057 (match_operand 5 "const_int_operand")]))]
2059 && can_create_pseudo_p ()
2060 && ((rtx_equal_p (operands[1], operands[3])
2061 && rtx_equal_p (operands[2], operands[4]))
2062 || (rtx_equal_p (operands[1], operands[4])
2063 && rtx_equal_p (operands[2], operands[3])))"
2065 (vec_merge:VF_128_256
2066 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2067 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2071 [(set (match_operand:VF_128_256 0 "register_operand")
2072 (match_operator:VF_128_256 6 "addsub_vm_operator"
2074 (match_operand:VF_128_256 1 "vector_operand")
2075 (match_operand:VF_128_256 2 "vector_operand"))
2077 (match_operand:VF_128_256 3 "register_operand")
2078 (match_operand:VF_128_256 4 "vector_operand"))
2079 (match_operand 5 "const_int_operand")]))]
2081 && can_create_pseudo_p ()
2082 && ((rtx_equal_p (operands[1], operands[3])
2083 && rtx_equal_p (operands[2], operands[4]))
2084 || (rtx_equal_p (operands[1], operands[4])
2085 && rtx_equal_p (operands[2], operands[3])))"
2087 (vec_merge:VF_128_256
2088 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2089 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2092 /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
2094 = GEN_INT (~INTVAL (operands[5])
2095 & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2099 [(set (match_operand:VF_128_256 0 "register_operand")
2100 (match_operator:VF_128_256 7 "addsub_vs_operator"
2101 [(vec_concat:<ssedoublemode>
2103 (match_operand:VF_128_256 1 "register_operand")
2104 (match_operand:VF_128_256 2 "vector_operand"))
2106 (match_operand:VF_128_256 3 "vector_operand")
2107 (match_operand:VF_128_256 4 "vector_operand")))
2108 (match_parallel 5 "addsub_vs_parallel"
2109 [(match_operand 6 "const_int_operand")])]))]
2111 && can_create_pseudo_p ()
2112 && ((rtx_equal_p (operands[1], operands[3])
2113 && rtx_equal_p (operands[2], operands[4]))
2114 || (rtx_equal_p (operands[1], operands[4])
2115 && rtx_equal_p (operands[2], operands[3])))"
2117 (vec_merge:VF_128_256
2118 (minus:VF_128_256 (match_dup 1) (match_dup 2))
2119 (plus:VF_128_256 (match_dup 1) (match_dup 2))
2122 int i, nelt = XVECLEN (operands[5], 0);
2123 HOST_WIDE_INT ival = 0;
2125 for (i = 0; i < nelt; i++)
2126 if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2127 ival |= HOST_WIDE_INT_1 << i;
2129 operands[5] = GEN_INT (ival);
2133 [(set (match_operand:VF_128_256 0 "register_operand")
2134 (match_operator:VF_128_256 7 "addsub_vs_operator"
2135 [(vec_concat:<ssedoublemode>
2137 (match_operand:VF_128_256 1 "vector_operand")
2138 (match_operand:VF_128_256 2 "vector_operand"))
2140 (match_operand:VF_128_256 3 "register_operand")
2141 (match_operand:VF_128_256 4 "vector_operand")))
2142 (match_parallel 5 "addsub_vs_parallel"
2143 [(match_operand 6 "const_int_operand")])]))]
2145 && can_create_pseudo_p ()
2146 && ((rtx_equal_p (operands[1], operands[3])
2147 && rtx_equal_p (operands[2], operands[4]))
2148 || (rtx_equal_p (operands[1], operands[4])
2149 && rtx_equal_p (operands[2], operands[3])))"
2151 (vec_merge:VF_128_256
2152 (minus:VF_128_256 (match_dup 3) (match_dup 4))
2153 (plus:VF_128_256 (match_dup 3) (match_dup 4))
2156 int i, nelt = XVECLEN (operands[5], 0);
2157 HOST_WIDE_INT ival = 0;
2159 for (i = 0; i < nelt; i++)
2160 if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2161 ival |= HOST_WIDE_INT_1 << i;
2163 operands[5] = GEN_INT (ival);
2166 (define_insn "avx_h<plusminus_insn>v4df3"
2167 [(set (match_operand:V4DF 0 "register_operand" "=x")
2172 (match_operand:V4DF 1 "register_operand" "x")
2173 (parallel [(const_int 0)]))
2174 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2177 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2178 (parallel [(const_int 0)]))
2179 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2182 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2183 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2185 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2186 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2188 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2189 [(set_attr "type" "sseadd")
2190 (set_attr "prefix" "vex")
2191 (set_attr "mode" "V4DF")])
2193 (define_expand "sse3_haddv2df3"
2194 [(set (match_operand:V2DF 0 "register_operand")
2198 (match_operand:V2DF 1 "register_operand")
2199 (parallel [(const_int 0)]))
2200 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2203 (match_operand:V2DF 2 "vector_operand")
2204 (parallel [(const_int 0)]))
2205 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2208 (define_insn "*sse3_haddv2df3"
2209 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2213 (match_operand:V2DF 1 "register_operand" "0,x")
2214 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2217 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2220 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2221 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2224 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2226 && INTVAL (operands[3]) != INTVAL (operands[4])
2227 && INTVAL (operands[5]) != INTVAL (operands[6])"
2229 haddpd\t{%2, %0|%0, %2}
2230 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2231 [(set_attr "isa" "noavx,avx")
2232 (set_attr "type" "sseadd")
2233 (set_attr "prefix" "orig,vex")
2234 (set_attr "mode" "V2DF")])
2236 (define_insn "sse3_hsubv2df3"
2237 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2241 (match_operand:V2DF 1 "register_operand" "0,x")
2242 (parallel [(const_int 0)]))
2243 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2246 (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2247 (parallel [(const_int 0)]))
2248 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2251 hsubpd\t{%2, %0|%0, %2}
2252 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "isa" "noavx,avx")
2254 (set_attr "type" "sseadd")
2255 (set_attr "prefix" "orig,vex")
2256 (set_attr "mode" "V2DF")])
2258 (define_insn "*sse3_haddv2df3_low"
2259 [(set (match_operand:DF 0 "register_operand" "=x,x")
2262 (match_operand:V2DF 1 "register_operand" "0,x")
2263 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2266 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2268 && INTVAL (operands[2]) != INTVAL (operands[3])"
2270 haddpd\t{%0, %0|%0, %0}
2271 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2272 [(set_attr "isa" "noavx,avx")
2273 (set_attr "type" "sseadd1")
2274 (set_attr "prefix" "orig,vex")
2275 (set_attr "mode" "V2DF")])
2277 (define_insn "*sse3_hsubv2df3_low"
2278 [(set (match_operand:DF 0 "register_operand" "=x,x")
2281 (match_operand:V2DF 1 "register_operand" "0,x")
2282 (parallel [(const_int 0)]))
2285 (parallel [(const_int 1)]))))]
2288 hsubpd\t{%0, %0|%0, %0}
2289 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2290 [(set_attr "isa" "noavx,avx")
2291 (set_attr "type" "sseadd1")
2292 (set_attr "prefix" "orig,vex")
2293 (set_attr "mode" "V2DF")])
2295 (define_insn "avx_h<plusminus_insn>v8sf3"
2296 [(set (match_operand:V8SF 0 "register_operand" "=x")
2302 (match_operand:V8SF 1 "register_operand" "x")
2303 (parallel [(const_int 0)]))
2304 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2306 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2307 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2311 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2312 (parallel [(const_int 0)]))
2313 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2315 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2316 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2320 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2321 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2323 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2324 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2327 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2328 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2330 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2331 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2333 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2334 [(set_attr "type" "sseadd")
2335 (set_attr "prefix" "vex")
2336 (set_attr "mode" "V8SF")])
2338 (define_insn "sse3_h<plusminus_insn>v4sf3"
2339 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2344 (match_operand:V4SF 1 "register_operand" "0,x")
2345 (parallel [(const_int 0)]))
2346 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2348 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2349 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2353 (match_operand:V4SF 2 "vector_operand" "xBm,xm")
2354 (parallel [(const_int 0)]))
2355 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2357 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2358 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2361 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2362 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2363 [(set_attr "isa" "noavx,avx")
2364 (set_attr "type" "sseadd")
2365 (set_attr "atom_unit" "complex")
2366 (set_attr "prefix" "orig,vex")
2367 (set_attr "prefix_rep" "1,*")
2368 (set_attr "mode" "V4SF")])
2370 (define_expand "reduc_plus_scal_v8df"
2371 [(match_operand:DF 0 "register_operand")
2372 (match_operand:V8DF 1 "register_operand")]
2375 rtx tmp = gen_reg_rtx (V8DFmode);
2376 ix86_expand_reduc (gen_addv8df3, tmp, operands[1]);
2377 emit_insn (gen_vec_extractv8dfdf (operands[0], tmp, const0_rtx));
2381 (define_expand "reduc_plus_scal_v4df"
2382 [(match_operand:DF 0 "register_operand")
2383 (match_operand:V4DF 1 "register_operand")]
2386 rtx tmp = gen_reg_rtx (V4DFmode);
2387 rtx tmp2 = gen_reg_rtx (V4DFmode);
2388 rtx vec_res = gen_reg_rtx (V4DFmode);
2389 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2390 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2391 emit_insn (gen_addv4df3 (vec_res, tmp, tmp2));
2392 emit_insn (gen_vec_extractv4dfdf (operands[0], vec_res, const0_rtx));
2396 (define_expand "reduc_plus_scal_v2df"
2397 [(match_operand:DF 0 "register_operand")
2398 (match_operand:V2DF 1 "register_operand")]
2401 rtx tmp = gen_reg_rtx (V2DFmode);
2402 emit_insn (gen_sse3_haddv2df3 (tmp, operands[1], operands[1]));
2403 emit_insn (gen_vec_extractv2dfdf (operands[0], tmp, const0_rtx));
2407 (define_expand "reduc_plus_scal_v16sf"
2408 [(match_operand:SF 0 "register_operand")
2409 (match_operand:V16SF 1 "register_operand")]
2412 rtx tmp = gen_reg_rtx (V16SFmode);
2413 ix86_expand_reduc (gen_addv16sf3, tmp, operands[1]);
2414 emit_insn (gen_vec_extractv16sfsf (operands[0], tmp, const0_rtx));
2418 (define_expand "reduc_plus_scal_v8sf"
2419 [(match_operand:SF 0 "register_operand")
2420 (match_operand:V8SF 1 "register_operand")]
2423 rtx tmp = gen_reg_rtx (V8SFmode);
2424 rtx tmp2 = gen_reg_rtx (V8SFmode);
2425 rtx vec_res = gen_reg_rtx (V8SFmode);
2426 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2427 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2428 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2429 emit_insn (gen_addv8sf3 (vec_res, tmp, tmp2));
2430 emit_insn (gen_vec_extractv8sfsf (operands[0], vec_res, const0_rtx));
2434 (define_expand "reduc_plus_scal_v4sf"
2435 [(match_operand:SF 0 "register_operand")
2436 (match_operand:V4SF 1 "register_operand")]
2439 rtx vec_res = gen_reg_rtx (V4SFmode);
2442 rtx tmp = gen_reg_rtx (V4SFmode);
2443 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2444 emit_insn (gen_sse3_haddv4sf3 (vec_res, tmp, tmp));
2447 ix86_expand_reduc (gen_addv4sf3, vec_res, operands[1]);
2448 emit_insn (gen_vec_extractv4sfsf (operands[0], vec_res, const0_rtx));
2452 ;; Modes handled by reduc_sm{in,ax}* patterns.
2453 (define_mode_iterator REDUC_SMINMAX_MODE
2454 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2455 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2456 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2457 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2458 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2459 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2460 (V8DF "TARGET_AVX512F")])
2462 (define_expand "reduc_<code>_scal_<mode>"
2463 [(smaxmin:REDUC_SMINMAX_MODE
2464 (match_operand:<ssescalarmode> 0 "register_operand")
2465 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2468 rtx tmp = gen_reg_rtx (<MODE>mode);
2469 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2470 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2475 (define_expand "reduc_<code>_scal_<mode>"
2476 [(umaxmin:VI_AVX512BW
2477 (match_operand:<ssescalarmode> 0 "register_operand")
2478 (match_operand:VI_AVX512BW 1 "register_operand"))]
2481 rtx tmp = gen_reg_rtx (<MODE>mode);
2482 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2483 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2488 (define_expand "reduc_<code>_scal_<mode>"
2490 (match_operand:<ssescalarmode> 0 "register_operand")
2491 (match_operand:VI_256 1 "register_operand"))]
2494 rtx tmp = gen_reg_rtx (<MODE>mode);
2495 ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2496 emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2501 (define_expand "reduc_umin_scal_v8hi"
2503 (match_operand:HI 0 "register_operand")
2504 (match_operand:V8HI 1 "register_operand"))]
2507 rtx tmp = gen_reg_rtx (V8HImode);
2508 ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2509 emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2513 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2514 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2516 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2517 (match_operand:SI 2 "const_0_to_255_operand")]
2520 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2521 [(set_attr "type" "sse")
2522 (set_attr "prefix" "evex")
2523 (set_attr "mode" "<MODE>")])
2525 (define_insn "reduces<mode>"
2526 [(set (match_operand:VF_128 0 "register_operand" "=v")
2529 [(match_operand:VF_128 1 "register_operand" "v")
2530 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2531 (match_operand:SI 3 "const_0_to_255_operand")]
2536 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2537 [(set_attr "type" "sse")
2538 (set_attr "prefix" "evex")
2539 (set_attr "mode" "<MODE>")])
2541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2543 ;; Parallel floating point comparisons
2545 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2547 (define_insn "avx_cmp<mode>3"
2548 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2550 [(match_operand:VF_128_256 1 "register_operand" "x")
2551 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2552 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2555 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2556 [(set_attr "type" "ssecmp")
2557 (set_attr "length_immediate" "1")
2558 (set_attr "prefix" "vex")
2559 (set_attr "mode" "<MODE>")])
2561 (define_insn "avx_vmcmp<mode>3"
2562 [(set (match_operand:VF_128 0 "register_operand" "=x")
2565 [(match_operand:VF_128 1 "register_operand" "x")
2566 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2567 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2572 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2573 [(set_attr "type" "ssecmp")
2574 (set_attr "length_immediate" "1")
2575 (set_attr "prefix" "vex")
2576 (set_attr "mode" "<ssescalarmode>")])
2578 (define_insn "*<sse>_maskcmp<mode>3_comm"
2579 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2580 (match_operator:VF_128_256 3 "sse_comparison_operator"
2581 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2582 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2584 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2586 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2587 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2588 [(set_attr "isa" "noavx,avx")
2589 (set_attr "type" "ssecmp")
2590 (set_attr "length_immediate" "1")
2591 (set_attr "prefix" "orig,vex")
2592 (set_attr "mode" "<MODE>")])
2594 (define_insn "<sse>_maskcmp<mode>3"
2595 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2596 (match_operator:VF_128_256 3 "sse_comparison_operator"
2597 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2598 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2601 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2602 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2603 [(set_attr "isa" "noavx,avx")
2604 (set_attr "type" "ssecmp")
2605 (set_attr "length_immediate" "1")
2606 (set_attr "prefix" "orig,vex")
2607 (set_attr "mode" "<MODE>")])
2609 (define_insn "<sse>_vmmaskcmp<mode>3"
2610 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2612 (match_operator:VF_128 3 "sse_comparison_operator"
2613 [(match_operand:VF_128 1 "register_operand" "0,x")
2614 (match_operand:VF_128 2 "vector_operand" "xBm,xm")])
2619 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2620 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2621 [(set_attr "isa" "noavx,avx")
2622 (set_attr "type" "ssecmp")
2623 (set_attr "length_immediate" "1,*")
2624 (set_attr "prefix" "orig,vex")
2625 (set_attr "mode" "<ssescalarmode>")])
2627 (define_mode_attr cmp_imm_predicate
2628 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2629 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2630 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2631 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2632 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2633 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2634 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2635 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2636 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2638 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2639 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2640 (unspec:<avx512fmaskmode>
2641 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2642 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2643 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2645 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2646 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2647 [(set_attr "type" "ssecmp")
2648 (set_attr "length_immediate" "1")
2649 (set_attr "prefix" "evex")
2650 (set_attr "mode" "<sseinsnmode>")])
2652 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2653 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2654 (unspec:<avx512fmaskmode>
2655 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2656 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2657 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2660 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2661 [(set_attr "type" "ssecmp")
2662 (set_attr "length_immediate" "1")
2663 (set_attr "prefix" "evex")
2664 (set_attr "mode" "<sseinsnmode>")])
2666 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2667 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2668 (unspec:<avx512fmaskmode>
2669 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2670 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2671 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2672 UNSPEC_UNSIGNED_PCMP))]
2674 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2675 [(set_attr "type" "ssecmp")
2676 (set_attr "length_immediate" "1")
2677 (set_attr "prefix" "evex")
2678 (set_attr "mode" "<sseinsnmode>")])
2680 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2681 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2682 (unspec:<avx512fmaskmode>
2683 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2684 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2685 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2686 UNSPEC_UNSIGNED_PCMP))]
2688 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2689 [(set_attr "type" "ssecmp")
2690 (set_attr "length_immediate" "1")
2691 (set_attr "prefix" "evex")
2692 (set_attr "mode" "<sseinsnmode>")])
2694 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2695 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2696 (and:<avx512fmaskmode>
2697 (unspec:<avx512fmaskmode>
2698 [(match_operand:VF_128 1 "register_operand" "v")
2699 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2700 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2704 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2705 [(set_attr "type" "ssecmp")
2706 (set_attr "length_immediate" "1")
2707 (set_attr "prefix" "evex")
2708 (set_attr "mode" "<ssescalarmode>")])
2710 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2711 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2712 (and:<avx512fmaskmode>
2713 (unspec:<avx512fmaskmode>
2714 [(match_operand:VF_128 1 "register_operand" "v")
2715 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2716 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2718 (and:<avx512fmaskmode>
2719 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2722 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2723 [(set_attr "type" "ssecmp")
2724 (set_attr "length_immediate" "1")
2725 (set_attr "prefix" "evex")
2726 (set_attr "mode" "<ssescalarmode>")])
2728 (define_insn "avx512f_maskcmp<mode>3"
2729 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2730 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2731 [(match_operand:VF 1 "register_operand" "v")
2732 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2734 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2735 [(set_attr "type" "ssecmp")
2736 (set_attr "length_immediate" "1")
2737 (set_attr "prefix" "evex")
2738 (set_attr "mode" "<sseinsnmode>")])
2740 (define_insn "<sse>_comi<round_saeonly_name>"
2741 [(set (reg:CCFP FLAGS_REG)
2744 (match_operand:<ssevecmode> 0 "register_operand" "v")
2745 (parallel [(const_int 0)]))
2747 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2748 (parallel [(const_int 0)]))))]
2749 "SSE_FLOAT_MODE_P (<MODE>mode)"
2750 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2751 [(set_attr "type" "ssecomi")
2752 (set_attr "prefix" "maybe_vex")
2753 (set_attr "prefix_rep" "0")
2754 (set (attr "prefix_data16")
2755 (if_then_else (eq_attr "mode" "DF")
2757 (const_string "0")))
2758 (set_attr "mode" "<MODE>")])
2760 (define_insn "<sse>_ucomi<round_saeonly_name>"
2761 [(set (reg:CCFPU FLAGS_REG)
2764 (match_operand:<ssevecmode> 0 "register_operand" "v")
2765 (parallel [(const_int 0)]))
2767 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
2768 (parallel [(const_int 0)]))))]
2769 "SSE_FLOAT_MODE_P (<MODE>mode)"
2770 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2771 [(set_attr "type" "ssecomi")
2772 (set_attr "prefix" "maybe_vex")
2773 (set_attr "prefix_rep" "0")
2774 (set (attr "prefix_data16")
2775 (if_then_else (eq_attr "mode" "DF")
2777 (const_string "0")))
2778 (set_attr "mode" "<MODE>")])
2780 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2781 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2782 (match_operator:<avx512fmaskmode> 1 ""
2783 [(match_operand:V48_AVX512VL 2 "register_operand")
2784 (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
2787 bool ok = ix86_expand_mask_vec_cmp (operands);
2792 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
2793 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2794 (match_operator:<avx512fmaskmode> 1 ""
2795 [(match_operand:VI12_AVX512VL 2 "register_operand")
2796 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2799 bool ok = ix86_expand_mask_vec_cmp (operands);
2804 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2805 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2806 (match_operator:<sseintvecmode> 1 ""
2807 [(match_operand:VI_256 2 "register_operand")
2808 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2811 bool ok = ix86_expand_int_vec_cmp (operands);
2816 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2817 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2818 (match_operator:<sseintvecmode> 1 ""
2819 [(match_operand:VI124_128 2 "register_operand")
2820 (match_operand:VI124_128 3 "vector_operand")]))]
2823 bool ok = ix86_expand_int_vec_cmp (operands);
2828 (define_expand "vec_cmpv2div2di"
2829 [(set (match_operand:V2DI 0 "register_operand")
2830 (match_operator:V2DI 1 ""
2831 [(match_operand:V2DI 2 "register_operand")
2832 (match_operand:V2DI 3 "vector_operand")]))]
2835 bool ok = ix86_expand_int_vec_cmp (operands);
2840 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2841 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2842 (match_operator:<sseintvecmode> 1 ""
2843 [(match_operand:VF_256 2 "register_operand")
2844 (match_operand:VF_256 3 "nonimmediate_operand")]))]
2847 bool ok = ix86_expand_fp_vec_cmp (operands);
2852 (define_expand "vec_cmp<mode><sseintvecmodelower>"
2853 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2854 (match_operator:<sseintvecmode> 1 ""
2855 [(match_operand:VF_128 2 "register_operand")
2856 (match_operand:VF_128 3 "vector_operand")]))]
2859 bool ok = ix86_expand_fp_vec_cmp (operands);
2864 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2865 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2866 (match_operator:<avx512fmaskmode> 1 ""
2867 [(match_operand:VI48_AVX512VL 2 "register_operand")
2868 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
2871 bool ok = ix86_expand_mask_vec_cmp (operands);
2876 (define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
2877 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
2878 (match_operator:<avx512fmaskmode> 1 ""
2879 [(match_operand:VI12_AVX512VL 2 "register_operand")
2880 (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
2883 bool ok = ix86_expand_mask_vec_cmp (operands);
2888 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2889 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2890 (match_operator:<sseintvecmode> 1 ""
2891 [(match_operand:VI_256 2 "register_operand")
2892 (match_operand:VI_256 3 "nonimmediate_operand")]))]
2895 bool ok = ix86_expand_int_vec_cmp (operands);
2900 (define_expand "vec_cmpu<mode><sseintvecmodelower>"
2901 [(set (match_operand:<sseintvecmode> 0 "register_operand")
2902 (match_operator:<sseintvecmode> 1 ""
2903 [(match_operand:VI124_128 2 "register_operand")
2904 (match_operand:VI124_128 3 "vector_operand")]))]
2907 bool ok = ix86_expand_int_vec_cmp (operands);
2912 (define_expand "vec_cmpuv2div2di"
2913 [(set (match_operand:V2DI 0 "register_operand")
2914 (match_operator:V2DI 1 ""
2915 [(match_operand:V2DI 2 "register_operand")
2916 (match_operand:V2DI 3 "vector_operand")]))]
2919 bool ok = ix86_expand_int_vec_cmp (operands);
2924 (define_expand "vec_cmpeqv2div2di"
2925 [(set (match_operand:V2DI 0 "register_operand")
2926 (match_operator:V2DI 1 ""
2927 [(match_operand:V2DI 2 "register_operand")
2928 (match_operand:V2DI 3 "vector_operand")]))]
2931 bool ok = ix86_expand_int_vec_cmp (operands);
2936 (define_expand "vcond<V_512:mode><VF_512:mode>"
2937 [(set (match_operand:V_512 0 "register_operand")
2939 (match_operator 3 ""
2940 [(match_operand:VF_512 4 "nonimmediate_operand")
2941 (match_operand:VF_512 5 "nonimmediate_operand")])
2942 (match_operand:V_512 1 "general_operand")
2943 (match_operand:V_512 2 "general_operand")))]
2945 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2946 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2948 bool ok = ix86_expand_fp_vcond (operands);
2953 (define_expand "vcond<V_256:mode><VF_256:mode>"
2954 [(set (match_operand:V_256 0 "register_operand")
2956 (match_operator 3 ""
2957 [(match_operand:VF_256 4 "nonimmediate_operand")
2958 (match_operand:VF_256 5 "nonimmediate_operand")])
2959 (match_operand:V_256 1 "general_operand")
2960 (match_operand:V_256 2 "general_operand")))]
2962 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2963 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2965 bool ok = ix86_expand_fp_vcond (operands);
2970 (define_expand "vcond<V_128:mode><VF_128:mode>"
2971 [(set (match_operand:V_128 0 "register_operand")
2973 (match_operator 3 ""
2974 [(match_operand:VF_128 4 "vector_operand")
2975 (match_operand:VF_128 5 "vector_operand")])
2976 (match_operand:V_128 1 "general_operand")
2977 (match_operand:V_128 2 "general_operand")))]
2979 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2980 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2982 bool ok = ix86_expand_fp_vcond (operands);
2987 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
2988 [(set (match_operand:V48_AVX512VL 0 "register_operand")
2989 (vec_merge:V48_AVX512VL
2990 (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
2991 (match_operand:V48_AVX512VL 2 "vector_move_operand")
2992 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
2995 (define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
2996 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
2997 (vec_merge:VI12_AVX512VL
2998 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
2999 (match_operand:VI12_AVX512VL 2 "vector_move_operand")
3000 (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3003 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3004 [(set (match_operand:VI_256 0 "register_operand")
3006 (match_operand:VI_256 1 "nonimmediate_operand")
3007 (match_operand:VI_256 2 "vector_move_operand")
3008 (match_operand:<sseintvecmode> 3 "register_operand")))]
3011 ix86_expand_sse_movcc (operands[0], operands[3],
3012 operands[1], operands[2]);
3016 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3017 [(set (match_operand:VI124_128 0 "register_operand")
3018 (vec_merge:VI124_128
3019 (match_operand:VI124_128 1 "vector_operand")
3020 (match_operand:VI124_128 2 "vector_move_operand")
3021 (match_operand:<sseintvecmode> 3 "register_operand")))]
3024 ix86_expand_sse_movcc (operands[0], operands[3],
3025 operands[1], operands[2]);
3029 (define_expand "vcond_mask_v2div2di"
3030 [(set (match_operand:V2DI 0 "register_operand")
3032 (match_operand:V2DI 1 "vector_operand")
3033 (match_operand:V2DI 2 "vector_move_operand")
3034 (match_operand:V2DI 3 "register_operand")))]
3037 ix86_expand_sse_movcc (operands[0], operands[3],
3038 operands[1], operands[2]);
3042 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3043 [(set (match_operand:VF_256 0 "register_operand")
3045 (match_operand:VF_256 1 "nonimmediate_operand")
3046 (match_operand:VF_256 2 "vector_move_operand")
3047 (match_operand:<sseintvecmode> 3 "register_operand")))]
3050 ix86_expand_sse_movcc (operands[0], operands[3],
3051 operands[1], operands[2]);
3055 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
3056 [(set (match_operand:VF_128 0 "register_operand")
3058 (match_operand:VF_128 1 "vector_operand")
3059 (match_operand:VF_128 2 "vector_move_operand")
3060 (match_operand:<sseintvecmode> 3 "register_operand")))]
3063 ix86_expand_sse_movcc (operands[0], operands[3],
3064 operands[1], operands[2]);
3068 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3070 ;; Parallel floating point logical operations
3072 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3074 (define_insn "<sse>_andnot<mode>3<mask_name>"
3075 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3078 (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3079 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3080 "TARGET_SSE && <mask_avx512vl_condition>"
3082 static char buf[128];
3086 switch (which_alternative)
3089 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3094 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3100 switch (get_attr_mode (insn))
3108 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3109 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3110 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3113 suffix = "<ssemodesuffix>";
3116 snprintf (buf, sizeof (buf), ops, suffix);
3119 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3120 (set_attr "type" "sselog")
3121 (set_attr "prefix" "orig,maybe_vex,evex,evex")
3123 (cond [(and (match_test "<mask_applied>")
3124 (and (eq_attr "alternative" "1")
3125 (match_test "!TARGET_AVX512DQ")))
3126 (const_string "<sseintvecmode2>")
3127 (eq_attr "alternative" "3")
3128 (const_string "<sseintvecmode2>")
3129 (and (match_test "<MODE_SIZE> == 16")
3130 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3131 (const_string "<ssePSmode>")
3132 (match_test "TARGET_AVX")
3133 (const_string "<MODE>")
3134 (match_test "optimize_function_for_size_p (cfun)")
3135 (const_string "V4SF")
3137 (const_string "<MODE>")))])
3140 (define_insn "<sse>_andnot<mode>3<mask_name>"
3141 [(set (match_operand:VF_512 0 "register_operand" "=v")
3144 (match_operand:VF_512 1 "register_operand" "v"))
3145 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3148 static char buf[128];
3152 suffix = "<ssemodesuffix>";
3155 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
3156 if (!TARGET_AVX512DQ)
3158 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3162 snprintf (buf, sizeof (buf),
3163 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3167 [(set_attr "type" "sselog")
3168 (set_attr "prefix" "evex")
3170 (if_then_else (match_test "TARGET_AVX512DQ")
3171 (const_string "<sseinsnmode>")
3172 (const_string "XI")))])
3174 (define_expand "<code><mode>3<mask_name>"
3175 [(set (match_operand:VF_128_256 0 "register_operand")
3176 (any_logic:VF_128_256
3177 (match_operand:VF_128_256 1 "vector_operand")
3178 (match_operand:VF_128_256 2 "vector_operand")))]
3179 "TARGET_SSE && <mask_avx512vl_condition>"
3180 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3182 (define_expand "<code><mode>3<mask_name>"
3183 [(set (match_operand:VF_512 0 "register_operand")
3185 (match_operand:VF_512 1 "nonimmediate_operand")
3186 (match_operand:VF_512 2 "nonimmediate_operand")))]
3188 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3190 (define_insn "*<code><mode>3<mask_name>"
3191 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3192 (any_logic:VF_128_256
3193 (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3194 (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3195 "TARGET_SSE && <mask_avx512vl_condition>
3196 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3198 static char buf[128];
3202 switch (which_alternative)
3205 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3210 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3216 switch (get_attr_mode (insn))
3224 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[qd]. */
3225 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3226 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3229 suffix = "<ssemodesuffix>";
3232 snprintf (buf, sizeof (buf), ops, suffix);
3235 [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3236 (set_attr "type" "sselog")
3237 (set_attr "prefix" "orig,maybe_evex,evex,evex")
3239 (cond [(and (match_test "<mask_applied>")
3240 (and (eq_attr "alternative" "1")
3241 (match_test "!TARGET_AVX512DQ")))
3242 (const_string "<sseintvecmode2>")
3243 (eq_attr "alternative" "3")
3244 (const_string "<sseintvecmode2>")
3245 (and (match_test "<MODE_SIZE> == 16")
3246 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3247 (const_string "<ssePSmode>")
3248 (match_test "TARGET_AVX")
3249 (const_string "<MODE>")
3250 (match_test "optimize_function_for_size_p (cfun)")
3251 (const_string "V4SF")
3253 (const_string "<MODE>")))])
3255 (define_insn "*<code><mode>3<mask_name>"
3256 [(set (match_operand:VF_512 0 "register_operand" "=v")
3258 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3259 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3260 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
3262 static char buf[128];
3266 suffix = "<ssemodesuffix>";
3269 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
3270 if (!TARGET_AVX512DQ)
3272 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3276 snprintf (buf, sizeof (buf),
3277 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3281 [(set_attr "type" "sselog")
3282 (set_attr "prefix" "evex")
3284 (if_then_else (match_test "TARGET_AVX512DQ")
3285 (const_string "<sseinsnmode>")
3286 (const_string "XI")))])
3288 (define_expand "copysign<mode>3"
3291 (not:VF (match_dup 3))
3292 (match_operand:VF 1 "vector_operand")))
3294 (and:VF (match_dup 3)
3295 (match_operand:VF 2 "vector_operand")))
3296 (set (match_operand:VF 0 "register_operand")
3297 (ior:VF (match_dup 4) (match_dup 5)))]
3300 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3302 operands[4] = gen_reg_rtx (<MODE>mode);
3303 operands[5] = gen_reg_rtx (<MODE>mode);
3306 ;; Also define scalar versions. These are used for abs, neg, and
3307 ;; conditional move. Using subregs into vector modes causes register
3308 ;; allocation lossage. These patterns do not allow memory operands
3309 ;; because the native instructions read the full 128-bits.
3311 (define_insn "*andnot<mode>3"
3312 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3315 (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3316 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3317 "SSE_FLOAT_MODE_P (<MODE>mode)"
3319 static char buf[128];
3322 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3324 switch (which_alternative)
3327 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3330 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3333 if (TARGET_AVX512DQ)
3334 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3337 suffix = <MODE>mode == DFmode ? "q" : "d";
3338 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3342 if (TARGET_AVX512DQ)
3343 ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3346 suffix = <MODE>mode == DFmode ? "q" : "d";
3347 ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3354 snprintf (buf, sizeof (buf), ops, suffix);
3357 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3358 (set_attr "type" "sselog")
3359 (set_attr "prefix" "orig,vex,evex,evex")
3361 (cond [(eq_attr "alternative" "2")
3362 (if_then_else (match_test "TARGET_AVX512DQ")
3363 (const_string "<ssevecmode>")
3364 (const_string "TI"))
3365 (eq_attr "alternative" "3")
3366 (if_then_else (match_test "TARGET_AVX512DQ")
3367 (const_string "<avx512fvecmode>")
3368 (const_string "XI"))
3369 (and (match_test "<MODE_SIZE> == 16")
3370 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3371 (const_string "V4SF")
3372 (match_test "TARGET_AVX")
3373 (const_string "<ssevecmode>")
3374 (match_test "optimize_function_for_size_p (cfun)")
3375 (const_string "V4SF")
3377 (const_string "<ssevecmode>")))])
3379 (define_insn "*andnottf3"
3380 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3382 (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3383 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3386 static char buf[128];
3389 = (which_alternative >= 2 ? "pandnq"
3390 : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3392 switch (which_alternative)
3395 ops = "%s\t{%%2, %%0|%%0, %%2}";
3399 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3402 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3408 snprintf (buf, sizeof (buf), ops, tmp);
3411 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3412 (set_attr "type" "sselog")
3413 (set (attr "prefix_data16")
3415 (and (eq_attr "alternative" "0")
3416 (eq_attr "mode" "TI"))
3418 (const_string "*")))
3419 (set_attr "prefix" "orig,vex,evex,evex")
3421 (cond [(eq_attr "alternative" "2")
3423 (eq_attr "alternative" "3")
3425 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3426 (const_string "V4SF")
3427 (match_test "TARGET_AVX")
3429 (ior (not (match_test "TARGET_SSE2"))
3430 (match_test "optimize_function_for_size_p (cfun)"))
3431 (const_string "V4SF")
3433 (const_string "TI")))])
3435 (define_insn "*<code><mode>3"
3436 [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3438 (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3439 (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3440 "SSE_FLOAT_MODE_P (<MODE>mode)"
3442 static char buf[128];
3445 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3447 switch (which_alternative)
3450 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3453 if (!TARGET_AVX512DQ)
3455 suffix = <MODE>mode == DFmode ? "q" : "d";
3456 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3461 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3464 if (TARGET_AVX512DQ)
3465 ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3468 suffix = <MODE>mode == DFmode ? "q" : "d";
3469 ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3476 snprintf (buf, sizeof (buf), ops, suffix);
3479 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3480 (set_attr "type" "sselog")
3481 (set_attr "prefix" "orig,vex,evex,evex")
3483 (cond [(eq_attr "alternative" "2")
3484 (if_then_else (match_test "TARGET_AVX512DQ")
3485 (const_string "<ssevecmode>")
3486 (const_string "TI"))
3487 (eq_attr "alternative" "3")
3488 (if_then_else (match_test "TARGET_AVX512DQ")
3489 (const_string "<avx512fvecmode>")
3490 (const_string "XI"))
3491 (and (match_test "<MODE_SIZE> == 16")
3492 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3493 (const_string "V4SF")
3494 (match_test "TARGET_AVX")
3495 (const_string "<ssevecmode>")
3496 (match_test "optimize_function_for_size_p (cfun)")
3497 (const_string "V4SF")
3499 (const_string "<ssevecmode>")))])
3501 (define_expand "<code>tf3"
3502 [(set (match_operand:TF 0 "register_operand")
3504 (match_operand:TF 1 "vector_operand")
3505 (match_operand:TF 2 "vector_operand")))]
3507 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3509 (define_insn "*<code>tf3"
3510 [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3512 (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3513 (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3515 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3517 static char buf[128];
3520 = (which_alternative >= 2 ? "p<logic>q"
3521 : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3523 switch (which_alternative)
3526 ops = "%s\t{%%2, %%0|%%0, %%2}";
3530 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3533 ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3539 snprintf (buf, sizeof (buf), ops, tmp);
3542 [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3543 (set_attr "type" "sselog")
3544 (set (attr "prefix_data16")
3546 (and (eq_attr "alternative" "0")
3547 (eq_attr "mode" "TI"))
3549 (const_string "*")))
3550 (set_attr "prefix" "orig,vex,evex,evex")
3552 (cond [(eq_attr "alternative" "2")
3554 (eq_attr "alternative" "3")
3556 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3557 (const_string "V4SF")
3558 (match_test "TARGET_AVX")
3560 (ior (not (match_test "TARGET_SSE2"))
3561 (match_test "optimize_function_for_size_p (cfun)"))
3562 (const_string "V4SF")
3564 (const_string "TI")))])
3566 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3568 ;; FMA floating point multiply/accumulate instructions. These include
3569 ;; scalar versions of the instructions as well as vector versions.
3571 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3573 ;; The standard names for scalar FMA are only available with SSE math enabled.
3574 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3575 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3576 ;; and TARGET_FMA4 are both false.
3577 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3578 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3579 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3580 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3581 (define_mode_iterator FMAMODEM
3582 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3583 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3584 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3585 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3586 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3587 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3588 (V16SF "TARGET_AVX512F")
3589 (V8DF "TARGET_AVX512F")])
3591 (define_expand "fma<mode>4"
3592 [(set (match_operand:FMAMODEM 0 "register_operand")
3594 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3595 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3596 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3598 (define_expand "fms<mode>4"
3599 [(set (match_operand:FMAMODEM 0 "register_operand")
3601 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3602 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3603 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3605 (define_expand "fnma<mode>4"
3606 [(set (match_operand:FMAMODEM 0 "register_operand")
3608 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3609 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3610 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3612 (define_expand "fnms<mode>4"
3613 [(set (match_operand:FMAMODEM 0 "register_operand")
3615 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3616 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3617 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3619 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3620 (define_mode_iterator FMAMODE_AVX512
3621 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3622 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3623 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3624 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3625 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3626 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3627 (V16SF "TARGET_AVX512F")
3628 (V8DF "TARGET_AVX512F")])
3630 (define_mode_iterator FMAMODE
3631 [SF DF V4SF V2DF V8SF V4DF])
3633 (define_expand "fma4i_fmadd_<mode>"
3634 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3636 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3637 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3638 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3640 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3641 [(match_operand:VF_AVX512VL 0 "register_operand")
3642 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3643 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3644 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3645 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3646 "TARGET_AVX512F && <round_mode512bit_condition>"
3648 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3649 operands[0], operands[1], operands[2], operands[3],
3650 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3654 (define_insn "*fma_fmadd_<mode>"
3655 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3657 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3658 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3659 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3660 "TARGET_FMA || TARGET_FMA4"
3662 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3663 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3664 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3665 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3666 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3667 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3668 (set_attr "type" "ssemuladd")
3669 (set_attr "mode" "<MODE>")])
3671 ;; Suppose AVX-512F as baseline
3672 (define_mode_iterator VF_SF_AVX512VL
3673 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3674 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3676 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3677 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3679 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3680 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3681 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3682 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3684 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3685 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3686 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3687 [(set_attr "type" "ssemuladd")
3688 (set_attr "mode" "<MODE>")])
3690 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3691 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3692 (vec_merge:VF_AVX512VL
3694 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3695 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3696 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3698 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3699 "TARGET_AVX512F && <round_mode512bit_condition>"
3701 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3702 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3703 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3704 (set_attr "type" "ssemuladd")
3705 (set_attr "mode" "<MODE>")])
3707 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3708 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3709 (vec_merge:VF_AVX512VL
3711 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3712 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3713 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3715 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3717 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3718 [(set_attr "isa" "fma_avx512f")
3719 (set_attr "type" "ssemuladd")
3720 (set_attr "mode" "<MODE>")])
3722 (define_insn "*fma_fmsub_<mode>"
3723 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3725 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3726 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3728 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3729 "TARGET_FMA || TARGET_FMA4"
3731 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3732 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3733 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3734 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3735 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3736 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3737 (set_attr "type" "ssemuladd")
3738 (set_attr "mode" "<MODE>")])
3740 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3741 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3743 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3744 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3746 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3747 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3749 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3750 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3751 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3752 [(set_attr "type" "ssemuladd")
3753 (set_attr "mode" "<MODE>")])
3755 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3756 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3757 (vec_merge:VF_AVX512VL
3759 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3760 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3762 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3764 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3767 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3768 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3769 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3770 (set_attr "type" "ssemuladd")
3771 (set_attr "mode" "<MODE>")])
3773 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3774 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3775 (vec_merge:VF_AVX512VL
3777 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3778 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3780 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3782 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3783 "TARGET_AVX512F && <round_mode512bit_condition>"
3784 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3785 [(set_attr "isa" "fma_avx512f")
3786 (set_attr "type" "ssemuladd")
3787 (set_attr "mode" "<MODE>")])
3789 (define_insn "*fma_fnmadd_<mode>"
3790 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3793 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3794 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3795 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3796 "TARGET_FMA || TARGET_FMA4"
3798 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3799 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3800 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3801 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3802 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3803 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3804 (set_attr "type" "ssemuladd")
3805 (set_attr "mode" "<MODE>")])
3807 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3808 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3811 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3812 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3813 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3814 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3816 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3817 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3818 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3819 [(set_attr "type" "ssemuladd")
3820 (set_attr "mode" "<MODE>")])
3822 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3823 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3824 (vec_merge:VF_AVX512VL
3827 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3828 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3829 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3831 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3832 "TARGET_AVX512F && <round_mode512bit_condition>"
3834 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3835 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3836 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3837 (set_attr "type" "ssemuladd")
3838 (set_attr "mode" "<MODE>")])
3840 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3841 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3842 (vec_merge:VF_AVX512VL
3845 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3846 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3847 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3849 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3850 "TARGET_AVX512F && <round_mode512bit_condition>"
3851 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3852 [(set_attr "isa" "fma_avx512f")
3853 (set_attr "type" "ssemuladd")
3854 (set_attr "mode" "<MODE>")])
3856 (define_insn "*fma_fnmsub_<mode>"
3857 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3860 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3861 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3863 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3864 "TARGET_FMA || TARGET_FMA4"
3866 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3867 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3868 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3869 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3870 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3871 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3872 (set_attr "type" "ssemuladd")
3873 (set_attr "mode" "<MODE>")])
3875 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3876 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3879 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3880 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3882 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3883 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3885 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3886 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3887 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3888 [(set_attr "type" "ssemuladd")
3889 (set_attr "mode" "<MODE>")])
3891 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3892 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3893 (vec_merge:VF_AVX512VL
3896 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3897 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3899 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3901 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3902 "TARGET_AVX512F && <round_mode512bit_condition>"
3904 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3905 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3906 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3907 (set_attr "type" "ssemuladd")
3908 (set_attr "mode" "<MODE>")])
3910 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3911 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3912 (vec_merge:VF_AVX512VL
3915 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3916 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3918 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3920 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3922 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3923 [(set_attr "isa" "fma_avx512f")
3924 (set_attr "type" "ssemuladd")
3925 (set_attr "mode" "<MODE>")])
3927 ;; FMA parallel floating point multiply addsub and subadd operations.
3929 ;; It would be possible to represent these without the UNSPEC as
3932 ;; (fma op1 op2 op3)
3933 ;; (fma op1 op2 (neg op3))
3936 ;; But this doesn't seem useful in practice.
3938 (define_expand "fmaddsub_<mode>"
3939 [(set (match_operand:VF 0 "register_operand")
3941 [(match_operand:VF 1 "nonimmediate_operand")
3942 (match_operand:VF 2 "nonimmediate_operand")
3943 (match_operand:VF 3 "nonimmediate_operand")]
3945 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3947 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3948 [(match_operand:VF_AVX512VL 0 "register_operand")
3949 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3950 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3951 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3952 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3955 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3956 operands[0], operands[1], operands[2], operands[3],
3957 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3961 (define_insn "*fma_fmaddsub_<mode>"
3962 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3964 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3965 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3966 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3968 "TARGET_FMA || TARGET_FMA4"
3970 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3971 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3972 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3973 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3974 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3975 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3976 (set_attr "type" "ssemuladd")
3977 (set_attr "mode" "<MODE>")])
3979 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3980 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3981 (unspec:VF_SF_AVX512VL
3982 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3983 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3984 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3986 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3988 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3989 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3990 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3991 [(set_attr "type" "ssemuladd")
3992 (set_attr "mode" "<MODE>")])
3994 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3995 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3996 (vec_merge:VF_AVX512VL
3998 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3999 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4000 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
4003 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4006 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4007 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4008 [(set_attr "isa" "fma_avx512f,fma_avx512f")
4009 (set_attr "type" "ssemuladd")
4010 (set_attr "mode" "<MODE>")])
4012 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4013 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4014 (vec_merge:VF_AVX512VL
4016 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4017 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4018 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4021 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4023 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4024 [(set_attr "isa" "fma_avx512f")
4025 (set_attr "type" "ssemuladd")
4026 (set_attr "mode" "<MODE>")])
4028 (define_insn "*fma_fmsubadd_<mode>"
4029 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4031 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4032 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4034 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4036 "TARGET_FMA || TARGET_FMA4"
4038 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4039 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4040 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4041 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4042 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4043 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4044 (set_attr "type" "ssemuladd")
4045 (set_attr "mode" "<MODE>")])
4047 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4048 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4049 (unspec:VF_SF_AVX512VL
4050 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4051 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4053 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4055 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4057 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4058 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4059 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4060 [(set_attr "type" "ssemuladd")
4061 (set_attr "mode" "<MODE>")])
4063 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4064 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4065 (vec_merge:VF_AVX512VL
4067 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4068 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
4070 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
4073 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4076 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4077 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4078 [(set_attr "isa" "fma_avx512f,fma_avx512f")
4079 (set_attr "type" "ssemuladd")
4080 (set_attr "mode" "<MODE>")])
4082 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4083 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4084 (vec_merge:VF_AVX512VL
4086 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4087 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
4089 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4092 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4094 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4095 [(set_attr "isa" "fma_avx512f")
4096 (set_attr "type" "ssemuladd")
4097 (set_attr "mode" "<MODE>")])
4099 ;; FMA3 floating point scalar intrinsics. These merge result with
4100 ;; high-order elements from the destination register.
4102 (define_expand "fmai_vmfmadd_<mode><round_name>"
4103 [(set (match_operand:VF_128 0 "register_operand")
4106 (match_operand:VF_128 1 "<round_nimm_predicate>")
4107 (match_operand:VF_128 2 "<round_nimm_predicate>")
4108 (match_operand:VF_128 3 "<round_nimm_predicate>"))
4113 (define_insn "*fmai_fmadd_<mode>"
4114 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4117 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4118 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
4119 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
4122 "TARGET_FMA || TARGET_AVX512F"
4124 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4125 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4126 [(set_attr "type" "ssemuladd")
4127 (set_attr "mode" "<MODE>")])
4129 (define_insn "*fmai_fmsub_<mode>"
4130 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4133 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4134 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
4136 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4139 "TARGET_FMA || TARGET_AVX512F"
4141 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4142 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4143 [(set_attr "type" "ssemuladd")
4144 (set_attr "mode" "<MODE>")])
4146 (define_insn "*fmai_fnmadd_<mode><round_name>"
4147 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4151 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
4152 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
4153 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4156 "TARGET_FMA || TARGET_AVX512F"
4158 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4159 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4160 [(set_attr "type" "ssemuladd")
4161 (set_attr "mode" "<MODE>")])
4163 (define_insn "*fmai_fnmsub_<mode><round_name>"
4164 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4168 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
4169 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
4171 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
4174 "TARGET_FMA || TARGET_AVX512F"
4176 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4177 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4178 [(set_attr "type" "ssemuladd")
4179 (set_attr "mode" "<MODE>")])
4181 ;; FMA4 floating point scalar intrinsics. These write the
4182 ;; entire destination register, with the high-order elements zeroed.
4184 (define_expand "fma4i_vmfmadd_<mode>"
4185 [(set (match_operand:VF_128 0 "register_operand")
4188 (match_operand:VF_128 1 "nonimmediate_operand")
4189 (match_operand:VF_128 2 "nonimmediate_operand")
4190 (match_operand:VF_128 3 "nonimmediate_operand"))
4194 "operands[4] = CONST0_RTX (<MODE>mode);")
4196 (define_insn "*fma4i_vmfmadd_<mode>"
4197 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4200 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4201 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4202 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4203 (match_operand:VF_128 4 "const0_operand")
4206 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4207 [(set_attr "type" "ssemuladd")
4208 (set_attr "mode" "<MODE>")])
4210 (define_insn "*fma4i_vmfmsub_<mode>"
4211 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4214 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
4215 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4217 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4218 (match_operand:VF_128 4 "const0_operand")
4221 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4222 [(set_attr "type" "ssemuladd")
4223 (set_attr "mode" "<MODE>")])
4225 (define_insn "*fma4i_vmfnmadd_<mode>"
4226 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4230 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4231 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4232 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
4233 (match_operand:VF_128 4 "const0_operand")
4236 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4237 [(set_attr "type" "ssemuladd")
4238 (set_attr "mode" "<MODE>")])
4240 (define_insn "*fma4i_vmfnmsub_<mode>"
4241 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
4245 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
4246 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
4248 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
4249 (match_operand:VF_128 4 "const0_operand")
4252 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
4253 [(set_attr "type" "ssemuladd")
4254 (set_attr "mode" "<MODE>")])
4256 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4258 ;; Parallel single-precision floating point conversion operations
4260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4262 (define_insn "sse_cvtpi2ps"
4263 [(set (match_operand:V4SF 0 "register_operand" "=x")
4266 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
4267 (match_operand:V4SF 1 "register_operand" "0")
4270 "cvtpi2ps\t{%2, %0|%0, %2}"
4271 [(set_attr "type" "ssecvt")
4272 (set_attr "mode" "V4SF")])
4274 (define_insn "sse_cvtps2pi"
4275 [(set (match_operand:V2SI 0 "register_operand" "=y")
4277 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
4279 (parallel [(const_int 0) (const_int 1)])))]
4281 "cvtps2pi\t{%1, %0|%0, %q1}"
4282 [(set_attr "type" "ssecvt")
4283 (set_attr "unit" "mmx")
4284 (set_attr "mode" "DI")])
4286 (define_insn "sse_cvttps2pi"
4287 [(set (match_operand:V2SI 0 "register_operand" "=y")
4289 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
4290 (parallel [(const_int 0) (const_int 1)])))]
4292 "cvttps2pi\t{%1, %0|%0, %q1}"
4293 [(set_attr "type" "ssecvt")
4294 (set_attr "unit" "mmx")
4295 (set_attr "prefix_rep" "0")
4296 (set_attr "mode" "SF")])
4298 (define_insn "sse_cvtsi2ss<round_name>"
4299 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4302 (float:SF (match_operand:SI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4303 (match_operand:V4SF 1 "register_operand" "0,0,v")
4307 cvtsi2ss\t{%2, %0|%0, %2}
4308 cvtsi2ss\t{%2, %0|%0, %2}
4309 vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4310 [(set_attr "isa" "noavx,noavx,avx")
4311 (set_attr "type" "sseicvt")
4312 (set_attr "athlon_decode" "vector,double,*")
4313 (set_attr "amdfam10_decode" "vector,double,*")
4314 (set_attr "bdver1_decode" "double,direct,*")
4315 (set_attr "btver2_decode" "double,double,double")
4316 (set_attr "znver1_decode" "double,double,double")
4317 (set_attr "prefix" "orig,orig,maybe_evex")
4318 (set_attr "mode" "SF")])
4320 (define_insn "sse_cvtsi2ssq<round_name>"
4321 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4324 (float:SF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4325 (match_operand:V4SF 1 "register_operand" "0,0,v")
4327 "TARGET_SSE && TARGET_64BIT"
4329 cvtsi2ssq\t{%2, %0|%0, %2}
4330 cvtsi2ssq\t{%2, %0|%0, %2}
4331 vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4332 [(set_attr "isa" "noavx,noavx,avx")
4333 (set_attr "type" "sseicvt")
4334 (set_attr "athlon_decode" "vector,double,*")
4335 (set_attr "amdfam10_decode" "vector,double,*")
4336 (set_attr "bdver1_decode" "double,direct,*")
4337 (set_attr "btver2_decode" "double,double,double")
4338 (set_attr "length_vex" "*,*,4")
4339 (set_attr "prefix_rex" "1,1,*")
4340 (set_attr "prefix" "orig,orig,maybe_evex")
4341 (set_attr "mode" "SF")])
4343 (define_insn "sse_cvtss2si<round_name>"
4344 [(set (match_operand:SI 0 "register_operand" "=r,r")
4347 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4348 (parallel [(const_int 0)]))]
4349 UNSPEC_FIX_NOTRUNC))]
4351 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4352 [(set_attr "type" "sseicvt")
4353 (set_attr "athlon_decode" "double,vector")
4354 (set_attr "bdver1_decode" "double,double")
4355 (set_attr "prefix_rep" "1")
4356 (set_attr "prefix" "maybe_vex")
4357 (set_attr "mode" "SI")])
4359 (define_insn "sse_cvtss2si_2"
4360 [(set (match_operand:SI 0 "register_operand" "=r,r")
4361 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4362 UNSPEC_FIX_NOTRUNC))]
4364 "%vcvtss2si\t{%1, %0|%0, %k1}"
4365 [(set_attr "type" "sseicvt")
4366 (set_attr "athlon_decode" "double,vector")
4367 (set_attr "amdfam10_decode" "double,double")
4368 (set_attr "bdver1_decode" "double,double")
4369 (set_attr "prefix_rep" "1")
4370 (set_attr "prefix" "maybe_vex")
4371 (set_attr "mode" "SI")])
4373 (define_insn "sse_cvtss2siq<round_name>"
4374 [(set (match_operand:DI 0 "register_operand" "=r,r")
4377 (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4378 (parallel [(const_int 0)]))]
4379 UNSPEC_FIX_NOTRUNC))]
4380 "TARGET_SSE && TARGET_64BIT"
4381 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
4382 [(set_attr "type" "sseicvt")
4383 (set_attr "athlon_decode" "double,vector")
4384 (set_attr "bdver1_decode" "double,double")
4385 (set_attr "prefix_rep" "1")
4386 (set_attr "prefix" "maybe_vex")
4387 (set_attr "mode" "DI")])
4389 (define_insn "sse_cvtss2siq_2"
4390 [(set (match_operand:DI 0 "register_operand" "=r,r")
4391 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
4392 UNSPEC_FIX_NOTRUNC))]
4393 "TARGET_SSE && TARGET_64BIT"
4394 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
4395 [(set_attr "type" "sseicvt")
4396 (set_attr "athlon_decode" "double,vector")
4397 (set_attr "amdfam10_decode" "double,double")
4398 (set_attr "bdver1_decode" "double,double")
4399 (set_attr "prefix_rep" "1")
4400 (set_attr "prefix" "maybe_vex")
4401 (set_attr "mode" "DI")])
4403 (define_insn "sse_cvttss2si<round_saeonly_name>"
4404 [(set (match_operand:SI 0 "register_operand" "=r,r")
4407 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4408 (parallel [(const_int 0)]))))]
4410 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4411 [(set_attr "type" "sseicvt")
4412 (set_attr "athlon_decode" "double,vector")
4413 (set_attr "amdfam10_decode" "double,double")
4414 (set_attr "bdver1_decode" "double,double")
4415 (set_attr "prefix_rep" "1")
4416 (set_attr "prefix" "maybe_vex")
4417 (set_attr "mode" "SI")])
4419 (define_insn "sse_cvttss2siq<round_saeonly_name>"
4420 [(set (match_operand:DI 0 "register_operand" "=r,r")
4423 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
4424 (parallel [(const_int 0)]))))]
4425 "TARGET_SSE && TARGET_64BIT"
4426 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
4427 [(set_attr "type" "sseicvt")
4428 (set_attr "athlon_decode" "double,vector")
4429 (set_attr "amdfam10_decode" "double,double")
4430 (set_attr "bdver1_decode" "double,double")
4431 (set_attr "prefix_rep" "1")
4432 (set_attr "prefix" "maybe_vex")
4433 (set_attr "mode" "DI")])
4435 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4436 [(set (match_operand:VF_128 0 "register_operand" "=v")
4438 (vec_duplicate:VF_128
4439 (unsigned_float:<ssescalarmode>
4440 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4441 (match_operand:VF_128 1 "register_operand" "v")
4443 "TARGET_AVX512F && <round_modev4sf_condition>"
4444 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4445 [(set_attr "type" "sseicvt")
4446 (set_attr "prefix" "evex")
4447 (set_attr "mode" "<ssescalarmode>")])
4449 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4450 [(set (match_operand:VF_128 0 "register_operand" "=v")
4452 (vec_duplicate:VF_128
4453 (unsigned_float:<ssescalarmode>
4454 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4455 (match_operand:VF_128 1 "register_operand" "v")
4457 "TARGET_AVX512F && TARGET_64BIT"
4458 "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4459 [(set_attr "type" "sseicvt")
4460 (set_attr "prefix" "evex")
4461 (set_attr "mode" "<ssescalarmode>")])
4463 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4464 [(set (match_operand:VF1 0 "register_operand" "=x,v")
4466 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
4467 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4469 cvtdq2ps\t{%1, %0|%0, %1}
4470 vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4471 [(set_attr "isa" "noavx,avx")
4472 (set_attr "type" "ssecvt")
4473 (set_attr "prefix" "maybe_vex")
4474 (set_attr "mode" "<sseinsnmode>")])
4476 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4477 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4478 (unsigned_float:VF1_AVX512VL
4479 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4481 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4482 [(set_attr "type" "ssecvt")
4483 (set_attr "prefix" "evex")
4484 (set_attr "mode" "<MODE>")])
4486 (define_expand "floatuns<sseintvecmodelower><mode>2"
4487 [(match_operand:VF1 0 "register_operand")
4488 (match_operand:<sseintvecmode> 1 "register_operand")]
4489 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4491 if (<MODE>mode == V16SFmode)
4492 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4494 if (TARGET_AVX512VL)
4496 if (<MODE>mode == V4SFmode)
4497 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4499 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4502 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4508 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4509 (define_mode_attr sf2simodelower
4510 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4512 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4513 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4515 [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
4516 UNSPEC_FIX_NOTRUNC))]
4517 "TARGET_SSE2 && <mask_mode512bit_condition>"
4518 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4519 [(set_attr "type" "ssecvt")
4520 (set (attr "prefix_data16")
4522 (match_test "TARGET_AVX")
4524 (const_string "1")))
4525 (set_attr "prefix" "maybe_vex")
4526 (set_attr "mode" "<sseinsnmode>")])
4528 (define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4529 [(set (match_operand:V16SI 0 "register_operand" "=v")
4531 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4532 UNSPEC_FIX_NOTRUNC))]
4534 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4535 [(set_attr "type" "ssecvt")
4536 (set_attr "prefix" "evex")
4537 (set_attr "mode" "XI")])
4539 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4540 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4541 (unspec:VI4_AVX512VL
4542 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4543 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4545 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4546 [(set_attr "type" "ssecvt")
4547 (set_attr "prefix" "evex")
4548 (set_attr "mode" "<sseinsnmode>")])
4550 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4551 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4552 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4553 UNSPEC_FIX_NOTRUNC))]
4554 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4555 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4556 [(set_attr "type" "ssecvt")
4557 (set_attr "prefix" "evex")
4558 (set_attr "mode" "<sseinsnmode>")])
4560 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4561 [(set (match_operand:V2DI 0 "register_operand" "=v")
4564 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4565 (parallel [(const_int 0) (const_int 1)]))]
4566 UNSPEC_FIX_NOTRUNC))]
4567 "TARGET_AVX512DQ && TARGET_AVX512VL"
4568 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "TI")])
4573 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4574 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4575 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4576 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4577 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4578 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4579 [(set_attr "type" "ssecvt")
4580 (set_attr "prefix" "evex")
4581 (set_attr "mode" "<sseinsnmode>")])
4583 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4584 [(set (match_operand:V2DI 0 "register_operand" "=v")
4587 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4588 (parallel [(const_int 0) (const_int 1)]))]
4589 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4590 "TARGET_AVX512DQ && TARGET_AVX512VL"
4591 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4592 [(set_attr "type" "ssecvt")
4593 (set_attr "prefix" "evex")
4594 (set_attr "mode" "TI")])
4596 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4597 [(set (match_operand:V16SI 0 "register_operand" "=v")
4599 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4601 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4602 [(set_attr "type" "ssecvt")
4603 (set_attr "prefix" "evex")
4604 (set_attr "mode" "XI")])
4606 (define_insn "fix_truncv8sfv8si2<mask_name>"
4607 [(set (match_operand:V8SI 0 "register_operand" "=v")
4608 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4609 "TARGET_AVX && <mask_avx512vl_condition>"
4610 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4611 [(set_attr "type" "ssecvt")
4612 (set_attr "prefix" "<mask_prefix>")
4613 (set_attr "mode" "OI")])
4615 (define_insn "fix_truncv4sfv4si2<mask_name>"
4616 [(set (match_operand:V4SI 0 "register_operand" "=v")
4617 (fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
4618 "TARGET_SSE2 && <mask_avx512vl_condition>"
4619 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4620 [(set_attr "type" "ssecvt")
4621 (set (attr "prefix_rep")
4623 (match_test "TARGET_AVX")
4625 (const_string "1")))
4626 (set (attr "prefix_data16")
4628 (match_test "TARGET_AVX")
4630 (const_string "0")))
4631 (set_attr "prefix_data16" "0")
4632 (set_attr "prefix" "<mask_prefix2>")
4633 (set_attr "mode" "TI")])
4635 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4636 [(match_operand:<sseintvecmode> 0 "register_operand")
4637 (match_operand:VF1 1 "register_operand")]
4640 if (<MODE>mode == V16SFmode)
4641 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4646 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4647 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4648 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4649 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4654 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4656 ;; Parallel double-precision floating point conversion operations
4658 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4660 (define_insn "sse2_cvtpi2pd"
4661 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4662 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4664 "cvtpi2pd\t{%1, %0|%0, %1}"
4665 [(set_attr "type" "ssecvt")
4666 (set_attr "unit" "mmx,*")
4667 (set_attr "prefix_data16" "1,*")
4668 (set_attr "mode" "V2DF")])
4670 (define_insn "sse2_cvtpd2pi"
4671 [(set (match_operand:V2SI 0 "register_operand" "=y")
4672 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4673 UNSPEC_FIX_NOTRUNC))]
4675 "cvtpd2pi\t{%1, %0|%0, %1}"
4676 [(set_attr "type" "ssecvt")
4677 (set_attr "unit" "mmx")
4678 (set_attr "bdver1_decode" "double")
4679 (set_attr "btver2_decode" "direct")
4680 (set_attr "prefix_data16" "1")
4681 (set_attr "mode" "DI")])
4683 (define_insn "sse2_cvttpd2pi"
4684 [(set (match_operand:V2SI 0 "register_operand" "=y")
4685 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4687 "cvttpd2pi\t{%1, %0|%0, %1}"
4688 [(set_attr "type" "ssecvt")
4689 (set_attr "unit" "mmx")
4690 (set_attr "bdver1_decode" "double")
4691 (set_attr "prefix_data16" "1")
4692 (set_attr "mode" "TI")])
4694 (define_insn "sse2_cvtsi2sd"
4695 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4698 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4699 (match_operand:V2DF 1 "register_operand" "0,0,v")
4703 cvtsi2sd\t{%2, %0|%0, %2}
4704 cvtsi2sd\t{%2, %0|%0, %2}
4705 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4706 [(set_attr "isa" "noavx,noavx,avx")
4707 (set_attr "type" "sseicvt")
4708 (set_attr "athlon_decode" "double,direct,*")
4709 (set_attr "amdfam10_decode" "vector,double,*")
4710 (set_attr "bdver1_decode" "double,direct,*")
4711 (set_attr "btver2_decode" "double,double,double")
4712 (set_attr "znver1_decode" "double,double,double")
4713 (set_attr "prefix" "orig,orig,maybe_evex")
4714 (set_attr "mode" "DF")])
4716 (define_insn "sse2_cvtsi2sdq<round_name>"
4717 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4720 (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
4721 (match_operand:V2DF 1 "register_operand" "0,0,v")
4723 "TARGET_SSE2 && TARGET_64BIT"
4725 cvtsi2sdq\t{%2, %0|%0, %2}
4726 cvtsi2sdq\t{%2, %0|%0, %2}
4727 vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4728 [(set_attr "isa" "noavx,noavx,avx")
4729 (set_attr "type" "sseicvt")
4730 (set_attr "athlon_decode" "double,direct,*")
4731 (set_attr "amdfam10_decode" "vector,double,*")
4732 (set_attr "bdver1_decode" "double,direct,*")
4733 (set_attr "length_vex" "*,*,4")
4734 (set_attr "prefix_rex" "1,1,*")
4735 (set_attr "prefix" "orig,orig,maybe_evex")
4736 (set_attr "mode" "DF")])
4738 (define_insn "avx512f_vcvtss2usi<round_name>"
4739 [(set (match_operand:SI 0 "register_operand" "=r")
4742 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4743 (parallel [(const_int 0)]))]
4744 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4746 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4747 [(set_attr "type" "sseicvt")
4748 (set_attr "prefix" "evex")
4749 (set_attr "mode" "SI")])
4751 (define_insn "avx512f_vcvtss2usiq<round_name>"
4752 [(set (match_operand:DI 0 "register_operand" "=r")
4755 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4756 (parallel [(const_int 0)]))]
4757 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4758 "TARGET_AVX512F && TARGET_64BIT"
4759 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4760 [(set_attr "type" "sseicvt")
4761 (set_attr "prefix" "evex")
4762 (set_attr "mode" "DI")])
4764 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4765 [(set (match_operand:SI 0 "register_operand" "=r")
4768 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4769 (parallel [(const_int 0)]))))]
4771 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4772 [(set_attr "type" "sseicvt")
4773 (set_attr "prefix" "evex")
4774 (set_attr "mode" "SI")])
4776 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4777 [(set (match_operand:DI 0 "register_operand" "=r")
4780 (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4781 (parallel [(const_int 0)]))))]
4782 "TARGET_AVX512F && TARGET_64BIT"
4783 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4784 [(set_attr "type" "sseicvt")
4785 (set_attr "prefix" "evex")
4786 (set_attr "mode" "DI")])
4788 (define_insn "avx512f_vcvtsd2usi<round_name>"
4789 [(set (match_operand:SI 0 "register_operand" "=r")
4792 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4793 (parallel [(const_int 0)]))]
4794 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4796 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4797 [(set_attr "type" "sseicvt")
4798 (set_attr "prefix" "evex")
4799 (set_attr "mode" "SI")])
4801 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4802 [(set (match_operand:DI 0 "register_operand" "=r")
4805 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4806 (parallel [(const_int 0)]))]
4807 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4808 "TARGET_AVX512F && TARGET_64BIT"
4809 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4810 [(set_attr "type" "sseicvt")
4811 (set_attr "prefix" "evex")
4812 (set_attr "mode" "DI")])
4814 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4815 [(set (match_operand:SI 0 "register_operand" "=r")
4818 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4819 (parallel [(const_int 0)]))))]
4821 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4822 [(set_attr "type" "sseicvt")
4823 (set_attr "prefix" "evex")
4824 (set_attr "mode" "SI")])
4826 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4827 [(set (match_operand:DI 0 "register_operand" "=r")
4830 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
4831 (parallel [(const_int 0)]))))]
4832 "TARGET_AVX512F && TARGET_64BIT"
4833 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4834 [(set_attr "type" "sseicvt")
4835 (set_attr "prefix" "evex")
4836 (set_attr "mode" "DI")])
4838 (define_insn "sse2_cvtsd2si<round_name>"
4839 [(set (match_operand:SI 0 "register_operand" "=r,r")
4842 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4843 (parallel [(const_int 0)]))]
4844 UNSPEC_FIX_NOTRUNC))]
4846 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4847 [(set_attr "type" "sseicvt")
4848 (set_attr "athlon_decode" "double,vector")
4849 (set_attr "bdver1_decode" "double,double")
4850 (set_attr "btver2_decode" "double,double")
4851 (set_attr "prefix_rep" "1")
4852 (set_attr "prefix" "maybe_vex")
4853 (set_attr "mode" "SI")])
4855 (define_insn "sse2_cvtsd2si_2"
4856 [(set (match_operand:SI 0 "register_operand" "=r,r")
4857 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4858 UNSPEC_FIX_NOTRUNC))]
4860 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4861 [(set_attr "type" "sseicvt")
4862 (set_attr "athlon_decode" "double,vector")
4863 (set_attr "amdfam10_decode" "double,double")
4864 (set_attr "bdver1_decode" "double,double")
4865 (set_attr "prefix_rep" "1")
4866 (set_attr "prefix" "maybe_vex")
4867 (set_attr "mode" "SI")])
4869 (define_insn "sse2_cvtsd2siq<round_name>"
4870 [(set (match_operand:DI 0 "register_operand" "=r,r")
4873 (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
4874 (parallel [(const_int 0)]))]
4875 UNSPEC_FIX_NOTRUNC))]
4876 "TARGET_SSE2 && TARGET_64BIT"
4877 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4878 [(set_attr "type" "sseicvt")
4879 (set_attr "athlon_decode" "double,vector")
4880 (set_attr "bdver1_decode" "double,double")
4881 (set_attr "prefix_rep" "1")
4882 (set_attr "prefix" "maybe_vex")
4883 (set_attr "mode" "DI")])
4885 (define_insn "sse2_cvtsd2siq_2"
4886 [(set (match_operand:DI 0 "register_operand" "=r,r")
4887 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4888 UNSPEC_FIX_NOTRUNC))]
4889 "TARGET_SSE2 && TARGET_64BIT"
4890 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4891 [(set_attr "type" "sseicvt")
4892 (set_attr "athlon_decode" "double,vector")
4893 (set_attr "amdfam10_decode" "double,double")
4894 (set_attr "bdver1_decode" "double,double")
4895 (set_attr "prefix_rep" "1")
4896 (set_attr "prefix" "maybe_vex")
4897 (set_attr "mode" "DI")])
4899 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4900 [(set (match_operand:SI 0 "register_operand" "=r,r")
4903 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4904 (parallel [(const_int 0)]))))]
4906 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4907 [(set_attr "type" "sseicvt")
4908 (set_attr "athlon_decode" "double,vector")
4909 (set_attr "amdfam10_decode" "double,double")
4910 (set_attr "bdver1_decode" "double,double")
4911 (set_attr "btver2_decode" "double,double")
4912 (set_attr "prefix_rep" "1")
4913 (set_attr "prefix" "maybe_vex")
4914 (set_attr "mode" "SI")])
4916 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4917 [(set (match_operand:DI 0 "register_operand" "=r,r")
4920 (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
4921 (parallel [(const_int 0)]))))]
4922 "TARGET_SSE2 && TARGET_64BIT"
4923 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4924 [(set_attr "type" "sseicvt")
4925 (set_attr "athlon_decode" "double,vector")
4926 (set_attr "amdfam10_decode" "double,double")
4927 (set_attr "bdver1_decode" "double,double")
4928 (set_attr "prefix_rep" "1")
4929 (set_attr "prefix" "maybe_vex")
4930 (set_attr "mode" "DI")])
4932 ;; For float<si2dfmode><mode>2 insn pattern
4933 (define_mode_attr si2dfmode
4934 [(V8DF "V8SI") (V4DF "V4SI")])
4935 (define_mode_attr si2dfmodelower
4936 [(V8DF "v8si") (V4DF "v4si")])
4938 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4939 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4940 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4941 "TARGET_AVX && <mask_mode512bit_condition>"
4942 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4943 [(set_attr "type" "ssecvt")
4944 (set_attr "prefix" "maybe_vex")
4945 (set_attr "mode" "<MODE>")])
4947 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4948 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4949 (any_float:VF2_AVX512VL
4950 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4952 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4953 [(set_attr "type" "ssecvt")
4954 (set_attr "prefix" "evex")
4955 (set_attr "mode" "<MODE>")])
4957 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4958 (define_mode_attr qq2pssuff
4959 [(V8SF "") (V4SF "{y}")])
4961 (define_mode_attr sselongvecmode
4962 [(V8SF "V8DI") (V4SF "V4DI")])
4964 (define_mode_attr sselongvecmodelower
4965 [(V8SF "v8di") (V4SF "v4di")])
4967 (define_mode_attr sseintvecmode3
4968 [(V8SF "XI") (V4SF "OI")
4969 (V8DF "OI") (V4DF "TI")])
4971 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4972 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4973 (any_float:VF1_128_256VL
4974 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4975 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4976 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4977 [(set_attr "type" "ssecvt")
4978 (set_attr "prefix" "evex")
4979 (set_attr "mode" "<MODE>")])
4981 (define_insn "*<floatsuffix>floatv2div2sf2"
4982 [(set (match_operand:V4SF 0 "register_operand" "=v")
4984 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4985 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4986 "TARGET_AVX512DQ && TARGET_AVX512VL"
4987 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4988 [(set_attr "type" "ssecvt")
4989 (set_attr "prefix" "evex")
4990 (set_attr "mode" "V4SF")])
4992 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4993 [(set (match_operand:V4SF 0 "register_operand" "=v")
4996 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4998 (match_operand:V4SF 2 "vector_move_operand" "0C")
4999 (parallel [(const_int 0) (const_int 1)]))
5000 (match_operand:QI 3 "register_operand" "Yk"))
5001 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5002 "TARGET_AVX512DQ && TARGET_AVX512VL"
5003 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5004 [(set_attr "type" "ssecvt")
5005 (set_attr "prefix" "evex")
5006 (set_attr "mode" "V4SF")])
5008 (define_insn "*<floatsuffix>floatv2div2sf2_mask_1"
5009 [(set (match_operand:V4SF 0 "register_operand" "=v")
5012 (any_float:V2SF (match_operand:V2DI 1
5013 "nonimmediate_operand" "vm"))
5014 (const_vector:V2SF [(const_int 0) (const_int 0)])
5015 (match_operand:QI 2 "register_operand" "Yk"))
5016 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
5017 "TARGET_AVX512DQ && TARGET_AVX512VL"
5018 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5019 [(set_attr "type" "ssecvt")
5020 (set_attr "prefix" "evex")
5021 (set_attr "mode" "V4SF")])
5023 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5024 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5025 (unsigned_float:VF2_512_256VL
5026 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5028 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5029 [(set_attr "type" "ssecvt")
5030 (set_attr "prefix" "evex")
5031 (set_attr "mode" "<MODE>")])
5033 (define_insn "ufloatv2siv2df2<mask_name>"
5034 [(set (match_operand:V2DF 0 "register_operand" "=v")
5035 (unsigned_float:V2DF
5037 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5038 (parallel [(const_int 0) (const_int 1)]))))]
5040 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5041 [(set_attr "type" "ssecvt")
5042 (set_attr "prefix" "evex")
5043 (set_attr "mode" "V2DF")])
5045 (define_insn "avx512f_cvtdq2pd512_2"
5046 [(set (match_operand:V8DF 0 "register_operand" "=v")
5049 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5050 (parallel [(const_int 0) (const_int 1)
5051 (const_int 2) (const_int 3)
5052 (const_int 4) (const_int 5)
5053 (const_int 6) (const_int 7)]))))]
5055 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5056 [(set_attr "type" "ssecvt")
5057 (set_attr "prefix" "evex")
5058 (set_attr "mode" "V8DF")])
5060 (define_insn "avx_cvtdq2pd256_2"
5061 [(set (match_operand:V4DF 0 "register_operand" "=v")
5064 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5065 (parallel [(const_int 0) (const_int 1)
5066 (const_int 2) (const_int 3)]))))]
5068 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5069 [(set_attr "type" "ssecvt")
5070 (set_attr "prefix" "maybe_evex")
5071 (set_attr "mode" "V4DF")])
5073 (define_insn "sse2_cvtdq2pd<mask_name>"
5074 [(set (match_operand:V2DF 0 "register_operand" "=v")
5077 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5078 (parallel [(const_int 0) (const_int 1)]))))]
5079 "TARGET_SSE2 && <mask_avx512vl_condition>"
5080 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5081 [(set_attr "type" "ssecvt")
5082 (set_attr "prefix" "maybe_vex")
5083 (set_attr "mode" "V2DF")])
5085 (define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5086 [(set (match_operand:V8SI 0 "register_operand" "=v")
5088 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5089 UNSPEC_FIX_NOTRUNC))]
5091 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5092 [(set_attr "type" "ssecvt")
5093 (set_attr "prefix" "evex")
5094 (set_attr "mode" "OI")])
5096 (define_insn "avx_cvtpd2dq256<mask_name>"
5097 [(set (match_operand:V4SI 0 "register_operand" "=v")
5098 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5099 UNSPEC_FIX_NOTRUNC))]
5100 "TARGET_AVX && <mask_avx512vl_condition>"
5101 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5102 [(set_attr "type" "ssecvt")
5103 (set_attr "prefix" "<mask_prefix>")
5104 (set_attr "mode" "OI")])
5106 (define_expand "avx_cvtpd2dq256_2"
5107 [(set (match_operand:V8SI 0 "register_operand")
5109 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5113 "operands[2] = CONST0_RTX (V4SImode);")
5115 (define_insn "*avx_cvtpd2dq256_2"
5116 [(set (match_operand:V8SI 0 "register_operand" "=v")
5118 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5120 (match_operand:V4SI 2 "const0_operand")))]
5122 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5123 [(set_attr "type" "ssecvt")
5124 (set_attr "prefix" "vex")
5125 (set_attr "btver2_decode" "vector")
5126 (set_attr "mode" "OI")])
5128 (define_insn "sse2_cvtpd2dq<mask_name>"
5129 [(set (match_operand:V4SI 0 "register_operand" "=v")
5131 (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5133 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5134 "TARGET_SSE2 && <mask_avx512vl_condition>"
5137 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5139 return "cvtpd2dq\t{%1, %0|%0, %1}";
5141 [(set_attr "type" "ssecvt")
5142 (set_attr "prefix_rep" "1")
5143 (set_attr "prefix_data16" "0")
5144 (set_attr "prefix" "maybe_vex")
5145 (set_attr "mode" "TI")
5146 (set_attr "amdfam10_decode" "double")
5147 (set_attr "athlon_decode" "vector")
5148 (set_attr "bdver1_decode" "double")])
5150 ;; For ufix_notrunc* insn patterns
5151 (define_mode_attr pd2udqsuff
5152 [(V8DF "") (V4DF "{y}")])
5154 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
5155 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
5157 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
5158 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5160 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5161 [(set_attr "type" "ssecvt")
5162 (set_attr "prefix" "evex")
5163 (set_attr "mode" "<sseinsnmode>")])
5165 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
5166 [(set (match_operand:V4SI 0 "register_operand" "=v")
5169 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5170 UNSPEC_UNSIGNED_FIX_NOTRUNC)
5171 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5173 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5174 [(set_attr "type" "ssecvt")
5175 (set_attr "prefix" "evex")
5176 (set_attr "mode" "TI")])
5178 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
5179 [(set (match_operand:V8SI 0 "register_operand" "=v")
5181 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5183 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5184 [(set_attr "type" "ssecvt")
5185 (set_attr "prefix" "evex")
5186 (set_attr "mode" "OI")])
5188 (define_insn "ufix_truncv2dfv2si2<mask_name>"
5189 [(set (match_operand:V4SI 0 "register_operand" "=v")
5191 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
5192 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5194 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5195 [(set_attr "type" "ssecvt")
5196 (set_attr "prefix" "evex")
5197 (set_attr "mode" "TI")])
5199 (define_insn "fix_truncv4dfv4si2<mask_name>"
5200 [(set (match_operand:V4SI 0 "register_operand" "=v")
5201 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5202 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
5203 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5204 [(set_attr "type" "ssecvt")
5205 (set_attr "prefix" "maybe_evex")
5206 (set_attr "mode" "OI")])
5208 (define_insn "ufix_truncv4dfv4si2<mask_name>"
5209 [(set (match_operand:V4SI 0 "register_operand" "=v")
5210 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5211 "TARGET_AVX512VL && TARGET_AVX512F"
5212 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5213 [(set_attr "type" "ssecvt")
5214 (set_attr "prefix" "maybe_evex")
5215 (set_attr "mode" "OI")])
5217 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
5218 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5219 (any_fix:<sseintvecmode>
5220 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5221 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
5222 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5223 [(set_attr "type" "ssecvt")
5224 (set_attr "prefix" "evex")
5225 (set_attr "mode" "<sseintvecmode2>")])
5227 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5228 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5229 (unspec:<sseintvecmode>
5230 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
5231 UNSPEC_FIX_NOTRUNC))]
5232 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5233 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5234 [(set_attr "type" "ssecvt")
5235 (set_attr "prefix" "evex")
5236 (set_attr "mode" "<sseintvecmode2>")])
5238 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
5239 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5240 (unspec:<sseintvecmode>
5241 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
5242 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5243 "TARGET_AVX512DQ && <round_mode512bit_condition>"
5244 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5245 [(set_attr "type" "ssecvt")
5246 (set_attr "prefix" "evex")
5247 (set_attr "mode" "<sseintvecmode2>")])
5249 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
5250 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
5251 (any_fix:<sselongvecmode>
5252 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5253 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
5254 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5255 [(set_attr "type" "ssecvt")
5256 (set_attr "prefix" "evex")
5257 (set_attr "mode" "<sseintvecmode3>")])
5259 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
5260 [(set (match_operand:V2DI 0 "register_operand" "=v")
5263 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5264 (parallel [(const_int 0) (const_int 1)]))))]
5265 "TARGET_AVX512DQ && TARGET_AVX512VL"
5266 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5267 [(set_attr "type" "ssecvt")
5268 (set_attr "prefix" "evex")
5269 (set_attr "mode" "TI")])
5271 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
5272 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
5273 (unsigned_fix:<sseintvecmode>
5274 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
5276 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5277 [(set_attr "type" "ssecvt")
5278 (set_attr "prefix" "evex")
5279 (set_attr "mode" "<sseintvecmode2>")])
5281 (define_expand "avx_cvttpd2dq256_2"
5282 [(set (match_operand:V8SI 0 "register_operand")
5284 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
5287 "operands[2] = CONST0_RTX (V4SImode);")
5289 (define_insn "sse2_cvttpd2dq<mask_name>"
5290 [(set (match_operand:V4SI 0 "register_operand" "=v")
5292 (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
5293 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5294 "TARGET_SSE2 && <mask_avx512vl_condition>"
5297 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
5299 return "cvttpd2dq\t{%1, %0|%0, %1}";
5301 [(set_attr "type" "ssecvt")
5302 (set_attr "amdfam10_decode" "double")
5303 (set_attr "athlon_decode" "vector")
5304 (set_attr "bdver1_decode" "double")
5305 (set_attr "prefix" "maybe_vex")
5306 (set_attr "mode" "TI")])
5308 (define_insn "sse2_cvtsd2ss<round_name>"
5309 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5312 (float_truncate:V2SF
5313 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
5314 (match_operand:V4SF 1 "register_operand" "0,0,v")
5318 cvtsd2ss\t{%2, %0|%0, %2}
5319 cvtsd2ss\t{%2, %0|%0, %q2}
5320 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
5321 [(set_attr "isa" "noavx,noavx,avx")
5322 (set_attr "type" "ssecvt")
5323 (set_attr "athlon_decode" "vector,double,*")
5324 (set_attr "amdfam10_decode" "vector,double,*")
5325 (set_attr "bdver1_decode" "direct,direct,*")
5326 (set_attr "btver2_decode" "double,double,double")
5327 (set_attr "prefix" "orig,orig,<round_prefix>")
5328 (set_attr "mode" "SF")])
5330 (define_insn "*sse2_vd_cvtsd2ss"
5331 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5334 (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
5335 (match_operand:V4SF 1 "register_operand" "0,0,v")
5339 cvtsd2ss\t{%2, %0|%0, %2}
5340 cvtsd2ss\t{%2, %0|%0, %2}
5341 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
5342 [(set_attr "isa" "noavx,noavx,avx")
5343 (set_attr "type" "ssecvt")
5344 (set_attr "athlon_decode" "vector,double,*")
5345 (set_attr "amdfam10_decode" "vector,double,*")
5346 (set_attr "bdver1_decode" "direct,direct,*")
5347 (set_attr "btver2_decode" "double,double,double")
5348 (set_attr "prefix" "orig,orig,vex")
5349 (set_attr "mode" "SF")])
5351 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
5352 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5356 (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
5357 (parallel [(const_int 0) (const_int 1)])))
5358 (match_operand:V2DF 1 "register_operand" "0,0,v")
5362 cvtss2sd\t{%2, %0|%0, %2}
5363 cvtss2sd\t{%2, %0|%0, %k2}
5364 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
5365 [(set_attr "isa" "noavx,noavx,avx")
5366 (set_attr "type" "ssecvt")
5367 (set_attr "amdfam10_decode" "vector,double,*")
5368 (set_attr "athlon_decode" "direct,direct,*")
5369 (set_attr "bdver1_decode" "direct,direct,*")
5370 (set_attr "btver2_decode" "double,double,double")
5371 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
5372 (set_attr "mode" "DF")])
5374 (define_insn "*sse2_vd_cvtss2sd"
5375 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5378 (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
5379 (match_operand:V2DF 1 "register_operand" "0,0,v")
5383 cvtss2sd\t{%2, %0|%0, %2}
5384 cvtss2sd\t{%2, %0|%0, %2}
5385 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
5386 [(set_attr "isa" "noavx,noavx,avx")
5387 (set_attr "type" "ssecvt")
5388 (set_attr "amdfam10_decode" "vector,double,*")
5389 (set_attr "athlon_decode" "direct,direct,*")
5390 (set_attr "bdver1_decode" "direct,direct,*")
5391 (set_attr "btver2_decode" "double,double,double")
5392 (set_attr "prefix" "orig,orig,vex")
5393 (set_attr "mode" "DF")])
5395 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
5396 [(set (match_operand:V8SF 0 "register_operand" "=v")
5397 (float_truncate:V8SF
5398 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
5400 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5401 [(set_attr "type" "ssecvt")
5402 (set_attr "prefix" "evex")
5403 (set_attr "mode" "V8SF")])
5405 (define_insn "avx_cvtpd2ps256<mask_name>"
5406 [(set (match_operand:V4SF 0 "register_operand" "=v")
5407 (float_truncate:V4SF
5408 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
5409 "TARGET_AVX && <mask_avx512vl_condition>"
5410 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5411 [(set_attr "type" "ssecvt")
5412 (set_attr "prefix" "maybe_evex")
5413 (set_attr "btver2_decode" "vector")
5414 (set_attr "mode" "V4SF")])
5416 (define_expand "sse2_cvtpd2ps"
5417 [(set (match_operand:V4SF 0 "register_operand")
5419 (float_truncate:V2SF
5420 (match_operand:V2DF 1 "vector_operand"))
5423 "operands[2] = CONST0_RTX (V2SFmode);")
5425 (define_expand "sse2_cvtpd2ps_mask"
5426 [(set (match_operand:V4SF 0 "register_operand")
5429 (float_truncate:V2SF
5430 (match_operand:V2DF 1 "vector_operand"))
5432 (match_operand:V4SF 2 "register_operand")
5433 (match_operand:QI 3 "register_operand")))]
5435 "operands[4] = CONST0_RTX (V2SFmode);")
5437 (define_insn "*sse2_cvtpd2ps<mask_name>"
5438 [(set (match_operand:V4SF 0 "register_operand" "=v")
5440 (float_truncate:V2SF
5441 (match_operand:V2DF 1 "vector_operand" "vBm"))
5442 (match_operand:V2SF 2 "const0_operand")))]
5443 "TARGET_SSE2 && <mask_avx512vl_condition>"
5446 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
5448 return "cvtpd2ps\t{%1, %0|%0, %1}";
5450 [(set_attr "type" "ssecvt")
5451 (set_attr "amdfam10_decode" "double")
5452 (set_attr "athlon_decode" "vector")
5453 (set_attr "bdver1_decode" "double")
5454 (set_attr "prefix_data16" "1")
5455 (set_attr "prefix" "maybe_vex")
5456 (set_attr "mode" "V4SF")])
5458 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
5459 (define_mode_attr sf2dfmode
5460 [(V8DF "V8SF") (V4DF "V4SF")])
5462 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
5463 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5464 (float_extend:VF2_512_256
5465 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5466 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
5467 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5468 [(set_attr "type" "ssecvt")
5469 (set_attr "prefix" "maybe_vex")
5470 (set_attr "mode" "<MODE>")])
5472 (define_insn "*avx_cvtps2pd256_2"
5473 [(set (match_operand:V4DF 0 "register_operand" "=v")
5476 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5477 (parallel [(const_int 0) (const_int 1)
5478 (const_int 2) (const_int 3)]))))]
5480 "vcvtps2pd\t{%x1, %0|%0, %x1}"
5481 [(set_attr "type" "ssecvt")
5482 (set_attr "prefix" "vex")
5483 (set_attr "mode" "V4DF")])
5485 (define_insn "vec_unpacks_lo_v16sf"
5486 [(set (match_operand:V8DF 0 "register_operand" "=v")
5489 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5490 (parallel [(const_int 0) (const_int 1)
5491 (const_int 2) (const_int 3)
5492 (const_int 4) (const_int 5)
5493 (const_int 6) (const_int 7)]))))]
5495 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5496 [(set_attr "type" "ssecvt")
5497 (set_attr "prefix" "evex")
5498 (set_attr "mode" "V8DF")])
5500 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5501 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5502 (unspec:<avx512fmaskmode>
5503 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5504 UNSPEC_CVTINT2MASK))]
5506 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5507 [(set_attr "prefix" "evex")
5508 (set_attr "mode" "<sseinsnmode>")])
5510 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5511 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5512 (unspec:<avx512fmaskmode>
5513 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5514 UNSPEC_CVTINT2MASK))]
5516 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5517 [(set_attr "prefix" "evex")
5518 (set_attr "mode" "<sseinsnmode>")])
5520 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5521 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5522 (vec_merge:VI12_AVX512VL
5525 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5528 operands[2] = CONSTM1_RTX (<MODE>mode);
5529 operands[3] = CONST0_RTX (<MODE>mode);
5532 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5533 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5534 (vec_merge:VI12_AVX512VL
5535 (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
5536 (match_operand:VI12_AVX512VL 3 "const0_operand")
5537 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5539 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5540 [(set_attr "prefix" "evex")
5541 (set_attr "mode" "<sseinsnmode>")])
5543 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5544 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5545 (vec_merge:VI48_AVX512VL
5548 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5551 operands[2] = CONSTM1_RTX (<MODE>mode);
5552 operands[3] = CONST0_RTX (<MODE>mode);
5555 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5556 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5557 (vec_merge:VI48_AVX512VL
5558 (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
5559 (match_operand:VI48_AVX512VL 3 "const0_operand")
5560 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5562 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5563 [(set_attr "prefix" "evex")
5564 (set_attr "mode" "<sseinsnmode>")])
5566 (define_insn "sse2_cvtps2pd<mask_name>"
5567 [(set (match_operand:V2DF 0 "register_operand" "=v")
5570 (match_operand:V4SF 1 "vector_operand" "vm")
5571 (parallel [(const_int 0) (const_int 1)]))))]
5572 "TARGET_SSE2 && <mask_avx512vl_condition>"
5573 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5574 [(set_attr "type" "ssecvt")
5575 (set_attr "amdfam10_decode" "direct")
5576 (set_attr "athlon_decode" "double")
5577 (set_attr "bdver1_decode" "double")
5578 (set_attr "prefix_data16" "0")
5579 (set_attr "prefix" "maybe_vex")
5580 (set_attr "mode" "V2DF")])
5582 (define_expand "vec_unpacks_hi_v4sf"
5587 (match_operand:V4SF 1 "vector_operand"))
5588 (parallel [(const_int 6) (const_int 7)
5589 (const_int 2) (const_int 3)])))
5590 (set (match_operand:V2DF 0 "register_operand")
5594 (parallel [(const_int 0) (const_int 1)]))))]
5596 "operands[2] = gen_reg_rtx (V4SFmode);")
5598 (define_expand "vec_unpacks_hi_v8sf"
5601 (match_operand:V8SF 1 "register_operand")
5602 (parallel [(const_int 4) (const_int 5)
5603 (const_int 6) (const_int 7)])))
5604 (set (match_operand:V4DF 0 "register_operand")
5608 "operands[2] = gen_reg_rtx (V4SFmode);")
5610 (define_expand "vec_unpacks_hi_v16sf"
5613 (match_operand:V16SF 1 "register_operand")
5614 (parallel [(const_int 8) (const_int 9)
5615 (const_int 10) (const_int 11)
5616 (const_int 12) (const_int 13)
5617 (const_int 14) (const_int 15)])))
5618 (set (match_operand:V8DF 0 "register_operand")
5622 "operands[2] = gen_reg_rtx (V8SFmode);")
5624 (define_expand "vec_unpacks_lo_v4sf"
5625 [(set (match_operand:V2DF 0 "register_operand")
5628 (match_operand:V4SF 1 "vector_operand")
5629 (parallel [(const_int 0) (const_int 1)]))))]
5632 (define_expand "vec_unpacks_lo_v8sf"
5633 [(set (match_operand:V4DF 0 "register_operand")
5636 (match_operand:V8SF 1 "nonimmediate_operand")
5637 (parallel [(const_int 0) (const_int 1)
5638 (const_int 2) (const_int 3)]))))]
5641 (define_mode_attr sseunpackfltmode
5642 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5643 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5645 (define_expand "vec_unpacks_float_hi_<mode>"
5646 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5647 (match_operand:VI2_AVX512F 1 "register_operand")]
5650 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5652 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5653 emit_insn (gen_rtx_SET (operands[0],
5654 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5658 (define_expand "vec_unpacks_float_lo_<mode>"
5659 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5660 (match_operand:VI2_AVX512F 1 "register_operand")]
5663 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5665 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5666 emit_insn (gen_rtx_SET (operands[0],
5667 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5671 (define_expand "vec_unpacku_float_hi_<mode>"
5672 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5673 (match_operand:VI2_AVX512F 1 "register_operand")]
5676 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5678 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5679 emit_insn (gen_rtx_SET (operands[0],
5680 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5684 (define_expand "vec_unpacku_float_lo_<mode>"
5685 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5686 (match_operand:VI2_AVX512F 1 "register_operand")]
5689 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5691 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5692 emit_insn (gen_rtx_SET (operands[0],
5693 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5697 (define_expand "vec_unpacks_float_hi_v4si"
5700 (match_operand:V4SI 1 "vector_operand")
5701 (parallel [(const_int 2) (const_int 3)
5702 (const_int 2) (const_int 3)])))
5703 (set (match_operand:V2DF 0 "register_operand")
5707 (parallel [(const_int 0) (const_int 1)]))))]
5709 "operands[2] = gen_reg_rtx (V4SImode);")
5711 (define_expand "vec_unpacks_float_lo_v4si"
5712 [(set (match_operand:V2DF 0 "register_operand")
5715 (match_operand:V4SI 1 "vector_operand")
5716 (parallel [(const_int 0) (const_int 1)]))))]
5719 (define_expand "vec_unpacks_float_hi_v8si"
5722 (match_operand:V8SI 1 "vector_operand")
5723 (parallel [(const_int 4) (const_int 5)
5724 (const_int 6) (const_int 7)])))
5725 (set (match_operand:V4DF 0 "register_operand")
5729 "operands[2] = gen_reg_rtx (V4SImode);")
5731 (define_expand "vec_unpacks_float_lo_v8si"
5732 [(set (match_operand:V4DF 0 "register_operand")
5735 (match_operand:V8SI 1 "nonimmediate_operand")
5736 (parallel [(const_int 0) (const_int 1)
5737 (const_int 2) (const_int 3)]))))]
5740 (define_expand "vec_unpacks_float_hi_v16si"
5743 (match_operand:V16SI 1 "nonimmediate_operand")
5744 (parallel [(const_int 8) (const_int 9)
5745 (const_int 10) (const_int 11)
5746 (const_int 12) (const_int 13)
5747 (const_int 14) (const_int 15)])))
5748 (set (match_operand:V8DF 0 "register_operand")
5752 "operands[2] = gen_reg_rtx (V8SImode);")
5754 (define_expand "vec_unpacks_float_lo_v16si"
5755 [(set (match_operand:V8DF 0 "register_operand")
5758 (match_operand:V16SI 1 "nonimmediate_operand")
5759 (parallel [(const_int 0) (const_int 1)
5760 (const_int 2) (const_int 3)
5761 (const_int 4) (const_int 5)
5762 (const_int 6) (const_int 7)]))))]
5765 (define_expand "vec_unpacku_float_hi_v4si"
5768 (match_operand:V4SI 1 "vector_operand")
5769 (parallel [(const_int 2) (const_int 3)
5770 (const_int 2) (const_int 3)])))
5775 (parallel [(const_int 0) (const_int 1)]))))
5777 (lt:V2DF (match_dup 6) (match_dup 3)))
5779 (and:V2DF (match_dup 7) (match_dup 4)))
5780 (set (match_operand:V2DF 0 "register_operand")
5781 (plus:V2DF (match_dup 6) (match_dup 8)))]
5784 REAL_VALUE_TYPE TWO32r;
5788 real_ldexp (&TWO32r, &dconst1, 32);
5789 x = const_double_from_real_value (TWO32r, DFmode);
5791 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5792 operands[4] = force_reg (V2DFmode,
5793 ix86_build_const_vector (V2DFmode, 1, x));
5795 operands[5] = gen_reg_rtx (V4SImode);
5797 for (i = 6; i < 9; i++)
5798 operands[i] = gen_reg_rtx (V2DFmode);
5801 (define_expand "vec_unpacku_float_lo_v4si"
5805 (match_operand:V4SI 1 "vector_operand")
5806 (parallel [(const_int 0) (const_int 1)]))))
5808 (lt:V2DF (match_dup 5) (match_dup 3)))
5810 (and:V2DF (match_dup 6) (match_dup 4)))
5811 (set (match_operand:V2DF 0 "register_operand")
5812 (plus:V2DF (match_dup 5) (match_dup 7)))]
5815 REAL_VALUE_TYPE TWO32r;
5819 real_ldexp (&TWO32r, &dconst1, 32);
5820 x = const_double_from_real_value (TWO32r, DFmode);
5822 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5823 operands[4] = force_reg (V2DFmode,
5824 ix86_build_const_vector (V2DFmode, 1, x));
5826 for (i = 5; i < 8; i++)
5827 operands[i] = gen_reg_rtx (V2DFmode);
5830 (define_expand "vec_unpacku_float_hi_v8si"
5831 [(match_operand:V4DF 0 "register_operand")
5832 (match_operand:V8SI 1 "register_operand")]
5835 REAL_VALUE_TYPE TWO32r;
5839 real_ldexp (&TWO32r, &dconst1, 32);
5840 x = const_double_from_real_value (TWO32r, DFmode);
5842 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5843 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5844 tmp[5] = gen_reg_rtx (V4SImode);
5846 for (i = 2; i < 5; i++)
5847 tmp[i] = gen_reg_rtx (V4DFmode);
5848 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5849 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5850 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5851 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5852 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5856 (define_expand "vec_unpacku_float_hi_v16si"
5857 [(match_operand:V8DF 0 "register_operand")
5858 (match_operand:V16SI 1 "register_operand")]
5861 REAL_VALUE_TYPE TWO32r;
5864 real_ldexp (&TWO32r, &dconst1, 32);
5865 x = const_double_from_real_value (TWO32r, DFmode);
5867 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5868 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5869 tmp[2] = gen_reg_rtx (V8DFmode);
5870 tmp[3] = gen_reg_rtx (V8SImode);
5871 k = gen_reg_rtx (QImode);
5873 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5874 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5875 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5876 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5877 emit_move_insn (operands[0], tmp[2]);
5881 (define_expand "vec_unpacku_float_lo_v8si"
5882 [(match_operand:V4DF 0 "register_operand")
5883 (match_operand:V8SI 1 "nonimmediate_operand")]
5886 REAL_VALUE_TYPE TWO32r;
5890 real_ldexp (&TWO32r, &dconst1, 32);
5891 x = const_double_from_real_value (TWO32r, DFmode);
5893 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5894 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5896 for (i = 2; i < 5; i++)
5897 tmp[i] = gen_reg_rtx (V4DFmode);
5898 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5899 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5900 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5901 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5905 (define_expand "vec_unpacku_float_lo_v16si"
5906 [(match_operand:V8DF 0 "register_operand")
5907 (match_operand:V16SI 1 "nonimmediate_operand")]
5910 REAL_VALUE_TYPE TWO32r;
5913 real_ldexp (&TWO32r, &dconst1, 32);
5914 x = const_double_from_real_value (TWO32r, DFmode);
5916 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5917 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5918 tmp[2] = gen_reg_rtx (V8DFmode);
5919 k = gen_reg_rtx (QImode);
5921 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5922 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5923 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5924 emit_move_insn (operands[0], tmp[2]);
5928 (define_expand "vec_pack_trunc_<mode>"
5930 (float_truncate:<sf2dfmode>
5931 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5933 (float_truncate:<sf2dfmode>
5934 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5935 (set (match_operand:<ssePSmode> 0 "register_operand")
5936 (vec_concat:<ssePSmode>
5941 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5942 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5945 (define_expand "vec_pack_trunc_v2df"
5946 [(match_operand:V4SF 0 "register_operand")
5947 (match_operand:V2DF 1 "vector_operand")
5948 (match_operand:V2DF 2 "vector_operand")]
5953 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5955 tmp0 = gen_reg_rtx (V4DFmode);
5956 tmp1 = force_reg (V2DFmode, operands[1]);
5958 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5959 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5963 tmp0 = gen_reg_rtx (V4SFmode);
5964 tmp1 = gen_reg_rtx (V4SFmode);
5966 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5967 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5968 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5973 (define_expand "vec_pack_sfix_trunc_v8df"
5974 [(match_operand:V16SI 0 "register_operand")
5975 (match_operand:V8DF 1 "nonimmediate_operand")
5976 (match_operand:V8DF 2 "nonimmediate_operand")]
5981 r1 = gen_reg_rtx (V8SImode);
5982 r2 = gen_reg_rtx (V8SImode);
5984 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5985 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5986 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5990 (define_expand "vec_pack_sfix_trunc_v4df"
5991 [(match_operand:V8SI 0 "register_operand")
5992 (match_operand:V4DF 1 "nonimmediate_operand")
5993 (match_operand:V4DF 2 "nonimmediate_operand")]
5998 r1 = gen_reg_rtx (V4SImode);
5999 r2 = gen_reg_rtx (V4SImode);
6001 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
6002 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
6003 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6007 (define_expand "vec_pack_sfix_trunc_v2df"
6008 [(match_operand:V4SI 0 "register_operand")
6009 (match_operand:V2DF 1 "vector_operand")
6010 (match_operand:V2DF 2 "vector_operand")]
6013 rtx tmp0, tmp1, tmp2;
6015 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6017 tmp0 = gen_reg_rtx (V4DFmode);
6018 tmp1 = force_reg (V2DFmode, operands[1]);
6020 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6021 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
6025 tmp0 = gen_reg_rtx (V4SImode);
6026 tmp1 = gen_reg_rtx (V4SImode);
6027 tmp2 = gen_reg_rtx (V2DImode);
6029 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
6030 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
6031 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6032 gen_lowpart (V2DImode, tmp0),
6033 gen_lowpart (V2DImode, tmp1)));
6034 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6039 (define_mode_attr ssepackfltmode
6040 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
6042 (define_expand "vec_pack_ufix_trunc_<mode>"
6043 [(match_operand:<ssepackfltmode> 0 "register_operand")
6044 (match_operand:VF2 1 "register_operand")
6045 (match_operand:VF2 2 "register_operand")]
6048 if (<MODE>mode == V8DFmode)
6052 r1 = gen_reg_rtx (V8SImode);
6053 r2 = gen_reg_rtx (V8SImode);
6055 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
6056 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
6057 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6062 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
6063 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
6064 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
6065 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
6066 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
6068 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
6069 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
6073 tmp[5] = gen_reg_rtx (V8SFmode);
6074 ix86_expand_vec_extract_even_odd (tmp[5],
6075 gen_lowpart (V8SFmode, tmp[2]),
6076 gen_lowpart (V8SFmode, tmp[3]), 0);
6077 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
6079 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
6080 operands[0], 0, OPTAB_DIRECT);
6081 if (tmp[6] != operands[0])
6082 emit_move_insn (operands[0], tmp[6]);
6088 (define_expand "avx512f_vec_pack_sfix_v8df"
6089 [(match_operand:V16SI 0 "register_operand")
6090 (match_operand:V8DF 1 "nonimmediate_operand")
6091 (match_operand:V8DF 2 "nonimmediate_operand")]
6096 r1 = gen_reg_rtx (V8SImode);
6097 r2 = gen_reg_rtx (V8SImode);
6099 emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
6100 emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
6101 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
6105 (define_expand "vec_pack_sfix_v4df"
6106 [(match_operand:V8SI 0 "register_operand")
6107 (match_operand:V4DF 1 "nonimmediate_operand")
6108 (match_operand:V4DF 2 "nonimmediate_operand")]
6113 r1 = gen_reg_rtx (V4SImode);
6114 r2 = gen_reg_rtx (V4SImode);
6116 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
6117 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
6118 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
6122 (define_expand "vec_pack_sfix_v2df"
6123 [(match_operand:V4SI 0 "register_operand")
6124 (match_operand:V2DF 1 "vector_operand")
6125 (match_operand:V2DF 2 "vector_operand")]
6128 rtx tmp0, tmp1, tmp2;
6130 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6132 tmp0 = gen_reg_rtx (V4DFmode);
6133 tmp1 = force_reg (V2DFmode, operands[1]);
6135 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6136 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
6140 tmp0 = gen_reg_rtx (V4SImode);
6141 tmp1 = gen_reg_rtx (V4SImode);
6142 tmp2 = gen_reg_rtx (V2DImode);
6144 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
6145 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
6146 emit_insn (gen_vec_interleave_lowv2di (tmp2,
6147 gen_lowpart (V2DImode, tmp0),
6148 gen_lowpart (V2DImode, tmp1)));
6149 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
6154 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6156 ;; Parallel single-precision floating point element swizzling
6158 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6160 (define_expand "sse_movhlps_exp"
6161 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6164 (match_operand:V4SF 1 "nonimmediate_operand")
6165 (match_operand:V4SF 2 "nonimmediate_operand"))
6166 (parallel [(const_int 6)
6172 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6174 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
6176 /* Fix up the destination if needed. */
6177 if (dst != operands[0])
6178 emit_move_insn (operands[0], dst);
6183 (define_insn "sse_movhlps"
6184 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6187 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6188 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
6189 (parallel [(const_int 6)
6193 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6195 movhlps\t{%2, %0|%0, %2}
6196 vmovhlps\t{%2, %1, %0|%0, %1, %2}
6197 movlps\t{%H2, %0|%0, %H2}
6198 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
6199 %vmovhps\t{%2, %0|%q0, %2}"
6200 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6201 (set_attr "type" "ssemov")
6202 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6203 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6205 (define_expand "sse_movlhps_exp"
6206 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6209 (match_operand:V4SF 1 "nonimmediate_operand")
6210 (match_operand:V4SF 2 "nonimmediate_operand"))
6211 (parallel [(const_int 0)
6217 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6219 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
6221 /* Fix up the destination if needed. */
6222 if (dst != operands[0])
6223 emit_move_insn (operands[0], dst);
6228 (define_insn "sse_movlhps"
6229 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6232 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6233 (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
6234 (parallel [(const_int 0)
6238 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
6240 movlhps\t{%2, %0|%0, %2}
6241 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6242 movhps\t{%2, %0|%0, %q2}
6243 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6244 %vmovlps\t{%2, %H0|%H0, %2}"
6245 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6246 (set_attr "type" "ssemov")
6247 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6248 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6250 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
6251 [(set (match_operand:V16SF 0 "register_operand" "=v")
6254 (match_operand:V16SF 1 "register_operand" "v")
6255 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6256 (parallel [(const_int 2) (const_int 18)
6257 (const_int 3) (const_int 19)
6258 (const_int 6) (const_int 22)
6259 (const_int 7) (const_int 23)
6260 (const_int 10) (const_int 26)
6261 (const_int 11) (const_int 27)
6262 (const_int 14) (const_int 30)
6263 (const_int 15) (const_int 31)])))]
6265 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6266 [(set_attr "type" "sselog")
6267 (set_attr "prefix" "evex")
6268 (set_attr "mode" "V16SF")])
6270 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6271 (define_insn "avx_unpckhps256<mask_name>"
6272 [(set (match_operand:V8SF 0 "register_operand" "=v")
6275 (match_operand:V8SF 1 "register_operand" "v")
6276 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6277 (parallel [(const_int 2) (const_int 10)
6278 (const_int 3) (const_int 11)
6279 (const_int 6) (const_int 14)
6280 (const_int 7) (const_int 15)])))]
6281 "TARGET_AVX && <mask_avx512vl_condition>"
6282 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6283 [(set_attr "type" "sselog")
6284 (set_attr "prefix" "vex")
6285 (set_attr "mode" "V8SF")])
6287 (define_expand "vec_interleave_highv8sf"
6291 (match_operand:V8SF 1 "register_operand")
6292 (match_operand:V8SF 2 "nonimmediate_operand"))
6293 (parallel [(const_int 0) (const_int 8)
6294 (const_int 1) (const_int 9)
6295 (const_int 4) (const_int 12)
6296 (const_int 5) (const_int 13)])))
6302 (parallel [(const_int 2) (const_int 10)
6303 (const_int 3) (const_int 11)
6304 (const_int 6) (const_int 14)
6305 (const_int 7) (const_int 15)])))
6306 (set (match_operand:V8SF 0 "register_operand")
6311 (parallel [(const_int 4) (const_int 5)
6312 (const_int 6) (const_int 7)
6313 (const_int 12) (const_int 13)
6314 (const_int 14) (const_int 15)])))]
6317 operands[3] = gen_reg_rtx (V8SFmode);
6318 operands[4] = gen_reg_rtx (V8SFmode);
6321 (define_insn "vec_interleave_highv4sf<mask_name>"
6322 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6325 (match_operand:V4SF 1 "register_operand" "0,v")
6326 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6327 (parallel [(const_int 2) (const_int 6)
6328 (const_int 3) (const_int 7)])))]
6329 "TARGET_SSE && <mask_avx512vl_condition>"
6331 unpckhps\t{%2, %0|%0, %2}
6332 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6333 [(set_attr "isa" "noavx,avx")
6334 (set_attr "type" "sselog")
6335 (set_attr "prefix" "orig,vex")
6336 (set_attr "mode" "V4SF")])
6338 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
6339 [(set (match_operand:V16SF 0 "register_operand" "=v")
6342 (match_operand:V16SF 1 "register_operand" "v")
6343 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6344 (parallel [(const_int 0) (const_int 16)
6345 (const_int 1) (const_int 17)
6346 (const_int 4) (const_int 20)
6347 (const_int 5) (const_int 21)
6348 (const_int 8) (const_int 24)
6349 (const_int 9) (const_int 25)
6350 (const_int 12) (const_int 28)
6351 (const_int 13) (const_int 29)])))]
6353 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6354 [(set_attr "type" "sselog")
6355 (set_attr "prefix" "evex")
6356 (set_attr "mode" "V16SF")])
6358 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6359 (define_insn "avx_unpcklps256<mask_name>"
6360 [(set (match_operand:V8SF 0 "register_operand" "=v")
6363 (match_operand:V8SF 1 "register_operand" "v")
6364 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6365 (parallel [(const_int 0) (const_int 8)
6366 (const_int 1) (const_int 9)
6367 (const_int 4) (const_int 12)
6368 (const_int 5) (const_int 13)])))]
6369 "TARGET_AVX && <mask_avx512vl_condition>"
6370 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6371 [(set_attr "type" "sselog")
6372 (set_attr "prefix" "vex")
6373 (set_attr "mode" "V8SF")])
6375 (define_insn "unpcklps128_mask"
6376 [(set (match_operand:V4SF 0 "register_operand" "=v")
6380 (match_operand:V4SF 1 "register_operand" "v")
6381 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6382 (parallel [(const_int 0) (const_int 4)
6383 (const_int 1) (const_int 5)]))
6384 (match_operand:V4SF 3 "vector_move_operand" "0C")
6385 (match_operand:QI 4 "register_operand" "Yk")))]
6387 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
6388 [(set_attr "type" "sselog")
6389 (set_attr "prefix" "evex")
6390 (set_attr "mode" "V4SF")])
6392 (define_expand "vec_interleave_lowv8sf"
6396 (match_operand:V8SF 1 "register_operand")
6397 (match_operand:V8SF 2 "nonimmediate_operand"))
6398 (parallel [(const_int 0) (const_int 8)
6399 (const_int 1) (const_int 9)
6400 (const_int 4) (const_int 12)
6401 (const_int 5) (const_int 13)])))
6407 (parallel [(const_int 2) (const_int 10)
6408 (const_int 3) (const_int 11)
6409 (const_int 6) (const_int 14)
6410 (const_int 7) (const_int 15)])))
6411 (set (match_operand:V8SF 0 "register_operand")
6416 (parallel [(const_int 0) (const_int 1)
6417 (const_int 2) (const_int 3)
6418 (const_int 8) (const_int 9)
6419 (const_int 10) (const_int 11)])))]
6422 operands[3] = gen_reg_rtx (V8SFmode);
6423 operands[4] = gen_reg_rtx (V8SFmode);
6426 (define_insn "vec_interleave_lowv4sf"
6427 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6430 (match_operand:V4SF 1 "register_operand" "0,v")
6431 (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
6432 (parallel [(const_int 0) (const_int 4)
6433 (const_int 1) (const_int 5)])))]
6436 unpcklps\t{%2, %0|%0, %2}
6437 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
6438 [(set_attr "isa" "noavx,avx")
6439 (set_attr "type" "sselog")
6440 (set_attr "prefix" "orig,maybe_evex")
6441 (set_attr "mode" "V4SF")])
6443 ;; These are modeled with the same vec_concat as the others so that we
6444 ;; capture users of shufps that can use the new instructions
6445 (define_insn "avx_movshdup256<mask_name>"
6446 [(set (match_operand:V8SF 0 "register_operand" "=v")
6449 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6451 (parallel [(const_int 1) (const_int 1)
6452 (const_int 3) (const_int 3)
6453 (const_int 5) (const_int 5)
6454 (const_int 7) (const_int 7)])))]
6455 "TARGET_AVX && <mask_avx512vl_condition>"
6456 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6457 [(set_attr "type" "sse")
6458 (set_attr "prefix" "vex")
6459 (set_attr "mode" "V8SF")])
6461 (define_insn "sse3_movshdup<mask_name>"
6462 [(set (match_operand:V4SF 0 "register_operand" "=v")
6465 (match_operand:V4SF 1 "vector_operand" "vBm")
6467 (parallel [(const_int 1)
6471 "TARGET_SSE3 && <mask_avx512vl_condition>"
6472 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6473 [(set_attr "type" "sse")
6474 (set_attr "prefix_rep" "1")
6475 (set_attr "prefix" "maybe_vex")
6476 (set_attr "mode" "V4SF")])
6478 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
6479 [(set (match_operand:V16SF 0 "register_operand" "=v")
6482 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6484 (parallel [(const_int 1) (const_int 1)
6485 (const_int 3) (const_int 3)
6486 (const_int 5) (const_int 5)
6487 (const_int 7) (const_int 7)
6488 (const_int 9) (const_int 9)
6489 (const_int 11) (const_int 11)
6490 (const_int 13) (const_int 13)
6491 (const_int 15) (const_int 15)])))]
6493 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6494 [(set_attr "type" "sse")
6495 (set_attr "prefix" "evex")
6496 (set_attr "mode" "V16SF")])
6498 (define_insn "avx_movsldup256<mask_name>"
6499 [(set (match_operand:V8SF 0 "register_operand" "=v")
6502 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6504 (parallel [(const_int 0) (const_int 0)
6505 (const_int 2) (const_int 2)
6506 (const_int 4) (const_int 4)
6507 (const_int 6) (const_int 6)])))]
6508 "TARGET_AVX && <mask_avx512vl_condition>"
6509 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6510 [(set_attr "type" "sse")
6511 (set_attr "prefix" "vex")
6512 (set_attr "mode" "V8SF")])
6514 (define_insn "sse3_movsldup<mask_name>"
6515 [(set (match_operand:V4SF 0 "register_operand" "=v")
6518 (match_operand:V4SF 1 "vector_operand" "vBm")
6520 (parallel [(const_int 0)
6524 "TARGET_SSE3 && <mask_avx512vl_condition>"
6525 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6526 [(set_attr "type" "sse")
6527 (set_attr "prefix_rep" "1")
6528 (set_attr "prefix" "maybe_vex")
6529 (set_attr "mode" "V4SF")])
6531 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6532 [(set (match_operand:V16SF 0 "register_operand" "=v")
6535 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6537 (parallel [(const_int 0) (const_int 0)
6538 (const_int 2) (const_int 2)
6539 (const_int 4) (const_int 4)
6540 (const_int 6) (const_int 6)
6541 (const_int 8) (const_int 8)
6542 (const_int 10) (const_int 10)
6543 (const_int 12) (const_int 12)
6544 (const_int 14) (const_int 14)])))]
6546 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6547 [(set_attr "type" "sse")
6548 (set_attr "prefix" "evex")
6549 (set_attr "mode" "V16SF")])
6551 (define_expand "avx_shufps256<mask_expand4_name>"
6552 [(match_operand:V8SF 0 "register_operand")
6553 (match_operand:V8SF 1 "register_operand")
6554 (match_operand:V8SF 2 "nonimmediate_operand")
6555 (match_operand:SI 3 "const_int_operand")]
6558 int mask = INTVAL (operands[3]);
6559 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6562 GEN_INT ((mask >> 0) & 3),
6563 GEN_INT ((mask >> 2) & 3),
6564 GEN_INT (((mask >> 4) & 3) + 8),
6565 GEN_INT (((mask >> 6) & 3) + 8),
6566 GEN_INT (((mask >> 0) & 3) + 4),
6567 GEN_INT (((mask >> 2) & 3) + 4),
6568 GEN_INT (((mask >> 4) & 3) + 12),
6569 GEN_INT (((mask >> 6) & 3) + 12)
6570 <mask_expand4_args>));
6574 ;; One bit in mask selects 2 elements.
6575 (define_insn "avx_shufps256_1<mask_name>"
6576 [(set (match_operand:V8SF 0 "register_operand" "=v")
6579 (match_operand:V8SF 1 "register_operand" "v")
6580 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6581 (parallel [(match_operand 3 "const_0_to_3_operand" )
6582 (match_operand 4 "const_0_to_3_operand" )
6583 (match_operand 5 "const_8_to_11_operand" )
6584 (match_operand 6 "const_8_to_11_operand" )
6585 (match_operand 7 "const_4_to_7_operand" )
6586 (match_operand 8 "const_4_to_7_operand" )
6587 (match_operand 9 "const_12_to_15_operand")
6588 (match_operand 10 "const_12_to_15_operand")])))]
6590 && <mask_avx512vl_condition>
6591 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6592 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6593 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6594 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6597 mask = INTVAL (operands[3]);
6598 mask |= INTVAL (operands[4]) << 2;
6599 mask |= (INTVAL (operands[5]) - 8) << 4;
6600 mask |= (INTVAL (operands[6]) - 8) << 6;
6601 operands[3] = GEN_INT (mask);
6603 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6605 [(set_attr "type" "sseshuf")
6606 (set_attr "length_immediate" "1")
6607 (set_attr "prefix" "<mask_prefix>")
6608 (set_attr "mode" "V8SF")])
6610 (define_expand "sse_shufps<mask_expand4_name>"
6611 [(match_operand:V4SF 0 "register_operand")
6612 (match_operand:V4SF 1 "register_operand")
6613 (match_operand:V4SF 2 "vector_operand")
6614 (match_operand:SI 3 "const_int_operand")]
6617 int mask = INTVAL (operands[3]);
6618 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6621 GEN_INT ((mask >> 0) & 3),
6622 GEN_INT ((mask >> 2) & 3),
6623 GEN_INT (((mask >> 4) & 3) + 4),
6624 GEN_INT (((mask >> 6) & 3) + 4)
6625 <mask_expand4_args>));
6629 (define_insn "sse_shufps_v4sf_mask"
6630 [(set (match_operand:V4SF 0 "register_operand" "=v")
6634 (match_operand:V4SF 1 "register_operand" "v")
6635 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6636 (parallel [(match_operand 3 "const_0_to_3_operand")
6637 (match_operand 4 "const_0_to_3_operand")
6638 (match_operand 5 "const_4_to_7_operand")
6639 (match_operand 6 "const_4_to_7_operand")]))
6640 (match_operand:V4SF 7 "vector_move_operand" "0C")
6641 (match_operand:QI 8 "register_operand" "Yk")))]
6645 mask |= INTVAL (operands[3]) << 0;
6646 mask |= INTVAL (operands[4]) << 2;
6647 mask |= (INTVAL (operands[5]) - 4) << 4;
6648 mask |= (INTVAL (operands[6]) - 4) << 6;
6649 operands[3] = GEN_INT (mask);
6651 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6653 [(set_attr "type" "sseshuf")
6654 (set_attr "length_immediate" "1")
6655 (set_attr "prefix" "evex")
6656 (set_attr "mode" "V4SF")])
6658 (define_insn "sse_shufps_<mode>"
6659 [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
6660 (vec_select:VI4F_128
6661 (vec_concat:<ssedoublevecmode>
6662 (match_operand:VI4F_128 1 "register_operand" "0,v")
6663 (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
6664 (parallel [(match_operand 3 "const_0_to_3_operand")
6665 (match_operand 4 "const_0_to_3_operand")
6666 (match_operand 5 "const_4_to_7_operand")
6667 (match_operand 6 "const_4_to_7_operand")])))]
6671 mask |= INTVAL (operands[3]) << 0;
6672 mask |= INTVAL (operands[4]) << 2;
6673 mask |= (INTVAL (operands[5]) - 4) << 4;
6674 mask |= (INTVAL (operands[6]) - 4) << 6;
6675 operands[3] = GEN_INT (mask);
6677 switch (which_alternative)
6680 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6682 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6687 [(set_attr "isa" "noavx,avx")
6688 (set_attr "type" "sseshuf")
6689 (set_attr "length_immediate" "1")
6690 (set_attr "prefix" "orig,maybe_evex")
6691 (set_attr "mode" "V4SF")])
6693 (define_insn "sse_storehps"
6694 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6696 (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
6697 (parallel [(const_int 2) (const_int 3)])))]
6698 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6700 %vmovhps\t{%1, %0|%q0, %1}
6701 %vmovhlps\t{%1, %d0|%d0, %1}
6702 %vmovlps\t{%H1, %d0|%d0, %H1}"
6703 [(set_attr "type" "ssemov")
6704 (set_attr "prefix" "maybe_vex")
6705 (set_attr "mode" "V2SF,V4SF,V2SF")])
6707 (define_expand "sse_loadhps_exp"
6708 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6711 (match_operand:V4SF 1 "nonimmediate_operand")
6712 (parallel [(const_int 0) (const_int 1)]))
6713 (match_operand:V2SF 2 "nonimmediate_operand")))]
6716 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6718 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6720 /* Fix up the destination if needed. */
6721 if (dst != operands[0])
6722 emit_move_insn (operands[0], dst);
6727 (define_insn "sse_loadhps"
6728 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,o")
6731 (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
6732 (parallel [(const_int 0) (const_int 1)]))
6733 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,v,v")))]
6736 movhps\t{%2, %0|%0, %q2}
6737 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6738 movlhps\t{%2, %0|%0, %2}
6739 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6740 %vmovlps\t{%2, %H0|%H0, %2}"
6741 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6742 (set_attr "type" "ssemov")
6743 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6744 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6746 (define_insn "sse_storelps"
6747 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
6749 (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
6750 (parallel [(const_int 0) (const_int 1)])))]
6751 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6753 %vmovlps\t{%1, %0|%q0, %1}
6754 %vmovaps\t{%1, %0|%0, %1}
6755 %vmovlps\t{%1, %d0|%d0, %q1}"
6756 [(set_attr "type" "ssemov")
6757 (set_attr "prefix" "maybe_vex")
6758 (set_attr "mode" "V2SF,V4SF,V2SF")])
6760 (define_expand "sse_loadlps_exp"
6761 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6763 (match_operand:V2SF 2 "nonimmediate_operand")
6765 (match_operand:V4SF 1 "nonimmediate_operand")
6766 (parallel [(const_int 2) (const_int 3)]))))]
6769 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6771 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6773 /* Fix up the destination if needed. */
6774 if (dst != operands[0])
6775 emit_move_insn (operands[0], dst);
6780 (define_insn "sse_loadlps"
6781 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,v,x,v,m")
6783 (match_operand:V2SF 2 "nonimmediate_operand" " 0,v,m,m,v")
6785 (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
6786 (parallel [(const_int 2) (const_int 3)]))))]
6789 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6790 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6791 movlps\t{%2, %0|%0, %q2}
6792 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6793 %vmovlps\t{%2, %0|%q0, %2}"
6794 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6795 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6796 (set (attr "length_immediate")
6797 (if_then_else (eq_attr "alternative" "0,1")
6799 (const_string "*")))
6800 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
6801 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6803 (define_insn "sse_movss"
6804 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
6806 (match_operand:V4SF 2 "register_operand" " x,v")
6807 (match_operand:V4SF 1 "register_operand" " 0,v")
6811 movss\t{%2, %0|%0, %2}
6812 vmovss\t{%2, %1, %0|%0, %1, %2}"
6813 [(set_attr "isa" "noavx,avx")
6814 (set_attr "type" "ssemov")
6815 (set_attr "prefix" "orig,maybe_evex")
6816 (set_attr "mode" "SF")])
6818 (define_insn "avx2_vec_dup<mode>"
6819 [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
6820 (vec_duplicate:VF1_128_256
6822 (match_operand:V4SF 1 "register_operand" "v")
6823 (parallel [(const_int 0)]))))]
6825 "vbroadcastss\t{%1, %0|%0, %1}"
6826 [(set_attr "type" "sselog1")
6827 (set_attr "prefix" "maybe_evex")
6828 (set_attr "mode" "<MODE>")])
6830 (define_insn "avx2_vec_dupv8sf_1"
6831 [(set (match_operand:V8SF 0 "register_operand" "=v")
6834 (match_operand:V8SF 1 "register_operand" "v")
6835 (parallel [(const_int 0)]))))]
6837 "vbroadcastss\t{%x1, %0|%0, %x1}"
6838 [(set_attr "type" "sselog1")
6839 (set_attr "prefix" "maybe_evex")
6840 (set_attr "mode" "V8SF")])
6842 (define_insn "avx512f_vec_dup<mode>_1"
6843 [(set (match_operand:VF_512 0 "register_operand" "=v")
6844 (vec_duplicate:VF_512
6845 (vec_select:<ssescalarmode>
6846 (match_operand:VF_512 1 "register_operand" "v")
6847 (parallel [(const_int 0)]))))]
6849 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6850 [(set_attr "type" "sselog1")
6851 (set_attr "prefix" "evex")
6852 (set_attr "mode" "<MODE>")])
6854 ;; Although insertps takes register source, we prefer
6855 ;; unpcklps with register source since it is shorter.
6856 (define_insn "*vec_concatv2sf_sse4_1"
6857 [(set (match_operand:V2SF 0 "register_operand"
6858 "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
6860 (match_operand:SF 1 "nonimmediate_operand"
6861 " 0, 0,Yv, 0,0, v,m, 0 , m")
6862 (match_operand:SF 2 "vector_move_operand"
6863 " Yr,*x,Yv, m,m, m,C,*ym, C")))]
6864 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6866 unpcklps\t{%2, %0|%0, %2}
6867 unpcklps\t{%2, %0|%0, %2}
6868 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6869 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6870 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6871 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6872 %vmovss\t{%1, %0|%0, %1}
6873 punpckldq\t{%2, %0|%0, %2}
6874 movd\t{%1, %0|%0, %1}"
6876 (cond [(eq_attr "alternative" "0,1,3,4")
6877 (const_string "noavx")
6878 (eq_attr "alternative" "2,5")
6879 (const_string "avx")
6881 (const_string "*")))
6883 (cond [(eq_attr "alternative" "6")
6884 (const_string "ssemov")
6885 (eq_attr "alternative" "7")
6886 (const_string "mmxcvt")
6887 (eq_attr "alternative" "8")
6888 (const_string "mmxmov")
6890 (const_string "sselog")))
6891 (set (attr "prefix_data16")
6892 (if_then_else (eq_attr "alternative" "3,4")
6894 (const_string "*")))
6895 (set (attr "prefix_extra")
6896 (if_then_else (eq_attr "alternative" "3,4,5")
6898 (const_string "*")))
6899 (set (attr "length_immediate")
6900 (if_then_else (eq_attr "alternative" "3,4,5")
6902 (const_string "*")))
6903 (set (attr "prefix")
6904 (cond [(eq_attr "alternative" "2,5")
6905 (const_string "maybe_evex")
6906 (eq_attr "alternative" "6")
6907 (const_string "maybe_vex")
6909 (const_string "orig")))
6910 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6912 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6913 ;; vector_operand for operand 2 and *not* allowing memory for the SSE
6914 ;; alternatives pretty much forces the MMX alternative to be chosen.
6915 (define_insn "*vec_concatv2sf_sse"
6916 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6918 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6919 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6922 unpcklps\t{%2, %0|%0, %2}
6923 movss\t{%1, %0|%0, %1}
6924 punpckldq\t{%2, %0|%0, %2}
6925 movd\t{%1, %0|%0, %1}"
6926 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6927 (set_attr "mode" "V4SF,SF,DI,DI")])
6929 (define_insn "*vec_concatv4sf"
6930 [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v")
6932 (match_operand:V2SF 1 "register_operand" " 0,v,0,v")
6933 (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
6936 movlhps\t{%2, %0|%0, %2}
6937 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6938 movhps\t{%2, %0|%0, %q2}
6939 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6940 [(set_attr "isa" "noavx,avx,noavx,avx")
6941 (set_attr "type" "ssemov")
6942 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
6943 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6945 ;; Avoid combining registers from different units in a single alternative,
6946 ;; see comment above inline_secondary_memory_needed function in i386.c
6947 (define_insn "vec_set<mode>_0"
6948 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6949 "=Yr,*x,v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6951 (vec_duplicate:VI4F_128
6952 (match_operand:<ssescalarmode> 2 "general_operand"
6953 " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6954 (match_operand:VI4F_128 1 "vector_move_operand"
6955 " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6959 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6960 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
6961 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
6962 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6963 %vmovd\t{%2, %0|%0, %2}
6964 movss\t{%2, %0|%0, %2}
6965 movss\t{%2, %0|%0, %2}
6966 vmovss\t{%2, %1, %0|%0, %1, %2}
6967 pinsrd\t{$0, %2, %0|%0, %2, 0}
6968 pinsrd\t{$0, %2, %0|%0, %2, 0}
6969 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6974 (cond [(eq_attr "alternative" "0,1,8,9")
6975 (const_string "sse4_noavx")
6976 (eq_attr "alternative" "2,7,10")
6977 (const_string "avx")
6978 (eq_attr "alternative" "3,4")
6979 (const_string "sse2")
6980 (eq_attr "alternative" "5,6")
6981 (const_string "noavx")
6983 (const_string "*")))
6985 (cond [(eq_attr "alternative" "0,1,2,8,9,10")
6986 (const_string "sselog")
6987 (eq_attr "alternative" "12")
6988 (const_string "imov")
6989 (eq_attr "alternative" "13")
6990 (const_string "fmov")
6992 (const_string "ssemov")))
6993 (set (attr "prefix_extra")
6994 (if_then_else (eq_attr "alternative" "8,9,10")
6996 (const_string "*")))
6997 (set (attr "length_immediate")
6998 (if_then_else (eq_attr "alternative" "8,9,10")
7000 (const_string "*")))
7001 (set (attr "prefix")
7002 (cond [(eq_attr "alternative" "0,1,5,6,8,9")
7003 (const_string "orig")
7004 (eq_attr "alternative" "2")
7005 (const_string "maybe_evex")
7006 (eq_attr "alternative" "3,4")
7007 (const_string "maybe_vex")
7008 (eq_attr "alternative" "7,10")
7009 (const_string "vex")
7011 (const_string "*")))
7012 (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
7014 ;; A subset is vec_setv4sf.
7015 (define_insn "*vec_setv4sf_sse4_1"
7016 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7019 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
7020 (match_operand:V4SF 1 "register_operand" "0,0,v")
7021 (match_operand:SI 3 "const_int_operand")))]
7023 && ((unsigned) exact_log2 (INTVAL (operands[3]))
7024 < GET_MODE_NUNITS (V4SFmode))"
7026 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
7027 switch (which_alternative)
7031 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7033 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7038 [(set_attr "isa" "noavx,noavx,avx")
7039 (set_attr "type" "sselog")
7040 (set_attr "prefix_data16" "1,1,*")
7041 (set_attr "prefix_extra" "1")
7042 (set_attr "length_immediate" "1")
7043 (set_attr "prefix" "orig,orig,maybe_evex")
7044 (set_attr "mode" "V4SF")])
7046 (define_insn "sse4_1_insertps"
7047 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
7048 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
7049 (match_operand:V4SF 1 "register_operand" "0,0,v")
7050 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
7054 if (MEM_P (operands[2]))
7056 unsigned count_s = INTVAL (operands[3]) >> 6;
7058 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
7059 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
7061 switch (which_alternative)
7065 return "insertps\t{%3, %2, %0|%0, %2, %3}";
7067 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7072 [(set_attr "isa" "noavx,noavx,avx")
7073 (set_attr "type" "sselog")
7074 (set_attr "prefix_data16" "1,1,*")
7075 (set_attr "prefix_extra" "1")
7076 (set_attr "length_immediate" "1")
7077 (set_attr "prefix" "orig,orig,maybe_evex")
7078 (set_attr "mode" "V4SF")])
7081 [(set (match_operand:VI4F_128 0 "memory_operand")
7083 (vec_duplicate:VI4F_128
7084 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
7087 "TARGET_SSE && reload_completed"
7088 [(set (match_dup 0) (match_dup 1))]
7089 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
7091 (define_expand "vec_set<mode>"
7092 [(match_operand:V 0 "register_operand")
7093 (match_operand:<ssescalarmode> 1 "register_operand")
7094 (match_operand 2 "const_int_operand")]
7097 ix86_expand_vector_set (false, operands[0], operands[1],
7098 INTVAL (operands[2]));
7102 (define_insn_and_split "*vec_extractv4sf_0"
7103 [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
7105 (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
7106 (parallel [(const_int 0)])))]
7107 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7109 "&& reload_completed"
7110 [(set (match_dup 0) (match_dup 1))]
7111 "operands[1] = gen_lowpart (SFmode, operands[1]);")
7113 (define_insn_and_split "*sse4_1_extractps"
7114 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
7116 (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
7117 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
7120 extractps\t{%2, %1, %0|%0, %1, %2}
7121 extractps\t{%2, %1, %0|%0, %1, %2}
7122 vextractps\t{%2, %1, %0|%0, %1, %2}
7125 "&& reload_completed && SSE_REG_P (operands[0])"
7128 rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
7129 switch (INTVAL (operands[2]))
7133 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
7134 operands[2], operands[2],
7135 GEN_INT (INTVAL (operands[2]) + 4),
7136 GEN_INT (INTVAL (operands[2]) + 4)));
7139 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
7142 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
7147 [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
7148 (set_attr "type" "sselog,sselog,sselog,*,*")
7149 (set_attr "prefix_data16" "1,1,1,*,*")
7150 (set_attr "prefix_extra" "1,1,1,*,*")
7151 (set_attr "length_immediate" "1,1,1,*,*")
7152 (set_attr "prefix" "orig,orig,maybe_evex,*,*")
7153 (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
7155 (define_insn_and_split "*vec_extractv4sf_mem"
7156 [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
7158 (match_operand:V4SF 1 "memory_operand" "o,o,o")
7159 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
7162 "&& reload_completed"
7163 [(set (match_dup 0) (match_dup 1))]
7165 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
7168 (define_mode_attr extract_type
7169 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
7171 (define_mode_attr extract_suf
7172 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
7174 (define_mode_iterator AVX512_VEC
7175 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
7177 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
7178 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
7179 (match_operand:AVX512_VEC 1 "register_operand")
7180 (match_operand:SI 2 "const_0_to_3_operand")
7181 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
7182 (match_operand:QI 4 "register_operand")]
7186 mask = INTVAL (operands[2]);
7187 rtx dest = operands[0];
7189 if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
7190 dest = gen_reg_rtx (<ssequartermode>mode);
7192 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
7193 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
7194 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
7195 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
7198 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
7199 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
7201 if (dest != operands[0])
7202 emit_move_insn (operands[0], dest);
7206 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
7207 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7208 (vec_merge:<ssequartermode>
7209 (vec_select:<ssequartermode>
7210 (match_operand:V8FI 1 "register_operand" "v")
7211 (parallel [(match_operand 2 "const_0_to_7_operand")
7212 (match_operand 3 "const_0_to_7_operand")]))
7213 (match_operand:<ssequartermode> 4 "memory_operand" "0")
7214 (match_operand:QI 5 "register_operand" "Yk")))]
7216 && INTVAL (operands[2]) % 2 == 0
7217 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7218 && rtx_equal_p (operands[4], operands[0])"
7220 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
7221 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
7223 [(set_attr "type" "sselog")
7224 (set_attr "prefix_extra" "1")
7225 (set_attr "length_immediate" "1")
7226 (set_attr "memory" "store")
7227 (set_attr "prefix" "evex")
7228 (set_attr "mode" "<sseinsnmode>")])
7230 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
7231 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
7232 (vec_merge:<ssequartermode>
7233 (vec_select:<ssequartermode>
7234 (match_operand:V16FI 1 "register_operand" "v")
7235 (parallel [(match_operand 2 "const_0_to_15_operand")
7236 (match_operand 3 "const_0_to_15_operand")
7237 (match_operand 4 "const_0_to_15_operand")
7238 (match_operand 5 "const_0_to_15_operand")]))
7239 (match_operand:<ssequartermode> 6 "memory_operand" "0")
7240 (match_operand:QI 7 "register_operand" "Yk")))]
7242 && INTVAL (operands[2]) % 4 == 0
7243 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7244 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7245 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
7246 && rtx_equal_p (operands[6], operands[0])"
7248 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7249 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
7251 [(set_attr "type" "sselog")
7252 (set_attr "prefix_extra" "1")
7253 (set_attr "length_immediate" "1")
7254 (set_attr "memory" "store")
7255 (set_attr "prefix" "evex")
7256 (set_attr "mode" "<sseinsnmode>")])
7258 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
7259 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7260 (vec_select:<ssequartermode>
7261 (match_operand:V8FI 1 "register_operand" "v")
7262 (parallel [(match_operand 2 "const_0_to_7_operand")
7263 (match_operand 3 "const_0_to_7_operand")])))]
7265 && INTVAL (operands[2]) % 2 == 0
7266 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
7268 operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
7269 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
7271 [(set_attr "type" "sselog1")
7272 (set_attr "prefix_extra" "1")
7273 (set_attr "length_immediate" "1")
7274 (set_attr "prefix" "evex")
7275 (set_attr "mode" "<sseinsnmode>")])
7277 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
7278 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7279 (vec_select:<ssequartermode>
7280 (match_operand:V16FI 1 "register_operand" "v")
7281 (parallel [(match_operand 2 "const_0_to_15_operand")
7282 (match_operand 3 "const_0_to_15_operand")
7283 (match_operand 4 "const_0_to_15_operand")
7284 (match_operand 5 "const_0_to_15_operand")])))]
7286 && INTVAL (operands[2]) % 4 == 0
7287 && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
7288 && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
7289 && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
7291 operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
7292 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
7294 [(set_attr "type" "sselog1")
7295 (set_attr "prefix_extra" "1")
7296 (set_attr "length_immediate" "1")
7297 (set_attr "prefix" "evex")
7298 (set_attr "mode" "<sseinsnmode>")])
7300 (define_mode_attr extract_type_2
7301 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
7303 (define_mode_attr extract_suf_2
7304 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
7306 (define_mode_iterator AVX512_VEC_2
7307 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
7309 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
7310 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7311 (match_operand:AVX512_VEC_2 1 "register_operand")
7312 (match_operand:SI 2 "const_0_to_1_operand")
7313 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
7314 (match_operand:QI 4 "register_operand")]
7317 rtx (*insn)(rtx, rtx, rtx, rtx);
7318 rtx dest = operands[0];
7320 if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
7321 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7323 switch (INTVAL (operands[2]))
7326 insn = gen_vec_extract_lo_<mode>_mask;
7329 insn = gen_vec_extract_hi_<mode>_mask;
7335 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7336 if (dest != operands[0])
7337 emit_move_insn (operands[0], dest);
7342 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7343 (vec_select:<ssehalfvecmode>
7344 (match_operand:V8FI 1 "nonimmediate_operand")
7345 (parallel [(const_int 0) (const_int 1)
7346 (const_int 2) (const_int 3)])))]
7347 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7350 || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
7351 [(set (match_dup 0) (match_dup 1))]
7352 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7354 (define_insn "vec_extract_lo_<mode>_maskm"
7355 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7356 (vec_merge:<ssehalfvecmode>
7357 (vec_select:<ssehalfvecmode>
7358 (match_operand:V8FI 1 "register_operand" "v")
7359 (parallel [(const_int 0) (const_int 1)
7360 (const_int 2) (const_int 3)]))
7361 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7362 (match_operand:QI 3 "register_operand" "Yk")))]
7364 && rtx_equal_p (operands[2], operands[0])"
7365 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7366 [(set_attr "type" "sselog1")
7367 (set_attr "prefix_extra" "1")
7368 (set_attr "length_immediate" "1")
7369 (set_attr "prefix" "evex")
7370 (set_attr "mode" "<sseinsnmode>")])
7372 (define_insn "vec_extract_lo_<mode><mask_name>"
7373 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
7374 (vec_select:<ssehalfvecmode>
7375 (match_operand:V8FI 1 "<store_mask_predicate>" "v,<store_mask_constraint>")
7376 (parallel [(const_int 0) (const_int 1)
7377 (const_int 2) (const_int 3)])))]
7379 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7381 if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
7382 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7386 [(set_attr "type" "sselog1")
7387 (set_attr "prefix_extra" "1")
7388 (set_attr "length_immediate" "1")
7389 (set_attr "prefix" "evex")
7390 (set_attr "mode" "<sseinsnmode>")])
7392 (define_insn "vec_extract_hi_<mode>_maskm"
7393 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7394 (vec_merge:<ssehalfvecmode>
7395 (vec_select:<ssehalfvecmode>
7396 (match_operand:V8FI 1 "register_operand" "v")
7397 (parallel [(const_int 4) (const_int 5)
7398 (const_int 6) (const_int 7)]))
7399 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7400 (match_operand:QI 3 "register_operand" "Yk")))]
7402 && rtx_equal_p (operands[2], operands[0])"
7403 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7404 [(set_attr "type" "sselog")
7405 (set_attr "prefix_extra" "1")
7406 (set_attr "length_immediate" "1")
7407 (set_attr "memory" "store")
7408 (set_attr "prefix" "evex")
7409 (set_attr "mode" "<sseinsnmode>")])
7411 (define_insn "vec_extract_hi_<mode><mask_name>"
7412 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7413 (vec_select:<ssehalfvecmode>
7414 (match_operand:V8FI 1 "register_operand" "v")
7415 (parallel [(const_int 4) (const_int 5)
7416 (const_int 6) (const_int 7)])))]
7418 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
7419 [(set_attr "type" "sselog1")
7420 (set_attr "prefix_extra" "1")
7421 (set_attr "length_immediate" "1")
7422 (set_attr "prefix" "evex")
7423 (set_attr "mode" "<sseinsnmode>")])
7425 (define_insn "vec_extract_hi_<mode>_maskm"
7426 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7427 (vec_merge:<ssehalfvecmode>
7428 (vec_select:<ssehalfvecmode>
7429 (match_operand:V16FI 1 "register_operand" "v")
7430 (parallel [(const_int 8) (const_int 9)
7431 (const_int 10) (const_int 11)
7432 (const_int 12) (const_int 13)
7433 (const_int 14) (const_int 15)]))
7434 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7435 (match_operand:QI 3 "register_operand" "Yk")))]
7437 && rtx_equal_p (operands[2], operands[0])"
7438 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7439 [(set_attr "type" "sselog1")
7440 (set_attr "prefix_extra" "1")
7441 (set_attr "length_immediate" "1")
7442 (set_attr "prefix" "evex")
7443 (set_attr "mode" "<sseinsnmode>")])
7445 (define_insn "vec_extract_hi_<mode><mask_name>"
7446 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
7447 (vec_select:<ssehalfvecmode>
7448 (match_operand:V16FI 1 "register_operand" "v,v")
7449 (parallel [(const_int 8) (const_int 9)
7450 (const_int 10) (const_int 11)
7451 (const_int 12) (const_int 13)
7452 (const_int 14) (const_int 15)])))]
7453 "TARGET_AVX512F && <mask_avx512dq_condition>"
7455 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
7456 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7457 [(set_attr "type" "sselog1")
7458 (set_attr "prefix_extra" "1")
7459 (set_attr "isa" "avx512dq,noavx512dq")
7460 (set_attr "length_immediate" "1")
7461 (set_attr "prefix" "evex")
7462 (set_attr "mode" "<sseinsnmode>")])
7464 (define_expand "avx512vl_vextractf128<mode>"
7465 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7466 (match_operand:VI48F_256 1 "register_operand")
7467 (match_operand:SI 2 "const_0_to_1_operand")
7468 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
7469 (match_operand:QI 4 "register_operand")]
7470 "TARGET_AVX512DQ && TARGET_AVX512VL"
7472 rtx (*insn)(rtx, rtx, rtx, rtx);
7473 rtx dest = operands[0];
7476 && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
7477 /* For V8S[IF]mode there are maskm insns with =m and 0
7479 ? !rtx_equal_p (dest, operands[3])
7480 /* For V4D[IF]mode, hi insns don't allow memory, and
7481 lo insns have =m and 0C constraints. */
7482 : (operands[2] != const0_rtx
7483 || (!rtx_equal_p (dest, operands[3])
7484 && GET_CODE (operands[3]) != CONST_VECTOR))))
7485 dest = gen_reg_rtx (<ssehalfvecmode>mode);
7486 switch (INTVAL (operands[2]))
7489 insn = gen_vec_extract_lo_<mode>_mask;
7492 insn = gen_vec_extract_hi_<mode>_mask;
7498 emit_insn (insn (dest, operands[1], operands[3], operands[4]));
7499 if (dest != operands[0])
7500 emit_move_insn (operands[0], dest);
7504 (define_expand "avx_vextractf128<mode>"
7505 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7506 (match_operand:V_256 1 "register_operand")
7507 (match_operand:SI 2 "const_0_to_1_operand")]
7510 rtx (*insn)(rtx, rtx);
7512 switch (INTVAL (operands[2]))
7515 insn = gen_vec_extract_lo_<mode>;
7518 insn = gen_vec_extract_hi_<mode>;
7524 emit_insn (insn (operands[0], operands[1]));
7528 (define_insn "vec_extract_lo_<mode><mask_name>"
7529 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
7530 (vec_select:<ssehalfvecmode>
7531 (match_operand:V16FI 1 "<store_mask_predicate>"
7532 "<store_mask_constraint>,v")
7533 (parallel [(const_int 0) (const_int 1)
7534 (const_int 2) (const_int 3)
7535 (const_int 4) (const_int 5)
7536 (const_int 6) (const_int 7)])))]
7538 && <mask_mode512bit_condition>
7539 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7542 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7548 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7549 (vec_select:<ssehalfvecmode>
7550 (match_operand:V16FI 1 "nonimmediate_operand")
7551 (parallel [(const_int 0) (const_int 1)
7552 (const_int 2) (const_int 3)
7553 (const_int 4) (const_int 5)
7554 (const_int 6) (const_int 7)])))]
7555 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7556 && reload_completed"
7557 [(set (match_dup 0) (match_dup 1))]
7558 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7560 (define_insn "vec_extract_lo_<mode><mask_name>"
7561 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7562 (vec_select:<ssehalfvecmode>
7563 (match_operand:VI8F_256 1 "<store_mask_predicate>"
7564 "<store_mask_constraint>,v")
7565 (parallel [(const_int 0) (const_int 1)])))]
7567 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7568 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7571 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7575 [(set_attr "type" "sselog")
7576 (set_attr "prefix_extra" "1")
7577 (set_attr "length_immediate" "1")
7578 (set_attr "memory" "none,store")
7579 (set_attr "prefix" "evex")
7580 (set_attr "mode" "XI")])
7583 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7584 (vec_select:<ssehalfvecmode>
7585 (match_operand:VI8F_256 1 "nonimmediate_operand")
7586 (parallel [(const_int 0) (const_int 1)])))]
7587 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7588 && reload_completed"
7589 [(set (match_dup 0) (match_dup 1))]
7590 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7592 (define_insn "vec_extract_hi_<mode><mask_name>"
7593 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7594 (vec_select:<ssehalfvecmode>
7595 (match_operand:VI8F_256 1 "register_operand" "v,v")
7596 (parallel [(const_int 2) (const_int 3)])))]
7597 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7599 if (TARGET_AVX512VL)
7601 if (TARGET_AVX512DQ)
7602 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7604 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7607 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7609 [(set_attr "type" "sselog")
7610 (set_attr "prefix_extra" "1")
7611 (set_attr "length_immediate" "1")
7612 (set_attr "memory" "none,store")
7613 (set_attr "prefix" "vex")
7614 (set_attr "mode" "<sseinsnmode>")])
7617 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7618 (vec_select:<ssehalfvecmode>
7619 (match_operand:VI4F_256 1 "nonimmediate_operand")
7620 (parallel [(const_int 0) (const_int 1)
7621 (const_int 2) (const_int 3)])))]
7622 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7623 && reload_completed"
7624 [(set (match_dup 0) (match_dup 1))]
7625 "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
7627 (define_insn "vec_extract_lo_<mode><mask_name>"
7628 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
7629 "=<store_mask_constraint>,v")
7630 (vec_select:<ssehalfvecmode>
7631 (match_operand:VI4F_256 1 "<store_mask_predicate>"
7632 "v,<store_mask_constraint>")
7633 (parallel [(const_int 0) (const_int 1)
7634 (const_int 2) (const_int 3)])))]
7636 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7637 && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
7640 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7644 [(set_attr "type" "sselog1")
7645 (set_attr "prefix_extra" "1")
7646 (set_attr "length_immediate" "1")
7647 (set_attr "prefix" "evex")
7648 (set_attr "mode" "<sseinsnmode>")])
7650 (define_insn "vec_extract_lo_<mode>_maskm"
7651 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7652 (vec_merge:<ssehalfvecmode>
7653 (vec_select:<ssehalfvecmode>
7654 (match_operand:VI4F_256 1 "register_operand" "v")
7655 (parallel [(const_int 0) (const_int 1)
7656 (const_int 2) (const_int 3)]))
7657 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7658 (match_operand:QI 3 "register_operand" "Yk")))]
7659 "TARGET_AVX512VL && TARGET_AVX512F
7660 && rtx_equal_p (operands[2], operands[0])"
7661 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7662 [(set_attr "type" "sselog1")
7663 (set_attr "prefix_extra" "1")
7664 (set_attr "length_immediate" "1")
7665 (set_attr "prefix" "evex")
7666 (set_attr "mode" "<sseinsnmode>")])
7668 (define_insn "vec_extract_hi_<mode>_maskm"
7669 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7670 (vec_merge:<ssehalfvecmode>
7671 (vec_select:<ssehalfvecmode>
7672 (match_operand:VI4F_256 1 "register_operand" "v")
7673 (parallel [(const_int 4) (const_int 5)
7674 (const_int 6) (const_int 7)]))
7675 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7676 (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
7677 "TARGET_AVX512F && TARGET_AVX512VL
7678 && rtx_equal_p (operands[2], operands[0])"
7679 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7680 [(set_attr "type" "sselog1")
7681 (set_attr "length_immediate" "1")
7682 (set_attr "prefix" "evex")
7683 (set_attr "mode" "<sseinsnmode>")])
7685 (define_insn "vec_extract_hi_<mode>_mask"
7686 [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7687 (vec_merge:<ssehalfvecmode>
7688 (vec_select:<ssehalfvecmode>
7689 (match_operand:VI4F_256 1 "register_operand" "v")
7690 (parallel [(const_int 4) (const_int 5)
7691 (const_int 6) (const_int 7)]))
7692 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7693 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7695 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7696 [(set_attr "type" "sselog1")
7697 (set_attr "length_immediate" "1")
7698 (set_attr "prefix" "evex")
7699 (set_attr "mode" "<sseinsnmode>")])
7701 (define_insn "vec_extract_hi_<mode>"
7702 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7703 (vec_select:<ssehalfvecmode>
7704 (match_operand:VI4F_256 1 "register_operand" "x, v")
7705 (parallel [(const_int 4) (const_int 5)
7706 (const_int 6) (const_int 7)])))]
7709 vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7710 vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7711 [(set_attr "isa" "*, avx512vl")
7712 (set_attr "prefix" "vex, evex")
7713 (set_attr "type" "sselog1")
7714 (set_attr "length_immediate" "1")
7715 (set_attr "mode" "<sseinsnmode>")])
7717 (define_insn_and_split "vec_extract_lo_v32hi"
7718 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7720 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7721 (parallel [(const_int 0) (const_int 1)
7722 (const_int 2) (const_int 3)
7723 (const_int 4) (const_int 5)
7724 (const_int 6) (const_int 7)
7725 (const_int 8) (const_int 9)
7726 (const_int 10) (const_int 11)
7727 (const_int 12) (const_int 13)
7728 (const_int 14) (const_int 15)])))]
7729 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7731 "&& reload_completed"
7732 [(set (match_dup 0) (match_dup 1))]
7733 "operands[1] = gen_lowpart (V16HImode, operands[1]);")
7735 (define_insn "vec_extract_hi_v32hi"
7736 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7738 (match_operand:V32HI 1 "register_operand" "v,v")
7739 (parallel [(const_int 16) (const_int 17)
7740 (const_int 18) (const_int 19)
7741 (const_int 20) (const_int 21)
7742 (const_int 22) (const_int 23)
7743 (const_int 24) (const_int 25)
7744 (const_int 26) (const_int 27)
7745 (const_int 28) (const_int 29)
7746 (const_int 30) (const_int 31)])))]
7748 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7749 [(set_attr "type" "sselog")
7750 (set_attr "prefix_extra" "1")
7751 (set_attr "length_immediate" "1")
7752 (set_attr "memory" "none,store")
7753 (set_attr "prefix" "evex")
7754 (set_attr "mode" "XI")])
7756 (define_insn_and_split "vec_extract_lo_v16hi"
7757 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
7759 (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
7760 (parallel [(const_int 0) (const_int 1)
7761 (const_int 2) (const_int 3)
7762 (const_int 4) (const_int 5)
7763 (const_int 6) (const_int 7)])))]
7764 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7766 "&& reload_completed"
7767 [(set (match_dup 0) (match_dup 1))]
7768 "operands[1] = gen_lowpart (V8HImode, operands[1]);")
7770 (define_insn "vec_extract_hi_v16hi"
7771 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7773 (match_operand:V16HI 1 "register_operand" "x,x,v,v,v,v")
7774 (parallel [(const_int 8) (const_int 9)
7775 (const_int 10) (const_int 11)
7776 (const_int 12) (const_int 13)
7777 (const_int 14) (const_int 15)])))]
7780 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7781 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7782 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7783 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7784 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7785 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7786 [(set_attr "type" "sselog")
7787 (set_attr "prefix_extra" "1")
7788 (set_attr "length_immediate" "1")
7789 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7790 (set_attr "memory" "none,store,none,store,none,store")
7791 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7792 (set_attr "mode" "OI")])
7794 (define_insn_and_split "vec_extract_lo_v64qi"
7795 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7797 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7798 (parallel [(const_int 0) (const_int 1)
7799 (const_int 2) (const_int 3)
7800 (const_int 4) (const_int 5)
7801 (const_int 6) (const_int 7)
7802 (const_int 8) (const_int 9)
7803 (const_int 10) (const_int 11)
7804 (const_int 12) (const_int 13)
7805 (const_int 14) (const_int 15)
7806 (const_int 16) (const_int 17)
7807 (const_int 18) (const_int 19)
7808 (const_int 20) (const_int 21)
7809 (const_int 22) (const_int 23)
7810 (const_int 24) (const_int 25)
7811 (const_int 26) (const_int 27)
7812 (const_int 28) (const_int 29)
7813 (const_int 30) (const_int 31)])))]
7814 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7816 "&& reload_completed"
7817 [(set (match_dup 0) (match_dup 1))]
7818 "operands[1] = gen_lowpart (V32QImode, operands[1]);")
7820 (define_insn "vec_extract_hi_v64qi"
7821 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7823 (match_operand:V64QI 1 "register_operand" "v,v")
7824 (parallel [(const_int 32) (const_int 33)
7825 (const_int 34) (const_int 35)
7826 (const_int 36) (const_int 37)
7827 (const_int 38) (const_int 39)
7828 (const_int 40) (const_int 41)
7829 (const_int 42) (const_int 43)
7830 (const_int 44) (const_int 45)
7831 (const_int 46) (const_int 47)
7832 (const_int 48) (const_int 49)
7833 (const_int 50) (const_int 51)
7834 (const_int 52) (const_int 53)
7835 (const_int 54) (const_int 55)
7836 (const_int 56) (const_int 57)
7837 (const_int 58) (const_int 59)
7838 (const_int 60) (const_int 61)
7839 (const_int 62) (const_int 63)])))]
7841 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7842 [(set_attr "type" "sselog")
7843 (set_attr "prefix_extra" "1")
7844 (set_attr "length_immediate" "1")
7845 (set_attr "memory" "none,store")
7846 (set_attr "prefix" "evex")
7847 (set_attr "mode" "XI")])
7849 (define_insn_and_split "vec_extract_lo_v32qi"
7850 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
7852 (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
7853 (parallel [(const_int 0) (const_int 1)
7854 (const_int 2) (const_int 3)
7855 (const_int 4) (const_int 5)
7856 (const_int 6) (const_int 7)
7857 (const_int 8) (const_int 9)
7858 (const_int 10) (const_int 11)
7859 (const_int 12) (const_int 13)
7860 (const_int 14) (const_int 15)])))]
7861 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7863 "&& reload_completed"
7864 [(set (match_dup 0) (match_dup 1))]
7865 "operands[1] = gen_lowpart (V16QImode, operands[1]);")
7867 (define_insn "vec_extract_hi_v32qi"
7868 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m,v,m,v,m")
7870 (match_operand:V32QI 1 "register_operand" "x,x,v,v,v,v")
7871 (parallel [(const_int 16) (const_int 17)
7872 (const_int 18) (const_int 19)
7873 (const_int 20) (const_int 21)
7874 (const_int 22) (const_int 23)
7875 (const_int 24) (const_int 25)
7876 (const_int 26) (const_int 27)
7877 (const_int 28) (const_int 29)
7878 (const_int 30) (const_int 31)])))]
7881 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7882 vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
7883 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7884 vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
7885 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}
7886 vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
7887 [(set_attr "type" "sselog")
7888 (set_attr "prefix_extra" "1")
7889 (set_attr "length_immediate" "1")
7890 (set_attr "isa" "*,*,avx512dq,avx512dq,avx512f,avx512f")
7891 (set_attr "memory" "none,store,none,store,none,store")
7892 (set_attr "prefix" "vex,vex,evex,evex,evex,evex")
7893 (set_attr "mode" "OI")])
7895 ;; Modes handled by vec_extract patterns.
7896 (define_mode_iterator VEC_EXTRACT_MODE
7897 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7898 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7899 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7900 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7901 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7902 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
7903 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
7905 (define_expand "vec_extract<mode><ssescalarmodelower>"
7906 [(match_operand:<ssescalarmode> 0 "register_operand")
7907 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7908 (match_operand 2 "const_int_operand")]
7911 ix86_expand_vector_extract (false, operands[0], operands[1],
7912 INTVAL (operands[2]));
7916 (define_expand "vec_extract<mode><ssehalfvecmodelower>"
7917 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7918 (match_operand:V_512 1 "register_operand")
7919 (match_operand 2 "const_0_to_1_operand")]
7922 if (INTVAL (operands[2]))
7923 emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
7925 emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
7929 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7931 ;; Parallel double-precision floating point element swizzling
7933 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7935 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7936 [(set (match_operand:V8DF 0 "register_operand" "=v")
7939 (match_operand:V8DF 1 "register_operand" "v")
7940 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7941 (parallel [(const_int 1) (const_int 9)
7942 (const_int 3) (const_int 11)
7943 (const_int 5) (const_int 13)
7944 (const_int 7) (const_int 15)])))]
7946 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7947 [(set_attr "type" "sselog")
7948 (set_attr "prefix" "evex")
7949 (set_attr "mode" "V8DF")])
7951 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7952 (define_insn "avx_unpckhpd256<mask_name>"
7953 [(set (match_operand:V4DF 0 "register_operand" "=v")
7956 (match_operand:V4DF 1 "register_operand" "v")
7957 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7958 (parallel [(const_int 1) (const_int 5)
7959 (const_int 3) (const_int 7)])))]
7960 "TARGET_AVX && <mask_avx512vl_condition>"
7961 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7962 [(set_attr "type" "sselog")
7963 (set_attr "prefix" "vex")
7964 (set_attr "mode" "V4DF")])
7966 (define_expand "vec_interleave_highv4df"
7970 (match_operand:V4DF 1 "register_operand")
7971 (match_operand:V4DF 2 "nonimmediate_operand"))
7972 (parallel [(const_int 0) (const_int 4)
7973 (const_int 2) (const_int 6)])))
7979 (parallel [(const_int 1) (const_int 5)
7980 (const_int 3) (const_int 7)])))
7981 (set (match_operand:V4DF 0 "register_operand")
7986 (parallel [(const_int 2) (const_int 3)
7987 (const_int 6) (const_int 7)])))]
7990 operands[3] = gen_reg_rtx (V4DFmode);
7991 operands[4] = gen_reg_rtx (V4DFmode);
7995 (define_insn "avx512vl_unpckhpd128_mask"
7996 [(set (match_operand:V2DF 0 "register_operand" "=v")
8000 (match_operand:V2DF 1 "register_operand" "v")
8001 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8002 (parallel [(const_int 1) (const_int 3)]))
8003 (match_operand:V2DF 3 "vector_move_operand" "0C")
8004 (match_operand:QI 4 "register_operand" "Yk")))]
8006 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8007 [(set_attr "type" "sselog")
8008 (set_attr "prefix" "evex")
8009 (set_attr "mode" "V2DF")])
8011 (define_expand "vec_interleave_highv2df"
8012 [(set (match_operand:V2DF 0 "register_operand")
8015 (match_operand:V2DF 1 "nonimmediate_operand")
8016 (match_operand:V2DF 2 "nonimmediate_operand"))
8017 (parallel [(const_int 1)
8021 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
8022 operands[2] = force_reg (V2DFmode, operands[2]);
8025 (define_insn "*vec_interleave_highv2df"
8026 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,m")
8029 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
8030 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
8031 (parallel [(const_int 1)
8033 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
8035 unpckhpd\t{%2, %0|%0, %2}
8036 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
8037 %vmovddup\t{%H1, %0|%0, %H1}
8038 movlpd\t{%H1, %0|%0, %H1}
8039 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
8040 %vmovhpd\t{%1, %0|%q0, %1}"
8041 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8042 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8043 (set (attr "prefix_data16")
8044 (if_then_else (eq_attr "alternative" "3,5")
8046 (const_string "*")))
8047 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8048 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8050 (define_expand "avx512f_movddup512<mask_name>"
8051 [(set (match_operand:V8DF 0 "register_operand")
8054 (match_operand:V8DF 1 "nonimmediate_operand")
8056 (parallel [(const_int 0) (const_int 8)
8057 (const_int 2) (const_int 10)
8058 (const_int 4) (const_int 12)
8059 (const_int 6) (const_int 14)])))]
8062 (define_expand "avx512f_unpcklpd512<mask_name>"
8063 [(set (match_operand:V8DF 0 "register_operand")
8066 (match_operand:V8DF 1 "register_operand")
8067 (match_operand:V8DF 2 "nonimmediate_operand"))
8068 (parallel [(const_int 0) (const_int 8)
8069 (const_int 2) (const_int 10)
8070 (const_int 4) (const_int 12)
8071 (const_int 6) (const_int 14)])))]
8074 (define_insn "*avx512f_unpcklpd512<mask_name>"
8075 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
8078 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
8079 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
8080 (parallel [(const_int 0) (const_int 8)
8081 (const_int 2) (const_int 10)
8082 (const_int 4) (const_int 12)
8083 (const_int 6) (const_int 14)])))]
8086 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
8087 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8088 [(set_attr "type" "sselog")
8089 (set_attr "prefix" "evex")
8090 (set_attr "mode" "V8DF")])
8092 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
8093 (define_expand "avx_movddup256<mask_name>"
8094 [(set (match_operand:V4DF 0 "register_operand")
8097 (match_operand:V4DF 1 "nonimmediate_operand")
8099 (parallel [(const_int 0) (const_int 4)
8100 (const_int 2) (const_int 6)])))]
8101 "TARGET_AVX && <mask_avx512vl_condition>")
8103 (define_expand "avx_unpcklpd256<mask_name>"
8104 [(set (match_operand:V4DF 0 "register_operand")
8107 (match_operand:V4DF 1 "register_operand")
8108 (match_operand:V4DF 2 "nonimmediate_operand"))
8109 (parallel [(const_int 0) (const_int 4)
8110 (const_int 2) (const_int 6)])))]
8111 "TARGET_AVX && <mask_avx512vl_condition>")
8113 (define_insn "*avx_unpcklpd256<mask_name>"
8114 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
8117 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
8118 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
8119 (parallel [(const_int 0) (const_int 4)
8120 (const_int 2) (const_int 6)])))]
8121 "TARGET_AVX && <mask_avx512vl_condition>"
8123 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
8124 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
8125 [(set_attr "type" "sselog")
8126 (set_attr "prefix" "vex")
8127 (set_attr "mode" "V4DF")])
8129 (define_expand "vec_interleave_lowv4df"
8133 (match_operand:V4DF 1 "register_operand")
8134 (match_operand:V4DF 2 "nonimmediate_operand"))
8135 (parallel [(const_int 0) (const_int 4)
8136 (const_int 2) (const_int 6)])))
8142 (parallel [(const_int 1) (const_int 5)
8143 (const_int 3) (const_int 7)])))
8144 (set (match_operand:V4DF 0 "register_operand")
8149 (parallel [(const_int 0) (const_int 1)
8150 (const_int 4) (const_int 5)])))]
8153 operands[3] = gen_reg_rtx (V4DFmode);
8154 operands[4] = gen_reg_rtx (V4DFmode);
8157 (define_insn "avx512vl_unpcklpd128_mask"
8158 [(set (match_operand:V2DF 0 "register_operand" "=v")
8162 (match_operand:V2DF 1 "register_operand" "v")
8163 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8164 (parallel [(const_int 0) (const_int 2)]))
8165 (match_operand:V2DF 3 "vector_move_operand" "0C")
8166 (match_operand:QI 4 "register_operand" "Yk")))]
8168 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
8169 [(set_attr "type" "sselog")
8170 (set_attr "prefix" "evex")
8171 (set_attr "mode" "V2DF")])
8173 (define_expand "vec_interleave_lowv2df"
8174 [(set (match_operand:V2DF 0 "register_operand")
8177 (match_operand:V2DF 1 "nonimmediate_operand")
8178 (match_operand:V2DF 2 "nonimmediate_operand"))
8179 (parallel [(const_int 0)
8183 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
8184 operands[1] = force_reg (V2DFmode, operands[1]);
8187 (define_insn "*vec_interleave_lowv2df"
8188 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,v,x,v,o")
8191 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
8192 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
8193 (parallel [(const_int 0)
8195 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
8197 unpcklpd\t{%2, %0|%0, %2}
8198 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8199 %vmovddup\t{%1, %0|%0, %q1}
8200 movhpd\t{%2, %0|%0, %q2}
8201 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
8202 %vmovlpd\t{%2, %H0|%H0, %2}"
8203 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
8204 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
8205 (set (attr "prefix_data16")
8206 (if_then_else (eq_attr "alternative" "3,5")
8208 (const_string "*")))
8209 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
8210 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
8213 [(set (match_operand:V2DF 0 "memory_operand")
8216 (match_operand:V2DF 1 "register_operand")
8218 (parallel [(const_int 0)
8220 "TARGET_SSE3 && reload_completed"
8223 rtx low = gen_lowpart (DFmode, operands[1]);
8225 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
8226 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
8231 [(set (match_operand:V2DF 0 "register_operand")
8234 (match_operand:V2DF 1 "memory_operand")
8236 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
8237 (match_operand:SI 3 "const_int_operand")])))]
8238 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
8239 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
8241 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
8244 (define_insn "avx512f_vmscalef<mode><round_name>"
8245 [(set (match_operand:VF_128 0 "register_operand" "=v")
8248 [(match_operand:VF_128 1 "register_operand" "v")
8249 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
8254 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
8255 [(set_attr "prefix" "evex")
8256 (set_attr "mode" "<ssescalarmode>")])
8258 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
8259 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8261 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
8262 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
8265 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8266 [(set_attr "prefix" "evex")
8267 (set_attr "mode" "<MODE>")])
8269 (define_expand "<avx512>_vternlog<mode>_maskz"
8270 [(match_operand:VI48_AVX512VL 0 "register_operand")
8271 (match_operand:VI48_AVX512VL 1 "register_operand")
8272 (match_operand:VI48_AVX512VL 2 "register_operand")
8273 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
8274 (match_operand:SI 4 "const_0_to_255_operand")
8275 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8278 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
8279 operands[0], operands[1], operands[2], operands[3],
8280 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
8284 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
8285 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8286 (unspec:VI48_AVX512VL
8287 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8288 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8289 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8290 (match_operand:SI 4 "const_0_to_255_operand")]
8293 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
8294 [(set_attr "type" "sselog")
8295 (set_attr "prefix" "evex")
8296 (set_attr "mode" "<sseinsnmode>")])
8298 (define_insn "<avx512>_vternlog<mode>_mask"
8299 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8300 (vec_merge:VI48_AVX512VL
8301 (unspec:VI48_AVX512VL
8302 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
8303 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
8304 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
8305 (match_operand:SI 4 "const_0_to_255_operand")]
8308 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8310 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
8311 [(set_attr "type" "sselog")
8312 (set_attr "prefix" "evex")
8313 (set_attr "mode" "<sseinsnmode>")])
8315 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
8316 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8317 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
8320 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
8321 [(set_attr "prefix" "evex")
8322 (set_attr "mode" "<MODE>")])
8324 (define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
8325 [(set (match_operand:VF_128 0 "register_operand" "=v")
8328 [(match_operand:VF_128 1 "register_operand" "v")
8329 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
8334 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_saeonly_scalar_mask_op3>}";
8335 [(set_attr "prefix" "evex")
8336 (set_attr "mode" "<ssescalarmode>")])
8338 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
8339 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
8340 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
8341 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
8342 (match_operand:SI 3 "const_0_to_255_operand")]
8345 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
8346 [(set_attr "prefix" "evex")
8347 (set_attr "mode" "<sseinsnmode>")])
8349 (define_expand "avx512f_shufps512_mask"
8350 [(match_operand:V16SF 0 "register_operand")
8351 (match_operand:V16SF 1 "register_operand")
8352 (match_operand:V16SF 2 "nonimmediate_operand")
8353 (match_operand:SI 3 "const_0_to_255_operand")
8354 (match_operand:V16SF 4 "register_operand")
8355 (match_operand:HI 5 "register_operand")]
8358 int mask = INTVAL (operands[3]);
8359 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
8360 GEN_INT ((mask >> 0) & 3),
8361 GEN_INT ((mask >> 2) & 3),
8362 GEN_INT (((mask >> 4) & 3) + 16),
8363 GEN_INT (((mask >> 6) & 3) + 16),
8364 GEN_INT (((mask >> 0) & 3) + 4),
8365 GEN_INT (((mask >> 2) & 3) + 4),
8366 GEN_INT (((mask >> 4) & 3) + 20),
8367 GEN_INT (((mask >> 6) & 3) + 20),
8368 GEN_INT (((mask >> 0) & 3) + 8),
8369 GEN_INT (((mask >> 2) & 3) + 8),
8370 GEN_INT (((mask >> 4) & 3) + 24),
8371 GEN_INT (((mask >> 6) & 3) + 24),
8372 GEN_INT (((mask >> 0) & 3) + 12),
8373 GEN_INT (((mask >> 2) & 3) + 12),
8374 GEN_INT (((mask >> 4) & 3) + 28),
8375 GEN_INT (((mask >> 6) & 3) + 28),
8376 operands[4], operands[5]));
8381 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
8382 [(match_operand:VF_AVX512VL 0 "register_operand")
8383 (match_operand:VF_AVX512VL 1 "register_operand")
8384 (match_operand:VF_AVX512VL 2 "register_operand")
8385 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8386 (match_operand:SI 4 "const_0_to_255_operand")
8387 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8390 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8391 operands[0], operands[1], operands[2], operands[3],
8392 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8393 <round_saeonly_expand_operand6>));
8397 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
8398 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8400 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8401 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8402 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8403 (match_operand:SI 4 "const_0_to_255_operand")]
8406 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8407 [(set_attr "prefix" "evex")
8408 (set_attr "mode" "<MODE>")])
8410 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
8411 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8412 (vec_merge:VF_AVX512VL
8414 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
8415 (match_operand:VF_AVX512VL 2 "register_operand" "v")
8416 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
8417 (match_operand:SI 4 "const_0_to_255_operand")]
8420 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8422 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8423 [(set_attr "prefix" "evex")
8424 (set_attr "mode" "<MODE>")])
8426 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
8427 [(match_operand:VF_128 0 "register_operand")
8428 (match_operand:VF_128 1 "register_operand")
8429 (match_operand:VF_128 2 "register_operand")
8430 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
8431 (match_operand:SI 4 "const_0_to_255_operand")
8432 (match_operand:<avx512fmaskmode> 5 "register_operand")]
8435 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
8436 operands[0], operands[1], operands[2], operands[3],
8437 operands[4], CONST0_RTX (<MODE>mode), operands[5]
8438 <round_saeonly_expand_operand6>));
8442 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
8443 [(set (match_operand:VF_128 0 "register_operand" "=v")
8446 [(match_operand:VF_128 1 "register_operand" "0")
8447 (match_operand:VF_128 2 "register_operand" "v")
8448 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8449 (match_operand:SI 4 "const_0_to_255_operand")]
8454 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
8455 [(set_attr "prefix" "evex")
8456 (set_attr "mode" "<ssescalarmode>")])
8458 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
8459 [(set (match_operand:VF_128 0 "register_operand" "=v")
8463 [(match_operand:VF_128 1 "register_operand" "0")
8464 (match_operand:VF_128 2 "register_operand" "v")
8465 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8466 (match_operand:SI 4 "const_0_to_255_operand")]
8471 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
8473 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
8474 [(set_attr "prefix" "evex")
8475 (set_attr "mode" "<ssescalarmode>")])
8477 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
8478 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
8480 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
8481 (match_operand:SI 2 "const_0_to_255_operand")]
8484 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
8485 [(set_attr "length_immediate" "1")
8486 (set_attr "prefix" "evex")
8487 (set_attr "mode" "<MODE>")])
8489 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
8490 [(set (match_operand:VF_128 0 "register_operand" "=v")
8493 [(match_operand:VF_128 1 "register_operand" "v")
8494 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
8495 (match_operand:SI 3 "const_0_to_255_operand")]
8500 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
8501 [(set_attr "length_immediate" "1")
8502 (set_attr "prefix" "evex")
8503 (set_attr "mode" "<MODE>")])
8505 ;; One bit in mask selects 2 elements.
8506 (define_insn "avx512f_shufps512_1<mask_name>"
8507 [(set (match_operand:V16SF 0 "register_operand" "=v")
8510 (match_operand:V16SF 1 "register_operand" "v")
8511 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
8512 (parallel [(match_operand 3 "const_0_to_3_operand")
8513 (match_operand 4 "const_0_to_3_operand")
8514 (match_operand 5 "const_16_to_19_operand")
8515 (match_operand 6 "const_16_to_19_operand")
8516 (match_operand 7 "const_4_to_7_operand")
8517 (match_operand 8 "const_4_to_7_operand")
8518 (match_operand 9 "const_20_to_23_operand")
8519 (match_operand 10 "const_20_to_23_operand")
8520 (match_operand 11 "const_8_to_11_operand")
8521 (match_operand 12 "const_8_to_11_operand")
8522 (match_operand 13 "const_24_to_27_operand")
8523 (match_operand 14 "const_24_to_27_operand")
8524 (match_operand 15 "const_12_to_15_operand")
8525 (match_operand 16 "const_12_to_15_operand")
8526 (match_operand 17 "const_28_to_31_operand")
8527 (match_operand 18 "const_28_to_31_operand")])))]
8529 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
8530 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
8531 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
8532 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
8533 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
8534 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
8535 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
8536 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
8537 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
8538 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
8539 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
8540 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
8543 mask = INTVAL (operands[3]);
8544 mask |= INTVAL (operands[4]) << 2;
8545 mask |= (INTVAL (operands[5]) - 16) << 4;
8546 mask |= (INTVAL (operands[6]) - 16) << 6;
8547 operands[3] = GEN_INT (mask);
8549 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
8551 [(set_attr "type" "sselog")
8552 (set_attr "length_immediate" "1")
8553 (set_attr "prefix" "evex")
8554 (set_attr "mode" "V16SF")])
8556 (define_expand "avx512f_shufpd512_mask"
8557 [(match_operand:V8DF 0 "register_operand")
8558 (match_operand:V8DF 1 "register_operand")
8559 (match_operand:V8DF 2 "nonimmediate_operand")
8560 (match_operand:SI 3 "const_0_to_255_operand")
8561 (match_operand:V8DF 4 "register_operand")
8562 (match_operand:QI 5 "register_operand")]
8565 int mask = INTVAL (operands[3]);
8566 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8568 GEN_INT (mask & 2 ? 9 : 8),
8569 GEN_INT (mask & 4 ? 3 : 2),
8570 GEN_INT (mask & 8 ? 11 : 10),
8571 GEN_INT (mask & 16 ? 5 : 4),
8572 GEN_INT (mask & 32 ? 13 : 12),
8573 GEN_INT (mask & 64 ? 7 : 6),
8574 GEN_INT (mask & 128 ? 15 : 14),
8575 operands[4], operands[5]));
8579 (define_insn "avx512f_shufpd512_1<mask_name>"
8580 [(set (match_operand:V8DF 0 "register_operand" "=v")
8583 (match_operand:V8DF 1 "register_operand" "v")
8584 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8585 (parallel [(match_operand 3 "const_0_to_1_operand")
8586 (match_operand 4 "const_8_to_9_operand")
8587 (match_operand 5 "const_2_to_3_operand")
8588 (match_operand 6 "const_10_to_11_operand")
8589 (match_operand 7 "const_4_to_5_operand")
8590 (match_operand 8 "const_12_to_13_operand")
8591 (match_operand 9 "const_6_to_7_operand")
8592 (match_operand 10 "const_14_to_15_operand")])))]
8596 mask = INTVAL (operands[3]);
8597 mask |= (INTVAL (operands[4]) - 8) << 1;
8598 mask |= (INTVAL (operands[5]) - 2) << 2;
8599 mask |= (INTVAL (operands[6]) - 10) << 3;
8600 mask |= (INTVAL (operands[7]) - 4) << 4;
8601 mask |= (INTVAL (operands[8]) - 12) << 5;
8602 mask |= (INTVAL (operands[9]) - 6) << 6;
8603 mask |= (INTVAL (operands[10]) - 14) << 7;
8604 operands[3] = GEN_INT (mask);
8606 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8608 [(set_attr "type" "sselog")
8609 (set_attr "length_immediate" "1")
8610 (set_attr "prefix" "evex")
8611 (set_attr "mode" "V8DF")])
8613 (define_expand "avx_shufpd256<mask_expand4_name>"
8614 [(match_operand:V4DF 0 "register_operand")
8615 (match_operand:V4DF 1 "register_operand")
8616 (match_operand:V4DF 2 "nonimmediate_operand")
8617 (match_operand:SI 3 "const_int_operand")]
8620 int mask = INTVAL (operands[3]);
8621 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8625 GEN_INT (mask & 2 ? 5 : 4),
8626 GEN_INT (mask & 4 ? 3 : 2),
8627 GEN_INT (mask & 8 ? 7 : 6)
8628 <mask_expand4_args>));
8632 (define_insn "avx_shufpd256_1<mask_name>"
8633 [(set (match_operand:V4DF 0 "register_operand" "=v")
8636 (match_operand:V4DF 1 "register_operand" "v")
8637 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8638 (parallel [(match_operand 3 "const_0_to_1_operand")
8639 (match_operand 4 "const_4_to_5_operand")
8640 (match_operand 5 "const_2_to_3_operand")
8641 (match_operand 6 "const_6_to_7_operand")])))]
8642 "TARGET_AVX && <mask_avx512vl_condition>"
8645 mask = INTVAL (operands[3]);
8646 mask |= (INTVAL (operands[4]) - 4) << 1;
8647 mask |= (INTVAL (operands[5]) - 2) << 2;
8648 mask |= (INTVAL (operands[6]) - 6) << 3;
8649 operands[3] = GEN_INT (mask);
8651 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8653 [(set_attr "type" "sseshuf")
8654 (set_attr "length_immediate" "1")
8655 (set_attr "prefix" "vex")
8656 (set_attr "mode" "V4DF")])
8658 (define_expand "sse2_shufpd<mask_expand4_name>"
8659 [(match_operand:V2DF 0 "register_operand")
8660 (match_operand:V2DF 1 "register_operand")
8661 (match_operand:V2DF 2 "vector_operand")
8662 (match_operand:SI 3 "const_int_operand")]
8665 int mask = INTVAL (operands[3]);
8666 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8667 operands[2], GEN_INT (mask & 1),
8668 GEN_INT (mask & 2 ? 3 : 2)
8669 <mask_expand4_args>));
8673 (define_insn "sse2_shufpd_v2df_mask"
8674 [(set (match_operand:V2DF 0 "register_operand" "=v")
8678 (match_operand:V2DF 1 "register_operand" "v")
8679 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8680 (parallel [(match_operand 3 "const_0_to_1_operand")
8681 (match_operand 4 "const_2_to_3_operand")]))
8682 (match_operand:V2DF 5 "vector_move_operand" "0C")
8683 (match_operand:QI 6 "register_operand" "Yk")))]
8687 mask = INTVAL (operands[3]);
8688 mask |= (INTVAL (operands[4]) - 2) << 1;
8689 operands[3] = GEN_INT (mask);
8691 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8693 [(set_attr "type" "sseshuf")
8694 (set_attr "length_immediate" "1")
8695 (set_attr "prefix" "evex")
8696 (set_attr "mode" "V2DF")])
8698 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8699 (define_insn "avx2_interleave_highv4di<mask_name>"
8700 [(set (match_operand:V4DI 0 "register_operand" "=v")
8703 (match_operand:V4DI 1 "register_operand" "v")
8704 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8705 (parallel [(const_int 1)
8709 "TARGET_AVX2 && <mask_avx512vl_condition>"
8710 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8711 [(set_attr "type" "sselog")
8712 (set_attr "prefix" "vex")
8713 (set_attr "mode" "OI")])
8715 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8716 [(set (match_operand:V8DI 0 "register_operand" "=v")
8719 (match_operand:V8DI 1 "register_operand" "v")
8720 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8721 (parallel [(const_int 1) (const_int 9)
8722 (const_int 3) (const_int 11)
8723 (const_int 5) (const_int 13)
8724 (const_int 7) (const_int 15)])))]
8726 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8727 [(set_attr "type" "sselog")
8728 (set_attr "prefix" "evex")
8729 (set_attr "mode" "XI")])
8731 (define_insn "vec_interleave_highv2di<mask_name>"
8732 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8735 (match_operand:V2DI 1 "register_operand" "0,v")
8736 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8737 (parallel [(const_int 1)
8739 "TARGET_SSE2 && <mask_avx512vl_condition>"
8741 punpckhqdq\t{%2, %0|%0, %2}
8742 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8743 [(set_attr "isa" "noavx,avx")
8744 (set_attr "type" "sselog")
8745 (set_attr "prefix_data16" "1,*")
8746 (set_attr "prefix" "orig,<mask_prefix>")
8747 (set_attr "mode" "TI")])
8749 (define_insn "avx2_interleave_lowv4di<mask_name>"
8750 [(set (match_operand:V4DI 0 "register_operand" "=v")
8753 (match_operand:V4DI 1 "register_operand" "v")
8754 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8755 (parallel [(const_int 0)
8759 "TARGET_AVX2 && <mask_avx512vl_condition>"
8760 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8761 [(set_attr "type" "sselog")
8762 (set_attr "prefix" "vex")
8763 (set_attr "mode" "OI")])
8765 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8766 [(set (match_operand:V8DI 0 "register_operand" "=v")
8769 (match_operand:V8DI 1 "register_operand" "v")
8770 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8771 (parallel [(const_int 0) (const_int 8)
8772 (const_int 2) (const_int 10)
8773 (const_int 4) (const_int 12)
8774 (const_int 6) (const_int 14)])))]
8776 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8777 [(set_attr "type" "sselog")
8778 (set_attr "prefix" "evex")
8779 (set_attr "mode" "XI")])
8781 (define_insn "vec_interleave_lowv2di<mask_name>"
8782 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8785 (match_operand:V2DI 1 "register_operand" "0,v")
8786 (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
8787 (parallel [(const_int 0)
8789 "TARGET_SSE2 && <mask_avx512vl_condition>"
8791 punpcklqdq\t{%2, %0|%0, %2}
8792 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8793 [(set_attr "isa" "noavx,avx")
8794 (set_attr "type" "sselog")
8795 (set_attr "prefix_data16" "1,*")
8796 (set_attr "prefix" "orig,vex")
8797 (set_attr "mode" "TI")])
8799 (define_insn "sse2_shufpd_<mode>"
8800 [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
8801 (vec_select:VI8F_128
8802 (vec_concat:<ssedoublevecmode>
8803 (match_operand:VI8F_128 1 "register_operand" "0,v")
8804 (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
8805 (parallel [(match_operand 3 "const_0_to_1_operand")
8806 (match_operand 4 "const_2_to_3_operand")])))]
8810 mask = INTVAL (operands[3]);
8811 mask |= (INTVAL (operands[4]) - 2) << 1;
8812 operands[3] = GEN_INT (mask);
8814 switch (which_alternative)
8817 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8819 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8824 [(set_attr "isa" "noavx,avx")
8825 (set_attr "type" "sseshuf")
8826 (set_attr "length_immediate" "1")
8827 (set_attr "prefix" "orig,maybe_evex")
8828 (set_attr "mode" "V2DF")])
8830 ;; Avoid combining registers from different units in a single alternative,
8831 ;; see comment above inline_secondary_memory_needed function in i386.c
8832 (define_insn "sse2_storehpd"
8833 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,Yv,x,*f,r")
8835 (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
8836 (parallel [(const_int 1)])))]
8837 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8839 %vmovhpd\t{%1, %0|%0, %1}
8841 vunpckhpd\t{%d1, %0|%0, %d1}
8845 [(set_attr "isa" "*,noavx,avx,*,*,*")
8846 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8847 (set (attr "prefix_data16")
8849 (and (eq_attr "alternative" "0")
8850 (not (match_test "TARGET_AVX")))
8852 (const_string "*")))
8853 (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
8854 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8857 [(set (match_operand:DF 0 "register_operand")
8859 (match_operand:V2DF 1 "memory_operand")
8860 (parallel [(const_int 1)])))]
8861 "TARGET_SSE2 && reload_completed"
8862 [(set (match_dup 0) (match_dup 1))]
8863 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8865 (define_insn "*vec_extractv2df_1_sse"
8866 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8868 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8869 (parallel [(const_int 1)])))]
8870 "!TARGET_SSE2 && TARGET_SSE
8871 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8873 movhps\t{%1, %0|%q0, %1}
8874 movhlps\t{%1, %0|%0, %1}
8875 movlps\t{%H1, %0|%0, %H1}"
8876 [(set_attr "type" "ssemov")
8877 (set_attr "mode" "V2SF,V4SF,V2SF")])
8879 ;; Avoid combining registers from different units in a single alternative,
8880 ;; see comment above inline_secondary_memory_needed function in i386.c
8881 (define_insn "sse2_storelpd"
8882 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8884 (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
8885 (parallel [(const_int 0)])))]
8886 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8888 %vmovlpd\t{%1, %0|%0, %1}
8893 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8894 (set (attr "prefix_data16")
8895 (if_then_else (eq_attr "alternative" "0")
8897 (const_string "*")))
8898 (set_attr "prefix" "maybe_vex")
8899 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8902 [(set (match_operand:DF 0 "register_operand")
8904 (match_operand:V2DF 1 "nonimmediate_operand")
8905 (parallel [(const_int 0)])))]
8906 "TARGET_SSE2 && reload_completed"
8907 [(set (match_dup 0) (match_dup 1))]
8908 "operands[1] = gen_lowpart (DFmode, operands[1]);")
8910 (define_insn "*vec_extractv2df_0_sse"
8911 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8913 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8914 (parallel [(const_int 0)])))]
8915 "!TARGET_SSE2 && TARGET_SSE
8916 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8918 movlps\t{%1, %0|%0, %1}
8919 movaps\t{%1, %0|%0, %1}
8920 movlps\t{%1, %0|%0, %q1}"
8921 [(set_attr "type" "ssemov")
8922 (set_attr "mode" "V2SF,V4SF,V2SF")])
8924 (define_expand "sse2_loadhpd_exp"
8925 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8928 (match_operand:V2DF 1 "nonimmediate_operand")
8929 (parallel [(const_int 0)]))
8930 (match_operand:DF 2 "nonimmediate_operand")))]
8933 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8935 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8937 /* Fix up the destination if needed. */
8938 if (dst != operands[0])
8939 emit_move_insn (operands[0], dst);
8944 ;; Avoid combining registers from different units in a single alternative,
8945 ;; see comment above inline_secondary_memory_needed function in i386.c
8946 (define_insn "sse2_loadhpd"
8947 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8951 (match_operand:V2DF 1 "nonimmediate_operand"
8953 (parallel [(const_int 0)]))
8954 (match_operand:DF 2 "nonimmediate_operand"
8955 " m,m,x,v,x,*f,r")))]
8956 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8958 movhpd\t{%2, %0|%0, %2}
8959 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8960 unpcklpd\t{%2, %0|%0, %2}
8961 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8965 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8966 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8967 (set (attr "prefix_data16")
8968 (if_then_else (eq_attr "alternative" "0")
8970 (const_string "*")))
8971 (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
8972 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8975 [(set (match_operand:V2DF 0 "memory_operand")
8977 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8978 (match_operand:DF 1 "register_operand")))]
8979 "TARGET_SSE2 && reload_completed"
8980 [(set (match_dup 0) (match_dup 1))]
8981 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8983 (define_expand "sse2_loadlpd_exp"
8984 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8986 (match_operand:DF 2 "nonimmediate_operand")
8988 (match_operand:V2DF 1 "nonimmediate_operand")
8989 (parallel [(const_int 1)]))))]
8992 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8994 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8996 /* Fix up the destination if needed. */
8997 if (dst != operands[0])
8998 emit_move_insn (operands[0], dst);
9003 ;; Avoid combining registers from different units in a single alternative,
9004 ;; see comment above inline_secondary_memory_needed function in i386.c
9005 (define_insn "sse2_loadlpd"
9006 [(set (match_operand:V2DF 0 "nonimmediate_operand"
9007 "=v,x,v,x,v,x,x,v,m,m ,m")
9009 (match_operand:DF 2 "nonimmediate_operand"
9010 "vm,m,m,x,v,0,0,v,x,*f,r")
9012 (match_operand:V2DF 1 "vector_move_operand"
9013 " C,0,v,0,v,x,o,o,0,0 ,0")
9014 (parallel [(const_int 1)]))))]
9015 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
9017 %vmovq\t{%2, %0|%0, %2}
9018 movlpd\t{%2, %0|%0, %2}
9019 vmovlpd\t{%2, %1, %0|%0, %1, %2}
9020 movsd\t{%2, %0|%0, %2}
9021 vmovsd\t{%2, %1, %0|%0, %1, %2}
9022 shufpd\t{$2, %1, %0|%0, %1, 2}
9023 movhpd\t{%H1, %0|%0, %H1}
9024 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
9028 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
9030 (cond [(eq_attr "alternative" "5")
9031 (const_string "sselog")
9032 (eq_attr "alternative" "9")
9033 (const_string "fmov")
9034 (eq_attr "alternative" "10")
9035 (const_string "imov")
9037 (const_string "ssemov")))
9038 (set (attr "prefix_data16")
9039 (if_then_else (eq_attr "alternative" "1,6")
9041 (const_string "*")))
9042 (set (attr "length_immediate")
9043 (if_then_else (eq_attr "alternative" "5")
9045 (const_string "*")))
9046 (set (attr "prefix")
9047 (cond [(eq_attr "alternative" "0")
9048 (const_string "maybe_vex")
9049 (eq_attr "alternative" "1,3,5,6")
9050 (const_string "orig")
9051 (eq_attr "alternative" "2,4,7")
9052 (const_string "maybe_evex")
9054 (const_string "*")))
9055 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
9058 [(set (match_operand:V2DF 0 "memory_operand")
9060 (match_operand:DF 1 "register_operand")
9061 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
9062 "TARGET_SSE2 && reload_completed"
9063 [(set (match_dup 0) (match_dup 1))]
9064 "operands[0] = adjust_address (operands[0], DFmode, 0);")
9066 (define_insn "sse2_movsd"
9067 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,v,x,v,m,x,x,v,o")
9069 (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
9070 (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
9074 movsd\t{%2, %0|%0, %2}
9075 vmovsd\t{%2, %1, %0|%0, %1, %2}
9076 movlpd\t{%2, %0|%0, %q2}
9077 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
9078 %vmovlpd\t{%2, %0|%q0, %2}
9079 shufpd\t{$2, %1, %0|%0, %1, 2}
9080 movhps\t{%H1, %0|%0, %H1}
9081 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
9082 %vmovhps\t{%1, %H0|%H0, %1}"
9083 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
9086 (eq_attr "alternative" "5")
9087 (const_string "sselog")
9088 (const_string "ssemov")))
9089 (set (attr "prefix_data16")
9091 (and (eq_attr "alternative" "2,4")
9092 (not (match_test "TARGET_AVX")))
9094 (const_string "*")))
9095 (set (attr "length_immediate")
9096 (if_then_else (eq_attr "alternative" "5")
9098 (const_string "*")))
9099 (set (attr "prefix")
9100 (cond [(eq_attr "alternative" "1,3,7")
9101 (const_string "maybe_evex")
9102 (eq_attr "alternative" "4,8")
9103 (const_string "maybe_vex")
9105 (const_string "orig")))
9106 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
9108 (define_insn "vec_dupv2df<mask_name>"
9109 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
9111 (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
9112 "TARGET_SSE2 && <mask_avx512vl_condition>"
9115 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
9116 vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
9117 [(set_attr "isa" "noavx,sse3,avx512vl")
9118 (set_attr "type" "sselog1")
9119 (set_attr "prefix" "orig,maybe_vex,evex")
9120 (set_attr "mode" "V2DF,DF,DF")])
9122 (define_insn "vec_concatv2df"
9123 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v,x,v,x,x, v,x,x")
9125 (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,xm,0,0")
9126 (match_operand:DF 2 "vector_move_operand" " x,x,v,1,1,m,m, C,x,m")))]
9128 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
9129 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
9131 unpcklpd\t{%2, %0|%0, %2}
9132 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9133 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9134 %vmovddup\t{%1, %0|%0, %1}
9135 vmovddup\t{%1, %0|%0, %1}
9136 movhpd\t{%2, %0|%0, %2}
9137 vmovhpd\t{%2, %1, %0|%0, %1, %2}
9138 %vmovq\t{%1, %0|%0, %1}
9139 movlhps\t{%2, %0|%0, %2}
9140 movhps\t{%2, %0|%0, %2}"
9142 (cond [(eq_attr "alternative" "0,5")
9143 (const_string "sse2_noavx")
9144 (eq_attr "alternative" "1,6")
9145 (const_string "avx")
9146 (eq_attr "alternative" "2,4")
9147 (const_string "avx512vl")
9148 (eq_attr "alternative" "3")
9149 (const_string "sse3")
9150 (eq_attr "alternative" "7")
9151 (const_string "sse2")
9153 (const_string "noavx")))
9156 (eq_attr "alternative" "0,1,2,3,4")
9157 (const_string "sselog")
9158 (const_string "ssemov")))
9159 (set (attr "prefix_data16")
9160 (if_then_else (eq_attr "alternative" "5")
9162 (const_string "*")))
9163 (set (attr "prefix")
9164 (cond [(eq_attr "alternative" "1,6")
9165 (const_string "vex")
9166 (eq_attr "alternative" "2,4")
9167 (const_string "evex")
9168 (eq_attr "alternative" "3,7")
9169 (const_string "maybe_vex")
9171 (const_string "orig")))
9172 (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
9174 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9176 ;; Parallel integer down-conversion operations
9178 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9180 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
9181 (define_mode_attr pmov_src_mode
9182 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
9183 (define_mode_attr pmov_src_lower
9184 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
9185 (define_mode_attr pmov_suff_1
9186 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
9188 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
9189 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9190 (any_truncate:PMOV_DST_MODE_1
9191 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
9193 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
9194 [(set_attr "type" "ssemov")
9195 (set_attr "memory" "none,store")
9196 (set_attr "prefix" "evex")
9197 (set_attr "mode" "<sseinsnmode>")])
9199 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
9200 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
9201 (vec_merge:PMOV_DST_MODE_1
9202 (any_truncate:PMOV_DST_MODE_1
9203 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
9204 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
9205 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9207 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9208 [(set_attr "type" "ssemov")
9209 (set_attr "memory" "none,store")
9210 (set_attr "prefix" "evex")
9211 (set_attr "mode" "<sseinsnmode>")])
9213 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
9214 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
9215 (vec_merge:PMOV_DST_MODE_1
9216 (any_truncate:PMOV_DST_MODE_1
9217 (match_operand:<pmov_src_mode> 1 "register_operand"))
9219 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9222 (define_insn "avx512bw_<code>v32hiv32qi2"
9223 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9225 (match_operand:V32HI 1 "register_operand" "v,v")))]
9227 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
9228 [(set_attr "type" "ssemov")
9229 (set_attr "memory" "none,store")
9230 (set_attr "prefix" "evex")
9231 (set_attr "mode" "XI")])
9233 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
9234 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
9237 (match_operand:V32HI 1 "register_operand" "v,v"))
9238 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
9239 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
9241 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9242 [(set_attr "type" "ssemov")
9243 (set_attr "memory" "none,store")
9244 (set_attr "prefix" "evex")
9245 (set_attr "mode" "XI")])
9247 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
9248 [(set (match_operand:V32QI 0 "nonimmediate_operand")
9251 (match_operand:V32HI 1 "register_operand"))
9253 (match_operand:SI 2 "register_operand")))]
9256 (define_mode_iterator PMOV_DST_MODE_2
9257 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
9258 (define_mode_attr pmov_suff_2
9259 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
9261 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
9262 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9263 (any_truncate:PMOV_DST_MODE_2
9264 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
9266 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
9267 [(set_attr "type" "ssemov")
9268 (set_attr "memory" "none,store")
9269 (set_attr "prefix" "evex")
9270 (set_attr "mode" "<sseinsnmode>")])
9272 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
9273 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
9274 (vec_merge:PMOV_DST_MODE_2
9275 (any_truncate:PMOV_DST_MODE_2
9276 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
9277 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
9278 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
9280 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9281 [(set_attr "type" "ssemov")
9282 (set_attr "memory" "none,store")
9283 (set_attr "prefix" "evex")
9284 (set_attr "mode" "<sseinsnmode>")])
9286 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
9287 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
9288 (vec_merge:PMOV_DST_MODE_2
9289 (any_truncate:PMOV_DST_MODE_2
9290 (match_operand:<ssedoublemode> 1 "register_operand"))
9292 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
9295 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
9296 (define_mode_attr pmov_dst_3
9297 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
9298 (define_mode_attr pmov_dst_zeroed_3
9299 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
9300 (define_mode_attr pmov_suff_3
9301 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
9303 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
9304 [(set (match_operand:V16QI 0 "register_operand" "=v")
9306 (any_truncate:<pmov_dst_3>
9307 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
9308 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
9310 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9311 [(set_attr "type" "ssemov")
9312 (set_attr "prefix" "evex")
9313 (set_attr "mode" "TI")])
9315 (define_insn "*avx512vl_<code>v2div2qi2_store"
9316 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9319 (match_operand:V2DI 1 "register_operand" "v"))
9322 (parallel [(const_int 2) (const_int 3)
9323 (const_int 4) (const_int 5)
9324 (const_int 6) (const_int 7)
9325 (const_int 8) (const_int 9)
9326 (const_int 10) (const_int 11)
9327 (const_int 12) (const_int 13)
9328 (const_int 14) (const_int 15)]))))]
9330 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9331 [(set_attr "type" "ssemov")
9332 (set_attr "memory" "store")
9333 (set_attr "prefix" "evex")
9334 (set_attr "mode" "TI")])
9336 (define_insn "avx512vl_<code>v2div2qi2_mask"
9337 [(set (match_operand:V16QI 0 "register_operand" "=v")
9341 (match_operand:V2DI 1 "register_operand" "v"))
9343 (match_operand:V16QI 2 "vector_move_operand" "0C")
9344 (parallel [(const_int 0) (const_int 1)]))
9345 (match_operand:QI 3 "register_operand" "Yk"))
9346 (const_vector:V14QI [(const_int 0) (const_int 0)
9347 (const_int 0) (const_int 0)
9348 (const_int 0) (const_int 0)
9349 (const_int 0) (const_int 0)
9350 (const_int 0) (const_int 0)
9351 (const_int 0) (const_int 0)
9352 (const_int 0) (const_int 0)])))]
9354 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9355 [(set_attr "type" "ssemov")
9356 (set_attr "prefix" "evex")
9357 (set_attr "mode" "TI")])
9359 (define_insn "*avx512vl_<code>v2div2qi2_mask_1"
9360 [(set (match_operand:V16QI 0 "register_operand" "=v")
9364 (match_operand:V2DI 1 "register_operand" "v"))
9365 (const_vector:V2QI [(const_int 0) (const_int 0)])
9366 (match_operand:QI 2 "register_operand" "Yk"))
9367 (const_vector:V14QI [(const_int 0) (const_int 0)
9368 (const_int 0) (const_int 0)
9369 (const_int 0) (const_int 0)
9370 (const_int 0) (const_int 0)
9371 (const_int 0) (const_int 0)
9372 (const_int 0) (const_int 0)
9373 (const_int 0) (const_int 0)])))]
9375 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9376 [(set_attr "type" "ssemov")
9377 (set_attr "prefix" "evex")
9378 (set_attr "mode" "TI")])
9380 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
9381 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9385 (match_operand:V2DI 1 "register_operand" "v"))
9388 (parallel [(const_int 0) (const_int 1)]))
9389 (match_operand:QI 2 "register_operand" "Yk"))
9392 (parallel [(const_int 2) (const_int 3)
9393 (const_int 4) (const_int 5)
9394 (const_int 6) (const_int 7)
9395 (const_int 8) (const_int 9)
9396 (const_int 10) (const_int 11)
9397 (const_int 12) (const_int 13)
9398 (const_int 14) (const_int 15)]))))]
9400 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
9401 [(set_attr "type" "ssemov")
9402 (set_attr "memory" "store")
9403 (set_attr "prefix" "evex")
9404 (set_attr "mode" "TI")])
9406 (define_insn "*avx512vl_<code><mode>v4qi2_store"
9407 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9410 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9413 (parallel [(const_int 4) (const_int 5)
9414 (const_int 6) (const_int 7)
9415 (const_int 8) (const_int 9)
9416 (const_int 10) (const_int 11)
9417 (const_int 12) (const_int 13)
9418 (const_int 14) (const_int 15)]))))]
9420 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9421 [(set_attr "type" "ssemov")
9422 (set_attr "memory" "store")
9423 (set_attr "prefix" "evex")
9424 (set_attr "mode" "TI")])
9426 (define_insn "avx512vl_<code><mode>v4qi2_mask"
9427 [(set (match_operand:V16QI 0 "register_operand" "=v")
9431 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9433 (match_operand:V16QI 2 "vector_move_operand" "0C")
9434 (parallel [(const_int 0) (const_int 1)
9435 (const_int 2) (const_int 3)]))
9436 (match_operand:QI 3 "register_operand" "Yk"))
9437 (const_vector:V12QI [(const_int 0) (const_int 0)
9438 (const_int 0) (const_int 0)
9439 (const_int 0) (const_int 0)
9440 (const_int 0) (const_int 0)
9441 (const_int 0) (const_int 0)
9442 (const_int 0) (const_int 0)])))]
9444 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9445 [(set_attr "type" "ssemov")
9446 (set_attr "prefix" "evex")
9447 (set_attr "mode" "TI")])
9449 (define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
9450 [(set (match_operand:V16QI 0 "register_operand" "=v")
9454 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9455 (const_vector:V4QI [(const_int 0) (const_int 0)
9456 (const_int 0) (const_int 0)])
9457 (match_operand:QI 2 "register_operand" "Yk"))
9458 (const_vector:V12QI [(const_int 0) (const_int 0)
9459 (const_int 0) (const_int 0)
9460 (const_int 0) (const_int 0)
9461 (const_int 0) (const_int 0)
9462 (const_int 0) (const_int 0)
9463 (const_int 0) (const_int 0)])))]
9465 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9466 [(set_attr "type" "ssemov")
9467 (set_attr "prefix" "evex")
9468 (set_attr "mode" "TI")])
9470 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
9471 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9475 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9478 (parallel [(const_int 0) (const_int 1)
9479 (const_int 2) (const_int 3)]))
9480 (match_operand:QI 2 "register_operand" "Yk"))
9483 (parallel [(const_int 4) (const_int 5)
9484 (const_int 6) (const_int 7)
9485 (const_int 8) (const_int 9)
9486 (const_int 10) (const_int 11)
9487 (const_int 12) (const_int 13)
9488 (const_int 14) (const_int 15)]))))]
9491 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
9492 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}";
9493 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9495 [(set_attr "type" "ssemov")
9496 (set_attr "memory" "store")
9497 (set_attr "prefix" "evex")
9498 (set_attr "mode" "TI")])
9500 (define_mode_iterator VI2_128_BW_4_256
9501 [(V8HI "TARGET_AVX512BW") V8SI])
9503 (define_insn "*avx512vl_<code><mode>v8qi2_store"
9504 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9507 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9510 (parallel [(const_int 8) (const_int 9)
9511 (const_int 10) (const_int 11)
9512 (const_int 12) (const_int 13)
9513 (const_int 14) (const_int 15)]))))]
9515 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
9516 [(set_attr "type" "ssemov")
9517 (set_attr "memory" "store")
9518 (set_attr "prefix" "evex")
9519 (set_attr "mode" "TI")])
9521 (define_insn "avx512vl_<code><mode>v8qi2_mask"
9522 [(set (match_operand:V16QI 0 "register_operand" "=v")
9526 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9528 (match_operand:V16QI 2 "vector_move_operand" "0C")
9529 (parallel [(const_int 0) (const_int 1)
9530 (const_int 2) (const_int 3)
9531 (const_int 4) (const_int 5)
9532 (const_int 6) (const_int 7)]))
9533 (match_operand:QI 3 "register_operand" "Yk"))
9534 (const_vector:V8QI [(const_int 0) (const_int 0)
9535 (const_int 0) (const_int 0)
9536 (const_int 0) (const_int 0)
9537 (const_int 0) (const_int 0)])))]
9539 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9540 [(set_attr "type" "ssemov")
9541 (set_attr "prefix" "evex")
9542 (set_attr "mode" "TI")])
9544 (define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
9545 [(set (match_operand:V16QI 0 "register_operand" "=v")
9549 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9550 (const_vector:V8QI [(const_int 0) (const_int 0)
9551 (const_int 0) (const_int 0)
9552 (const_int 0) (const_int 0)
9553 (const_int 0) (const_int 0)])
9554 (match_operand:QI 2 "register_operand" "Yk"))
9555 (const_vector:V8QI [(const_int 0) (const_int 0)
9556 (const_int 0) (const_int 0)
9557 (const_int 0) (const_int 0)
9558 (const_int 0) (const_int 0)])))]
9560 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9561 [(set_attr "type" "ssemov")
9562 (set_attr "prefix" "evex")
9563 (set_attr "mode" "TI")])
9565 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
9566 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9570 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
9573 (parallel [(const_int 0) (const_int 1)
9574 (const_int 2) (const_int 3)
9575 (const_int 4) (const_int 5)
9576 (const_int 6) (const_int 7)]))
9577 (match_operand:QI 2 "register_operand" "Yk"))
9580 (parallel [(const_int 8) (const_int 9)
9581 (const_int 10) (const_int 11)
9582 (const_int 12) (const_int 13)
9583 (const_int 14) (const_int 15)]))))]
9586 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9587 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9588 return "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
9590 [(set_attr "type" "ssemov")
9591 (set_attr "memory" "store")
9592 (set_attr "prefix" "evex")
9593 (set_attr "mode" "TI")])
9595 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
9596 (define_mode_attr pmov_dst_4
9597 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
9598 (define_mode_attr pmov_dst_zeroed_4
9599 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
9600 (define_mode_attr pmov_suff_4
9601 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
9603 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
9604 [(set (match_operand:V8HI 0 "register_operand" "=v")
9606 (any_truncate:<pmov_dst_4>
9607 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
9608 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
9610 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9611 [(set_attr "type" "ssemov")
9612 (set_attr "prefix" "evex")
9613 (set_attr "mode" "TI")])
9615 (define_insn "*avx512vl_<code><mode>v4hi2_store"
9616 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9619 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9622 (parallel [(const_int 4) (const_int 5)
9623 (const_int 6) (const_int 7)]))))]
9625 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
9626 [(set_attr "type" "ssemov")
9627 (set_attr "memory" "store")
9628 (set_attr "prefix" "evex")
9629 (set_attr "mode" "TI")])
9631 (define_insn "avx512vl_<code><mode>v4hi2_mask"
9632 [(set (match_operand:V8HI 0 "register_operand" "=v")
9636 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9638 (match_operand:V8HI 2 "vector_move_operand" "0C")
9639 (parallel [(const_int 0) (const_int 1)
9640 (const_int 2) (const_int 3)]))
9641 (match_operand:QI 3 "register_operand" "Yk"))
9642 (const_vector:V4HI [(const_int 0) (const_int 0)
9643 (const_int 0) (const_int 0)])))]
9645 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9646 [(set_attr "type" "ssemov")
9647 (set_attr "prefix" "evex")
9648 (set_attr "mode" "TI")])
9650 (define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
9651 [(set (match_operand:V8HI 0 "register_operand" "=v")
9655 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9656 (const_vector:V4HI [(const_int 0) (const_int 0)
9657 (const_int 0) (const_int 0)])
9658 (match_operand:QI 2 "register_operand" "Yk"))
9659 (const_vector:V4HI [(const_int 0) (const_int 0)
9660 (const_int 0) (const_int 0)])))]
9662 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9663 [(set_attr "type" "ssemov")
9664 (set_attr "prefix" "evex")
9665 (set_attr "mode" "TI")])
9667 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
9668 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9672 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
9675 (parallel [(const_int 0) (const_int 1)
9676 (const_int 2) (const_int 3)]))
9677 (match_operand:QI 2 "register_operand" "Yk"))
9680 (parallel [(const_int 4) (const_int 5)
9681 (const_int 6) (const_int 7)]))))]
9684 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
9685 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
9686 return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
9688 [(set_attr "type" "ssemov")
9689 (set_attr "memory" "store")
9690 (set_attr "prefix" "evex")
9691 (set_attr "mode" "TI")])
9693 (define_insn "*avx512vl_<code>v2div2hi2_store"
9694 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9697 (match_operand:V2DI 1 "register_operand" "v"))
9700 (parallel [(const_int 2) (const_int 3)
9701 (const_int 4) (const_int 5)
9702 (const_int 6) (const_int 7)]))))]
9704 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9705 [(set_attr "type" "ssemov")
9706 (set_attr "memory" "store")
9707 (set_attr "prefix" "evex")
9708 (set_attr "mode" "TI")])
9710 (define_insn "avx512vl_<code>v2div2hi2_mask"
9711 [(set (match_operand:V8HI 0 "register_operand" "=v")
9715 (match_operand:V2DI 1 "register_operand" "v"))
9717 (match_operand:V8HI 2 "vector_move_operand" "0C")
9718 (parallel [(const_int 0) (const_int 1)]))
9719 (match_operand:QI 3 "register_operand" "Yk"))
9720 (const_vector:V6HI [(const_int 0) (const_int 0)
9721 (const_int 0) (const_int 0)
9722 (const_int 0) (const_int 0)])))]
9724 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9725 [(set_attr "type" "ssemov")
9726 (set_attr "prefix" "evex")
9727 (set_attr "mode" "TI")])
9729 (define_insn "*avx512vl_<code>v2div2hi2_mask_1"
9730 [(set (match_operand:V8HI 0 "register_operand" "=v")
9734 (match_operand:V2DI 1 "register_operand" "v"))
9735 (const_vector:V2HI [(const_int 0) (const_int 0)])
9736 (match_operand:QI 2 "register_operand" "Yk"))
9737 (const_vector:V6HI [(const_int 0) (const_int 0)
9738 (const_int 0) (const_int 0)
9739 (const_int 0) (const_int 0)])))]
9741 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9742 [(set_attr "type" "ssemov")
9743 (set_attr "prefix" "evex")
9744 (set_attr "mode" "TI")])
9746 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9747 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9751 (match_operand:V2DI 1 "register_operand" "v"))
9754 (parallel [(const_int 0) (const_int 1)]))
9755 (match_operand:QI 2 "register_operand" "Yk"))
9758 (parallel [(const_int 2) (const_int 3)
9759 (const_int 4) (const_int 5)
9760 (const_int 6) (const_int 7)]))))]
9762 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
9763 [(set_attr "type" "ssemov")
9764 (set_attr "memory" "store")
9765 (set_attr "prefix" "evex")
9766 (set_attr "mode" "TI")])
9768 (define_insn "*avx512vl_<code>v2div2si2"
9769 [(set (match_operand:V4SI 0 "register_operand" "=v")
9772 (match_operand:V2DI 1 "register_operand" "v"))
9773 (match_operand:V2SI 2 "const0_operand")))]
9775 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9776 [(set_attr "type" "ssemov")
9777 (set_attr "prefix" "evex")
9778 (set_attr "mode" "TI")])
9780 (define_insn "*avx512vl_<code>v2div2si2_store"
9781 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9784 (match_operand:V2DI 1 "register_operand" "v"))
9787 (parallel [(const_int 2) (const_int 3)]))))]
9789 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9790 [(set_attr "type" "ssemov")
9791 (set_attr "memory" "store")
9792 (set_attr "prefix" "evex")
9793 (set_attr "mode" "TI")])
9795 (define_insn "avx512vl_<code>v2div2si2_mask"
9796 [(set (match_operand:V4SI 0 "register_operand" "=v")
9800 (match_operand:V2DI 1 "register_operand" "v"))
9802 (match_operand:V4SI 2 "vector_move_operand" "0C")
9803 (parallel [(const_int 0) (const_int 1)]))
9804 (match_operand:QI 3 "register_operand" "Yk"))
9805 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9807 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9808 [(set_attr "type" "ssemov")
9809 (set_attr "prefix" "evex")
9810 (set_attr "mode" "TI")])
9812 (define_insn "*avx512vl_<code>v2div2si2_mask_1"
9813 [(set (match_operand:V4SI 0 "register_operand" "=v")
9817 (match_operand:V2DI 1 "register_operand" "v"))
9818 (const_vector:V2SI [(const_int 0) (const_int 0)])
9819 (match_operand:QI 2 "register_operand" "Yk"))
9820 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9822 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9823 [(set_attr "type" "ssemov")
9824 (set_attr "prefix" "evex")
9825 (set_attr "mode" "TI")])
9827 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9828 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9832 (match_operand:V2DI 1 "register_operand" "v"))
9835 (parallel [(const_int 0) (const_int 1)]))
9836 (match_operand:QI 2 "register_operand" "Yk"))
9839 (parallel [(const_int 2) (const_int 3)]))))]
9841 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
9842 [(set_attr "type" "ssemov")
9843 (set_attr "memory" "store")
9844 (set_attr "prefix" "evex")
9845 (set_attr "mode" "TI")])
9847 (define_insn "*avx512f_<code>v8div16qi2"
9848 [(set (match_operand:V16QI 0 "register_operand" "=v")
9851 (match_operand:V8DI 1 "register_operand" "v"))
9852 (const_vector:V8QI [(const_int 0) (const_int 0)
9853 (const_int 0) (const_int 0)
9854 (const_int 0) (const_int 0)
9855 (const_int 0) (const_int 0)])))]
9857 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9858 [(set_attr "type" "ssemov")
9859 (set_attr "prefix" "evex")
9860 (set_attr "mode" "TI")])
9862 (define_insn "*avx512f_<code>v8div16qi2_store"
9863 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9866 (match_operand:V8DI 1 "register_operand" "v"))
9869 (parallel [(const_int 8) (const_int 9)
9870 (const_int 10) (const_int 11)
9871 (const_int 12) (const_int 13)
9872 (const_int 14) (const_int 15)]))))]
9874 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9875 [(set_attr "type" "ssemov")
9876 (set_attr "memory" "store")
9877 (set_attr "prefix" "evex")
9878 (set_attr "mode" "TI")])
9880 (define_insn "avx512f_<code>v8div16qi2_mask"
9881 [(set (match_operand:V16QI 0 "register_operand" "=v")
9885 (match_operand:V8DI 1 "register_operand" "v"))
9887 (match_operand:V16QI 2 "vector_move_operand" "0C")
9888 (parallel [(const_int 0) (const_int 1)
9889 (const_int 2) (const_int 3)
9890 (const_int 4) (const_int 5)
9891 (const_int 6) (const_int 7)]))
9892 (match_operand:QI 3 "register_operand" "Yk"))
9893 (const_vector:V8QI [(const_int 0) (const_int 0)
9894 (const_int 0) (const_int 0)
9895 (const_int 0) (const_int 0)
9896 (const_int 0) (const_int 0)])))]
9898 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9899 [(set_attr "type" "ssemov")
9900 (set_attr "prefix" "evex")
9901 (set_attr "mode" "TI")])
9903 (define_insn "*avx512f_<code>v8div16qi2_mask_1"
9904 [(set (match_operand:V16QI 0 "register_operand" "=v")
9908 (match_operand:V8DI 1 "register_operand" "v"))
9909 (const_vector:V8QI [(const_int 0) (const_int 0)
9910 (const_int 0) (const_int 0)
9911 (const_int 0) (const_int 0)
9912 (const_int 0) (const_int 0)])
9913 (match_operand:QI 2 "register_operand" "Yk"))
9914 (const_vector:V8QI [(const_int 0) (const_int 0)
9915 (const_int 0) (const_int 0)
9916 (const_int 0) (const_int 0)
9917 (const_int 0) (const_int 0)])))]
9919 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
9920 [(set_attr "type" "ssemov")
9921 (set_attr "prefix" "evex")
9922 (set_attr "mode" "TI")])
9924 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9925 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9929 (match_operand:V8DI 1 "register_operand" "v"))
9932 (parallel [(const_int 0) (const_int 1)
9933 (const_int 2) (const_int 3)
9934 (const_int 4) (const_int 5)
9935 (const_int 6) (const_int 7)]))
9936 (match_operand:QI 2 "register_operand" "Yk"))
9939 (parallel [(const_int 8) (const_int 9)
9940 (const_int 10) (const_int 11)
9941 (const_int 12) (const_int 13)
9942 (const_int 14) (const_int 15)]))))]
9944 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
9945 [(set_attr "type" "ssemov")
9946 (set_attr "memory" "store")
9947 (set_attr "prefix" "evex")
9948 (set_attr "mode" "TI")])
9950 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9952 ;; Parallel integral arithmetic
9954 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9956 (define_expand "neg<mode>2"
9957 [(set (match_operand:VI_AVX2 0 "register_operand")
9960 (match_operand:VI_AVX2 1 "vector_operand")))]
9962 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9964 (define_expand "<plusminus_insn><mode>3"
9965 [(set (match_operand:VI_AVX2 0 "register_operand")
9967 (match_operand:VI_AVX2 1 "vector_operand")
9968 (match_operand:VI_AVX2 2 "vector_operand")))]
9970 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9972 (define_expand "<plusminus_insn><mode>3_mask"
9973 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9974 (vec_merge:VI48_AVX512VL
9975 (plusminus:VI48_AVX512VL
9976 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9977 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9978 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9979 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9981 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9983 (define_expand "<plusminus_insn><mode>3_mask"
9984 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9985 (vec_merge:VI12_AVX512VL
9986 (plusminus:VI12_AVX512VL
9987 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9988 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9989 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9990 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9992 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9994 (define_insn "*<plusminus_insn><mode>3"
9995 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9997 (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
9998 (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
10000 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10002 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10003 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10004 [(set_attr "isa" "noavx,avx")
10005 (set_attr "type" "sseiadd")
10006 (set_attr "prefix_data16" "1,*")
10007 (set_attr "prefix" "<mask_prefix3>")
10008 (set_attr "mode" "<sseinsnmode>")])
10010 (define_insn "*<plusminus_insn><mode>3_mask"
10011 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10012 (vec_merge:VI48_AVX512VL
10013 (plusminus:VI48_AVX512VL
10014 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10015 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10016 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10017 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10019 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10020 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10021 [(set_attr "type" "sseiadd")
10022 (set_attr "prefix" "evex")
10023 (set_attr "mode" "<sseinsnmode>")])
10025 (define_insn "*<plusminus_insn><mode>3_mask"
10026 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10027 (vec_merge:VI12_AVX512VL
10028 (plusminus:VI12_AVX512VL
10029 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
10030 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10031 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10032 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10033 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10034 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
10035 [(set_attr "type" "sseiadd")
10036 (set_attr "prefix" "evex")
10037 (set_attr "mode" "<sseinsnmode>")])
10039 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10040 [(set (match_operand:VI12_AVX2 0 "register_operand")
10041 (sat_plusminus:VI12_AVX2
10042 (match_operand:VI12_AVX2 1 "vector_operand")
10043 (match_operand:VI12_AVX2 2 "vector_operand")))]
10044 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10045 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10047 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
10048 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
10049 (sat_plusminus:VI12_AVX2
10050 (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
10051 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
10052 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
10053 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10055 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
10056 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10057 [(set_attr "isa" "noavx,avx")
10058 (set_attr "type" "sseiadd")
10059 (set_attr "prefix_data16" "1,*")
10060 (set_attr "prefix" "orig,maybe_evex")
10061 (set_attr "mode" "TI")])
10063 (define_expand "mul<mode>3<mask_name>"
10064 [(set (match_operand:VI1_AVX512 0 "register_operand")
10065 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
10066 (match_operand:VI1_AVX512 2 "register_operand")))]
10067 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10069 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
10073 (define_expand "mul<mode>3<mask_name>"
10074 [(set (match_operand:VI2_AVX2 0 "register_operand")
10075 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
10076 (match_operand:VI2_AVX2 2 "vector_operand")))]
10077 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10078 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10080 (define_insn "*mul<mode>3<mask_name>"
10081 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10082 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
10083 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
10085 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
10086 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10088 pmullw\t{%2, %0|%0, %2}
10089 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10090 [(set_attr "isa" "noavx,avx")
10091 (set_attr "type" "sseimul")
10092 (set_attr "prefix_data16" "1,*")
10093 (set_attr "prefix" "orig,vex")
10094 (set_attr "mode" "<sseinsnmode>")])
10096 (define_expand "<s>mul<mode>3_highpart<mask_name>"
10097 [(set (match_operand:VI2_AVX2 0 "register_operand")
10099 (lshiftrt:<ssedoublemode>
10100 (mult:<ssedoublemode>
10101 (any_extend:<ssedoublemode>
10102 (match_operand:VI2_AVX2 1 "vector_operand"))
10103 (any_extend:<ssedoublemode>
10104 (match_operand:VI2_AVX2 2 "vector_operand")))
10107 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10108 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
10110 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
10111 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
10113 (lshiftrt:<ssedoublemode>
10114 (mult:<ssedoublemode>
10115 (any_extend:<ssedoublemode>
10116 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
10117 (any_extend:<ssedoublemode>
10118 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
10121 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
10122 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10124 pmulh<u>w\t{%2, %0|%0, %2}
10125 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10126 [(set_attr "isa" "noavx,avx")
10127 (set_attr "type" "sseimul")
10128 (set_attr "prefix_data16" "1,*")
10129 (set_attr "prefix" "orig,vex")
10130 (set_attr "mode" "<sseinsnmode>")])
10132 (define_expand "vec_widen_umult_even_v16si<mask_name>"
10133 [(set (match_operand:V8DI 0 "register_operand")
10137 (match_operand:V16SI 1 "nonimmediate_operand")
10138 (parallel [(const_int 0) (const_int 2)
10139 (const_int 4) (const_int 6)
10140 (const_int 8) (const_int 10)
10141 (const_int 12) (const_int 14)])))
10144 (match_operand:V16SI 2 "nonimmediate_operand")
10145 (parallel [(const_int 0) (const_int 2)
10146 (const_int 4) (const_int 6)
10147 (const_int 8) (const_int 10)
10148 (const_int 12) (const_int 14)])))))]
10150 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10152 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
10153 [(set (match_operand:V8DI 0 "register_operand" "=v")
10157 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10158 (parallel [(const_int 0) (const_int 2)
10159 (const_int 4) (const_int 6)
10160 (const_int 8) (const_int 10)
10161 (const_int 12) (const_int 14)])))
10164 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10165 (parallel [(const_int 0) (const_int 2)
10166 (const_int 4) (const_int 6)
10167 (const_int 8) (const_int 10)
10168 (const_int 12) (const_int 14)])))))]
10169 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
10170 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10171 [(set_attr "isa" "avx512f")
10172 (set_attr "type" "sseimul")
10173 (set_attr "prefix_extra" "1")
10174 (set_attr "prefix" "evex")
10175 (set_attr "mode" "XI")])
10177 (define_expand "vec_widen_umult_even_v8si<mask_name>"
10178 [(set (match_operand:V4DI 0 "register_operand")
10182 (match_operand:V8SI 1 "nonimmediate_operand")
10183 (parallel [(const_int 0) (const_int 2)
10184 (const_int 4) (const_int 6)])))
10187 (match_operand:V8SI 2 "nonimmediate_operand")
10188 (parallel [(const_int 0) (const_int 2)
10189 (const_int 4) (const_int 6)])))))]
10190 "TARGET_AVX2 && <mask_avx512vl_condition>"
10191 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10193 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
10194 [(set (match_operand:V4DI 0 "register_operand" "=v")
10198 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10199 (parallel [(const_int 0) (const_int 2)
10200 (const_int 4) (const_int 6)])))
10203 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10204 (parallel [(const_int 0) (const_int 2)
10205 (const_int 4) (const_int 6)])))))]
10206 "TARGET_AVX2 && <mask_avx512vl_condition>
10207 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
10208 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10209 [(set_attr "type" "sseimul")
10210 (set_attr "prefix" "maybe_evex")
10211 (set_attr "mode" "OI")])
10213 (define_expand "vec_widen_umult_even_v4si<mask_name>"
10214 [(set (match_operand:V2DI 0 "register_operand")
10218 (match_operand:V4SI 1 "vector_operand")
10219 (parallel [(const_int 0) (const_int 2)])))
10222 (match_operand:V4SI 2 "vector_operand")
10223 (parallel [(const_int 0) (const_int 2)])))))]
10224 "TARGET_SSE2 && <mask_avx512vl_condition>"
10225 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10227 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
10228 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10232 (match_operand:V4SI 1 "vector_operand" "%0,v")
10233 (parallel [(const_int 0) (const_int 2)])))
10236 (match_operand:V4SI 2 "vector_operand" "xBm,vm")
10237 (parallel [(const_int 0) (const_int 2)])))))]
10238 "TARGET_SSE2 && <mask_avx512vl_condition>
10239 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
10241 pmuludq\t{%2, %0|%0, %2}
10242 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10243 [(set_attr "isa" "noavx,avx")
10244 (set_attr "type" "sseimul")
10245 (set_attr "prefix_data16" "1,*")
10246 (set_attr "prefix" "orig,maybe_evex")
10247 (set_attr "mode" "TI")])
10249 (define_expand "vec_widen_smult_even_v16si<mask_name>"
10250 [(set (match_operand:V8DI 0 "register_operand")
10254 (match_operand:V16SI 1 "nonimmediate_operand")
10255 (parallel [(const_int 0) (const_int 2)
10256 (const_int 4) (const_int 6)
10257 (const_int 8) (const_int 10)
10258 (const_int 12) (const_int 14)])))
10261 (match_operand:V16SI 2 "nonimmediate_operand")
10262 (parallel [(const_int 0) (const_int 2)
10263 (const_int 4) (const_int 6)
10264 (const_int 8) (const_int 10)
10265 (const_int 12) (const_int 14)])))))]
10267 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
10269 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
10270 [(set (match_operand:V8DI 0 "register_operand" "=v")
10274 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
10275 (parallel [(const_int 0) (const_int 2)
10276 (const_int 4) (const_int 6)
10277 (const_int 8) (const_int 10)
10278 (const_int 12) (const_int 14)])))
10281 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
10282 (parallel [(const_int 0) (const_int 2)
10283 (const_int 4) (const_int 6)
10284 (const_int 8) (const_int 10)
10285 (const_int 12) (const_int 14)])))))]
10286 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
10287 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10288 [(set_attr "isa" "avx512f")
10289 (set_attr "type" "sseimul")
10290 (set_attr "prefix_extra" "1")
10291 (set_attr "prefix" "evex")
10292 (set_attr "mode" "XI")])
10294 (define_expand "vec_widen_smult_even_v8si<mask_name>"
10295 [(set (match_operand:V4DI 0 "register_operand")
10299 (match_operand:V8SI 1 "nonimmediate_operand")
10300 (parallel [(const_int 0) (const_int 2)
10301 (const_int 4) (const_int 6)])))
10304 (match_operand:V8SI 2 "nonimmediate_operand")
10305 (parallel [(const_int 0) (const_int 2)
10306 (const_int 4) (const_int 6)])))))]
10307 "TARGET_AVX2 && <mask_avx512vl_condition>"
10308 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
10310 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
10311 [(set (match_operand:V4DI 0 "register_operand" "=v")
10315 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
10316 (parallel [(const_int 0) (const_int 2)
10317 (const_int 4) (const_int 6)])))
10320 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
10321 (parallel [(const_int 0) (const_int 2)
10322 (const_int 4) (const_int 6)])))))]
10324 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
10325 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10326 [(set_attr "type" "sseimul")
10327 (set_attr "prefix_extra" "1")
10328 (set_attr "prefix" "vex")
10329 (set_attr "mode" "OI")])
10331 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
10332 [(set (match_operand:V2DI 0 "register_operand")
10336 (match_operand:V4SI 1 "vector_operand")
10337 (parallel [(const_int 0) (const_int 2)])))
10340 (match_operand:V4SI 2 "vector_operand")
10341 (parallel [(const_int 0) (const_int 2)])))))]
10342 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
10343 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
10345 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
10346 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
10350 (match_operand:V4SI 1 "vector_operand" "%0,0,v")
10351 (parallel [(const_int 0) (const_int 2)])))
10354 (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
10355 (parallel [(const_int 0) (const_int 2)])))))]
10356 "TARGET_SSE4_1 && <mask_avx512vl_condition>
10357 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
10359 pmuldq\t{%2, %0|%0, %2}
10360 pmuldq\t{%2, %0|%0, %2}
10361 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10362 [(set_attr "isa" "noavx,noavx,avx")
10363 (set_attr "type" "sseimul")
10364 (set_attr "prefix_data16" "1,1,*")
10365 (set_attr "prefix_extra" "1")
10366 (set_attr "prefix" "orig,orig,vex")
10367 (set_attr "mode" "TI")])
10369 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
10370 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
10371 (unspec:<sseunpackmode>
10372 [(match_operand:VI2_AVX2 1 "register_operand" "v")
10373 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
10374 UNSPEC_PMADDWD512))]
10375 "TARGET_AVX512BW && <mask_mode512bit_condition>"
10376 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
10377 [(set_attr "type" "sseiadd")
10378 (set_attr "prefix" "evex")
10379 (set_attr "mode" "XI")])
10381 (define_expand "avx2_pmaddwd"
10382 [(set (match_operand:V8SI 0 "register_operand")
10387 (match_operand:V16HI 1 "nonimmediate_operand")
10388 (parallel [(const_int 0) (const_int 2)
10389 (const_int 4) (const_int 6)
10390 (const_int 8) (const_int 10)
10391 (const_int 12) (const_int 14)])))
10394 (match_operand:V16HI 2 "nonimmediate_operand")
10395 (parallel [(const_int 0) (const_int 2)
10396 (const_int 4) (const_int 6)
10397 (const_int 8) (const_int 10)
10398 (const_int 12) (const_int 14)]))))
10401 (vec_select:V8HI (match_dup 1)
10402 (parallel [(const_int 1) (const_int 3)
10403 (const_int 5) (const_int 7)
10404 (const_int 9) (const_int 11)
10405 (const_int 13) (const_int 15)])))
10407 (vec_select:V8HI (match_dup 2)
10408 (parallel [(const_int 1) (const_int 3)
10409 (const_int 5) (const_int 7)
10410 (const_int 9) (const_int 11)
10411 (const_int 13) (const_int 15)]))))))]
10413 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
10415 (define_insn "*avx2_pmaddwd"
10416 [(set (match_operand:V8SI 0 "register_operand" "=x,v")
10421 (match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
10422 (parallel [(const_int 0) (const_int 2)
10423 (const_int 4) (const_int 6)
10424 (const_int 8) (const_int 10)
10425 (const_int 12) (const_int 14)])))
10428 (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
10429 (parallel [(const_int 0) (const_int 2)
10430 (const_int 4) (const_int 6)
10431 (const_int 8) (const_int 10)
10432 (const_int 12) (const_int 14)]))))
10435 (vec_select:V8HI (match_dup 1)
10436 (parallel [(const_int 1) (const_int 3)
10437 (const_int 5) (const_int 7)
10438 (const_int 9) (const_int 11)
10439 (const_int 13) (const_int 15)])))
10441 (vec_select:V8HI (match_dup 2)
10442 (parallel [(const_int 1) (const_int 3)
10443 (const_int 5) (const_int 7)
10444 (const_int 9) (const_int 11)
10445 (const_int 13) (const_int 15)]))))))]
10446 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
10447 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10448 [(set_attr "type" "sseiadd")
10449 (set_attr "isa" "*,avx512bw")
10450 (set_attr "prefix" "vex,evex")
10451 (set_attr "mode" "OI")])
10453 (define_expand "sse2_pmaddwd"
10454 [(set (match_operand:V4SI 0 "register_operand")
10459 (match_operand:V8HI 1 "vector_operand")
10460 (parallel [(const_int 0) (const_int 2)
10461 (const_int 4) (const_int 6)])))
10464 (match_operand:V8HI 2 "vector_operand")
10465 (parallel [(const_int 0) (const_int 2)
10466 (const_int 4) (const_int 6)]))))
10469 (vec_select:V4HI (match_dup 1)
10470 (parallel [(const_int 1) (const_int 3)
10471 (const_int 5) (const_int 7)])))
10473 (vec_select:V4HI (match_dup 2)
10474 (parallel [(const_int 1) (const_int 3)
10475 (const_int 5) (const_int 7)]))))))]
10477 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
10479 (define_insn "*sse2_pmaddwd"
10480 [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
10485 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
10486 (parallel [(const_int 0) (const_int 2)
10487 (const_int 4) (const_int 6)])))
10490 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
10491 (parallel [(const_int 0) (const_int 2)
10492 (const_int 4) (const_int 6)]))))
10495 (vec_select:V4HI (match_dup 1)
10496 (parallel [(const_int 1) (const_int 3)
10497 (const_int 5) (const_int 7)])))
10499 (vec_select:V4HI (match_dup 2)
10500 (parallel [(const_int 1) (const_int 3)
10501 (const_int 5) (const_int 7)]))))))]
10502 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
10504 pmaddwd\t{%2, %0|%0, %2}
10505 vpmaddwd\t{%2, %1, %0|%0, %1, %2}
10506 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
10507 [(set_attr "isa" "noavx,avx,avx512bw")
10508 (set_attr "type" "sseiadd")
10509 (set_attr "atom_unit" "simul")
10510 (set_attr "prefix_data16" "1,*,*")
10511 (set_attr "prefix" "orig,vex,evex")
10512 (set_attr "mode" "TI")])
10514 (define_insn "avx512dq_mul<mode>3<mask_name>"
10515 [(set (match_operand:VI8 0 "register_operand" "=v")
10517 (match_operand:VI8 1 "register_operand" "v")
10518 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
10519 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
10520 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10521 [(set_attr "type" "sseimul")
10522 (set_attr "prefix" "evex")
10523 (set_attr "mode" "<sseinsnmode>")])
10525 (define_expand "mul<mode>3<mask_name>"
10526 [(set (match_operand:VI4_AVX512F 0 "register_operand")
10528 (match_operand:VI4_AVX512F 1 "general_vector_operand")
10529 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
10530 "TARGET_SSE2 && <mask_mode512bit_condition>"
10534 if (!vector_operand (operands[1], <MODE>mode))
10535 operands[1] = force_reg (<MODE>mode, operands[1]);
10536 if (!vector_operand (operands[2], <MODE>mode))
10537 operands[2] = force_reg (<MODE>mode, operands[2]);
10538 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
10542 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
10547 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
10548 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
10550 (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
10551 (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
10552 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
10554 pmulld\t{%2, %0|%0, %2}
10555 pmulld\t{%2, %0|%0, %2}
10556 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10557 [(set_attr "isa" "noavx,noavx,avx")
10558 (set_attr "type" "sseimul")
10559 (set_attr "prefix_extra" "1")
10560 (set_attr "prefix" "<mask_prefix4>")
10561 (set_attr "btver2_decode" "vector,vector,vector")
10562 (set_attr "mode" "<sseinsnmode>")])
10564 (define_expand "mul<mode>3"
10565 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10566 (mult:VI8_AVX2_AVX512F
10567 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10568 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10571 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
10575 (define_expand "vec_widen_<s>mult_hi_<mode>"
10576 [(match_operand:<sseunpackmode> 0 "register_operand")
10577 (any_extend:<sseunpackmode>
10578 (match_operand:VI124_AVX2 1 "register_operand"))
10579 (match_operand:VI124_AVX2 2 "register_operand")]
10582 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10587 (define_expand "vec_widen_<s>mult_lo_<mode>"
10588 [(match_operand:<sseunpackmode> 0 "register_operand")
10589 (any_extend:<sseunpackmode>
10590 (match_operand:VI124_AVX2 1 "register_operand"))
10591 (match_operand:VI124_AVX2 2 "register_operand")]
10594 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
10599 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
10600 ;; named patterns, but signed V4SI needs special help for plain SSE2.
10601 (define_expand "vec_widen_smult_even_v4si"
10602 [(match_operand:V2DI 0 "register_operand")
10603 (match_operand:V4SI 1 "vector_operand")
10604 (match_operand:V4SI 2 "vector_operand")]
10607 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10612 (define_expand "vec_widen_<s>mult_odd_<mode>"
10613 [(match_operand:<sseunpackmode> 0 "register_operand")
10614 (any_extend:<sseunpackmode>
10615 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
10616 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
10619 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
10624 (define_mode_attr SDOT_PMADD_SUF
10625 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
10627 (define_expand "sdot_prod<mode>"
10628 [(match_operand:<sseunpackmode> 0 "register_operand")
10629 (match_operand:VI2_AVX2 1 "register_operand")
10630 (match_operand:VI2_AVX2 2 "register_operand")
10631 (match_operand:<sseunpackmode> 3 "register_operand")]
10634 rtx t = gen_reg_rtx (<sseunpackmode>mode);
10635 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
10636 emit_insn (gen_rtx_SET (operands[0],
10637 gen_rtx_PLUS (<sseunpackmode>mode,
10642 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
10643 ;; back together when madd is available.
10644 (define_expand "sdot_prodv4si"
10645 [(match_operand:V2DI 0 "register_operand")
10646 (match_operand:V4SI 1 "register_operand")
10647 (match_operand:V4SI 2 "register_operand")
10648 (match_operand:V2DI 3 "register_operand")]
10651 rtx t = gen_reg_rtx (V2DImode);
10652 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
10653 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
10657 (define_expand "usadv16qi"
10658 [(match_operand:V4SI 0 "register_operand")
10659 (match_operand:V16QI 1 "register_operand")
10660 (match_operand:V16QI 2 "vector_operand")
10661 (match_operand:V4SI 3 "vector_operand")]
10664 rtx t1 = gen_reg_rtx (V2DImode);
10665 rtx t2 = gen_reg_rtx (V4SImode);
10666 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
10667 convert_move (t2, t1, 0);
10668 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
10672 (define_expand "usadv32qi"
10673 [(match_operand:V8SI 0 "register_operand")
10674 (match_operand:V32QI 1 "register_operand")
10675 (match_operand:V32QI 2 "nonimmediate_operand")
10676 (match_operand:V8SI 3 "nonimmediate_operand")]
10679 rtx t1 = gen_reg_rtx (V4DImode);
10680 rtx t2 = gen_reg_rtx (V8SImode);
10681 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
10682 convert_move (t2, t1, 0);
10683 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
10687 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
10688 [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
10689 (ashiftrt:VI248_AVX512BW_1
10690 (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
10691 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10693 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10694 [(set_attr "type" "sseishft")
10695 (set (attr "length_immediate")
10696 (if_then_else (match_operand 2 "const_int_operand")
10698 (const_string "0")))
10699 (set_attr "mode" "<sseinsnmode>")])
10701 (define_insn "ashr<mode>3"
10702 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
10703 (ashiftrt:VI24_AVX2
10704 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
10705 (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
10708 psra<ssemodesuffix>\t{%2, %0|%0, %2}
10709 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10710 [(set_attr "isa" "noavx,avx")
10711 (set_attr "type" "sseishft")
10712 (set (attr "length_immediate")
10713 (if_then_else (match_operand 2 "const_int_operand")
10715 (const_string "0")))
10716 (set_attr "prefix_data16" "1,*")
10717 (set_attr "prefix" "orig,vex")
10718 (set_attr "mode" "<sseinsnmode>")])
10720 (define_insn "ashr<mode>3<mask_name>"
10721 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
10722 (ashiftrt:VI248_AVX512BW_AVX512VL
10723 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
10724 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
10726 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10727 [(set_attr "type" "sseishft")
10728 (set (attr "length_immediate")
10729 (if_then_else (match_operand 2 "const_int_operand")
10731 (const_string "0")))
10732 (set_attr "mode" "<sseinsnmode>")])
10734 (define_insn "<shift_insn><mode>3<mask_name>"
10735 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
10736 (any_lshift:VI2_AVX2_AVX512BW
10737 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
10738 (match_operand:DI 2 "nonmemory_operand" "xN,vN")))]
10739 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10741 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10742 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10743 [(set_attr "isa" "noavx,avx")
10744 (set_attr "type" "sseishft")
10745 (set (attr "length_immediate")
10746 (if_then_else (match_operand 2 "const_int_operand")
10748 (const_string "0")))
10749 (set_attr "prefix_data16" "1,*")
10750 (set_attr "prefix" "orig,vex")
10751 (set_attr "mode" "<sseinsnmode>")])
10753 (define_insn "<shift_insn><mode>3<mask_name>"
10754 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,x,v")
10755 (any_lshift:VI48_AVX2
10756 (match_operand:VI48_AVX2 1 "register_operand" "0,x,v")
10757 (match_operand:DI 2 "nonmemory_operand" "xN,xN,vN")))]
10758 "TARGET_SSE2 && <mask_mode512bit_condition>"
10760 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10761 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
10762 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10763 [(set_attr "isa" "noavx,avx,avx512bw")
10764 (set_attr "type" "sseishft")
10765 (set (attr "length_immediate")
10766 (if_then_else (match_operand 2 "const_int_operand")
10768 (const_string "0")))
10769 (set_attr "prefix_data16" "1,*,*")
10770 (set_attr "prefix" "orig,vex,evex")
10771 (set_attr "mode" "<sseinsnmode>")])
10773 (define_insn "<shift_insn><mode>3<mask_name>"
10774 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10775 (any_lshift:VI48_512
10776 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
10777 (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
10778 "TARGET_AVX512F && <mask_mode512bit_condition>"
10779 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10780 [(set_attr "isa" "avx512f")
10781 (set_attr "type" "sseishft")
10782 (set (attr "length_immediate")
10783 (if_then_else (match_operand 2 "const_int_operand")
10785 (const_string "0")))
10786 (set_attr "prefix" "evex")
10787 (set_attr "mode" "<sseinsnmode>")])
10790 (define_expand "vec_shl_<mode>"
10791 [(set (match_dup 3)
10793 (match_operand:VI_128 1 "register_operand")
10794 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10795 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10798 operands[1] = gen_lowpart (V1TImode, operands[1]);
10799 operands[3] = gen_reg_rtx (V1TImode);
10800 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10803 (define_insn "<sse2_avx2>_ashl<mode>3"
10804 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10806 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10807 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10810 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10812 switch (which_alternative)
10815 return "pslldq\t{%2, %0|%0, %2}";
10817 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10819 gcc_unreachable ();
10822 [(set_attr "isa" "noavx,avx")
10823 (set_attr "type" "sseishft")
10824 (set_attr "length_immediate" "1")
10825 (set_attr "prefix_data16" "1,*")
10826 (set_attr "prefix" "orig,vex")
10827 (set_attr "mode" "<sseinsnmode>")])
10829 (define_expand "vec_shr_<mode>"
10830 [(set (match_dup 3)
10832 (match_operand:VI_128 1 "register_operand")
10833 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10834 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10837 operands[1] = gen_lowpart (V1TImode, operands[1]);
10838 operands[3] = gen_reg_rtx (V1TImode);
10839 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10842 (define_insn "<sse2_avx2>_lshr<mode>3"
10843 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10844 (lshiftrt:VIMAX_AVX2
10845 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10846 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10849 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10851 switch (which_alternative)
10854 return "psrldq\t{%2, %0|%0, %2}";
10856 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10858 gcc_unreachable ();
10861 [(set_attr "isa" "noavx,avx")
10862 (set_attr "type" "sseishft")
10863 (set_attr "length_immediate" "1")
10864 (set_attr "atom_unit" "sishuf")
10865 (set_attr "prefix_data16" "1,*")
10866 (set_attr "prefix" "orig,vex")
10867 (set_attr "mode" "<sseinsnmode>")])
10869 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10870 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10871 (any_rotate:VI48_AVX512VL
10872 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10873 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10875 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10876 [(set_attr "prefix" "evex")
10877 (set_attr "mode" "<sseinsnmode>")])
10879 (define_insn "<avx512>_<rotate><mode><mask_name>"
10880 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10881 (any_rotate:VI48_AVX512VL
10882 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10883 (match_operand:SI 2 "const_0_to_255_operand")))]
10885 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10886 [(set_attr "prefix" "evex")
10887 (set_attr "mode" "<sseinsnmode>")])
10889 (define_expand "<code><mode>3"
10890 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10891 (maxmin:VI124_256_AVX512F_AVX512BW
10892 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10893 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10895 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10897 (define_insn "*avx2_<code><mode>3"
10898 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10900 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10901 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10902 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10903 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10904 [(set_attr "type" "sseiadd")
10905 (set_attr "prefix_extra" "1")
10906 (set_attr "prefix" "vex")
10907 (set_attr "mode" "OI")])
10909 (define_expand "<code><mode>3_mask"
10910 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10911 (vec_merge:VI48_AVX512VL
10912 (maxmin:VI48_AVX512VL
10913 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10914 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10915 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10916 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10918 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10920 (define_insn "*avx512f_<code><mode>3<mask_name>"
10921 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10922 (maxmin:VI48_AVX512VL
10923 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10924 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10925 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10926 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10927 [(set_attr "type" "sseiadd")
10928 (set_attr "prefix_extra" "1")
10929 (set_attr "prefix" "maybe_evex")
10930 (set_attr "mode" "<sseinsnmode>")])
10932 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10933 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10934 (maxmin:VI12_AVX512VL
10935 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10936 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10938 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10939 [(set_attr "type" "sseiadd")
10940 (set_attr "prefix" "evex")
10941 (set_attr "mode" "<sseinsnmode>")])
10943 (define_expand "<code><mode>3"
10944 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
10945 (maxmin:VI8_AVX2_AVX512F
10946 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
10947 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
10951 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10952 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10955 enum rtx_code code;
10960 xops[0] = operands[0];
10962 if (<CODE> == SMAX || <CODE> == UMAX)
10964 xops[1] = operands[1];
10965 xops[2] = operands[2];
10969 xops[1] = operands[2];
10970 xops[2] = operands[1];
10973 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10975 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10976 xops[4] = operands[1];
10977 xops[5] = operands[2];
10979 ok = ix86_expand_int_vcond (xops);
10985 (define_expand "<code><mode>3"
10986 [(set (match_operand:VI124_128 0 "register_operand")
10988 (match_operand:VI124_128 1 "vector_operand")
10989 (match_operand:VI124_128 2 "vector_operand")))]
10992 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10993 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10999 xops[0] = operands[0];
11000 operands[1] = force_reg (<MODE>mode, operands[1]);
11001 operands[2] = force_reg (<MODE>mode, operands[2]);
11003 if (<CODE> == SMAX)
11005 xops[1] = operands[1];
11006 xops[2] = operands[2];
11010 xops[1] = operands[2];
11011 xops[2] = operands[1];
11014 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
11015 xops[4] = operands[1];
11016 xops[5] = operands[2];
11018 ok = ix86_expand_int_vcond (xops);
11024 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11025 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
11027 (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
11028 (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11030 && <mask_mode512bit_condition>
11031 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11033 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11034 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11035 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11036 [(set_attr "isa" "noavx,noavx,avx")
11037 (set_attr "type" "sseiadd")
11038 (set_attr "prefix_extra" "1,1,*")
11039 (set_attr "prefix" "orig,orig,vex")
11040 (set_attr "mode" "TI")])
11042 (define_insn "*<code>v8hi3"
11043 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
11045 (match_operand:V8HI 1 "vector_operand" "%0,x,v")
11046 (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
11047 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
11049 p<maxmin_int>w\t{%2, %0|%0, %2}
11050 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
11051 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
11052 [(set_attr "isa" "noavx,avx,avx512bw")
11053 (set_attr "type" "sseiadd")
11054 (set_attr "prefix_data16" "1,*,*")
11055 (set_attr "prefix_extra" "*,1,1")
11056 (set_attr "prefix" "orig,vex,evex")
11057 (set_attr "mode" "TI")])
11059 (define_expand "<code><mode>3"
11060 [(set (match_operand:VI124_128 0 "register_operand")
11062 (match_operand:VI124_128 1 "vector_operand")
11063 (match_operand:VI124_128 2 "vector_operand")))]
11066 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
11067 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
11068 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
11070 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
11071 operands[1] = force_reg (<MODE>mode, operands[1]);
11072 if (rtx_equal_p (op3, op2))
11073 op3 = gen_reg_rtx (V8HImode);
11074 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
11075 emit_insn (gen_addv8hi3 (op0, op3, op2));
11083 operands[1] = force_reg (<MODE>mode, operands[1]);
11084 operands[2] = force_reg (<MODE>mode, operands[2]);
11086 xops[0] = operands[0];
11088 if (<CODE> == UMAX)
11090 xops[1] = operands[1];
11091 xops[2] = operands[2];
11095 xops[1] = operands[2];
11096 xops[2] = operands[1];
11099 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
11100 xops[4] = operands[1];
11101 xops[5] = operands[2];
11103 ok = ix86_expand_int_vcond (xops);
11109 (define_insn "*sse4_1_<code><mode>3<mask_name>"
11110 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
11112 (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
11113 (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
11115 && <mask_mode512bit_condition>
11116 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11118 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11119 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
11120 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11121 [(set_attr "isa" "noavx,noavx,avx")
11122 (set_attr "type" "sseiadd")
11123 (set_attr "prefix_extra" "1,1,*")
11124 (set_attr "prefix" "orig,orig,vex")
11125 (set_attr "mode" "TI")])
11127 (define_insn "*<code>v16qi3"
11128 [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
11130 (match_operand:V16QI 1 "vector_operand" "%0,x,v")
11131 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
11132 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
11134 p<maxmin_int>b\t{%2, %0|%0, %2}
11135 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
11136 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
11137 [(set_attr "isa" "noavx,avx,avx512bw")
11138 (set_attr "type" "sseiadd")
11139 (set_attr "prefix_data16" "1,*,*")
11140 (set_attr "prefix_extra" "*,1,1")
11141 (set_attr "prefix" "orig,vex,evex")
11142 (set_attr "mode" "TI")])
11144 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11146 ;; Parallel integral comparisons
11148 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11150 (define_expand "avx2_eq<mode>3"
11151 [(set (match_operand:VI_256 0 "register_operand")
11153 (match_operand:VI_256 1 "nonimmediate_operand")
11154 (match_operand:VI_256 2 "nonimmediate_operand")))]
11156 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11158 (define_insn "*avx2_eq<mode>3"
11159 [(set (match_operand:VI_256 0 "register_operand" "=x")
11161 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
11162 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11163 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11164 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11165 [(set_attr "type" "ssecmp")
11166 (set_attr "prefix_extra" "1")
11167 (set_attr "prefix" "vex")
11168 (set_attr "mode" "OI")])
11170 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11171 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11172 (unspec:<avx512fmaskmode>
11173 [(match_operand:VI12_AVX512VL 1 "register_operand")
11174 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
11175 UNSPEC_MASKED_EQ))]
11177 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11179 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
11180 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
11181 (unspec:<avx512fmaskmode>
11182 [(match_operand:VI48_AVX512VL 1 "register_operand")
11183 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
11184 UNSPEC_MASKED_EQ))]
11186 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11188 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11189 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11190 (unspec:<avx512fmaskmode>
11191 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
11192 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11193 UNSPEC_MASKED_EQ))]
11194 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11195 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11196 [(set_attr "type" "ssecmp")
11197 (set_attr "prefix_extra" "1")
11198 (set_attr "prefix" "evex")
11199 (set_attr "mode" "<sseinsnmode>")])
11201 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
11202 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11203 (unspec:<avx512fmaskmode>
11204 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
11205 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11206 UNSPEC_MASKED_EQ))]
11207 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11208 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11209 [(set_attr "type" "ssecmp")
11210 (set_attr "prefix_extra" "1")
11211 (set_attr "prefix" "evex")
11212 (set_attr "mode" "<sseinsnmode>")])
11214 (define_insn "*sse4_1_eqv2di3"
11215 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11217 (match_operand:V2DI 1 "vector_operand" "%0,0,x")
11218 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11219 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
11221 pcmpeqq\t{%2, %0|%0, %2}
11222 pcmpeqq\t{%2, %0|%0, %2}
11223 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
11224 [(set_attr "isa" "noavx,noavx,avx")
11225 (set_attr "type" "ssecmp")
11226 (set_attr "prefix_extra" "1")
11227 (set_attr "prefix" "orig,orig,vex")
11228 (set_attr "mode" "TI")])
11230 (define_insn "*sse2_eq<mode>3"
11231 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11233 (match_operand:VI124_128 1 "vector_operand" "%0,x")
11234 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11235 "TARGET_SSE2 && !TARGET_XOP
11236 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
11238 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
11239 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11240 [(set_attr "isa" "noavx,avx")
11241 (set_attr "type" "ssecmp")
11242 (set_attr "prefix_data16" "1,*")
11243 (set_attr "prefix" "orig,vex")
11244 (set_attr "mode" "TI")])
11246 (define_expand "sse2_eq<mode>3"
11247 [(set (match_operand:VI124_128 0 "register_operand")
11249 (match_operand:VI124_128 1 "vector_operand")
11250 (match_operand:VI124_128 2 "vector_operand")))]
11251 "TARGET_SSE2 && !TARGET_XOP "
11252 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
11254 (define_expand "sse4_1_eqv2di3"
11255 [(set (match_operand:V2DI 0 "register_operand")
11257 (match_operand:V2DI 1 "vector_operand")
11258 (match_operand:V2DI 2 "vector_operand")))]
11260 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
11262 (define_insn "sse4_2_gtv2di3"
11263 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
11265 (match_operand:V2DI 1 "register_operand" "0,0,x")
11266 (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
11269 pcmpgtq\t{%2, %0|%0, %2}
11270 pcmpgtq\t{%2, %0|%0, %2}
11271 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
11272 [(set_attr "isa" "noavx,noavx,avx")
11273 (set_attr "type" "ssecmp")
11274 (set_attr "prefix_extra" "1")
11275 (set_attr "prefix" "orig,orig,vex")
11276 (set_attr "mode" "TI")])
11278 (define_insn "avx2_gt<mode>3"
11279 [(set (match_operand:VI_256 0 "register_operand" "=x")
11281 (match_operand:VI_256 1 "register_operand" "x")
11282 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
11284 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11285 [(set_attr "type" "ssecmp")
11286 (set_attr "prefix_extra" "1")
11287 (set_attr "prefix" "vex")
11288 (set_attr "mode" "OI")])
11290 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11291 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11292 (unspec:<avx512fmaskmode>
11293 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11294 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11296 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11297 [(set_attr "type" "ssecmp")
11298 (set_attr "prefix_extra" "1")
11299 (set_attr "prefix" "evex")
11300 (set_attr "mode" "<sseinsnmode>")])
11302 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
11303 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11304 (unspec:<avx512fmaskmode>
11305 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11306 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
11308 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11309 [(set_attr "type" "ssecmp")
11310 (set_attr "prefix_extra" "1")
11311 (set_attr "prefix" "evex")
11312 (set_attr "mode" "<sseinsnmode>")])
11314 (define_insn "sse2_gt<mode>3"
11315 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
11317 (match_operand:VI124_128 1 "register_operand" "0,x")
11318 (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
11319 "TARGET_SSE2 && !TARGET_XOP"
11321 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
11322 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11323 [(set_attr "isa" "noavx,avx")
11324 (set_attr "type" "ssecmp")
11325 (set_attr "prefix_data16" "1,*")
11326 (set_attr "prefix" "orig,vex")
11327 (set_attr "mode" "TI")])
11329 (define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
11330 [(set (match_operand:V_512 0 "register_operand")
11331 (if_then_else:V_512
11332 (match_operator 3 ""
11333 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11334 (match_operand:VI_AVX512BW 5 "general_operand")])
11335 (match_operand:V_512 1)
11336 (match_operand:V_512 2)))]
11338 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11339 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11341 bool ok = ix86_expand_int_vcond (operands);
11346 (define_expand "vcond<V_256:mode><VI_256:mode>"
11347 [(set (match_operand:V_256 0 "register_operand")
11348 (if_then_else:V_256
11349 (match_operator 3 ""
11350 [(match_operand:VI_256 4 "nonimmediate_operand")
11351 (match_operand:VI_256 5 "general_operand")])
11352 (match_operand:V_256 1)
11353 (match_operand:V_256 2)))]
11355 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11356 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11358 bool ok = ix86_expand_int_vcond (operands);
11363 (define_expand "vcond<V_128:mode><VI124_128:mode>"
11364 [(set (match_operand:V_128 0 "register_operand")
11365 (if_then_else:V_128
11366 (match_operator 3 ""
11367 [(match_operand:VI124_128 4 "vector_operand")
11368 (match_operand:VI124_128 5 "general_operand")])
11369 (match_operand:V_128 1)
11370 (match_operand:V_128 2)))]
11372 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11373 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11375 bool ok = ix86_expand_int_vcond (operands);
11380 (define_expand "vcond<VI8F_128:mode>v2di"
11381 [(set (match_operand:VI8F_128 0 "register_operand")
11382 (if_then_else:VI8F_128
11383 (match_operator 3 ""
11384 [(match_operand:V2DI 4 "vector_operand")
11385 (match_operand:V2DI 5 "general_operand")])
11386 (match_operand:VI8F_128 1)
11387 (match_operand:VI8F_128 2)))]
11390 bool ok = ix86_expand_int_vcond (operands);
11395 (define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
11396 [(set (match_operand:V_512 0 "register_operand")
11397 (if_then_else:V_512
11398 (match_operator 3 ""
11399 [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
11400 (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
11401 (match_operand:V_512 1 "general_operand")
11402 (match_operand:V_512 2 "general_operand")))]
11404 && (GET_MODE_NUNITS (<V_512:MODE>mode)
11405 == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
11407 bool ok = ix86_expand_int_vcond (operands);
11412 (define_expand "vcondu<V_256:mode><VI_256:mode>"
11413 [(set (match_operand:V_256 0 "register_operand")
11414 (if_then_else:V_256
11415 (match_operator 3 ""
11416 [(match_operand:VI_256 4 "nonimmediate_operand")
11417 (match_operand:VI_256 5 "nonimmediate_operand")])
11418 (match_operand:V_256 1 "general_operand")
11419 (match_operand:V_256 2 "general_operand")))]
11421 && (GET_MODE_NUNITS (<V_256:MODE>mode)
11422 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
11424 bool ok = ix86_expand_int_vcond (operands);
11429 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
11430 [(set (match_operand:V_128 0 "register_operand")
11431 (if_then_else:V_128
11432 (match_operator 3 ""
11433 [(match_operand:VI124_128 4 "vector_operand")
11434 (match_operand:VI124_128 5 "vector_operand")])
11435 (match_operand:V_128 1 "general_operand")
11436 (match_operand:V_128 2 "general_operand")))]
11438 && (GET_MODE_NUNITS (<V_128:MODE>mode)
11439 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
11441 bool ok = ix86_expand_int_vcond (operands);
11446 (define_expand "vcondu<VI8F_128:mode>v2di"
11447 [(set (match_operand:VI8F_128 0 "register_operand")
11448 (if_then_else:VI8F_128
11449 (match_operator 3 ""
11450 [(match_operand:V2DI 4 "vector_operand")
11451 (match_operand:V2DI 5 "vector_operand")])
11452 (match_operand:VI8F_128 1 "general_operand")
11453 (match_operand:VI8F_128 2 "general_operand")))]
11456 bool ok = ix86_expand_int_vcond (operands);
11461 (define_expand "vcondeq<VI8F_128:mode>v2di"
11462 [(set (match_operand:VI8F_128 0 "register_operand")
11463 (if_then_else:VI8F_128
11464 (match_operator 3 ""
11465 [(match_operand:V2DI 4 "vector_operand")
11466 (match_operand:V2DI 5 "general_operand")])
11467 (match_operand:VI8F_128 1)
11468 (match_operand:VI8F_128 2)))]
11471 bool ok = ix86_expand_int_vcond (operands);
11476 (define_mode_iterator VEC_PERM_AVX2
11477 [V16QI V8HI V4SI V2DI V4SF V2DF
11478 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11479 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
11480 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
11481 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11482 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11483 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
11485 (define_expand "vec_perm<mode>"
11486 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
11487 (match_operand:VEC_PERM_AVX2 1 "register_operand")
11488 (match_operand:VEC_PERM_AVX2 2 "register_operand")
11489 (match_operand:<sseintvecmode> 3 "register_operand")]
11490 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
11492 ix86_expand_vec_perm (operands);
11496 (define_mode_iterator VEC_PERM_CONST
11497 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
11498 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
11499 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
11500 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
11501 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
11502 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
11503 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
11504 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
11505 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
11507 (define_expand "vec_perm_const<mode>"
11508 [(match_operand:VEC_PERM_CONST 0 "register_operand")
11509 (match_operand:VEC_PERM_CONST 1 "register_operand")
11510 (match_operand:VEC_PERM_CONST 2 "register_operand")
11511 (match_operand:<sseintvecmode> 3)]
11514 if (ix86_expand_vec_perm_const (operands))
11520 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11522 ;; Parallel bitwise logical operations
11524 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11526 (define_expand "one_cmpl<mode>2"
11527 [(set (match_operand:VI 0 "register_operand")
11528 (xor:VI (match_operand:VI 1 "vector_operand")
11532 int i, n = GET_MODE_NUNITS (<MODE>mode);
11533 rtvec v = rtvec_alloc (n);
11535 for (i = 0; i < n; ++i)
11536 RTVEC_ELT (v, i) = constm1_rtx;
11538 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
11541 (define_expand "<sse2_avx2>_andnot<mode>3"
11542 [(set (match_operand:VI_AVX2 0 "register_operand")
11544 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
11545 (match_operand:VI_AVX2 2 "vector_operand")))]
11548 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11549 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11550 (vec_merge:VI48_AVX512VL
11553 (match_operand:VI48_AVX512VL 1 "register_operand"))
11554 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11555 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
11556 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11559 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
11560 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11561 (vec_merge:VI12_AVX512VL
11564 (match_operand:VI12_AVX512VL 1 "register_operand"))
11565 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11566 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
11567 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11570 (define_insn "*andnot<mode>3"
11571 [(set (match_operand:VI 0 "register_operand" "=x,v")
11573 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
11574 (match_operand:VI 2 "vector_operand" "xBm,vm")))]
11577 static char buf[64];
11580 const char *ssesuffix;
11582 switch (get_attr_mode (insn))
11585 gcc_assert (TARGET_AVX512F);
11588 gcc_assert (TARGET_AVX2);
11591 gcc_assert (TARGET_SSE2);
11593 switch (<MODE>mode)
11597 /* There is no vpandnb or vpandnw instruction, nor vpandn for
11598 512-bit vectors. Use vpandnq instead. */
11603 ssesuffix = "<ssemodesuffix>";
11609 ssesuffix = TARGET_AVX512VL ? "<ssemodesuffix>" : "";
11612 ssesuffix = TARGET_AVX512VL ? "q" : "";
11617 gcc_assert (TARGET_AVX512F);
11620 gcc_assert (TARGET_AVX);
11623 gcc_assert (TARGET_SSE);
11629 gcc_unreachable ();
11632 switch (which_alternative)
11635 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11638 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11641 gcc_unreachable ();
11644 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11647 [(set_attr "isa" "noavx,avx")
11648 (set_attr "type" "sselog")
11649 (set (attr "prefix_data16")
11651 (and (eq_attr "alternative" "0")
11652 (eq_attr "mode" "TI"))
11654 (const_string "*")))
11655 (set_attr "prefix" "orig,vex")
11657 (cond [(and (match_test "<MODE_SIZE> == 16")
11658 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11659 (const_string "<ssePSmode>")
11660 (match_test "TARGET_AVX2")
11661 (const_string "<sseinsnmode>")
11662 (match_test "TARGET_AVX")
11664 (match_test "<MODE_SIZE> > 16")
11665 (const_string "V8SF")
11666 (const_string "<sseinsnmode>"))
11667 (ior (not (match_test "TARGET_SSE2"))
11668 (match_test "optimize_function_for_size_p (cfun)"))
11669 (const_string "V4SF")
11671 (const_string "<sseinsnmode>")))])
11673 (define_insn "*andnot<mode>3_mask"
11674 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11675 (vec_merge:VI48_AVX512VL
11678 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
11679 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11680 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
11681 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11683 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
11684 [(set_attr "type" "sselog")
11685 (set_attr "prefix" "evex")
11686 (set_attr "mode" "<sseinsnmode>")])
11688 (define_expand "<code><mode>3"
11689 [(set (match_operand:VI 0 "register_operand")
11691 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
11692 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
11695 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
11699 (define_insn "<mask_codefor><code><mode>3<mask_name>"
11700 [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
11701 (any_logic:VI48_AVX_AVX512F
11702 (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,v")
11703 (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,vm")))]
11704 "TARGET_SSE && <mask_mode512bit_condition>
11705 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11707 static char buf[64];
11710 const char *ssesuffix;
11712 switch (get_attr_mode (insn))
11715 gcc_assert (TARGET_AVX512F);
11718 gcc_assert (TARGET_AVX2);
11721 gcc_assert (TARGET_SSE2);
11723 switch (<MODE>mode)
11727 ssesuffix = "<ssemodesuffix>";
11733 ssesuffix = TARGET_AVX512VL ? "<ssemodesuffix>" : "";
11736 gcc_unreachable ();
11741 gcc_assert (TARGET_AVX);
11744 gcc_assert (TARGET_SSE);
11750 gcc_unreachable ();
11753 switch (which_alternative)
11756 if (<mask_applied>)
11757 ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
11759 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11762 ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11765 gcc_unreachable ();
11768 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11771 [(set_attr "isa" "noavx,avx")
11772 (set_attr "type" "sselog")
11773 (set (attr "prefix_data16")
11775 (and (eq_attr "alternative" "0")
11776 (eq_attr "mode" "TI"))
11778 (const_string "*")))
11779 (set_attr "prefix" "<mask_prefix3>")
11781 (cond [(and (match_test "<MODE_SIZE> == 16")
11782 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11783 (const_string "<ssePSmode>")
11784 (match_test "TARGET_AVX2")
11785 (const_string "<sseinsnmode>")
11786 (match_test "TARGET_AVX")
11788 (match_test "<MODE_SIZE> > 16")
11789 (const_string "V8SF")
11790 (const_string "<sseinsnmode>"))
11791 (ior (not (match_test "TARGET_SSE2"))
11792 (match_test "optimize_function_for_size_p (cfun)"))
11793 (const_string "V4SF")
11795 (const_string "<sseinsnmode>")))])
11797 (define_insn "*<code><mode>3"
11798 [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
11799 (any_logic: VI12_AVX_AVX512F
11800 (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,v")
11801 (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,vm")))]
11802 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11804 static char buf[64];
11807 const char *ssesuffix;
11809 switch (get_attr_mode (insn))
11812 gcc_assert (TARGET_AVX512F);
11815 gcc_assert (TARGET_AVX2);
11818 gcc_assert (TARGET_SSE2);
11820 switch (<MODE>mode)
11830 ssesuffix = TARGET_AVX512VL ? "q" : "";
11833 gcc_unreachable ();
11838 gcc_assert (TARGET_AVX);
11841 gcc_assert (TARGET_SSE);
11847 gcc_unreachable ();
11850 switch (which_alternative)
11853 ops = "%s%s\t{%%2, %%0|%%0, %%2}";
11856 ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11859 gcc_unreachable ();
11862 snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11865 [(set_attr "isa" "noavx,avx")
11866 (set_attr "type" "sselog")
11867 (set (attr "prefix_data16")
11869 (and (eq_attr "alternative" "0")
11870 (eq_attr "mode" "TI"))
11872 (const_string "*")))
11873 (set_attr "prefix" "<mask_prefix3>")
11875 (cond [(and (match_test "<MODE_SIZE> == 16")
11876 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11877 (const_string "<ssePSmode>")
11878 (match_test "TARGET_AVX2")
11879 (const_string "<sseinsnmode>")
11880 (match_test "TARGET_AVX")
11882 (match_test "<MODE_SIZE> > 16")
11883 (const_string "V8SF")
11884 (const_string "<sseinsnmode>"))
11885 (ior (not (match_test "TARGET_SSE2"))
11886 (match_test "optimize_function_for_size_p (cfun)"))
11887 (const_string "V4SF")
11889 (const_string "<sseinsnmode>")))])
11891 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11892 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11893 (unspec:<avx512fmaskmode>
11894 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11895 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11898 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11899 [(set_attr "prefix" "evex")
11900 (set_attr "mode" "<sseinsnmode>")])
11902 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11903 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11904 (unspec:<avx512fmaskmode>
11905 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11906 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11909 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11910 [(set_attr "prefix" "evex")
11911 (set_attr "mode" "<sseinsnmode>")])
11913 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11914 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11915 (unspec:<avx512fmaskmode>
11916 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11917 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11920 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11921 [(set_attr "prefix" "evex")
11922 (set_attr "mode" "<sseinsnmode>")])
11924 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11925 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11926 (unspec:<avx512fmaskmode>
11927 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11928 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11931 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11932 [(set_attr "prefix" "evex")
11933 (set_attr "mode" "<sseinsnmode>")])
11935 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11937 ;; Parallel integral element swizzling
11939 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11941 (define_expand "vec_pack_trunc_<mode>"
11942 [(match_operand:<ssepackmode> 0 "register_operand")
11943 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11944 (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11947 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11948 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11949 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11953 (define_expand "vec_pack_trunc_qi"
11954 [(set (match_operand:HI 0 ("register_operand"))
11955 (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
11957 (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
11960 (define_expand "vec_pack_trunc_<mode>"
11961 [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
11962 (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
11964 (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
11967 operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
11970 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11971 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
11972 (vec_concat:VI1_AVX512
11973 (ss_truncate:<ssehalfvecmode>
11974 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11975 (ss_truncate:<ssehalfvecmode>
11976 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11977 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11979 packsswb\t{%2, %0|%0, %2}
11980 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11981 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11982 [(set_attr "isa" "noavx,avx,avx512bw")
11983 (set_attr "type" "sselog")
11984 (set_attr "prefix_data16" "1,*,*")
11985 (set_attr "prefix" "orig,<mask_prefix>,evex")
11986 (set_attr "mode" "<sseinsnmode>")])
11988 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11989 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
11990 (vec_concat:VI2_AVX2
11991 (ss_truncate:<ssehalfvecmode>
11992 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
11993 (ss_truncate:<ssehalfvecmode>
11994 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
11995 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11997 packssdw\t{%2, %0|%0, %2}
11998 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
11999 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12000 [(set_attr "isa" "noavx,avx,avx512bw")
12001 (set_attr "type" "sselog")
12002 (set_attr "prefix_data16" "1,*,*")
12003 (set_attr "prefix" "orig,<mask_prefix>,evex")
12004 (set_attr "mode" "<sseinsnmode>")])
12006 (define_insn "<sse2_avx2>_packuswb<mask_name>"
12007 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
12008 (vec_concat:VI1_AVX512
12009 (us_truncate:<ssehalfvecmode>
12010 (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
12011 (us_truncate:<ssehalfvecmode>
12012 (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
12013 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
12015 packuswb\t{%2, %0|%0, %2}
12016 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
12017 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12018 [(set_attr "isa" "noavx,avx,avx512bw")
12019 (set_attr "type" "sselog")
12020 (set_attr "prefix_data16" "1,*,*")
12021 (set_attr "prefix" "orig,<mask_prefix>,evex")
12022 (set_attr "mode" "<sseinsnmode>")])
12024 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
12025 [(set (match_operand:V64QI 0 "register_operand" "=v")
12028 (match_operand:V64QI 1 "register_operand" "v")
12029 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12030 (parallel [(const_int 8) (const_int 72)
12031 (const_int 9) (const_int 73)
12032 (const_int 10) (const_int 74)
12033 (const_int 11) (const_int 75)
12034 (const_int 12) (const_int 76)
12035 (const_int 13) (const_int 77)
12036 (const_int 14) (const_int 78)
12037 (const_int 15) (const_int 79)
12038 (const_int 24) (const_int 88)
12039 (const_int 25) (const_int 89)
12040 (const_int 26) (const_int 90)
12041 (const_int 27) (const_int 91)
12042 (const_int 28) (const_int 92)
12043 (const_int 29) (const_int 93)
12044 (const_int 30) (const_int 94)
12045 (const_int 31) (const_int 95)
12046 (const_int 40) (const_int 104)
12047 (const_int 41) (const_int 105)
12048 (const_int 42) (const_int 106)
12049 (const_int 43) (const_int 107)
12050 (const_int 44) (const_int 108)
12051 (const_int 45) (const_int 109)
12052 (const_int 46) (const_int 110)
12053 (const_int 47) (const_int 111)
12054 (const_int 56) (const_int 120)
12055 (const_int 57) (const_int 121)
12056 (const_int 58) (const_int 122)
12057 (const_int 59) (const_int 123)
12058 (const_int 60) (const_int 124)
12059 (const_int 61) (const_int 125)
12060 (const_int 62) (const_int 126)
12061 (const_int 63) (const_int 127)])))]
12063 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12064 [(set_attr "type" "sselog")
12065 (set_attr "prefix" "evex")
12066 (set_attr "mode" "XI")])
12068 (define_insn "avx2_interleave_highv32qi<mask_name>"
12069 [(set (match_operand:V32QI 0 "register_operand" "=v")
12072 (match_operand:V32QI 1 "register_operand" "v")
12073 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12074 (parallel [(const_int 8) (const_int 40)
12075 (const_int 9) (const_int 41)
12076 (const_int 10) (const_int 42)
12077 (const_int 11) (const_int 43)
12078 (const_int 12) (const_int 44)
12079 (const_int 13) (const_int 45)
12080 (const_int 14) (const_int 46)
12081 (const_int 15) (const_int 47)
12082 (const_int 24) (const_int 56)
12083 (const_int 25) (const_int 57)
12084 (const_int 26) (const_int 58)
12085 (const_int 27) (const_int 59)
12086 (const_int 28) (const_int 60)
12087 (const_int 29) (const_int 61)
12088 (const_int 30) (const_int 62)
12089 (const_int 31) (const_int 63)])))]
12090 "TARGET_AVX2 && <mask_avx512vl_condition>"
12091 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12092 [(set_attr "type" "sselog")
12093 (set_attr "prefix" "<mask_prefix>")
12094 (set_attr "mode" "OI")])
12096 (define_insn "vec_interleave_highv16qi<mask_name>"
12097 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12100 (match_operand:V16QI 1 "register_operand" "0,v")
12101 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12102 (parallel [(const_int 8) (const_int 24)
12103 (const_int 9) (const_int 25)
12104 (const_int 10) (const_int 26)
12105 (const_int 11) (const_int 27)
12106 (const_int 12) (const_int 28)
12107 (const_int 13) (const_int 29)
12108 (const_int 14) (const_int 30)
12109 (const_int 15) (const_int 31)])))]
12110 "TARGET_SSE2 && <mask_avx512vl_condition>"
12112 punpckhbw\t{%2, %0|%0, %2}
12113 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12114 [(set_attr "isa" "noavx,avx")
12115 (set_attr "type" "sselog")
12116 (set_attr "prefix_data16" "1,*")
12117 (set_attr "prefix" "orig,<mask_prefix>")
12118 (set_attr "mode" "TI")])
12120 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
12121 [(set (match_operand:V64QI 0 "register_operand" "=v")
12124 (match_operand:V64QI 1 "register_operand" "v")
12125 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
12126 (parallel [(const_int 0) (const_int 64)
12127 (const_int 1) (const_int 65)
12128 (const_int 2) (const_int 66)
12129 (const_int 3) (const_int 67)
12130 (const_int 4) (const_int 68)
12131 (const_int 5) (const_int 69)
12132 (const_int 6) (const_int 70)
12133 (const_int 7) (const_int 71)
12134 (const_int 16) (const_int 80)
12135 (const_int 17) (const_int 81)
12136 (const_int 18) (const_int 82)
12137 (const_int 19) (const_int 83)
12138 (const_int 20) (const_int 84)
12139 (const_int 21) (const_int 85)
12140 (const_int 22) (const_int 86)
12141 (const_int 23) (const_int 87)
12142 (const_int 32) (const_int 96)
12143 (const_int 33) (const_int 97)
12144 (const_int 34) (const_int 98)
12145 (const_int 35) (const_int 99)
12146 (const_int 36) (const_int 100)
12147 (const_int 37) (const_int 101)
12148 (const_int 38) (const_int 102)
12149 (const_int 39) (const_int 103)
12150 (const_int 48) (const_int 112)
12151 (const_int 49) (const_int 113)
12152 (const_int 50) (const_int 114)
12153 (const_int 51) (const_int 115)
12154 (const_int 52) (const_int 116)
12155 (const_int 53) (const_int 117)
12156 (const_int 54) (const_int 118)
12157 (const_int 55) (const_int 119)])))]
12159 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12160 [(set_attr "type" "sselog")
12161 (set_attr "prefix" "evex")
12162 (set_attr "mode" "XI")])
12164 (define_insn "avx2_interleave_lowv32qi<mask_name>"
12165 [(set (match_operand:V32QI 0 "register_operand" "=v")
12168 (match_operand:V32QI 1 "register_operand" "v")
12169 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
12170 (parallel [(const_int 0) (const_int 32)
12171 (const_int 1) (const_int 33)
12172 (const_int 2) (const_int 34)
12173 (const_int 3) (const_int 35)
12174 (const_int 4) (const_int 36)
12175 (const_int 5) (const_int 37)
12176 (const_int 6) (const_int 38)
12177 (const_int 7) (const_int 39)
12178 (const_int 16) (const_int 48)
12179 (const_int 17) (const_int 49)
12180 (const_int 18) (const_int 50)
12181 (const_int 19) (const_int 51)
12182 (const_int 20) (const_int 52)
12183 (const_int 21) (const_int 53)
12184 (const_int 22) (const_int 54)
12185 (const_int 23) (const_int 55)])))]
12186 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12187 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12188 [(set_attr "type" "sselog")
12189 (set_attr "prefix" "maybe_vex")
12190 (set_attr "mode" "OI")])
12192 (define_insn "vec_interleave_lowv16qi<mask_name>"
12193 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
12196 (match_operand:V16QI 1 "register_operand" "0,v")
12197 (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
12198 (parallel [(const_int 0) (const_int 16)
12199 (const_int 1) (const_int 17)
12200 (const_int 2) (const_int 18)
12201 (const_int 3) (const_int 19)
12202 (const_int 4) (const_int 20)
12203 (const_int 5) (const_int 21)
12204 (const_int 6) (const_int 22)
12205 (const_int 7) (const_int 23)])))]
12206 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12208 punpcklbw\t{%2, %0|%0, %2}
12209 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12210 [(set_attr "isa" "noavx,avx")
12211 (set_attr "type" "sselog")
12212 (set_attr "prefix_data16" "1,*")
12213 (set_attr "prefix" "orig,vex")
12214 (set_attr "mode" "TI")])
12216 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
12217 [(set (match_operand:V32HI 0 "register_operand" "=v")
12220 (match_operand:V32HI 1 "register_operand" "v")
12221 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12222 (parallel [(const_int 4) (const_int 36)
12223 (const_int 5) (const_int 37)
12224 (const_int 6) (const_int 38)
12225 (const_int 7) (const_int 39)
12226 (const_int 12) (const_int 44)
12227 (const_int 13) (const_int 45)
12228 (const_int 14) (const_int 46)
12229 (const_int 15) (const_int 47)
12230 (const_int 20) (const_int 52)
12231 (const_int 21) (const_int 53)
12232 (const_int 22) (const_int 54)
12233 (const_int 23) (const_int 55)
12234 (const_int 28) (const_int 60)
12235 (const_int 29) (const_int 61)
12236 (const_int 30) (const_int 62)
12237 (const_int 31) (const_int 63)])))]
12239 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12240 [(set_attr "type" "sselog")
12241 (set_attr "prefix" "evex")
12242 (set_attr "mode" "XI")])
12244 (define_insn "avx2_interleave_highv16hi<mask_name>"
12245 [(set (match_operand:V16HI 0 "register_operand" "=v")
12248 (match_operand:V16HI 1 "register_operand" "v")
12249 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12250 (parallel [(const_int 4) (const_int 20)
12251 (const_int 5) (const_int 21)
12252 (const_int 6) (const_int 22)
12253 (const_int 7) (const_int 23)
12254 (const_int 12) (const_int 28)
12255 (const_int 13) (const_int 29)
12256 (const_int 14) (const_int 30)
12257 (const_int 15) (const_int 31)])))]
12258 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12259 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12260 [(set_attr "type" "sselog")
12261 (set_attr "prefix" "maybe_evex")
12262 (set_attr "mode" "OI")])
12264 (define_insn "vec_interleave_highv8hi<mask_name>"
12265 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12268 (match_operand:V8HI 1 "register_operand" "0,v")
12269 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12270 (parallel [(const_int 4) (const_int 12)
12271 (const_int 5) (const_int 13)
12272 (const_int 6) (const_int 14)
12273 (const_int 7) (const_int 15)])))]
12274 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12276 punpckhwd\t{%2, %0|%0, %2}
12277 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12278 [(set_attr "isa" "noavx,avx")
12279 (set_attr "type" "sselog")
12280 (set_attr "prefix_data16" "1,*")
12281 (set_attr "prefix" "orig,maybe_vex")
12282 (set_attr "mode" "TI")])
12284 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
12285 [(set (match_operand:V32HI 0 "register_operand" "=v")
12288 (match_operand:V32HI 1 "register_operand" "v")
12289 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
12290 (parallel [(const_int 0) (const_int 32)
12291 (const_int 1) (const_int 33)
12292 (const_int 2) (const_int 34)
12293 (const_int 3) (const_int 35)
12294 (const_int 8) (const_int 40)
12295 (const_int 9) (const_int 41)
12296 (const_int 10) (const_int 42)
12297 (const_int 11) (const_int 43)
12298 (const_int 16) (const_int 48)
12299 (const_int 17) (const_int 49)
12300 (const_int 18) (const_int 50)
12301 (const_int 19) (const_int 51)
12302 (const_int 24) (const_int 56)
12303 (const_int 25) (const_int 57)
12304 (const_int 26) (const_int 58)
12305 (const_int 27) (const_int 59)])))]
12307 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12308 [(set_attr "type" "sselog")
12309 (set_attr "prefix" "evex")
12310 (set_attr "mode" "XI")])
12312 (define_insn "avx2_interleave_lowv16hi<mask_name>"
12313 [(set (match_operand:V16HI 0 "register_operand" "=v")
12316 (match_operand:V16HI 1 "register_operand" "v")
12317 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
12318 (parallel [(const_int 0) (const_int 16)
12319 (const_int 1) (const_int 17)
12320 (const_int 2) (const_int 18)
12321 (const_int 3) (const_int 19)
12322 (const_int 8) (const_int 24)
12323 (const_int 9) (const_int 25)
12324 (const_int 10) (const_int 26)
12325 (const_int 11) (const_int 27)])))]
12326 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12327 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12328 [(set_attr "type" "sselog")
12329 (set_attr "prefix" "maybe_evex")
12330 (set_attr "mode" "OI")])
12332 (define_insn "vec_interleave_lowv8hi<mask_name>"
12333 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
12336 (match_operand:V8HI 1 "register_operand" "0,v")
12337 (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
12338 (parallel [(const_int 0) (const_int 8)
12339 (const_int 1) (const_int 9)
12340 (const_int 2) (const_int 10)
12341 (const_int 3) (const_int 11)])))]
12342 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
12344 punpcklwd\t{%2, %0|%0, %2}
12345 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12346 [(set_attr "isa" "noavx,avx")
12347 (set_attr "type" "sselog")
12348 (set_attr "prefix_data16" "1,*")
12349 (set_attr "prefix" "orig,maybe_evex")
12350 (set_attr "mode" "TI")])
12352 (define_insn "avx2_interleave_highv8si<mask_name>"
12353 [(set (match_operand:V8SI 0 "register_operand" "=v")
12356 (match_operand:V8SI 1 "register_operand" "v")
12357 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12358 (parallel [(const_int 2) (const_int 10)
12359 (const_int 3) (const_int 11)
12360 (const_int 6) (const_int 14)
12361 (const_int 7) (const_int 15)])))]
12362 "TARGET_AVX2 && <mask_avx512vl_condition>"
12363 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12364 [(set_attr "type" "sselog")
12365 (set_attr "prefix" "maybe_evex")
12366 (set_attr "mode" "OI")])
12368 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
12369 [(set (match_operand:V16SI 0 "register_operand" "=v")
12372 (match_operand:V16SI 1 "register_operand" "v")
12373 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12374 (parallel [(const_int 2) (const_int 18)
12375 (const_int 3) (const_int 19)
12376 (const_int 6) (const_int 22)
12377 (const_int 7) (const_int 23)
12378 (const_int 10) (const_int 26)
12379 (const_int 11) (const_int 27)
12380 (const_int 14) (const_int 30)
12381 (const_int 15) (const_int 31)])))]
12383 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12384 [(set_attr "type" "sselog")
12385 (set_attr "prefix" "evex")
12386 (set_attr "mode" "XI")])
12389 (define_insn "vec_interleave_highv4si<mask_name>"
12390 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12393 (match_operand:V4SI 1 "register_operand" "0,v")
12394 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12395 (parallel [(const_int 2) (const_int 6)
12396 (const_int 3) (const_int 7)])))]
12397 "TARGET_SSE2 && <mask_avx512vl_condition>"
12399 punpckhdq\t{%2, %0|%0, %2}
12400 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12401 [(set_attr "isa" "noavx,avx")
12402 (set_attr "type" "sselog")
12403 (set_attr "prefix_data16" "1,*")
12404 (set_attr "prefix" "orig,maybe_vex")
12405 (set_attr "mode" "TI")])
12407 (define_insn "avx2_interleave_lowv8si<mask_name>"
12408 [(set (match_operand:V8SI 0 "register_operand" "=v")
12411 (match_operand:V8SI 1 "register_operand" "v")
12412 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
12413 (parallel [(const_int 0) (const_int 8)
12414 (const_int 1) (const_int 9)
12415 (const_int 4) (const_int 12)
12416 (const_int 5) (const_int 13)])))]
12417 "TARGET_AVX2 && <mask_avx512vl_condition>"
12418 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12419 [(set_attr "type" "sselog")
12420 (set_attr "prefix" "maybe_evex")
12421 (set_attr "mode" "OI")])
12423 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
12424 [(set (match_operand:V16SI 0 "register_operand" "=v")
12427 (match_operand:V16SI 1 "register_operand" "v")
12428 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
12429 (parallel [(const_int 0) (const_int 16)
12430 (const_int 1) (const_int 17)
12431 (const_int 4) (const_int 20)
12432 (const_int 5) (const_int 21)
12433 (const_int 8) (const_int 24)
12434 (const_int 9) (const_int 25)
12435 (const_int 12) (const_int 28)
12436 (const_int 13) (const_int 29)])))]
12438 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12439 [(set_attr "type" "sselog")
12440 (set_attr "prefix" "evex")
12441 (set_attr "mode" "XI")])
12443 (define_insn "vec_interleave_lowv4si<mask_name>"
12444 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
12447 (match_operand:V4SI 1 "register_operand" "0,v")
12448 (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
12449 (parallel [(const_int 0) (const_int 4)
12450 (const_int 1) (const_int 5)])))]
12451 "TARGET_SSE2 && <mask_avx512vl_condition>"
12453 punpckldq\t{%2, %0|%0, %2}
12454 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12455 [(set_attr "isa" "noavx,avx")
12456 (set_attr "type" "sselog")
12457 (set_attr "prefix_data16" "1,*")
12458 (set_attr "prefix" "orig,vex")
12459 (set_attr "mode" "TI")])
12461 (define_expand "vec_interleave_high<mode>"
12462 [(match_operand:VI_256 0 "register_operand")
12463 (match_operand:VI_256 1 "register_operand")
12464 (match_operand:VI_256 2 "nonimmediate_operand")]
12467 rtx t1 = gen_reg_rtx (<MODE>mode);
12468 rtx t2 = gen_reg_rtx (<MODE>mode);
12469 rtx t3 = gen_reg_rtx (V4DImode);
12470 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12471 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12472 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12473 gen_lowpart (V4DImode, t2),
12474 GEN_INT (1 + (3 << 4))));
12475 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12479 (define_expand "vec_interleave_low<mode>"
12480 [(match_operand:VI_256 0 "register_operand")
12481 (match_operand:VI_256 1 "register_operand")
12482 (match_operand:VI_256 2 "nonimmediate_operand")]
12485 rtx t1 = gen_reg_rtx (<MODE>mode);
12486 rtx t2 = gen_reg_rtx (<MODE>mode);
12487 rtx t3 = gen_reg_rtx (V4DImode);
12488 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
12489 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
12490 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
12491 gen_lowpart (V4DImode, t2),
12492 GEN_INT (0 + (2 << 4))));
12493 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
12497 ;; Modes handled by pinsr patterns.
12498 (define_mode_iterator PINSR_MODE
12499 [(V16QI "TARGET_SSE4_1") V8HI
12500 (V4SI "TARGET_SSE4_1")
12501 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
12503 (define_mode_attr sse2p4_1
12504 [(V16QI "sse4_1") (V8HI "sse2")
12505 (V4SI "sse4_1") (V2DI "sse4_1")])
12507 (define_mode_attr pinsr_evex_isa
12508 [(V16QI "avx512bw") (V8HI "avx512bw")
12509 (V4SI "avx512dq") (V2DI "avx512dq")])
12511 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
12512 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
12513 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
12514 (vec_merge:PINSR_MODE
12515 (vec_duplicate:PINSR_MODE
12516 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
12517 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
12518 (match_operand:SI 3 "const_int_operand")))]
12520 && ((unsigned) exact_log2 (INTVAL (operands[3]))
12521 < GET_MODE_NUNITS (<MODE>mode))"
12523 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
12525 switch (which_alternative)
12528 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12529 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
12532 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
12535 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
12536 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
12540 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
12542 gcc_unreachable ();
12545 [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
12546 (set_attr "type" "sselog")
12547 (set (attr "prefix_rex")
12549 (and (not (match_test "TARGET_AVX"))
12550 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
12552 (const_string "*")))
12553 (set (attr "prefix_data16")
12555 (and (not (match_test "TARGET_AVX"))
12556 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12558 (const_string "*")))
12559 (set (attr "prefix_extra")
12561 (and (not (match_test "TARGET_AVX"))
12562 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12564 (const_string "1")))
12565 (set_attr "length_immediate" "1")
12566 (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
12567 (set_attr "mode" "TI")])
12569 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
12570 [(match_operand:AVX512_VEC 0 "register_operand")
12571 (match_operand:AVX512_VEC 1 "register_operand")
12572 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
12573 (match_operand:SI 3 "const_0_to_3_operand")
12574 (match_operand:AVX512_VEC 4 "register_operand")
12575 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12579 mask = INTVAL (operands[3]);
12580 selector = GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ?
12581 0xFFFF ^ (0xF000 >> mask * 4)
12582 : 0xFF ^ (0xC0 >> mask * 2);
12583 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
12584 (operands[0], operands[1], operands[2], GEN_INT (selector),
12585 operands[4], operands[5]));
12589 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
12590 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
12591 (vec_merge:AVX512_VEC
12592 (match_operand:AVX512_VEC 1 "register_operand" "v")
12593 (vec_duplicate:AVX512_VEC
12594 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
12595 (match_operand:SI 3 "const_int_operand" "n")))]
12599 int selector = INTVAL (operands[3]);
12601 if (selector == 0xFFF || selector == 0x3F)
12603 else if ( selector == 0xF0FF || selector == 0xCF)
12605 else if ( selector == 0xFF0F || selector == 0xF3)
12607 else if ( selector == 0xFFF0 || selector == 0xFC)
12610 gcc_unreachable ();
12612 operands[3] = GEN_INT (mask);
12614 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
12616 [(set_attr "type" "sselog")
12617 (set_attr "length_immediate" "1")
12618 (set_attr "prefix" "evex")
12619 (set_attr "mode" "<sseinsnmode>")])
12621 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
12622 [(match_operand:AVX512_VEC_2 0 "register_operand")
12623 (match_operand:AVX512_VEC_2 1 "register_operand")
12624 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
12625 (match_operand:SI 3 "const_0_to_1_operand")
12626 (match_operand:AVX512_VEC_2 4 "register_operand")
12627 (match_operand:<avx512fmaskmode> 5 "register_operand")]
12630 int mask = INTVAL (operands[3]);
12632 emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
12633 operands[2], operands[4],
12636 emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
12637 operands[2], operands[4],
12642 (define_insn "vec_set_lo_<mode><mask_name>"
12643 [(set (match_operand:V16FI 0 "register_operand" "=v")
12645 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12646 (vec_select:<ssehalfvecmode>
12647 (match_operand:V16FI 1 "register_operand" "v")
12648 (parallel [(const_int 8) (const_int 9)
12649 (const_int 10) (const_int 11)
12650 (const_int 12) (const_int 13)
12651 (const_int 14) (const_int 15)]))))]
12653 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12654 [(set_attr "type" "sselog")
12655 (set_attr "length_immediate" "1")
12656 (set_attr "prefix" "evex")
12657 (set_attr "mode" "<sseinsnmode>")])
12659 (define_insn "vec_set_hi_<mode><mask_name>"
12660 [(set (match_operand:V16FI 0 "register_operand" "=v")
12662 (vec_select:<ssehalfvecmode>
12663 (match_operand:V16FI 1 "register_operand" "v")
12664 (parallel [(const_int 0) (const_int 1)
12665 (const_int 2) (const_int 3)
12666 (const_int 4) (const_int 5)
12667 (const_int 6) (const_int 7)]))
12668 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12670 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12671 [(set_attr "type" "sselog")
12672 (set_attr "length_immediate" "1")
12673 (set_attr "prefix" "evex")
12674 (set_attr "mode" "<sseinsnmode>")])
12676 (define_insn "vec_set_lo_<mode><mask_name>"
12677 [(set (match_operand:V8FI 0 "register_operand" "=v")
12679 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
12680 (vec_select:<ssehalfvecmode>
12681 (match_operand:V8FI 1 "register_operand" "v")
12682 (parallel [(const_int 4) (const_int 5)
12683 (const_int 6) (const_int 7)]))))]
12685 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
12686 [(set_attr "type" "sselog")
12687 (set_attr "length_immediate" "1")
12688 (set_attr "prefix" "evex")
12689 (set_attr "mode" "XI")])
12691 (define_insn "vec_set_hi_<mode><mask_name>"
12692 [(set (match_operand:V8FI 0 "register_operand" "=v")
12694 (vec_select:<ssehalfvecmode>
12695 (match_operand:V8FI 1 "register_operand" "v")
12696 (parallel [(const_int 0) (const_int 1)
12697 (const_int 2) (const_int 3)]))
12698 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
12700 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
12701 [(set_attr "type" "sselog")
12702 (set_attr "length_immediate" "1")
12703 (set_attr "prefix" "evex")
12704 (set_attr "mode" "XI")])
12706 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
12707 [(match_operand:VI8F_256 0 "register_operand")
12708 (match_operand:VI8F_256 1 "register_operand")
12709 (match_operand:VI8F_256 2 "nonimmediate_operand")
12710 (match_operand:SI 3 "const_0_to_3_operand")
12711 (match_operand:VI8F_256 4 "register_operand")
12712 (match_operand:QI 5 "register_operand")]
12715 int mask = INTVAL (operands[3]);
12716 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
12717 (operands[0], operands[1], operands[2],
12718 GEN_INT (((mask >> 0) & 1) * 2 + 0),
12719 GEN_INT (((mask >> 0) & 1) * 2 + 1),
12720 GEN_INT (((mask >> 1) & 1) * 2 + 4),
12721 GEN_INT (((mask >> 1) & 1) * 2 + 5),
12722 operands[4], operands[5]));
12726 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
12727 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
12728 (vec_select:VI8F_256
12729 (vec_concat:<ssedoublemode>
12730 (match_operand:VI8F_256 1 "register_operand" "v")
12731 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
12732 (parallel [(match_operand 3 "const_0_to_3_operand")
12733 (match_operand 4 "const_0_to_3_operand")
12734 (match_operand 5 "const_4_to_7_operand")
12735 (match_operand 6 "const_4_to_7_operand")])))]
12737 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12738 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
12741 mask = INTVAL (operands[3]) / 2;
12742 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
12743 operands[3] = GEN_INT (mask);
12744 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
12746 [(set_attr "type" "sselog")
12747 (set_attr "length_immediate" "1")
12748 (set_attr "prefix" "evex")
12749 (set_attr "mode" "XI")])
12751 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
12752 [(match_operand:V8FI 0 "register_operand")
12753 (match_operand:V8FI 1 "register_operand")
12754 (match_operand:V8FI 2 "nonimmediate_operand")
12755 (match_operand:SI 3 "const_0_to_255_operand")
12756 (match_operand:V8FI 4 "register_operand")
12757 (match_operand:QI 5 "register_operand")]
12760 int mask = INTVAL (operands[3]);
12761 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
12762 (operands[0], operands[1], operands[2],
12763 GEN_INT (((mask >> 0) & 3) * 2),
12764 GEN_INT (((mask >> 0) & 3) * 2 + 1),
12765 GEN_INT (((mask >> 2) & 3) * 2),
12766 GEN_INT (((mask >> 2) & 3) * 2 + 1),
12767 GEN_INT (((mask >> 4) & 3) * 2 + 8),
12768 GEN_INT (((mask >> 4) & 3) * 2 + 9),
12769 GEN_INT (((mask >> 6) & 3) * 2 + 8),
12770 GEN_INT (((mask >> 6) & 3) * 2 + 9),
12771 operands[4], operands[5]));
12775 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
12776 [(set (match_operand:V8FI 0 "register_operand" "=v")
12778 (vec_concat:<ssedoublemode>
12779 (match_operand:V8FI 1 "register_operand" "v")
12780 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
12781 (parallel [(match_operand 3 "const_0_to_7_operand")
12782 (match_operand 4 "const_0_to_7_operand")
12783 (match_operand 5 "const_0_to_7_operand")
12784 (match_operand 6 "const_0_to_7_operand")
12785 (match_operand 7 "const_8_to_15_operand")
12786 (match_operand 8 "const_8_to_15_operand")
12787 (match_operand 9 "const_8_to_15_operand")
12788 (match_operand 10 "const_8_to_15_operand")])))]
12790 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12791 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12792 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12793 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12796 mask = INTVAL (operands[3]) / 2;
12797 mask |= INTVAL (operands[5]) / 2 << 2;
12798 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12799 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12800 operands[3] = GEN_INT (mask);
12802 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12804 [(set_attr "type" "sselog")
12805 (set_attr "length_immediate" "1")
12806 (set_attr "prefix" "evex")
12807 (set_attr "mode" "<sseinsnmode>")])
12809 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12810 [(match_operand:VI4F_256 0 "register_operand")
12811 (match_operand:VI4F_256 1 "register_operand")
12812 (match_operand:VI4F_256 2 "nonimmediate_operand")
12813 (match_operand:SI 3 "const_0_to_3_operand")
12814 (match_operand:VI4F_256 4 "register_operand")
12815 (match_operand:QI 5 "register_operand")]
12818 int mask = INTVAL (operands[3]);
12819 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12820 (operands[0], operands[1], operands[2],
12821 GEN_INT (((mask >> 0) & 1) * 4 + 0),
12822 GEN_INT (((mask >> 0) & 1) * 4 + 1),
12823 GEN_INT (((mask >> 0) & 1) * 4 + 2),
12824 GEN_INT (((mask >> 0) & 1) * 4 + 3),
12825 GEN_INT (((mask >> 1) & 1) * 4 + 8),
12826 GEN_INT (((mask >> 1) & 1) * 4 + 9),
12827 GEN_INT (((mask >> 1) & 1) * 4 + 10),
12828 GEN_INT (((mask >> 1) & 1) * 4 + 11),
12829 operands[4], operands[5]));
12833 (define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12834 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12835 (vec_select:VI4F_256
12836 (vec_concat:<ssedoublemode>
12837 (match_operand:VI4F_256 1 "register_operand" "v")
12838 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12839 (parallel [(match_operand 3 "const_0_to_7_operand")
12840 (match_operand 4 "const_0_to_7_operand")
12841 (match_operand 5 "const_0_to_7_operand")
12842 (match_operand 6 "const_0_to_7_operand")
12843 (match_operand 7 "const_8_to_15_operand")
12844 (match_operand 8 "const_8_to_15_operand")
12845 (match_operand 9 "const_8_to_15_operand")
12846 (match_operand 10 "const_8_to_15_operand")])))]
12848 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12849 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12850 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12851 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12852 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12853 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12856 mask = INTVAL (operands[3]) / 4;
12857 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12858 operands[3] = GEN_INT (mask);
12860 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12862 [(set_attr "type" "sselog")
12863 (set_attr "length_immediate" "1")
12864 (set_attr "prefix" "evex")
12865 (set_attr "mode" "<sseinsnmode>")])
12867 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12868 [(match_operand:V16FI 0 "register_operand")
12869 (match_operand:V16FI 1 "register_operand")
12870 (match_operand:V16FI 2 "nonimmediate_operand")
12871 (match_operand:SI 3 "const_0_to_255_operand")
12872 (match_operand:V16FI 4 "register_operand")
12873 (match_operand:HI 5 "register_operand")]
12876 int mask = INTVAL (operands[3]);
12877 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12878 (operands[0], operands[1], operands[2],
12879 GEN_INT (((mask >> 0) & 3) * 4),
12880 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12881 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12882 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12883 GEN_INT (((mask >> 2) & 3) * 4),
12884 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12885 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12886 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12887 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12888 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12889 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12890 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12891 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12892 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12893 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12894 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12895 operands[4], operands[5]));
12899 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12900 [(set (match_operand:V16FI 0 "register_operand" "=v")
12902 (vec_concat:<ssedoublemode>
12903 (match_operand:V16FI 1 "register_operand" "v")
12904 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12905 (parallel [(match_operand 3 "const_0_to_15_operand")
12906 (match_operand 4 "const_0_to_15_operand")
12907 (match_operand 5 "const_0_to_15_operand")
12908 (match_operand 6 "const_0_to_15_operand")
12909 (match_operand 7 "const_0_to_15_operand")
12910 (match_operand 8 "const_0_to_15_operand")
12911 (match_operand 9 "const_0_to_15_operand")
12912 (match_operand 10 "const_0_to_15_operand")
12913 (match_operand 11 "const_16_to_31_operand")
12914 (match_operand 12 "const_16_to_31_operand")
12915 (match_operand 13 "const_16_to_31_operand")
12916 (match_operand 14 "const_16_to_31_operand")
12917 (match_operand 15 "const_16_to_31_operand")
12918 (match_operand 16 "const_16_to_31_operand")
12919 (match_operand 17 "const_16_to_31_operand")
12920 (match_operand 18 "const_16_to_31_operand")])))]
12922 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12923 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12924 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12925 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12926 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12927 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12928 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12929 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12930 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12931 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12932 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12933 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12936 mask = INTVAL (operands[3]) / 4;
12937 mask |= INTVAL (operands[7]) / 4 << 2;
12938 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12939 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12940 operands[3] = GEN_INT (mask);
12942 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12944 [(set_attr "type" "sselog")
12945 (set_attr "length_immediate" "1")
12946 (set_attr "prefix" "evex")
12947 (set_attr "mode" "<sseinsnmode>")])
12949 (define_expand "avx512f_pshufdv3_mask"
12950 [(match_operand:V16SI 0 "register_operand")
12951 (match_operand:V16SI 1 "nonimmediate_operand")
12952 (match_operand:SI 2 "const_0_to_255_operand")
12953 (match_operand:V16SI 3 "register_operand")
12954 (match_operand:HI 4 "register_operand")]
12957 int mask = INTVAL (operands[2]);
12958 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12959 GEN_INT ((mask >> 0) & 3),
12960 GEN_INT ((mask >> 2) & 3),
12961 GEN_INT ((mask >> 4) & 3),
12962 GEN_INT ((mask >> 6) & 3),
12963 GEN_INT (((mask >> 0) & 3) + 4),
12964 GEN_INT (((mask >> 2) & 3) + 4),
12965 GEN_INT (((mask >> 4) & 3) + 4),
12966 GEN_INT (((mask >> 6) & 3) + 4),
12967 GEN_INT (((mask >> 0) & 3) + 8),
12968 GEN_INT (((mask >> 2) & 3) + 8),
12969 GEN_INT (((mask >> 4) & 3) + 8),
12970 GEN_INT (((mask >> 6) & 3) + 8),
12971 GEN_INT (((mask >> 0) & 3) + 12),
12972 GEN_INT (((mask >> 2) & 3) + 12),
12973 GEN_INT (((mask >> 4) & 3) + 12),
12974 GEN_INT (((mask >> 6) & 3) + 12),
12975 operands[3], operands[4]));
12979 (define_insn "avx512f_pshufd_1<mask_name>"
12980 [(set (match_operand:V16SI 0 "register_operand" "=v")
12982 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12983 (parallel [(match_operand 2 "const_0_to_3_operand")
12984 (match_operand 3 "const_0_to_3_operand")
12985 (match_operand 4 "const_0_to_3_operand")
12986 (match_operand 5 "const_0_to_3_operand")
12987 (match_operand 6 "const_4_to_7_operand")
12988 (match_operand 7 "const_4_to_7_operand")
12989 (match_operand 8 "const_4_to_7_operand")
12990 (match_operand 9 "const_4_to_7_operand")
12991 (match_operand 10 "const_8_to_11_operand")
12992 (match_operand 11 "const_8_to_11_operand")
12993 (match_operand 12 "const_8_to_11_operand")
12994 (match_operand 13 "const_8_to_11_operand")
12995 (match_operand 14 "const_12_to_15_operand")
12996 (match_operand 15 "const_12_to_15_operand")
12997 (match_operand 16 "const_12_to_15_operand")
12998 (match_operand 17 "const_12_to_15_operand")])))]
13000 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13001 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13002 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13003 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
13004 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
13005 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
13006 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
13007 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
13008 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
13009 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
13010 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
13011 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
13014 mask |= INTVAL (operands[2]) << 0;
13015 mask |= INTVAL (operands[3]) << 2;
13016 mask |= INTVAL (operands[4]) << 4;
13017 mask |= INTVAL (operands[5]) << 6;
13018 operands[2] = GEN_INT (mask);
13020 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
13022 [(set_attr "type" "sselog1")
13023 (set_attr "prefix" "evex")
13024 (set_attr "length_immediate" "1")
13025 (set_attr "mode" "XI")])
13027 (define_expand "avx512vl_pshufdv3_mask"
13028 [(match_operand:V8SI 0 "register_operand")
13029 (match_operand:V8SI 1 "nonimmediate_operand")
13030 (match_operand:SI 2 "const_0_to_255_operand")
13031 (match_operand:V8SI 3 "register_operand")
13032 (match_operand:QI 4 "register_operand")]
13035 int mask = INTVAL (operands[2]);
13036 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
13037 GEN_INT ((mask >> 0) & 3),
13038 GEN_INT ((mask >> 2) & 3),
13039 GEN_INT ((mask >> 4) & 3),
13040 GEN_INT ((mask >> 6) & 3),
13041 GEN_INT (((mask >> 0) & 3) + 4),
13042 GEN_INT (((mask >> 2) & 3) + 4),
13043 GEN_INT (((mask >> 4) & 3) + 4),
13044 GEN_INT (((mask >> 6) & 3) + 4),
13045 operands[3], operands[4]));
13049 (define_expand "avx2_pshufdv3"
13050 [(match_operand:V8SI 0 "register_operand")
13051 (match_operand:V8SI 1 "nonimmediate_operand")
13052 (match_operand:SI 2 "const_0_to_255_operand")]
13055 int mask = INTVAL (operands[2]);
13056 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
13057 GEN_INT ((mask >> 0) & 3),
13058 GEN_INT ((mask >> 2) & 3),
13059 GEN_INT ((mask >> 4) & 3),
13060 GEN_INT ((mask >> 6) & 3),
13061 GEN_INT (((mask >> 0) & 3) + 4),
13062 GEN_INT (((mask >> 2) & 3) + 4),
13063 GEN_INT (((mask >> 4) & 3) + 4),
13064 GEN_INT (((mask >> 6) & 3) + 4)));
13068 (define_insn "avx2_pshufd_1<mask_name>"
13069 [(set (match_operand:V8SI 0 "register_operand" "=v")
13071 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
13072 (parallel [(match_operand 2 "const_0_to_3_operand")
13073 (match_operand 3 "const_0_to_3_operand")
13074 (match_operand 4 "const_0_to_3_operand")
13075 (match_operand 5 "const_0_to_3_operand")
13076 (match_operand 6 "const_4_to_7_operand")
13077 (match_operand 7 "const_4_to_7_operand")
13078 (match_operand 8 "const_4_to_7_operand")
13079 (match_operand 9 "const_4_to_7_operand")])))]
13081 && <mask_avx512vl_condition>
13082 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
13083 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
13084 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
13085 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
13088 mask |= INTVAL (operands[2]) << 0;
13089 mask |= INTVAL (operands[3]) << 2;
13090 mask |= INTVAL (operands[4]) << 4;
13091 mask |= INTVAL (operands[5]) << 6;
13092 operands[2] = GEN_INT (mask);
13094 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13096 [(set_attr "type" "sselog1")
13097 (set_attr "prefix" "maybe_evex")
13098 (set_attr "length_immediate" "1")
13099 (set_attr "mode" "OI")])
13101 (define_expand "avx512vl_pshufd_mask"
13102 [(match_operand:V4SI 0 "register_operand")
13103 (match_operand:V4SI 1 "nonimmediate_operand")
13104 (match_operand:SI 2 "const_0_to_255_operand")
13105 (match_operand:V4SI 3 "register_operand")
13106 (match_operand:QI 4 "register_operand")]
13109 int mask = INTVAL (operands[2]);
13110 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
13111 GEN_INT ((mask >> 0) & 3),
13112 GEN_INT ((mask >> 2) & 3),
13113 GEN_INT ((mask >> 4) & 3),
13114 GEN_INT ((mask >> 6) & 3),
13115 operands[3], operands[4]));
13119 (define_expand "sse2_pshufd"
13120 [(match_operand:V4SI 0 "register_operand")
13121 (match_operand:V4SI 1 "vector_operand")
13122 (match_operand:SI 2 "const_int_operand")]
13125 int mask = INTVAL (operands[2]);
13126 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
13127 GEN_INT ((mask >> 0) & 3),
13128 GEN_INT ((mask >> 2) & 3),
13129 GEN_INT ((mask >> 4) & 3),
13130 GEN_INT ((mask >> 6) & 3)));
13134 (define_insn "sse2_pshufd_1<mask_name>"
13135 [(set (match_operand:V4SI 0 "register_operand" "=v")
13137 (match_operand:V4SI 1 "vector_operand" "vBm")
13138 (parallel [(match_operand 2 "const_0_to_3_operand")
13139 (match_operand 3 "const_0_to_3_operand")
13140 (match_operand 4 "const_0_to_3_operand")
13141 (match_operand 5 "const_0_to_3_operand")])))]
13142 "TARGET_SSE2 && <mask_avx512vl_condition>"
13145 mask |= INTVAL (operands[2]) << 0;
13146 mask |= INTVAL (operands[3]) << 2;
13147 mask |= INTVAL (operands[4]) << 4;
13148 mask |= INTVAL (operands[5]) << 6;
13149 operands[2] = GEN_INT (mask);
13151 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13153 [(set_attr "type" "sselog1")
13154 (set_attr "prefix_data16" "1")
13155 (set_attr "prefix" "<mask_prefix2>")
13156 (set_attr "length_immediate" "1")
13157 (set_attr "mode" "TI")])
13159 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
13160 [(set (match_operand:V32HI 0 "register_operand" "=v")
13162 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13163 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13166 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13167 [(set_attr "type" "sselog")
13168 (set_attr "prefix" "evex")
13169 (set_attr "mode" "XI")])
13171 (define_expand "avx512vl_pshuflwv3_mask"
13172 [(match_operand:V16HI 0 "register_operand")
13173 (match_operand:V16HI 1 "nonimmediate_operand")
13174 (match_operand:SI 2 "const_0_to_255_operand")
13175 (match_operand:V16HI 3 "register_operand")
13176 (match_operand:HI 4 "register_operand")]
13177 "TARGET_AVX512VL && TARGET_AVX512BW"
13179 int mask = INTVAL (operands[2]);
13180 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
13181 GEN_INT ((mask >> 0) & 3),
13182 GEN_INT ((mask >> 2) & 3),
13183 GEN_INT ((mask >> 4) & 3),
13184 GEN_INT ((mask >> 6) & 3),
13185 GEN_INT (((mask >> 0) & 3) + 8),
13186 GEN_INT (((mask >> 2) & 3) + 8),
13187 GEN_INT (((mask >> 4) & 3) + 8),
13188 GEN_INT (((mask >> 6) & 3) + 8),
13189 operands[3], operands[4]));
13193 (define_expand "avx2_pshuflwv3"
13194 [(match_operand:V16HI 0 "register_operand")
13195 (match_operand:V16HI 1 "nonimmediate_operand")
13196 (match_operand:SI 2 "const_0_to_255_operand")]
13199 int mask = INTVAL (operands[2]);
13200 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
13201 GEN_INT ((mask >> 0) & 3),
13202 GEN_INT ((mask >> 2) & 3),
13203 GEN_INT ((mask >> 4) & 3),
13204 GEN_INT ((mask >> 6) & 3),
13205 GEN_INT (((mask >> 0) & 3) + 8),
13206 GEN_INT (((mask >> 2) & 3) + 8),
13207 GEN_INT (((mask >> 4) & 3) + 8),
13208 GEN_INT (((mask >> 6) & 3) + 8)));
13212 (define_insn "avx2_pshuflw_1<mask_name>"
13213 [(set (match_operand:V16HI 0 "register_operand" "=v")
13215 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13216 (parallel [(match_operand 2 "const_0_to_3_operand")
13217 (match_operand 3 "const_0_to_3_operand")
13218 (match_operand 4 "const_0_to_3_operand")
13219 (match_operand 5 "const_0_to_3_operand")
13224 (match_operand 6 "const_8_to_11_operand")
13225 (match_operand 7 "const_8_to_11_operand")
13226 (match_operand 8 "const_8_to_11_operand")
13227 (match_operand 9 "const_8_to_11_operand")
13231 (const_int 15)])))]
13233 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13234 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13235 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13236 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13237 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13240 mask |= INTVAL (operands[2]) << 0;
13241 mask |= INTVAL (operands[3]) << 2;
13242 mask |= INTVAL (operands[4]) << 4;
13243 mask |= INTVAL (operands[5]) << 6;
13244 operands[2] = GEN_INT (mask);
13246 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13248 [(set_attr "type" "sselog")
13249 (set_attr "prefix" "maybe_evex")
13250 (set_attr "length_immediate" "1")
13251 (set_attr "mode" "OI")])
13253 (define_expand "avx512vl_pshuflw_mask"
13254 [(match_operand:V8HI 0 "register_operand")
13255 (match_operand:V8HI 1 "nonimmediate_operand")
13256 (match_operand:SI 2 "const_0_to_255_operand")
13257 (match_operand:V8HI 3 "register_operand")
13258 (match_operand:QI 4 "register_operand")]
13259 "TARGET_AVX512VL && TARGET_AVX512BW"
13261 int mask = INTVAL (operands[2]);
13262 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
13263 GEN_INT ((mask >> 0) & 3),
13264 GEN_INT ((mask >> 2) & 3),
13265 GEN_INT ((mask >> 4) & 3),
13266 GEN_INT ((mask >> 6) & 3),
13267 operands[3], operands[4]));
13271 (define_expand "sse2_pshuflw"
13272 [(match_operand:V8HI 0 "register_operand")
13273 (match_operand:V8HI 1 "vector_operand")
13274 (match_operand:SI 2 "const_int_operand")]
13277 int mask = INTVAL (operands[2]);
13278 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
13279 GEN_INT ((mask >> 0) & 3),
13280 GEN_INT ((mask >> 2) & 3),
13281 GEN_INT ((mask >> 4) & 3),
13282 GEN_INT ((mask >> 6) & 3)));
13286 (define_insn "sse2_pshuflw_1<mask_name>"
13287 [(set (match_operand:V8HI 0 "register_operand" "=v")
13289 (match_operand:V8HI 1 "vector_operand" "vBm")
13290 (parallel [(match_operand 2 "const_0_to_3_operand")
13291 (match_operand 3 "const_0_to_3_operand")
13292 (match_operand 4 "const_0_to_3_operand")
13293 (match_operand 5 "const_0_to_3_operand")
13298 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13301 mask |= INTVAL (operands[2]) << 0;
13302 mask |= INTVAL (operands[3]) << 2;
13303 mask |= INTVAL (operands[4]) << 4;
13304 mask |= INTVAL (operands[5]) << 6;
13305 operands[2] = GEN_INT (mask);
13307 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13309 [(set_attr "type" "sselog")
13310 (set_attr "prefix_data16" "0")
13311 (set_attr "prefix_rep" "1")
13312 (set_attr "prefix" "maybe_vex")
13313 (set_attr "length_immediate" "1")
13314 (set_attr "mode" "TI")])
13316 (define_expand "avx2_pshufhwv3"
13317 [(match_operand:V16HI 0 "register_operand")
13318 (match_operand:V16HI 1 "nonimmediate_operand")
13319 (match_operand:SI 2 "const_0_to_255_operand")]
13322 int mask = INTVAL (operands[2]);
13323 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
13324 GEN_INT (((mask >> 0) & 3) + 4),
13325 GEN_INT (((mask >> 2) & 3) + 4),
13326 GEN_INT (((mask >> 4) & 3) + 4),
13327 GEN_INT (((mask >> 6) & 3) + 4),
13328 GEN_INT (((mask >> 0) & 3) + 12),
13329 GEN_INT (((mask >> 2) & 3) + 12),
13330 GEN_INT (((mask >> 4) & 3) + 12),
13331 GEN_INT (((mask >> 6) & 3) + 12)));
13335 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
13336 [(set (match_operand:V32HI 0 "register_operand" "=v")
13338 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
13339 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13342 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13343 [(set_attr "type" "sselog")
13344 (set_attr "prefix" "evex")
13345 (set_attr "mode" "XI")])
13347 (define_expand "avx512vl_pshufhwv3_mask"
13348 [(match_operand:V16HI 0 "register_operand")
13349 (match_operand:V16HI 1 "nonimmediate_operand")
13350 (match_operand:SI 2 "const_0_to_255_operand")
13351 (match_operand:V16HI 3 "register_operand")
13352 (match_operand:HI 4 "register_operand")]
13353 "TARGET_AVX512VL && TARGET_AVX512BW"
13355 int mask = INTVAL (operands[2]);
13356 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
13357 GEN_INT (((mask >> 0) & 3) + 4),
13358 GEN_INT (((mask >> 2) & 3) + 4),
13359 GEN_INT (((mask >> 4) & 3) + 4),
13360 GEN_INT (((mask >> 6) & 3) + 4),
13361 GEN_INT (((mask >> 0) & 3) + 12),
13362 GEN_INT (((mask >> 2) & 3) + 12),
13363 GEN_INT (((mask >> 4) & 3) + 12),
13364 GEN_INT (((mask >> 6) & 3) + 12),
13365 operands[3], operands[4]));
13369 (define_insn "avx2_pshufhw_1<mask_name>"
13370 [(set (match_operand:V16HI 0 "register_operand" "=v")
13372 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
13373 (parallel [(const_int 0)
13377 (match_operand 2 "const_4_to_7_operand")
13378 (match_operand 3 "const_4_to_7_operand")
13379 (match_operand 4 "const_4_to_7_operand")
13380 (match_operand 5 "const_4_to_7_operand")
13385 (match_operand 6 "const_12_to_15_operand")
13386 (match_operand 7 "const_12_to_15_operand")
13387 (match_operand 8 "const_12_to_15_operand")
13388 (match_operand 9 "const_12_to_15_operand")])))]
13390 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
13391 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
13392 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
13393 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
13394 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
13397 mask |= (INTVAL (operands[2]) - 4) << 0;
13398 mask |= (INTVAL (operands[3]) - 4) << 2;
13399 mask |= (INTVAL (operands[4]) - 4) << 4;
13400 mask |= (INTVAL (operands[5]) - 4) << 6;
13401 operands[2] = GEN_INT (mask);
13403 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
13405 [(set_attr "type" "sselog")
13406 (set_attr "prefix" "maybe_evex")
13407 (set_attr "length_immediate" "1")
13408 (set_attr "mode" "OI")])
13410 (define_expand "avx512vl_pshufhw_mask"
13411 [(match_operand:V8HI 0 "register_operand")
13412 (match_operand:V8HI 1 "nonimmediate_operand")
13413 (match_operand:SI 2 "const_0_to_255_operand")
13414 (match_operand:V8HI 3 "register_operand")
13415 (match_operand:QI 4 "register_operand")]
13416 "TARGET_AVX512VL && TARGET_AVX512BW"
13418 int mask = INTVAL (operands[2]);
13419 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
13420 GEN_INT (((mask >> 0) & 3) + 4),
13421 GEN_INT (((mask >> 2) & 3) + 4),
13422 GEN_INT (((mask >> 4) & 3) + 4),
13423 GEN_INT (((mask >> 6) & 3) + 4),
13424 operands[3], operands[4]));
13428 (define_expand "sse2_pshufhw"
13429 [(match_operand:V8HI 0 "register_operand")
13430 (match_operand:V8HI 1 "vector_operand")
13431 (match_operand:SI 2 "const_int_operand")]
13434 int mask = INTVAL (operands[2]);
13435 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
13436 GEN_INT (((mask >> 0) & 3) + 4),
13437 GEN_INT (((mask >> 2) & 3) + 4),
13438 GEN_INT (((mask >> 4) & 3) + 4),
13439 GEN_INT (((mask >> 6) & 3) + 4)));
13443 (define_insn "sse2_pshufhw_1<mask_name>"
13444 [(set (match_operand:V8HI 0 "register_operand" "=v")
13446 (match_operand:V8HI 1 "vector_operand" "vBm")
13447 (parallel [(const_int 0)
13451 (match_operand 2 "const_4_to_7_operand")
13452 (match_operand 3 "const_4_to_7_operand")
13453 (match_operand 4 "const_4_to_7_operand")
13454 (match_operand 5 "const_4_to_7_operand")])))]
13455 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
13458 mask |= (INTVAL (operands[2]) - 4) << 0;
13459 mask |= (INTVAL (operands[3]) - 4) << 2;
13460 mask |= (INTVAL (operands[4]) - 4) << 4;
13461 mask |= (INTVAL (operands[5]) - 4) << 6;
13462 operands[2] = GEN_INT (mask);
13464 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
13466 [(set_attr "type" "sselog")
13467 (set_attr "prefix_rep" "1")
13468 (set_attr "prefix_data16" "0")
13469 (set_attr "prefix" "maybe_vex")
13470 (set_attr "length_immediate" "1")
13471 (set_attr "mode" "TI")])
13473 (define_expand "sse2_loadd"
13474 [(set (match_operand:V4SI 0 "register_operand")
13476 (vec_duplicate:V4SI
13477 (match_operand:SI 1 "nonimmediate_operand"))
13481 "operands[2] = CONST0_RTX (V4SImode);")
13483 (define_insn "sse2_loadld"
13484 [(set (match_operand:V4SI 0 "register_operand" "=v,Yi,x,x,v")
13486 (vec_duplicate:V4SI
13487 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
13488 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,v")
13492 %vmovd\t{%2, %0|%0, %2}
13493 %vmovd\t{%2, %0|%0, %2}
13494 movss\t{%2, %0|%0, %2}
13495 movss\t{%2, %0|%0, %2}
13496 vmovss\t{%2, %1, %0|%0, %1, %2}"
13497 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
13498 (set_attr "type" "ssemov")
13499 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
13500 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
13502 ;; QI and HI modes handled by pextr patterns.
13503 (define_mode_iterator PEXTR_MODE12
13504 [(V16QI "TARGET_SSE4_1") V8HI])
13506 (define_insn "*vec_extract<mode>"
13507 [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
13508 (vec_select:<ssescalarmode>
13509 (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
13511 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
13514 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13515 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
13516 vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13517 vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13518 [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
13519 (set_attr "type" "sselog1")
13520 (set_attr "prefix_data16" "1")
13521 (set (attr "prefix_extra")
13523 (and (eq_attr "alternative" "0,2")
13524 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13526 (const_string "1")))
13527 (set_attr "length_immediate" "1")
13528 (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
13529 (set_attr "mode" "TI")])
13531 (define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
13532 [(set (match_operand:SWI48 0 "register_operand" "=r,r")
13534 (vec_select:<PEXTR_MODE12:ssescalarmode>
13535 (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
13537 [(match_operand:SI 2
13538 "const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
13541 %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
13542 vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
13543 [(set_attr "isa" "*,avx512bw")
13544 (set_attr "type" "sselog1")
13545 (set_attr "prefix_data16" "1")
13546 (set (attr "prefix_extra")
13548 (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
13550 (const_string "1")))
13551 (set_attr "length_immediate" "1")
13552 (set_attr "prefix" "maybe_vex")
13553 (set_attr "mode" "TI")])
13555 (define_insn "*vec_extract<mode>_mem"
13556 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
13557 (vec_select:<ssescalarmode>
13558 (match_operand:VI12_128 1 "memory_operand" "o")
13560 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13564 (define_insn "*vec_extract<ssevecmodelower>_0"
13565 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,v ,m")
13567 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,vm,v")
13568 (parallel [(const_int 0)])))]
13569 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13572 (define_insn "*vec_extractv2di_0_sse"
13573 [(set (match_operand:DI 0 "nonimmediate_operand" "=v,m")
13575 (match_operand:V2DI 1 "nonimmediate_operand" "vm,v")
13576 (parallel [(const_int 0)])))]
13577 "TARGET_SSE && !TARGET_64BIT
13578 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13582 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13584 (match_operand:<ssevecmode> 1 "register_operand")
13585 (parallel [(const_int 0)])))]
13586 "TARGET_SSE && reload_completed"
13587 [(set (match_dup 0) (match_dup 1))]
13588 "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
13590 (define_insn "*vec_extractv4si_0_zext_sse4"
13591 [(set (match_operand:DI 0 "register_operand" "=r,x,v")
13594 (match_operand:V4SI 1 "register_operand" "Yj,x,v")
13595 (parallel [(const_int 0)]))))]
13598 [(set_attr "isa" "x64,*,avx512f")])
13600 (define_insn "*vec_extractv4si_0_zext"
13601 [(set (match_operand:DI 0 "register_operand" "=r")
13604 (match_operand:V4SI 1 "register_operand" "x")
13605 (parallel [(const_int 0)]))))]
13606 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
13610 [(set (match_operand:DI 0 "register_operand")
13613 (match_operand:V4SI 1 "register_operand")
13614 (parallel [(const_int 0)]))))]
13615 "TARGET_SSE2 && reload_completed"
13616 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13617 "operands[1] = gen_lowpart (SImode, operands[1]);")
13619 (define_insn "*vec_extractv4si"
13620 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
13622 (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
13623 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
13626 switch (which_alternative)
13630 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
13634 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13635 return "psrldq\t{%2, %0|%0, %2}";
13639 operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
13640 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
13643 gcc_unreachable ();
13646 [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
13647 (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
13648 (set (attr "prefix_extra")
13649 (if_then_else (eq_attr "alternative" "0,1")
13651 (const_string "*")))
13652 (set_attr "length_immediate" "1")
13653 (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
13654 (set_attr "mode" "TI")])
13656 (define_insn "*vec_extractv4si_zext"
13657 [(set (match_operand:DI 0 "register_operand" "=r,r")
13660 (match_operand:V4SI 1 "register_operand" "x,v")
13661 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13662 "TARGET_64BIT && TARGET_SSE4_1"
13663 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
13664 [(set_attr "isa" "*,avx512dq")
13665 (set_attr "type" "sselog1")
13666 (set_attr "prefix_extra" "1")
13667 (set_attr "length_immediate" "1")
13668 (set_attr "prefix" "maybe_vex")
13669 (set_attr "mode" "TI")])
13671 (define_insn "*vec_extractv4si_mem"
13672 [(set (match_operand:SI 0 "register_operand" "=x,r")
13674 (match_operand:V4SI 1 "memory_operand" "o,o")
13675 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
13679 (define_insn_and_split "*vec_extractv4si_zext_mem"
13680 [(set (match_operand:DI 0 "register_operand" "=x,r")
13683 (match_operand:V4SI 1 "memory_operand" "o,o")
13684 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
13685 "TARGET_64BIT && TARGET_SSE"
13687 "&& reload_completed"
13688 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
13690 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
13693 (define_insn "*vec_extractv2di_1"
13694 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm,m,x,x,Yv,x,v,r")
13696 (match_operand:V2DI 1 "nonimmediate_operand" "x ,v ,v,0,x, v,x,o,o")
13697 (parallel [(const_int 1)])))]
13698 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
13700 %vpextrq\t{$1, %1, %0|%0, %1, 1}
13701 vpextrq\t{$1, %1, %0|%0, %1, 1}
13702 %vmovhps\t{%1, %0|%0, %1}
13703 psrldq\t{$8, %0|%0, 8}
13704 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13705 vpsrldq\t{$8, %1, %0|%0, %1, 8}
13706 movhlps\t{%1, %0|%0, %1}
13710 (cond [(eq_attr "alternative" "0")
13711 (const_string "x64_sse4")
13712 (eq_attr "alternative" "1")
13713 (const_string "x64_avx512dq")
13714 (eq_attr "alternative" "3")
13715 (const_string "sse2_noavx")
13716 (eq_attr "alternative" "4")
13717 (const_string "avx")
13718 (eq_attr "alternative" "5")
13719 (const_string "avx512bw")
13720 (eq_attr "alternative" "6")
13721 (const_string "noavx")
13722 (eq_attr "alternative" "8")
13723 (const_string "x64")
13725 (const_string "*")))
13727 (cond [(eq_attr "alternative" "2,6,7")
13728 (const_string "ssemov")
13729 (eq_attr "alternative" "3,4,5")
13730 (const_string "sseishft1")
13731 (eq_attr "alternative" "8")
13732 (const_string "imov")
13734 (const_string "sselog1")))
13735 (set (attr "length_immediate")
13736 (if_then_else (eq_attr "alternative" "0,1,3,4,5")
13738 (const_string "*")))
13739 (set (attr "prefix_rex")
13740 (if_then_else (eq_attr "alternative" "0,1")
13742 (const_string "*")))
13743 (set (attr "prefix_extra")
13744 (if_then_else (eq_attr "alternative" "0,1")
13746 (const_string "*")))
13747 (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
13748 (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
13751 [(set (match_operand:<ssescalarmode> 0 "register_operand")
13752 (vec_select:<ssescalarmode>
13753 (match_operand:VI_128 1 "memory_operand")
13755 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
13756 "TARGET_SSE && reload_completed"
13757 [(set (match_dup 0) (match_dup 1))]
13759 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
13761 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
13764 (define_insn "*vec_extractv2ti"
13765 [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
13767 (match_operand:V2TI 1 "register_operand" "x,v")
13769 [(match_operand:SI 2 "const_0_to_1_operand")])))]
13772 vextract%~128\t{%2, %1, %0|%0, %1, %2}
13773 vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
13774 [(set_attr "type" "sselog")
13775 (set_attr "prefix_extra" "1")
13776 (set_attr "length_immediate" "1")
13777 (set_attr "prefix" "vex,evex")
13778 (set_attr "mode" "OI")])
13780 (define_insn "*vec_extractv4ti"
13781 [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
13783 (match_operand:V4TI 1 "register_operand" "v")
13785 [(match_operand:SI 2 "const_0_to_3_operand")])))]
13787 "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
13788 [(set_attr "type" "sselog")
13789 (set_attr "prefix_extra" "1")
13790 (set_attr "length_immediate" "1")
13791 (set_attr "prefix" "evex")
13792 (set_attr "mode" "XI")])
13794 (define_mode_iterator VEXTRACTI128_MODE
13795 [(V4TI "TARGET_AVX512F") V2TI])
13798 [(set (match_operand:TI 0 "nonimmediate_operand")
13800 (match_operand:VEXTRACTI128_MODE 1 "register_operand")
13801 (parallel [(const_int 0)])))]
13803 && reload_completed
13804 && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
13805 [(set (match_dup 0) (match_dup 1))]
13806 "operands[1] = gen_lowpart (TImode, operands[1]);")
13808 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
13809 ;; vector modes into vec_extract*.
13811 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
13812 (subreg:SWI48x (match_operand 1 "register_operand") 0))]
13813 "can_create_pseudo_p ()
13814 && REG_P (operands[1])
13815 && VECTOR_MODE_P (GET_MODE (operands[1]))
13816 && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
13817 || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
13818 || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
13819 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
13820 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
13821 (parallel [(const_int 0)])))]
13825 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
13828 if (<MODE>mode == SImode)
13830 tmp = gen_reg_rtx (V8SImode);
13831 emit_insn (gen_vec_extract_lo_v16si (tmp,
13832 gen_lowpart (V16SImode,
13837 tmp = gen_reg_rtx (V4DImode);
13838 emit_insn (gen_vec_extract_lo_v8di (tmp,
13839 gen_lowpart (V8DImode,
13845 tmp = gen_reg_rtx (<ssevecmode>mode);
13846 if (<MODE>mode == SImode)
13847 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
13850 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
13855 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
13860 (define_insn "*vec_concatv2si_sse4_1"
13861 [(set (match_operand:V2SI 0 "register_operand"
13862 "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
13864 (match_operand:SI 1 "nonimmediate_operand"
13865 " 0, 0, x,Yv, 0, 0,Yv,rm, 0,rm")
13866 (match_operand:SI 2 "vector_move_operand"
13867 " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
13868 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13870 pinsrd\t{$1, %2, %0|%0, %2, 1}
13871 pinsrd\t{$1, %2, %0|%0, %2, 1}
13872 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13873 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13874 punpckldq\t{%2, %0|%0, %2}
13875 punpckldq\t{%2, %0|%0, %2}
13876 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13877 %vmovd\t{%1, %0|%0, %1}
13878 punpckldq\t{%2, %0|%0, %2}
13879 movd\t{%1, %0|%0, %1}"
13880 [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
13882 (cond [(eq_attr "alternative" "7")
13883 (const_string "ssemov")
13884 (eq_attr "alternative" "8")
13885 (const_string "mmxcvt")
13886 (eq_attr "alternative" "9")
13887 (const_string "mmxmov")
13889 (const_string "sselog")))
13890 (set (attr "prefix_extra")
13891 (if_then_else (eq_attr "alternative" "0,1,2,3")
13893 (const_string "*")))
13894 (set (attr "length_immediate")
13895 (if_then_else (eq_attr "alternative" "0,1,2,3")
13897 (const_string "*")))
13898 (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
13899 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13901 ;; ??? In theory we can match memory for the MMX alternative, but allowing
13902 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13903 ;; alternatives pretty much forces the MMX alternative to be chosen.
13904 (define_insn "*vec_concatv2si"
13905 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
13907 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13908 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
13909 "TARGET_SSE && !TARGET_SSE4_1"
13911 punpckldq\t{%2, %0|%0, %2}
13912 movd\t{%1, %0|%0, %1}
13913 movd\t{%1, %0|%0, %1}
13914 unpcklps\t{%2, %0|%0, %2}
13915 movss\t{%1, %0|%0, %1}
13916 punpckldq\t{%2, %0|%0, %2}
13917 movd\t{%1, %0|%0, %1}"
13918 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13919 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13920 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13922 (define_insn "*vec_concatv4si"
13923 [(set (match_operand:V4SI 0 "register_operand" "=x,v,x,x,v")
13925 (match_operand:V2SI 1 "register_operand" " 0,v,0,0,v")
13926 (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
13929 punpcklqdq\t{%2, %0|%0, %2}
13930 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13931 movlhps\t{%2, %0|%0, %2}
13932 movhps\t{%2, %0|%0, %q2}
13933 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13934 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13935 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13936 (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
13937 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13939 ;; movd instead of movq is required to handle broken assemblers.
13940 (define_insn "vec_concatv2di"
13941 [(set (match_operand:V2DI 0 "register_operand"
13942 "=Yr,*x,x ,v ,Yi,v ,x ,x,v ,x,x,v")
13944 (match_operand:DI 1 "nonimmediate_operand"
13945 " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
13946 (match_operand:DI 2 "vector_move_operand"
13947 "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
13950 pinsrq\t{$1, %2, %0|%0, %2, 1}
13951 pinsrq\t{$1, %2, %0|%0, %2, 1}
13952 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13953 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13954 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13955 %vmovq\t{%1, %0|%0, %1}
13956 movq2dq\t{%1, %0|%0, %1}
13957 punpcklqdq\t{%2, %0|%0, %2}
13958 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13959 movlhps\t{%2, %0|%0, %2}
13960 movhps\t{%2, %0|%0, %2}
13961 vmovhps\t{%2, %1, %0|%0, %1, %2}"
13963 (cond [(eq_attr "alternative" "0,1")
13964 (const_string "x64_sse4_noavx")
13965 (eq_attr "alternative" "2")
13966 (const_string "x64_avx")
13967 (eq_attr "alternative" "3")
13968 (const_string "x64_avx512dq")
13969 (eq_attr "alternative" "4")
13970 (const_string "x64")
13971 (eq_attr "alternative" "5,6")
13972 (const_string "sse2")
13973 (eq_attr "alternative" "7")
13974 (const_string "sse2_noavx")
13975 (eq_attr "alternative" "8,11")
13976 (const_string "avx")
13978 (const_string "noavx")))
13981 (eq_attr "alternative" "0,1,2,3,7,8")
13982 (const_string "sselog")
13983 (const_string "ssemov")))
13984 (set (attr "prefix_rex")
13985 (if_then_else (eq_attr "alternative" "0,1,2,3,4")
13987 (const_string "*")))
13988 (set (attr "prefix_extra")
13989 (if_then_else (eq_attr "alternative" "0,1,2,3")
13991 (const_string "*")))
13992 (set (attr "length_immediate")
13993 (if_then_else (eq_attr "alternative" "0,1,2,3")
13995 (const_string "*")))
13996 (set (attr "prefix")
13997 (cond [(eq_attr "alternative" "2")
13998 (const_string "vex")
13999 (eq_attr "alternative" "3")
14000 (const_string "evex")
14001 (eq_attr "alternative" "4,5")
14002 (const_string "maybe_vex")
14003 (eq_attr "alternative" "8,11")
14004 (const_string "maybe_evex")
14006 (const_string "orig")))
14007 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
14009 (define_expand "vec_unpacks_lo_<mode>"
14010 [(match_operand:<sseunpackmode> 0 "register_operand")
14011 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14013 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
14015 (define_expand "vec_unpacks_hi_<mode>"
14016 [(match_operand:<sseunpackmode> 0 "register_operand")
14017 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14019 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
14021 (define_expand "vec_unpacku_lo_<mode>"
14022 [(match_operand:<sseunpackmode> 0 "register_operand")
14023 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14025 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
14027 (define_expand "vec_unpacks_lo_hi"
14028 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14029 (match_operand:HI 1 "register_operand"))]
14032 (define_expand "vec_unpacks_lo_si"
14033 [(set (match_operand:HI 0 "register_operand")
14034 (subreg:HI (match_operand:SI 1 "register_operand") 0))]
14037 (define_expand "vec_unpacks_lo_di"
14038 [(set (match_operand:SI 0 "register_operand")
14039 (subreg:SI (match_operand:DI 1 "register_operand") 0))]
14042 (define_expand "vec_unpacku_hi_<mode>"
14043 [(match_operand:<sseunpackmode> 0 "register_operand")
14044 (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
14046 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
14048 (define_expand "vec_unpacks_hi_hi"
14050 [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
14051 (lshiftrt:HI (match_operand:HI 1 "register_operand")
14053 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14056 (define_expand "vec_unpacks_hi_<mode>"
14058 [(set (subreg:SWI48x
14059 (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
14060 (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
14062 (unspec [(const_int 0)] UNSPEC_MASKOP)])]
14064 "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
14066 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14070 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14072 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
14073 [(set (match_operand:VI12_AVX2 0 "register_operand")
14074 (truncate:VI12_AVX2
14075 (lshiftrt:<ssedoublemode>
14076 (plus:<ssedoublemode>
14077 (plus:<ssedoublemode>
14078 (zero_extend:<ssedoublemode>
14079 (match_operand:VI12_AVX2 1 "vector_operand"))
14080 (zero_extend:<ssedoublemode>
14081 (match_operand:VI12_AVX2 2 "vector_operand")))
14082 (match_dup <mask_expand_op3>))
14084 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14087 if (<mask_applied>)
14089 operands[3] = CONST1_RTX(<MODE>mode);
14090 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
14092 if (<mask_applied>)
14094 operands[5] = operands[3];
14099 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
14100 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
14101 (truncate:VI12_AVX2
14102 (lshiftrt:<ssedoublemode>
14103 (plus:<ssedoublemode>
14104 (plus:<ssedoublemode>
14105 (zero_extend:<ssedoublemode>
14106 (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
14107 (zero_extend:<ssedoublemode>
14108 (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
14109 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
14111 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14112 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
14114 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
14115 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14116 [(set_attr "isa" "noavx,avx")
14117 (set_attr "type" "sseiadd")
14118 (set_attr "prefix_data16" "1,*")
14119 (set_attr "prefix" "orig,<mask_prefix>")
14120 (set_attr "mode" "<sseinsnmode>")])
14122 ;; The correct representation for this is absolutely enormous, and
14123 ;; surely not generally useful.
14124 (define_insn "<sse2_avx2>_psadbw"
14125 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
14126 (unspec:VI8_AVX2_AVX512BW
14127 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
14128 (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
14132 psadbw\t{%2, %0|%0, %2}
14133 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
14134 [(set_attr "isa" "noavx,avx")
14135 (set_attr "type" "sseiadd")
14136 (set_attr "atom_unit" "simul")
14137 (set_attr "prefix_data16" "1,*")
14138 (set_attr "prefix" "orig,maybe_evex")
14139 (set_attr "mode" "<sseinsnmode>")])
14141 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
14142 [(set (match_operand:SI 0 "register_operand" "=r")
14144 [(match_operand:VF_128_256 1 "register_operand" "x")]
14147 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
14148 [(set_attr "type" "ssemov")
14149 (set_attr "prefix" "maybe_vex")
14150 (set_attr "mode" "<MODE>")])
14152 (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext"
14153 [(set (match_operand:DI 0 "register_operand" "=r")
14156 [(match_operand:VF_128_256 1 "register_operand" "x")]
14158 "TARGET_64BIT && TARGET_SSE"
14159 "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
14160 [(set_attr "type" "ssemov")
14161 (set_attr "prefix" "maybe_vex")
14162 (set_attr "mode" "<MODE>")])
14164 (define_insn "<sse2_avx2>_pmovmskb"
14165 [(set (match_operand:SI 0 "register_operand" "=r")
14167 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14170 "%vpmovmskb\t{%1, %0|%0, %1}"
14171 [(set_attr "type" "ssemov")
14172 (set (attr "prefix_data16")
14174 (match_test "TARGET_AVX")
14176 (const_string "1")))
14177 (set_attr "prefix" "maybe_vex")
14178 (set_attr "mode" "SI")])
14180 (define_insn "*<sse2_avx2>_pmovmskb_zext"
14181 [(set (match_operand:DI 0 "register_operand" "=r")
14184 [(match_operand:VI1_AVX2 1 "register_operand" "x")]
14186 "TARGET_64BIT && TARGET_SSE2"
14187 "%vpmovmskb\t{%1, %k0|%k0, %1}"
14188 [(set_attr "type" "ssemov")
14189 (set (attr "prefix_data16")
14191 (match_test "TARGET_AVX")
14193 (const_string "1")))
14194 (set_attr "prefix" "maybe_vex")
14195 (set_attr "mode" "SI")])
14197 (define_expand "sse2_maskmovdqu"
14198 [(set (match_operand:V16QI 0 "memory_operand")
14199 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
14200 (match_operand:V16QI 2 "register_operand")
14205 (define_insn "*sse2_maskmovdqu"
14206 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
14207 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
14208 (match_operand:V16QI 2 "register_operand" "x")
14209 (mem:V16QI (match_dup 0))]
14213 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
14214 that requires %v to be at the beginning of the opcode name. */
14215 if (Pmode != word_mode)
14216 fputs ("\taddr32", asm_out_file);
14217 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
14219 [(set_attr "type" "ssemov")
14220 (set_attr "prefix_data16" "1")
14221 (set (attr "length_address")
14222 (symbol_ref ("Pmode != word_mode")))
14223 ;; The implicit %rdi operand confuses default length_vex computation.
14224 (set (attr "length_vex")
14225 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
14226 (set_attr "prefix" "maybe_vex")
14227 (set_attr "znver1_decode" "vector")
14228 (set_attr "mode" "TI")])
14230 (define_insn "sse_ldmxcsr"
14231 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
14235 [(set_attr "type" "sse")
14236 (set_attr "atom_sse_attr" "mxcsr")
14237 (set_attr "prefix" "maybe_vex")
14238 (set_attr "memory" "load")])
14240 (define_insn "sse_stmxcsr"
14241 [(set (match_operand:SI 0 "memory_operand" "=m")
14242 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
14245 [(set_attr "type" "sse")
14246 (set_attr "atom_sse_attr" "mxcsr")
14247 (set_attr "prefix" "maybe_vex")
14248 (set_attr "memory" "store")])
14250 (define_insn "sse2_clflush"
14251 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
14255 [(set_attr "type" "sse")
14256 (set_attr "atom_sse_attr" "fence")
14257 (set_attr "memory" "unknown")])
14259 ;; As per AMD and Intel ISA manuals, the first operand is extensions
14260 ;; and it goes to %ecx. The second operand received is hints and it goes
14262 (define_insn "sse3_mwait"
14263 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
14264 (match_operand:SI 1 "register_operand" "a")]
14267 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
14268 ;; Since 32bit register operands are implicitly zero extended to 64bit,
14269 ;; we only need to set up 32bit registers.
14271 [(set_attr "length" "3")])
14273 (define_insn "sse3_monitor_<mode>"
14274 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
14275 (match_operand:SI 1 "register_operand" "c")
14276 (match_operand:SI 2 "register_operand" "d")]
14279 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
14280 ;; RCX and RDX are used. Since 32bit register operands are implicitly
14281 ;; zero extended to 64bit, we only need to set up 32bit registers.
14283 [(set (attr "length")
14284 (symbol_ref ("(Pmode != word_mode) + 3")))])
14286 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14288 ;; SSSE3 instructions
14290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14292 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
14294 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
14295 [(set (match_operand:V16HI 0 "register_operand" "=x")
14300 (ssse3_plusminus:HI
14302 (match_operand:V16HI 1 "register_operand" "x")
14303 (parallel [(const_int 0)]))
14304 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14305 (ssse3_plusminus:HI
14306 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14307 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14309 (ssse3_plusminus:HI
14310 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14311 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14312 (ssse3_plusminus:HI
14313 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14314 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14317 (ssse3_plusminus:HI
14318 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
14319 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
14320 (ssse3_plusminus:HI
14321 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
14322 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
14324 (ssse3_plusminus:HI
14325 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
14326 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
14327 (ssse3_plusminus:HI
14328 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
14329 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
14333 (ssse3_plusminus:HI
14335 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14336 (parallel [(const_int 0)]))
14337 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14338 (ssse3_plusminus:HI
14339 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14340 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14342 (ssse3_plusminus:HI
14343 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14344 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14345 (ssse3_plusminus:HI
14346 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14347 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
14350 (ssse3_plusminus:HI
14351 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
14352 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
14353 (ssse3_plusminus:HI
14354 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
14355 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
14357 (ssse3_plusminus:HI
14358 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
14359 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
14360 (ssse3_plusminus:HI
14361 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
14362 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
14364 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14365 [(set_attr "type" "sseiadd")
14366 (set_attr "prefix_extra" "1")
14367 (set_attr "prefix" "vex")
14368 (set_attr "mode" "OI")])
14370 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
14371 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
14375 (ssse3_plusminus:HI
14377 (match_operand:V8HI 1 "register_operand" "0,x")
14378 (parallel [(const_int 0)]))
14379 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14380 (ssse3_plusminus:HI
14381 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14382 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14384 (ssse3_plusminus:HI
14385 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
14386 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
14387 (ssse3_plusminus:HI
14388 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
14389 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
14392 (ssse3_plusminus:HI
14394 (match_operand:V8HI 2 "vector_operand" "xBm,xm")
14395 (parallel [(const_int 0)]))
14396 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14397 (ssse3_plusminus:HI
14398 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14399 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
14401 (ssse3_plusminus:HI
14402 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
14403 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
14404 (ssse3_plusminus:HI
14405 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
14406 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
14409 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
14410 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
14411 [(set_attr "isa" "noavx,avx")
14412 (set_attr "type" "sseiadd")
14413 (set_attr "atom_unit" "complex")
14414 (set_attr "prefix_data16" "1,*")
14415 (set_attr "prefix_extra" "1")
14416 (set_attr "prefix" "orig,vex")
14417 (set_attr "mode" "TI")])
14419 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
14420 [(set (match_operand:V4HI 0 "register_operand" "=y")
14423 (ssse3_plusminus:HI
14425 (match_operand:V4HI 1 "register_operand" "0")
14426 (parallel [(const_int 0)]))
14427 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
14428 (ssse3_plusminus:HI
14429 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
14430 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
14432 (ssse3_plusminus:HI
14434 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
14435 (parallel [(const_int 0)]))
14436 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
14437 (ssse3_plusminus:HI
14438 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
14439 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
14441 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
14442 [(set_attr "type" "sseiadd")
14443 (set_attr "atom_unit" "complex")
14444 (set_attr "prefix_extra" "1")
14445 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14446 (set_attr "mode" "DI")])
14448 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
14449 [(set (match_operand:V8SI 0 "register_operand" "=x")
14455 (match_operand:V8SI 1 "register_operand" "x")
14456 (parallel [(const_int 0)]))
14457 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14459 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14460 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14463 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
14464 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
14466 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
14467 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
14472 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
14473 (parallel [(const_int 0)]))
14474 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14476 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14477 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
14480 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
14481 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
14483 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
14484 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
14486 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14487 [(set_attr "type" "sseiadd")
14488 (set_attr "prefix_extra" "1")
14489 (set_attr "prefix" "vex")
14490 (set_attr "mode" "OI")])
14492 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
14493 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
14498 (match_operand:V4SI 1 "register_operand" "0,x")
14499 (parallel [(const_int 0)]))
14500 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14502 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
14503 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
14507 (match_operand:V4SI 2 "vector_operand" "xBm,xm")
14508 (parallel [(const_int 0)]))
14509 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
14511 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
14512 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
14515 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
14516 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
14517 [(set_attr "isa" "noavx,avx")
14518 (set_attr "type" "sseiadd")
14519 (set_attr "atom_unit" "complex")
14520 (set_attr "prefix_data16" "1,*")
14521 (set_attr "prefix_extra" "1")
14522 (set_attr "prefix" "orig,vex")
14523 (set_attr "mode" "TI")])
14525 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
14526 [(set (match_operand:V2SI 0 "register_operand" "=y")
14530 (match_operand:V2SI 1 "register_operand" "0")
14531 (parallel [(const_int 0)]))
14532 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
14535 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
14536 (parallel [(const_int 0)]))
14537 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
14539 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
14540 [(set_attr "type" "sseiadd")
14541 (set_attr "atom_unit" "complex")
14542 (set_attr "prefix_extra" "1")
14543 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14544 (set_attr "mode" "DI")])
14546 (define_insn "avx2_pmaddubsw256"
14547 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
14552 (match_operand:V32QI 1 "register_operand" "x,v")
14553 (parallel [(const_int 0) (const_int 2)
14554 (const_int 4) (const_int 6)
14555 (const_int 8) (const_int 10)
14556 (const_int 12) (const_int 14)
14557 (const_int 16) (const_int 18)
14558 (const_int 20) (const_int 22)
14559 (const_int 24) (const_int 26)
14560 (const_int 28) (const_int 30)])))
14563 (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
14564 (parallel [(const_int 0) (const_int 2)
14565 (const_int 4) (const_int 6)
14566 (const_int 8) (const_int 10)
14567 (const_int 12) (const_int 14)
14568 (const_int 16) (const_int 18)
14569 (const_int 20) (const_int 22)
14570 (const_int 24) (const_int 26)
14571 (const_int 28) (const_int 30)]))))
14574 (vec_select:V16QI (match_dup 1)
14575 (parallel [(const_int 1) (const_int 3)
14576 (const_int 5) (const_int 7)
14577 (const_int 9) (const_int 11)
14578 (const_int 13) (const_int 15)
14579 (const_int 17) (const_int 19)
14580 (const_int 21) (const_int 23)
14581 (const_int 25) (const_int 27)
14582 (const_int 29) (const_int 31)])))
14584 (vec_select:V16QI (match_dup 2)
14585 (parallel [(const_int 1) (const_int 3)
14586 (const_int 5) (const_int 7)
14587 (const_int 9) (const_int 11)
14588 (const_int 13) (const_int 15)
14589 (const_int 17) (const_int 19)
14590 (const_int 21) (const_int 23)
14591 (const_int 25) (const_int 27)
14592 (const_int 29) (const_int 31)]))))))]
14594 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14595 [(set_attr "isa" "*,avx512bw")
14596 (set_attr "type" "sseiadd")
14597 (set_attr "prefix_extra" "1")
14598 (set_attr "prefix" "vex,evex")
14599 (set_attr "mode" "OI")])
14601 ;; The correct representation for this is absolutely enormous, and
14602 ;; surely not generally useful.
14603 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
14604 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
14605 (unspec:VI2_AVX512VL
14606 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
14607 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
14608 UNSPEC_PMADDUBSW512))]
14610 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
14611 [(set_attr "type" "sseiadd")
14612 (set_attr "prefix" "evex")
14613 (set_attr "mode" "XI")])
14615 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
14616 [(set (match_operand:V32HI 0 "register_operand" "=v")
14623 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
14625 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
14627 (const_vector:V32HI [(const_int 1) (const_int 1)
14628 (const_int 1) (const_int 1)
14629 (const_int 1) (const_int 1)
14630 (const_int 1) (const_int 1)
14631 (const_int 1) (const_int 1)
14632 (const_int 1) (const_int 1)
14633 (const_int 1) (const_int 1)
14634 (const_int 1) (const_int 1)
14635 (const_int 1) (const_int 1)
14636 (const_int 1) (const_int 1)
14637 (const_int 1) (const_int 1)
14638 (const_int 1) (const_int 1)
14639 (const_int 1) (const_int 1)
14640 (const_int 1) (const_int 1)
14641 (const_int 1) (const_int 1)
14642 (const_int 1) (const_int 1)]))
14645 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14646 [(set_attr "type" "sseimul")
14647 (set_attr "prefix" "evex")
14648 (set_attr "mode" "XI")])
14650 (define_insn "ssse3_pmaddubsw128"
14651 [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
14656 (match_operand:V16QI 1 "register_operand" "0,x,v")
14657 (parallel [(const_int 0) (const_int 2)
14658 (const_int 4) (const_int 6)
14659 (const_int 8) (const_int 10)
14660 (const_int 12) (const_int 14)])))
14663 (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
14664 (parallel [(const_int 0) (const_int 2)
14665 (const_int 4) (const_int 6)
14666 (const_int 8) (const_int 10)
14667 (const_int 12) (const_int 14)]))))
14670 (vec_select:V8QI (match_dup 1)
14671 (parallel [(const_int 1) (const_int 3)
14672 (const_int 5) (const_int 7)
14673 (const_int 9) (const_int 11)
14674 (const_int 13) (const_int 15)])))
14676 (vec_select:V8QI (match_dup 2)
14677 (parallel [(const_int 1) (const_int 3)
14678 (const_int 5) (const_int 7)
14679 (const_int 9) (const_int 11)
14680 (const_int 13) (const_int 15)]))))))]
14683 pmaddubsw\t{%2, %0|%0, %2}
14684 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
14685 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
14686 [(set_attr "isa" "noavx,avx,avx512bw")
14687 (set_attr "type" "sseiadd")
14688 (set_attr "atom_unit" "simul")
14689 (set_attr "prefix_data16" "1,*,*")
14690 (set_attr "prefix_extra" "1")
14691 (set_attr "prefix" "orig,vex,evex")
14692 (set_attr "mode" "TI")])
14694 (define_insn "ssse3_pmaddubsw"
14695 [(set (match_operand:V4HI 0 "register_operand" "=y")
14700 (match_operand:V8QI 1 "register_operand" "0")
14701 (parallel [(const_int 0) (const_int 2)
14702 (const_int 4) (const_int 6)])))
14705 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
14706 (parallel [(const_int 0) (const_int 2)
14707 (const_int 4) (const_int 6)]))))
14710 (vec_select:V4QI (match_dup 1)
14711 (parallel [(const_int 1) (const_int 3)
14712 (const_int 5) (const_int 7)])))
14714 (vec_select:V4QI (match_dup 2)
14715 (parallel [(const_int 1) (const_int 3)
14716 (const_int 5) (const_int 7)]))))))]
14718 "pmaddubsw\t{%2, %0|%0, %2}"
14719 [(set_attr "type" "sseiadd")
14720 (set_attr "atom_unit" "simul")
14721 (set_attr "prefix_extra" "1")
14722 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14723 (set_attr "mode" "DI")])
14725 (define_mode_iterator PMULHRSW
14726 [V4HI V8HI (V16HI "TARGET_AVX2")])
14728 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
14729 [(set (match_operand:PMULHRSW 0 "register_operand")
14730 (vec_merge:PMULHRSW
14732 (lshiftrt:<ssedoublemode>
14733 (plus:<ssedoublemode>
14734 (lshiftrt:<ssedoublemode>
14735 (mult:<ssedoublemode>
14736 (sign_extend:<ssedoublemode>
14737 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14738 (sign_extend:<ssedoublemode>
14739 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14743 (match_operand:PMULHRSW 3 "register_operand")
14744 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
14745 "TARGET_AVX512BW && TARGET_AVX512VL"
14747 operands[5] = CONST1_RTX(<MODE>mode);
14748 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14751 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
14752 [(set (match_operand:PMULHRSW 0 "register_operand")
14754 (lshiftrt:<ssedoublemode>
14755 (plus:<ssedoublemode>
14756 (lshiftrt:<ssedoublemode>
14757 (mult:<ssedoublemode>
14758 (sign_extend:<ssedoublemode>
14759 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
14760 (sign_extend:<ssedoublemode>
14761 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
14767 operands[3] = CONST1_RTX(<MODE>mode);
14768 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
14771 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
14772 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
14774 (lshiftrt:<ssedoublemode>
14775 (plus:<ssedoublemode>
14776 (lshiftrt:<ssedoublemode>
14777 (mult:<ssedoublemode>
14778 (sign_extend:<ssedoublemode>
14779 (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
14780 (sign_extend:<ssedoublemode>
14781 (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
14783 (match_operand:VI2_AVX2 3 "const1_operand"))
14785 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
14786 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
14788 pmulhrsw\t{%2, %0|%0, %2}
14789 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
14790 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
14791 [(set_attr "isa" "noavx,avx,avx512bw")
14792 (set_attr "type" "sseimul")
14793 (set_attr "prefix_data16" "1,*,*")
14794 (set_attr "prefix_extra" "1")
14795 (set_attr "prefix" "orig,maybe_evex,evex")
14796 (set_attr "mode" "<sseinsnmode>")])
14798 (define_insn "*ssse3_pmulhrswv4hi3"
14799 [(set (match_operand:V4HI 0 "register_operand" "=y")
14806 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
14808 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
14810 (match_operand:V4HI 3 "const1_operand"))
14812 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
14813 "pmulhrsw\t{%2, %0|%0, %2}"
14814 [(set_attr "type" "sseimul")
14815 (set_attr "prefix_extra" "1")
14816 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14817 (set_attr "mode" "DI")])
14819 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
14820 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
14822 [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
14823 (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
14825 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14827 pshufb\t{%2, %0|%0, %2}
14828 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
14829 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14830 [(set_attr "isa" "noavx,avx,avx512bw")
14831 (set_attr "type" "sselog1")
14832 (set_attr "prefix_data16" "1,*,*")
14833 (set_attr "prefix_extra" "1")
14834 (set_attr "prefix" "orig,maybe_evex,evex")
14835 (set_attr "btver2_decode" "vector")
14836 (set_attr "mode" "<sseinsnmode>")])
14838 (define_insn "ssse3_pshufbv8qi3"
14839 [(set (match_operand:V8QI 0 "register_operand" "=y")
14840 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
14841 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
14844 "pshufb\t{%2, %0|%0, %2}";
14845 [(set_attr "type" "sselog1")
14846 (set_attr "prefix_extra" "1")
14847 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14848 (set_attr "mode" "DI")])
14850 (define_insn "<ssse3_avx2>_psign<mode>3"
14851 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
14853 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
14854 (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
14858 psign<ssemodesuffix>\t{%2, %0|%0, %2}
14859 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14860 [(set_attr "isa" "noavx,avx")
14861 (set_attr "type" "sselog1")
14862 (set_attr "prefix_data16" "1,*")
14863 (set_attr "prefix_extra" "1")
14864 (set_attr "prefix" "orig,vex")
14865 (set_attr "mode" "<sseinsnmode>")])
14867 (define_insn "ssse3_psign<mode>3"
14868 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14870 [(match_operand:MMXMODEI 1 "register_operand" "0")
14871 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
14874 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
14875 [(set_attr "type" "sselog1")
14876 (set_attr "prefix_extra" "1")
14877 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14878 (set_attr "mode" "DI")])
14880 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
14881 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
14882 (vec_merge:VI1_AVX512
14884 [(match_operand:VI1_AVX512 1 "register_operand" "v")
14885 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
14886 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14888 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
14889 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
14890 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
14892 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14893 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
14895 [(set_attr "type" "sseishft")
14896 (set_attr "atom_unit" "sishuf")
14897 (set_attr "prefix_extra" "1")
14898 (set_attr "length_immediate" "1")
14899 (set_attr "prefix" "evex")
14900 (set_attr "mode" "<sseinsnmode>")])
14902 (define_insn "<ssse3_avx2>_palignr<mode>"
14903 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
14904 (unspec:SSESCALARMODE
14905 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
14906 (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
14907 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
14911 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14913 switch (which_alternative)
14916 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14919 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14921 gcc_unreachable ();
14924 [(set_attr "isa" "noavx,avx,avx512bw")
14925 (set_attr "type" "sseishft")
14926 (set_attr "atom_unit" "sishuf")
14927 (set_attr "prefix_data16" "1,*,*")
14928 (set_attr "prefix_extra" "1")
14929 (set_attr "length_immediate" "1")
14930 (set_attr "prefix" "orig,vex,evex")
14931 (set_attr "mode" "<sseinsnmode>")])
14933 (define_insn "ssse3_palignrdi"
14934 [(set (match_operand:DI 0 "register_operand" "=y")
14935 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
14936 (match_operand:DI 2 "nonimmediate_operand" "ym")
14937 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
14941 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
14942 return "palignr\t{%3, %2, %0|%0, %2, %3}";
14944 [(set_attr "type" "sseishft")
14945 (set_attr "atom_unit" "sishuf")
14946 (set_attr "prefix_extra" "1")
14947 (set_attr "length_immediate" "1")
14948 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14949 (set_attr "mode" "DI")])
14951 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
14952 ;; modes for abs instruction on pre AVX-512 targets.
14953 (define_mode_iterator VI1248_AVX512VL_AVX512BW
14954 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
14955 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
14956 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
14957 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
14959 (define_insn "*abs<mode>2"
14960 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
14961 (abs:VI1248_AVX512VL_AVX512BW
14962 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
14964 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
14965 [(set_attr "type" "sselog1")
14966 (set_attr "prefix_data16" "1")
14967 (set_attr "prefix_extra" "1")
14968 (set_attr "prefix" "maybe_vex")
14969 (set_attr "mode" "<sseinsnmode>")])
14971 (define_insn "abs<mode>2_mask"
14972 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
14973 (vec_merge:VI48_AVX512VL
14975 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
14976 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
14977 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14979 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14980 [(set_attr "type" "sselog1")
14981 (set_attr "prefix" "evex")
14982 (set_attr "mode" "<sseinsnmode>")])
14984 (define_insn "abs<mode>2_mask"
14985 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14986 (vec_merge:VI12_AVX512VL
14988 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14989 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14990 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14992 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14993 [(set_attr "type" "sselog1")
14994 (set_attr "prefix" "evex")
14995 (set_attr "mode" "<sseinsnmode>")])
14997 (define_expand "abs<mode>2"
14998 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14999 (abs:VI1248_AVX512VL_AVX512BW
15000 (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
15005 ix86_expand_sse2_abs (operands[0], operands[1]);
15010 (define_insn "abs<mode>2"
15011 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
15013 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
15015 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
15016 [(set_attr "type" "sselog1")
15017 (set_attr "prefix_rep" "0")
15018 (set_attr "prefix_extra" "1")
15019 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
15020 (set_attr "mode" "DI")])
15022 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15024 ;; AMD SSE4A instructions
15026 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15028 (define_insn "sse4a_movnt<mode>"
15029 [(set (match_operand:MODEF 0 "memory_operand" "=m")
15031 [(match_operand:MODEF 1 "register_operand" "x")]
15034 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
15035 [(set_attr "type" "ssemov")
15036 (set_attr "mode" "<MODE>")])
15038 (define_insn "sse4a_vmmovnt<mode>"
15039 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
15040 (unspec:<ssescalarmode>
15041 [(vec_select:<ssescalarmode>
15042 (match_operand:VF_128 1 "register_operand" "x")
15043 (parallel [(const_int 0)]))]
15046 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
15047 [(set_attr "type" "ssemov")
15048 (set_attr "mode" "<ssescalarmode>")])
15050 (define_insn "sse4a_extrqi"
15051 [(set (match_operand:V2DI 0 "register_operand" "=x")
15052 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15053 (match_operand 2 "const_0_to_255_operand")
15054 (match_operand 3 "const_0_to_255_operand")]
15057 "extrq\t{%3, %2, %0|%0, %2, %3}"
15058 [(set_attr "type" "sse")
15059 (set_attr "prefix_data16" "1")
15060 (set_attr "length_immediate" "2")
15061 (set_attr "mode" "TI")])
15063 (define_insn "sse4a_extrq"
15064 [(set (match_operand:V2DI 0 "register_operand" "=x")
15065 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15066 (match_operand:V16QI 2 "register_operand" "x")]
15069 "extrq\t{%2, %0|%0, %2}"
15070 [(set_attr "type" "sse")
15071 (set_attr "prefix_data16" "1")
15072 (set_attr "mode" "TI")])
15074 (define_insn "sse4a_insertqi"
15075 [(set (match_operand:V2DI 0 "register_operand" "=x")
15076 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15077 (match_operand:V2DI 2 "register_operand" "x")
15078 (match_operand 3 "const_0_to_255_operand")
15079 (match_operand 4 "const_0_to_255_operand")]
15082 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
15083 [(set_attr "type" "sseins")
15084 (set_attr "prefix_data16" "0")
15085 (set_attr "prefix_rep" "1")
15086 (set_attr "length_immediate" "2")
15087 (set_attr "mode" "TI")])
15089 (define_insn "sse4a_insertq"
15090 [(set (match_operand:V2DI 0 "register_operand" "=x")
15091 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
15092 (match_operand:V2DI 2 "register_operand" "x")]
15095 "insertq\t{%2, %0|%0, %2}"
15096 [(set_attr "type" "sseins")
15097 (set_attr "prefix_data16" "0")
15098 (set_attr "prefix_rep" "1")
15099 (set_attr "mode" "TI")])
15101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15103 ;; Intel SSE4.1 instructions
15105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15107 ;; Mapping of immediate bits for blend instructions
15108 (define_mode_attr blendbits
15109 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
15111 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
15112 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15113 (vec_merge:VF_128_256
15114 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15115 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
15116 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
15119 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15120 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15121 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15122 [(set_attr "isa" "noavx,noavx,avx")
15123 (set_attr "type" "ssemov")
15124 (set_attr "length_immediate" "1")
15125 (set_attr "prefix_data16" "1,1,*")
15126 (set_attr "prefix_extra" "1")
15127 (set_attr "prefix" "orig,orig,vex")
15128 (set_attr "mode" "<MODE>")])
15130 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
15131 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15133 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
15134 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15135 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
15139 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15140 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15141 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15142 [(set_attr "isa" "noavx,noavx,avx")
15143 (set_attr "type" "ssemov")
15144 (set_attr "length_immediate" "1")
15145 (set_attr "prefix_data16" "1,1,*")
15146 (set_attr "prefix_extra" "1")
15147 (set_attr "prefix" "orig,orig,vex")
15148 (set_attr "btver2_decode" "vector,vector,vector")
15149 (set_attr "mode" "<MODE>")])
15151 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
15152 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15154 [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
15155 (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
15156 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15160 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15161 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
15162 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15163 [(set_attr "isa" "noavx,noavx,avx")
15164 (set_attr "type" "ssemul")
15165 (set_attr "length_immediate" "1")
15166 (set_attr "prefix_data16" "1,1,*")
15167 (set_attr "prefix_extra" "1")
15168 (set_attr "prefix" "orig,orig,vex")
15169 (set_attr "btver2_decode" "vector,vector,vector")
15170 (set_attr "znver1_decode" "vector,vector,vector")
15171 (set_attr "mode" "<MODE>")])
15173 ;; Mode attribute used by `vmovntdqa' pattern
15174 (define_mode_attr vi8_sse4_1_avx2_avx512
15175 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
15177 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
15178 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
15179 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
15182 "%vmovntdqa\t{%1, %0|%0, %1}"
15183 [(set_attr "isa" "noavx,noavx,avx")
15184 (set_attr "type" "ssemov")
15185 (set_attr "prefix_extra" "1,1,*")
15186 (set_attr "prefix" "orig,orig,maybe_evex")
15187 (set_attr "mode" "<sseinsnmode>")])
15189 (define_insn "<sse4_1_avx2>_mpsadbw"
15190 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15192 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15193 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15194 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
15198 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15199 mpsadbw\t{%3, %2, %0|%0, %2, %3}
15200 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15201 [(set_attr "isa" "noavx,noavx,avx")
15202 (set_attr "type" "sselog1")
15203 (set_attr "length_immediate" "1")
15204 (set_attr "prefix_extra" "1")
15205 (set_attr "prefix" "orig,orig,vex")
15206 (set_attr "btver2_decode" "vector,vector,vector")
15207 (set_attr "znver1_decode" "vector,vector,vector")
15208 (set_attr "mode" "<sseinsnmode>")])
15210 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
15211 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
15212 (vec_concat:VI2_AVX2
15213 (us_truncate:<ssehalfvecmode>
15214 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
15215 (us_truncate:<ssehalfvecmode>
15216 (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
15217 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15219 packusdw\t{%2, %0|%0, %2}
15220 packusdw\t{%2, %0|%0, %2}
15221 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
15222 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15223 [(set_attr "isa" "noavx,noavx,avx,avx512bw")
15224 (set_attr "type" "sselog")
15225 (set_attr "prefix_extra" "1")
15226 (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
15227 (set_attr "mode" "<sseinsnmode>")])
15229 (define_insn "<sse4_1_avx2>_pblendvb"
15230 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
15232 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
15233 (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
15234 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
15238 pblendvb\t{%3, %2, %0|%0, %2, %3}
15239 pblendvb\t{%3, %2, %0|%0, %2, %3}
15240 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15241 [(set_attr "isa" "noavx,noavx,avx")
15242 (set_attr "type" "ssemov")
15243 (set_attr "prefix_extra" "1")
15244 (set_attr "length_immediate" "*,*,1")
15245 (set_attr "prefix" "orig,orig,vex")
15246 (set_attr "btver2_decode" "vector,vector,vector")
15247 (set_attr "mode" "<sseinsnmode>")])
15249 (define_insn "sse4_1_pblendw"
15250 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15252 (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
15253 (match_operand:V8HI 1 "register_operand" "0,0,x")
15254 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
15257 pblendw\t{%3, %2, %0|%0, %2, %3}
15258 pblendw\t{%3, %2, %0|%0, %2, %3}
15259 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15260 [(set_attr "isa" "noavx,noavx,avx")
15261 (set_attr "type" "ssemov")
15262 (set_attr "prefix_extra" "1")
15263 (set_attr "length_immediate" "1")
15264 (set_attr "prefix" "orig,orig,vex")
15265 (set_attr "mode" "TI")])
15267 ;; The builtin uses an 8-bit immediate. Expand that.
15268 (define_expand "avx2_pblendw"
15269 [(set (match_operand:V16HI 0 "register_operand")
15271 (match_operand:V16HI 2 "nonimmediate_operand")
15272 (match_operand:V16HI 1 "register_operand")
15273 (match_operand:SI 3 "const_0_to_255_operand")))]
15276 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
15277 operands[3] = GEN_INT (val << 8 | val);
15280 (define_insn "*avx2_pblendw"
15281 [(set (match_operand:V16HI 0 "register_operand" "=x")
15283 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
15284 (match_operand:V16HI 1 "register_operand" "x")
15285 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
15288 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
15289 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
15291 [(set_attr "type" "ssemov")
15292 (set_attr "prefix_extra" "1")
15293 (set_attr "length_immediate" "1")
15294 (set_attr "prefix" "vex")
15295 (set_attr "mode" "OI")])
15297 (define_insn "avx2_pblendd<mode>"
15298 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
15299 (vec_merge:VI4_AVX2
15300 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
15301 (match_operand:VI4_AVX2 1 "register_operand" "x")
15302 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
15304 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15305 [(set_attr "type" "ssemov")
15306 (set_attr "prefix_extra" "1")
15307 (set_attr "length_immediate" "1")
15308 (set_attr "prefix" "vex")
15309 (set_attr "mode" "<sseinsnmode>")])
15311 (define_insn "sse4_1_phminposuw"
15312 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
15313 (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
15314 UNSPEC_PHMINPOSUW))]
15316 "%vphminposuw\t{%1, %0|%0, %1}"
15317 [(set_attr "isa" "noavx,noavx,avx")
15318 (set_attr "type" "sselog1")
15319 (set_attr "prefix_extra" "1")
15320 (set_attr "prefix" "orig,orig,vex")
15321 (set_attr "mode" "TI")])
15323 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
15324 [(set (match_operand:V16HI 0 "register_operand" "=v")
15326 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15327 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15328 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15329 [(set_attr "type" "ssemov")
15330 (set_attr "prefix_extra" "1")
15331 (set_attr "prefix" "maybe_evex")
15332 (set_attr "mode" "OI")])
15334 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
15335 [(set (match_operand:V32HI 0 "register_operand" "=v")
15337 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
15339 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15340 [(set_attr "type" "ssemov")
15341 (set_attr "prefix_extra" "1")
15342 (set_attr "prefix" "evex")
15343 (set_attr "mode" "XI")])
15345 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
15346 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
15349 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15350 (parallel [(const_int 0) (const_int 1)
15351 (const_int 2) (const_int 3)
15352 (const_int 4) (const_int 5)
15353 (const_int 6) (const_int 7)]))))]
15354 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
15355 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15356 [(set_attr "isa" "noavx,noavx,avx")
15357 (set_attr "type" "ssemov")
15358 (set_attr "prefix_extra" "1")
15359 (set_attr "prefix" "orig,orig,maybe_evex")
15360 (set_attr "mode" "TI")])
15362 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
15363 [(set (match_operand:V16SI 0 "register_operand" "=v")
15365 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
15367 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15368 [(set_attr "type" "ssemov")
15369 (set_attr "prefix" "evex")
15370 (set_attr "mode" "XI")])
15372 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
15373 [(set (match_operand:V8SI 0 "register_operand" "=v")
15376 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15377 (parallel [(const_int 0) (const_int 1)
15378 (const_int 2) (const_int 3)
15379 (const_int 4) (const_int 5)
15380 (const_int 6) (const_int 7)]))))]
15381 "TARGET_AVX2 && <mask_avx512vl_condition>"
15382 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15383 [(set_attr "type" "ssemov")
15384 (set_attr "prefix_extra" "1")
15385 (set_attr "prefix" "maybe_evex")
15386 (set_attr "mode" "OI")])
15388 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
15389 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15392 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15393 (parallel [(const_int 0) (const_int 1)
15394 (const_int 2) (const_int 3)]))))]
15395 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15396 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15397 [(set_attr "isa" "noavx,noavx,avx")
15398 (set_attr "type" "ssemov")
15399 (set_attr "prefix_extra" "1")
15400 (set_attr "prefix" "orig,orig,maybe_evex")
15401 (set_attr "mode" "TI")])
15403 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
15404 [(set (match_operand:V16SI 0 "register_operand" "=v")
15406 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
15408 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15409 [(set_attr "type" "ssemov")
15410 (set_attr "prefix" "evex")
15411 (set_attr "mode" "XI")])
15413 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
15414 [(set (match_operand:V8SI 0 "register_operand" "=v")
15416 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15417 "TARGET_AVX2 && <mask_avx512vl_condition>"
15418 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15419 [(set_attr "type" "ssemov")
15420 (set_attr "prefix_extra" "1")
15421 (set_attr "prefix" "maybe_evex")
15422 (set_attr "mode" "OI")])
15424 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
15425 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
15428 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15429 (parallel [(const_int 0) (const_int 1)
15430 (const_int 2) (const_int 3)]))))]
15431 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15432 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15433 [(set_attr "isa" "noavx,noavx,avx")
15434 (set_attr "type" "ssemov")
15435 (set_attr "prefix_extra" "1")
15436 (set_attr "prefix" "orig,orig,maybe_evex")
15437 (set_attr "mode" "TI")])
15439 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
15440 [(set (match_operand:V8DI 0 "register_operand" "=v")
15443 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15444 (parallel [(const_int 0) (const_int 1)
15445 (const_int 2) (const_int 3)
15446 (const_int 4) (const_int 5)
15447 (const_int 6) (const_int 7)]))))]
15449 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15450 [(set_attr "type" "ssemov")
15451 (set_attr "prefix" "evex")
15452 (set_attr "mode" "XI")])
15454 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
15455 [(set (match_operand:V4DI 0 "register_operand" "=v")
15458 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
15459 (parallel [(const_int 0) (const_int 1)
15460 (const_int 2) (const_int 3)]))))]
15461 "TARGET_AVX2 && <mask_avx512vl_condition>"
15462 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15463 [(set_attr "type" "ssemov")
15464 (set_attr "prefix_extra" "1")
15465 (set_attr "prefix" "maybe_evex")
15466 (set_attr "mode" "OI")])
15468 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
15469 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15472 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15473 (parallel [(const_int 0) (const_int 1)]))))]
15474 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15475 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
15476 [(set_attr "isa" "noavx,noavx,avx")
15477 (set_attr "type" "ssemov")
15478 (set_attr "prefix_extra" "1")
15479 (set_attr "prefix" "orig,orig,maybe_evex")
15480 (set_attr "mode" "TI")])
15482 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
15483 [(set (match_operand:V8DI 0 "register_operand" "=v")
15485 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
15487 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15488 [(set_attr "type" "ssemov")
15489 (set_attr "prefix" "evex")
15490 (set_attr "mode" "XI")])
15492 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
15493 [(set (match_operand:V4DI 0 "register_operand" "=v")
15496 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
15497 (parallel [(const_int 0) (const_int 1)
15498 (const_int 2) (const_int 3)]))))]
15499 "TARGET_AVX2 && <mask_avx512vl_condition>"
15500 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15501 [(set_attr "type" "ssemov")
15502 (set_attr "prefix_extra" "1")
15503 (set_attr "prefix" "maybe_evex")
15504 (set_attr "mode" "OI")])
15506 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
15507 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15510 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15511 (parallel [(const_int 0) (const_int 1)]))))]
15512 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15513 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
15514 [(set_attr "isa" "noavx,noavx,avx")
15515 (set_attr "type" "ssemov")
15516 (set_attr "prefix_extra" "1")
15517 (set_attr "prefix" "orig,orig,maybe_evex")
15518 (set_attr "mode" "TI")])
15520 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
15521 [(set (match_operand:V8DI 0 "register_operand" "=v")
15523 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
15525 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15526 [(set_attr "type" "ssemov")
15527 (set_attr "prefix" "evex")
15528 (set_attr "mode" "XI")])
15530 (define_insn "avx2_<code>v4siv4di2<mask_name>"
15531 [(set (match_operand:V4DI 0 "register_operand" "=v")
15533 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
15534 "TARGET_AVX2 && <mask_avx512vl_condition>"
15535 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15536 [(set_attr "type" "ssemov")
15537 (set_attr "prefix" "maybe_evex")
15538 (set_attr "prefix_extra" "1")
15539 (set_attr "mode" "OI")])
15541 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
15542 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
15545 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*xm,vm")
15546 (parallel [(const_int 0) (const_int 1)]))))]
15547 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
15548 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
15549 [(set_attr "isa" "noavx,noavx,avx")
15550 (set_attr "type" "ssemov")
15551 (set_attr "prefix_extra" "1")
15552 (set_attr "prefix" "orig,orig,maybe_evex")
15553 (set_attr "mode" "TI")])
15555 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
15556 ;; setting FLAGS_REG. But it is not a really compare instruction.
15557 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
15558 [(set (reg:CC FLAGS_REG)
15559 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
15560 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
15563 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
15564 [(set_attr "type" "ssecomi")
15565 (set_attr "prefix_extra" "1")
15566 (set_attr "prefix" "vex")
15567 (set_attr "mode" "<MODE>")])
15569 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
15570 ;; But it is not a really compare instruction.
15571 (define_insn "<sse4_1>_ptest<mode>"
15572 [(set (reg:CC FLAGS_REG)
15573 (unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
15574 (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
15577 "%vptest\t{%1, %0|%0, %1}"
15578 [(set_attr "isa" "noavx,noavx,avx")
15579 (set_attr "type" "ssecomi")
15580 (set_attr "prefix_extra" "1")
15581 (set_attr "prefix" "orig,orig,vex")
15582 (set (attr "btver2_decode")
15584 (match_test "<sseinsnmode>mode==OImode")
15585 (const_string "vector")
15586 (const_string "*")))
15587 (set_attr "mode" "<sseinsnmode>")])
15589 (define_insn "ptesttf2"
15590 [(set (reg:CC FLAGS_REG)
15591 (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
15592 (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
15595 "%vptest\t{%1, %0|%0, %1}"
15596 [(set_attr "isa" "noavx,noavx,avx")
15597 (set_attr "type" "ssecomi")
15598 (set_attr "prefix_extra" "1")
15599 (set_attr "prefix" "orig,orig,vex")
15600 (set_attr "mode" "TI")])
15602 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
15603 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
15605 [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
15606 (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
15609 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15610 [(set_attr "isa" "noavx,noavx,avx")
15611 (set_attr "type" "ssecvt")
15612 (set_attr "prefix_data16" "1,1,*")
15613 (set_attr "prefix_extra" "1")
15614 (set_attr "length_immediate" "1")
15615 (set_attr "prefix" "orig,orig,vex")
15616 (set_attr "mode" "<MODE>")])
15618 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
15619 [(match_operand:<sseintvecmode> 0 "register_operand")
15620 (match_operand:VF1_128_256 1 "vector_operand")
15621 (match_operand:SI 2 "const_0_to_15_operand")]
15624 rtx tmp = gen_reg_rtx (<MODE>mode);
15627 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
15630 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15634 (define_expand "avx512f_round<castmode>512"
15635 [(match_operand:VF_512 0 "register_operand")
15636 (match_operand:VF_512 1 "nonimmediate_operand")
15637 (match_operand:SI 2 "const_0_to_15_operand")]
15640 emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
15644 (define_expand "avx512f_roundps512_sfix"
15645 [(match_operand:V16SI 0 "register_operand")
15646 (match_operand:V16SF 1 "nonimmediate_operand")
15647 (match_operand:SI 2 "const_0_to_15_operand")]
15650 rtx tmp = gen_reg_rtx (V16SFmode);
15651 emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
15652 emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
15656 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
15657 [(match_operand:<ssepackfltmode> 0 "register_operand")
15658 (match_operand:VF2 1 "vector_operand")
15659 (match_operand:VF2 2 "vector_operand")
15660 (match_operand:SI 3 "const_0_to_15_operand")]
15665 if (<MODE>mode == V2DFmode
15666 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15668 rtx tmp2 = gen_reg_rtx (V4DFmode);
15670 tmp0 = gen_reg_rtx (V4DFmode);
15671 tmp1 = force_reg (V2DFmode, operands[1]);
15673 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15674 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
15675 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15679 tmp0 = gen_reg_rtx (<MODE>mode);
15680 tmp1 = gen_reg_rtx (<MODE>mode);
15683 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
15686 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
15689 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15694 (define_insn "sse4_1_round<ssescalarmodesuffix>"
15695 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
15698 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
15699 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
15701 (match_operand:VF_128 1 "register_operand" "0,0,x,v")
15705 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15706 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
15707 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
15708 vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15709 [(set_attr "isa" "noavx,noavx,avx,avx512f")
15710 (set_attr "type" "ssecvt")
15711 (set_attr "length_immediate" "1")
15712 (set_attr "prefix_data16" "1,1,*,*")
15713 (set_attr "prefix_extra" "1")
15714 (set_attr "prefix" "orig,orig,vex,evex")
15715 (set_attr "mode" "<MODE>")])
15717 (define_expand "round<mode>2"
15718 [(set (match_dup 3)
15720 (match_operand:VF 1 "register_operand")
15722 (set (match_operand:VF 0 "register_operand")
15724 [(match_dup 3) (match_dup 4)]
15726 "TARGET_SSE4_1 && !flag_trapping_math"
15728 machine_mode scalar_mode;
15729 const struct real_format *fmt;
15730 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
15731 rtx half, vec_half;
15733 scalar_mode = GET_MODE_INNER (<MODE>mode);
15735 /* load nextafter (0.5, 0.0) */
15736 fmt = REAL_MODE_FORMAT (scalar_mode);
15737 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
15738 real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
15739 half = const_double_from_real_value (pred_half, scalar_mode);
15741 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
15742 vec_half = force_reg (<MODE>mode, vec_half);
15744 operands[2] = gen_reg_rtx (<MODE>mode);
15745 emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
15747 operands[3] = gen_reg_rtx (<MODE>mode);
15748 operands[4] = GEN_INT (ROUND_TRUNC);
15751 (define_expand "round<mode>2_sfix"
15752 [(match_operand:<sseintvecmode> 0 "register_operand")
15753 (match_operand:VF1 1 "register_operand")]
15754 "TARGET_SSE4_1 && !flag_trapping_math"
15756 rtx tmp = gen_reg_rtx (<MODE>mode);
15758 emit_insn (gen_round<mode>2 (tmp, operands[1]));
15761 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
15765 (define_expand "round<mode>2_vec_pack_sfix"
15766 [(match_operand:<ssepackfltmode> 0 "register_operand")
15767 (match_operand:VF2 1 "register_operand")
15768 (match_operand:VF2 2 "register_operand")]
15769 "TARGET_SSE4_1 && !flag_trapping_math"
15773 if (<MODE>mode == V2DFmode
15774 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
15776 rtx tmp2 = gen_reg_rtx (V4DFmode);
15778 tmp0 = gen_reg_rtx (V4DFmode);
15779 tmp1 = force_reg (V2DFmode, operands[1]);
15781 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
15782 emit_insn (gen_roundv4df2 (tmp2, tmp0));
15783 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
15787 tmp0 = gen_reg_rtx (<MODE>mode);
15788 tmp1 = gen_reg_rtx (<MODE>mode);
15790 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
15791 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
15794 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
15799 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15801 ;; Intel SSE4.2 string/text processing instructions
15803 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15805 (define_insn_and_split "sse4_2_pcmpestr"
15806 [(set (match_operand:SI 0 "register_operand" "=c,c")
15808 [(match_operand:V16QI 2 "register_operand" "x,x")
15809 (match_operand:SI 3 "register_operand" "a,a")
15810 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
15811 (match_operand:SI 5 "register_operand" "d,d")
15812 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
15814 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15822 (set (reg:CC FLAGS_REG)
15831 && can_create_pseudo_p ()"
15836 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15837 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15838 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15841 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
15842 operands[3], operands[4],
15843 operands[5], operands[6]));
15845 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
15846 operands[3], operands[4],
15847 operands[5], operands[6]));
15848 if (flags && !(ecx || xmm0))
15849 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
15850 operands[2], operands[3],
15851 operands[4], operands[5],
15853 if (!(flags || ecx || xmm0))
15854 emit_note (NOTE_INSN_DELETED);
15858 [(set_attr "type" "sselog")
15859 (set_attr "prefix_data16" "1")
15860 (set_attr "prefix_extra" "1")
15861 (set_attr "length_immediate" "1")
15862 (set_attr "memory" "none,load")
15863 (set_attr "mode" "TI")])
15865 (define_insn "sse4_2_pcmpestri"
15866 [(set (match_operand:SI 0 "register_operand" "=c,c")
15868 [(match_operand:V16QI 1 "register_operand" "x,x")
15869 (match_operand:SI 2 "register_operand" "a,a")
15870 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15871 (match_operand:SI 4 "register_operand" "d,d")
15872 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15874 (set (reg:CC FLAGS_REG)
15883 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
15884 [(set_attr "type" "sselog")
15885 (set_attr "prefix_data16" "1")
15886 (set_attr "prefix_extra" "1")
15887 (set_attr "prefix" "maybe_vex")
15888 (set_attr "length_immediate" "1")
15889 (set_attr "btver2_decode" "vector")
15890 (set_attr "memory" "none,load")
15891 (set_attr "mode" "TI")])
15893 (define_insn "sse4_2_pcmpestrm"
15894 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15896 [(match_operand:V16QI 1 "register_operand" "x,x")
15897 (match_operand:SI 2 "register_operand" "a,a")
15898 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15899 (match_operand:SI 4 "register_operand" "d,d")
15900 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
15902 (set (reg:CC FLAGS_REG)
15911 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
15912 [(set_attr "type" "sselog")
15913 (set_attr "prefix_data16" "1")
15914 (set_attr "prefix_extra" "1")
15915 (set_attr "length_immediate" "1")
15916 (set_attr "prefix" "maybe_vex")
15917 (set_attr "btver2_decode" "vector")
15918 (set_attr "memory" "none,load")
15919 (set_attr "mode" "TI")])
15921 (define_insn "sse4_2_pcmpestr_cconly"
15922 [(set (reg:CC FLAGS_REG)
15924 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15925 (match_operand:SI 3 "register_operand" "a,a,a,a")
15926 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
15927 (match_operand:SI 5 "register_operand" "d,d,d,d")
15928 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
15930 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15931 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15934 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15935 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15936 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15937 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15938 [(set_attr "type" "sselog")
15939 (set_attr "prefix_data16" "1")
15940 (set_attr "prefix_extra" "1")
15941 (set_attr "length_immediate" "1")
15942 (set_attr "memory" "none,load,none,load")
15943 (set_attr "btver2_decode" "vector,vector,vector,vector")
15944 (set_attr "prefix" "maybe_vex")
15945 (set_attr "mode" "TI")])
15947 (define_insn_and_split "sse4_2_pcmpistr"
15948 [(set (match_operand:SI 0 "register_operand" "=c,c")
15950 [(match_operand:V16QI 2 "register_operand" "x,x")
15951 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15952 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15954 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15960 (set (reg:CC FLAGS_REG)
15967 && can_create_pseudo_p ()"
15972 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15973 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15974 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15977 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15978 operands[3], operands[4]));
15980 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15981 operands[3], operands[4]));
15982 if (flags && !(ecx || xmm0))
15983 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15984 operands[2], operands[3],
15986 if (!(flags || ecx || xmm0))
15987 emit_note (NOTE_INSN_DELETED);
15991 [(set_attr "type" "sselog")
15992 (set_attr "prefix_data16" "1")
15993 (set_attr "prefix_extra" "1")
15994 (set_attr "length_immediate" "1")
15995 (set_attr "memory" "none,load")
15996 (set_attr "mode" "TI")])
15998 (define_insn "sse4_2_pcmpistri"
15999 [(set (match_operand:SI 0 "register_operand" "=c,c")
16001 [(match_operand:V16QI 1 "register_operand" "x,x")
16002 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16003 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16005 (set (reg:CC FLAGS_REG)
16012 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
16013 [(set_attr "type" "sselog")
16014 (set_attr "prefix_data16" "1")
16015 (set_attr "prefix_extra" "1")
16016 (set_attr "length_immediate" "1")
16017 (set_attr "prefix" "maybe_vex")
16018 (set_attr "memory" "none,load")
16019 (set_attr "btver2_decode" "vector")
16020 (set_attr "mode" "TI")])
16022 (define_insn "sse4_2_pcmpistrm"
16023 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
16025 [(match_operand:V16QI 1 "register_operand" "x,x")
16026 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16027 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16029 (set (reg:CC FLAGS_REG)
16036 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
16037 [(set_attr "type" "sselog")
16038 (set_attr "prefix_data16" "1")
16039 (set_attr "prefix_extra" "1")
16040 (set_attr "length_immediate" "1")
16041 (set_attr "prefix" "maybe_vex")
16042 (set_attr "memory" "none,load")
16043 (set_attr "btver2_decode" "vector")
16044 (set_attr "mode" "TI")])
16046 (define_insn "sse4_2_pcmpistr_cconly"
16047 [(set (reg:CC FLAGS_REG)
16049 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
16050 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
16051 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
16053 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
16054 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
16057 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16058 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
16059 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
16060 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
16061 [(set_attr "type" "sselog")
16062 (set_attr "prefix_data16" "1")
16063 (set_attr "prefix_extra" "1")
16064 (set_attr "length_immediate" "1")
16065 (set_attr "memory" "none,load,none,load")
16066 (set_attr "prefix" "maybe_vex")
16067 (set_attr "btver2_decode" "vector,vector,vector,vector")
16068 (set_attr "mode" "TI")])
16070 ;; Packed float variants
16071 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
16072 [(V8DI "V8SF") (V16SI "V16SF")])
16074 (define_expand "avx512pf_gatherpf<mode>sf"
16076 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16077 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16079 [(match_operand 2 "vsib_address_operand")
16080 (match_operand:VI48_512 1 "register_operand")
16081 (match_operand:SI 3 "const1248_operand")]))
16082 (match_operand:SI 4 "const_2_to_3_operand")]
16083 UNSPEC_GATHER_PREFETCH)]
16087 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16088 operands[3]), UNSPEC_VSIBADDR);
16091 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
16093 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16094 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16096 [(match_operand:P 2 "vsib_address_operand" "Tv")
16097 (match_operand:VI48_512 1 "register_operand" "v")
16098 (match_operand:SI 3 "const1248_operand" "n")]
16100 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16101 UNSPEC_GATHER_PREFETCH)]
16104 switch (INTVAL (operands[4]))
16107 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16109 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16111 gcc_unreachable ();
16114 [(set_attr "type" "sse")
16115 (set_attr "prefix" "evex")
16116 (set_attr "mode" "XI")])
16118 ;; Packed double variants
16119 (define_expand "avx512pf_gatherpf<mode>df"
16121 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16124 [(match_operand 2 "vsib_address_operand")
16125 (match_operand:VI4_256_8_512 1 "register_operand")
16126 (match_operand:SI 3 "const1248_operand")]))
16127 (match_operand:SI 4 "const_2_to_3_operand")]
16128 UNSPEC_GATHER_PREFETCH)]
16132 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16133 operands[3]), UNSPEC_VSIBADDR);
16136 (define_insn "*avx512pf_gatherpf<mode>df_mask"
16138 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16139 (match_operator:V8DF 5 "vsib_mem_operator"
16141 [(match_operand:P 2 "vsib_address_operand" "Tv")
16142 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16143 (match_operand:SI 3 "const1248_operand" "n")]
16145 (match_operand:SI 4 "const_2_to_3_operand" "n")]
16146 UNSPEC_GATHER_PREFETCH)]
16149 switch (INTVAL (operands[4]))
16152 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16154 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16156 gcc_unreachable ();
16159 [(set_attr "type" "sse")
16160 (set_attr "prefix" "evex")
16161 (set_attr "mode" "XI")])
16163 ;; Packed float variants
16164 (define_expand "avx512pf_scatterpf<mode>sf"
16166 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16167 (mem:<GATHER_SCATTER_SF_MEM_MODE>
16169 [(match_operand 2 "vsib_address_operand")
16170 (match_operand:VI48_512 1 "register_operand")
16171 (match_operand:SI 3 "const1248_operand")]))
16172 (match_operand:SI 4 "const2367_operand")]
16173 UNSPEC_SCATTER_PREFETCH)]
16177 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16178 operands[3]), UNSPEC_VSIBADDR);
16181 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
16183 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16184 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
16186 [(match_operand:P 2 "vsib_address_operand" "Tv")
16187 (match_operand:VI48_512 1 "register_operand" "v")
16188 (match_operand:SI 3 "const1248_operand" "n")]
16190 (match_operand:SI 4 "const2367_operand" "n")]
16191 UNSPEC_SCATTER_PREFETCH)]
16194 switch (INTVAL (operands[4]))
16198 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16201 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
16203 gcc_unreachable ();
16206 [(set_attr "type" "sse")
16207 (set_attr "prefix" "evex")
16208 (set_attr "mode" "XI")])
16210 ;; Packed double variants
16211 (define_expand "avx512pf_scatterpf<mode>df"
16213 [(match_operand:<avx512fmaskmode> 0 "register_operand")
16216 [(match_operand 2 "vsib_address_operand")
16217 (match_operand:VI4_256_8_512 1 "register_operand")
16218 (match_operand:SI 3 "const1248_operand")]))
16219 (match_operand:SI 4 "const2367_operand")]
16220 UNSPEC_SCATTER_PREFETCH)]
16224 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
16225 operands[3]), UNSPEC_VSIBADDR);
16228 (define_insn "*avx512pf_scatterpf<mode>df_mask"
16230 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
16231 (match_operator:V8DF 5 "vsib_mem_operator"
16233 [(match_operand:P 2 "vsib_address_operand" "Tv")
16234 (match_operand:VI4_256_8_512 1 "register_operand" "v")
16235 (match_operand:SI 3 "const1248_operand" "n")]
16237 (match_operand:SI 4 "const2367_operand" "n")]
16238 UNSPEC_SCATTER_PREFETCH)]
16241 switch (INTVAL (operands[4]))
16245 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16248 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
16250 gcc_unreachable ();
16253 [(set_attr "type" "sse")
16254 (set_attr "prefix" "evex")
16255 (set_attr "mode" "XI")])
16257 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
16258 [(set (match_operand:VF_512 0 "register_operand" "=v")
16260 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16263 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16264 [(set_attr "prefix" "evex")
16265 (set_attr "type" "sse")
16266 (set_attr "mode" "<MODE>")])
16268 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
16269 [(set (match_operand:VF_512 0 "register_operand" "=v")
16271 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16274 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16275 [(set_attr "prefix" "evex")
16276 (set_attr "type" "sse")
16277 (set_attr "mode" "<MODE>")])
16279 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
16280 [(set (match_operand:VF_128 0 "register_operand" "=v")
16283 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16285 (match_operand:VF_128 2 "register_operand" "v")
16288 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16289 [(set_attr "length_immediate" "1")
16290 (set_attr "prefix" "evex")
16291 (set_attr "type" "sse")
16292 (set_attr "mode" "<MODE>")])
16294 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
16295 [(set (match_operand:VF_512 0 "register_operand" "=v")
16297 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16300 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
16301 [(set_attr "prefix" "evex")
16302 (set_attr "type" "sse")
16303 (set_attr "mode" "<MODE>")])
16305 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
16306 [(set (match_operand:VF_128 0 "register_operand" "=v")
16309 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
16311 (match_operand:VF_128 2 "register_operand" "v")
16314 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
16315 [(set_attr "length_immediate" "1")
16316 (set_attr "type" "sse")
16317 (set_attr "prefix" "evex")
16318 (set_attr "mode" "<MODE>")])
16320 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16322 ;; XOP instructions
16324 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16326 (define_code_iterator xop_plus [plus ss_plus])
16328 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
16329 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
16331 ;; XOP parallel integer multiply/add instructions.
16333 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
16334 [(set (match_operand:VI24_128 0 "register_operand" "=x")
16337 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
16338 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
16339 (match_operand:VI24_128 3 "register_operand" "x")))]
16341 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16342 [(set_attr "type" "ssemuladd")
16343 (set_attr "mode" "TI")])
16345 (define_insn "xop_p<macs>dql"
16346 [(set (match_operand:V2DI 0 "register_operand" "=x")
16351 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16352 (parallel [(const_int 0) (const_int 2)])))
16355 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16356 (parallel [(const_int 0) (const_int 2)]))))
16357 (match_operand:V2DI 3 "register_operand" "x")))]
16359 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16360 [(set_attr "type" "ssemuladd")
16361 (set_attr "mode" "TI")])
16363 (define_insn "xop_p<macs>dqh"
16364 [(set (match_operand:V2DI 0 "register_operand" "=x")
16369 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
16370 (parallel [(const_int 1) (const_int 3)])))
16373 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
16374 (parallel [(const_int 1) (const_int 3)]))))
16375 (match_operand:V2DI 3 "register_operand" "x")))]
16377 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16378 [(set_attr "type" "ssemuladd")
16379 (set_attr "mode" "TI")])
16381 ;; XOP parallel integer multiply/add instructions for the intrinisics
16382 (define_insn "xop_p<macs>wd"
16383 [(set (match_operand:V4SI 0 "register_operand" "=x")
16388 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16389 (parallel [(const_int 1) (const_int 3)
16390 (const_int 5) (const_int 7)])))
16393 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16394 (parallel [(const_int 1) (const_int 3)
16395 (const_int 5) (const_int 7)]))))
16396 (match_operand:V4SI 3 "register_operand" "x")))]
16398 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16399 [(set_attr "type" "ssemuladd")
16400 (set_attr "mode" "TI")])
16402 (define_insn "xop_p<madcs>wd"
16403 [(set (match_operand:V4SI 0 "register_operand" "=x")
16409 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
16410 (parallel [(const_int 0) (const_int 2)
16411 (const_int 4) (const_int 6)])))
16414 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
16415 (parallel [(const_int 0) (const_int 2)
16416 (const_int 4) (const_int 6)]))))
16421 (parallel [(const_int 1) (const_int 3)
16422 (const_int 5) (const_int 7)])))
16426 (parallel [(const_int 1) (const_int 3)
16427 (const_int 5) (const_int 7)])))))
16428 (match_operand:V4SI 3 "register_operand" "x")))]
16430 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16431 [(set_attr "type" "ssemuladd")
16432 (set_attr "mode" "TI")])
16434 ;; XOP parallel XMM conditional moves
16435 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
16436 [(set (match_operand:V 0 "register_operand" "=x,x")
16438 (match_operand:V 3 "nonimmediate_operand" "x,m")
16439 (match_operand:V 1 "register_operand" "x,x")
16440 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
16442 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16443 [(set_attr "type" "sse4arg")])
16445 ;; XOP horizontal add/subtract instructions
16446 (define_insn "xop_phadd<u>bw"
16447 [(set (match_operand:V8HI 0 "register_operand" "=x")
16451 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16452 (parallel [(const_int 0) (const_int 2)
16453 (const_int 4) (const_int 6)
16454 (const_int 8) (const_int 10)
16455 (const_int 12) (const_int 14)])))
16459 (parallel [(const_int 1) (const_int 3)
16460 (const_int 5) (const_int 7)
16461 (const_int 9) (const_int 11)
16462 (const_int 13) (const_int 15)])))))]
16464 "vphadd<u>bw\t{%1, %0|%0, %1}"
16465 [(set_attr "type" "sseiadd1")])
16467 (define_insn "xop_phadd<u>bd"
16468 [(set (match_operand:V4SI 0 "register_operand" "=x")
16473 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16474 (parallel [(const_int 0) (const_int 4)
16475 (const_int 8) (const_int 12)])))
16479 (parallel [(const_int 1) (const_int 5)
16480 (const_int 9) (const_int 13)]))))
16485 (parallel [(const_int 2) (const_int 6)
16486 (const_int 10) (const_int 14)])))
16490 (parallel [(const_int 3) (const_int 7)
16491 (const_int 11) (const_int 15)]))))))]
16493 "vphadd<u>bd\t{%1, %0|%0, %1}"
16494 [(set_attr "type" "sseiadd1")])
16496 (define_insn "xop_phadd<u>bq"
16497 [(set (match_operand:V2DI 0 "register_operand" "=x")
16503 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16504 (parallel [(const_int 0) (const_int 8)])))
16508 (parallel [(const_int 1) (const_int 9)]))))
16513 (parallel [(const_int 2) (const_int 10)])))
16517 (parallel [(const_int 3) (const_int 11)])))))
16523 (parallel [(const_int 4) (const_int 12)])))
16527 (parallel [(const_int 5) (const_int 13)]))))
16532 (parallel [(const_int 6) (const_int 14)])))
16536 (parallel [(const_int 7) (const_int 15)])))))))]
16538 "vphadd<u>bq\t{%1, %0|%0, %1}"
16539 [(set_attr "type" "sseiadd1")])
16541 (define_insn "xop_phadd<u>wd"
16542 [(set (match_operand:V4SI 0 "register_operand" "=x")
16546 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16547 (parallel [(const_int 0) (const_int 2)
16548 (const_int 4) (const_int 6)])))
16552 (parallel [(const_int 1) (const_int 3)
16553 (const_int 5) (const_int 7)])))))]
16555 "vphadd<u>wd\t{%1, %0|%0, %1}"
16556 [(set_attr "type" "sseiadd1")])
16558 (define_insn "xop_phadd<u>wq"
16559 [(set (match_operand:V2DI 0 "register_operand" "=x")
16564 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16565 (parallel [(const_int 0) (const_int 4)])))
16569 (parallel [(const_int 1) (const_int 5)]))))
16574 (parallel [(const_int 2) (const_int 6)])))
16578 (parallel [(const_int 3) (const_int 7)]))))))]
16580 "vphadd<u>wq\t{%1, %0|%0, %1}"
16581 [(set_attr "type" "sseiadd1")])
16583 (define_insn "xop_phadd<u>dq"
16584 [(set (match_operand:V2DI 0 "register_operand" "=x")
16588 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16589 (parallel [(const_int 0) (const_int 2)])))
16593 (parallel [(const_int 1) (const_int 3)])))))]
16595 "vphadd<u>dq\t{%1, %0|%0, %1}"
16596 [(set_attr "type" "sseiadd1")])
16598 (define_insn "xop_phsubbw"
16599 [(set (match_operand:V8HI 0 "register_operand" "=x")
16603 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
16604 (parallel [(const_int 0) (const_int 2)
16605 (const_int 4) (const_int 6)
16606 (const_int 8) (const_int 10)
16607 (const_int 12) (const_int 14)])))
16611 (parallel [(const_int 1) (const_int 3)
16612 (const_int 5) (const_int 7)
16613 (const_int 9) (const_int 11)
16614 (const_int 13) (const_int 15)])))))]
16616 "vphsubbw\t{%1, %0|%0, %1}"
16617 [(set_attr "type" "sseiadd1")])
16619 (define_insn "xop_phsubwd"
16620 [(set (match_operand:V4SI 0 "register_operand" "=x")
16624 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
16625 (parallel [(const_int 0) (const_int 2)
16626 (const_int 4) (const_int 6)])))
16630 (parallel [(const_int 1) (const_int 3)
16631 (const_int 5) (const_int 7)])))))]
16633 "vphsubwd\t{%1, %0|%0, %1}"
16634 [(set_attr "type" "sseiadd1")])
16636 (define_insn "xop_phsubdq"
16637 [(set (match_operand:V2DI 0 "register_operand" "=x")
16641 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
16642 (parallel [(const_int 0) (const_int 2)])))
16646 (parallel [(const_int 1) (const_int 3)])))))]
16648 "vphsubdq\t{%1, %0|%0, %1}"
16649 [(set_attr "type" "sseiadd1")])
16651 ;; XOP permute instructions
16652 (define_insn "xop_pperm"
16653 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16655 [(match_operand:V16QI 1 "register_operand" "x,x")
16656 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
16657 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
16658 UNSPEC_XOP_PERMUTE))]
16659 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16660 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16661 [(set_attr "type" "sse4arg")
16662 (set_attr "mode" "TI")])
16664 ;; XOP pack instructions that combine two vectors into a smaller vector
16665 (define_insn "xop_pperm_pack_v2di_v4si"
16666 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16669 (match_operand:V2DI 1 "register_operand" "x,x"))
16671 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
16672 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16673 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16674 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16675 [(set_attr "type" "sse4arg")
16676 (set_attr "mode" "TI")])
16678 (define_insn "xop_pperm_pack_v4si_v8hi"
16679 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16682 (match_operand:V4SI 1 "register_operand" "x,x"))
16684 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
16685 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16686 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16687 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16688 [(set_attr "type" "sse4arg")
16689 (set_attr "mode" "TI")])
16691 (define_insn "xop_pperm_pack_v8hi_v16qi"
16692 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
16695 (match_operand:V8HI 1 "register_operand" "x,x"))
16697 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
16698 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
16699 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
16700 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16701 [(set_attr "type" "sse4arg")
16702 (set_attr "mode" "TI")])
16704 ;; XOP packed rotate instructions
16705 (define_expand "rotl<mode>3"
16706 [(set (match_operand:VI_128 0 "register_operand")
16708 (match_operand:VI_128 1 "nonimmediate_operand")
16709 (match_operand:SI 2 "general_operand")))]
16712 /* If we were given a scalar, convert it to parallel */
16713 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16715 rtvec vs = rtvec_alloc (<ssescalarnum>);
16716 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16717 rtx reg = gen_reg_rtx (<MODE>mode);
16718 rtx op2 = operands[2];
16721 if (GET_MODE (op2) != <ssescalarmode>mode)
16723 op2 = gen_reg_rtx (<ssescalarmode>mode);
16724 convert_move (op2, operands[2], false);
16727 for (i = 0; i < <ssescalarnum>; i++)
16728 RTVEC_ELT (vs, i) = op2;
16730 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16731 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16736 (define_expand "rotr<mode>3"
16737 [(set (match_operand:VI_128 0 "register_operand")
16739 (match_operand:VI_128 1 "nonimmediate_operand")
16740 (match_operand:SI 2 "general_operand")))]
16743 /* If we were given a scalar, convert it to parallel */
16744 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
16746 rtvec vs = rtvec_alloc (<ssescalarnum>);
16747 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
16748 rtx neg = gen_reg_rtx (<MODE>mode);
16749 rtx reg = gen_reg_rtx (<MODE>mode);
16750 rtx op2 = operands[2];
16753 if (GET_MODE (op2) != <ssescalarmode>mode)
16755 op2 = gen_reg_rtx (<ssescalarmode>mode);
16756 convert_move (op2, operands[2], false);
16759 for (i = 0; i < <ssescalarnum>; i++)
16760 RTVEC_ELT (vs, i) = op2;
16762 emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
16763 emit_insn (gen_neg<mode>2 (neg, reg));
16764 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
16769 (define_insn "xop_rotl<mode>3"
16770 [(set (match_operand:VI_128 0 "register_operand" "=x")
16772 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16773 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16775 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16776 [(set_attr "type" "sseishft")
16777 (set_attr "length_immediate" "1")
16778 (set_attr "mode" "TI")])
16780 (define_insn "xop_rotr<mode>3"
16781 [(set (match_operand:VI_128 0 "register_operand" "=x")
16783 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
16784 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
16788 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
16789 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
16791 [(set_attr "type" "sseishft")
16792 (set_attr "length_immediate" "1")
16793 (set_attr "mode" "TI")])
16795 (define_expand "vrotr<mode>3"
16796 [(match_operand:VI_128 0 "register_operand")
16797 (match_operand:VI_128 1 "register_operand")
16798 (match_operand:VI_128 2 "register_operand")]
16801 rtx reg = gen_reg_rtx (<MODE>mode);
16802 emit_insn (gen_neg<mode>2 (reg, operands[2]));
16803 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
16807 (define_expand "vrotl<mode>3"
16808 [(match_operand:VI_128 0 "register_operand")
16809 (match_operand:VI_128 1 "register_operand")
16810 (match_operand:VI_128 2 "register_operand")]
16813 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
16817 (define_insn "xop_vrotl<mode>3"
16818 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16819 (if_then_else:VI_128
16821 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16824 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16828 (neg:VI_128 (match_dup 2)))))]
16829 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16830 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16831 [(set_attr "type" "sseishft")
16832 (set_attr "prefix_data16" "0")
16833 (set_attr "prefix_extra" "2")
16834 (set_attr "mode" "TI")])
16836 ;; XOP packed shift instructions.
16837 (define_expand "vlshr<mode>3"
16838 [(set (match_operand:VI12_128 0 "register_operand")
16840 (match_operand:VI12_128 1 "register_operand")
16841 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16844 rtx neg = gen_reg_rtx (<MODE>mode);
16845 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16846 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16850 (define_expand "vlshr<mode>3"
16851 [(set (match_operand:VI48_128 0 "register_operand")
16853 (match_operand:VI48_128 1 "register_operand")
16854 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16855 "TARGET_AVX2 || TARGET_XOP"
16859 rtx neg = gen_reg_rtx (<MODE>mode);
16860 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16861 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16866 (define_expand "vlshr<mode>3"
16867 [(set (match_operand:VI48_512 0 "register_operand")
16869 (match_operand:VI48_512 1 "register_operand")
16870 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16873 (define_expand "vlshr<mode>3"
16874 [(set (match_operand:VI48_256 0 "register_operand")
16876 (match_operand:VI48_256 1 "register_operand")
16877 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16880 (define_expand "vashrv8hi3<mask_name>"
16881 [(set (match_operand:V8HI 0 "register_operand")
16883 (match_operand:V8HI 1 "register_operand")
16884 (match_operand:V8HI 2 "nonimmediate_operand")))]
16885 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16889 rtx neg = gen_reg_rtx (V8HImode);
16890 emit_insn (gen_negv8hi2 (neg, operands[2]));
16891 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16896 (define_expand "vashrv16qi3"
16897 [(set (match_operand:V16QI 0 "register_operand")
16899 (match_operand:V16QI 1 "register_operand")
16900 (match_operand:V16QI 2 "nonimmediate_operand")))]
16903 rtx neg = gen_reg_rtx (V16QImode);
16904 emit_insn (gen_negv16qi2 (neg, operands[2]));
16905 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16909 (define_expand "vashrv2di3<mask_name>"
16910 [(set (match_operand:V2DI 0 "register_operand")
16912 (match_operand:V2DI 1 "register_operand")
16913 (match_operand:V2DI 2 "nonimmediate_operand")))]
16914 "TARGET_XOP || TARGET_AVX512VL"
16918 rtx neg = gen_reg_rtx (V2DImode);
16919 emit_insn (gen_negv2di2 (neg, operands[2]));
16920 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16925 (define_expand "vashrv4si3"
16926 [(set (match_operand:V4SI 0 "register_operand")
16927 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16928 (match_operand:V4SI 2 "nonimmediate_operand")))]
16929 "TARGET_AVX2 || TARGET_XOP"
16933 rtx neg = gen_reg_rtx (V4SImode);
16934 emit_insn (gen_negv4si2 (neg, operands[2]));
16935 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16940 (define_expand "vashrv16si3"
16941 [(set (match_operand:V16SI 0 "register_operand")
16942 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16943 (match_operand:V16SI 2 "nonimmediate_operand")))]
16946 (define_expand "vashrv8si3"
16947 [(set (match_operand:V8SI 0 "register_operand")
16948 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16949 (match_operand:V8SI 2 "nonimmediate_operand")))]
16952 (define_expand "vashl<mode>3"
16953 [(set (match_operand:VI12_128 0 "register_operand")
16955 (match_operand:VI12_128 1 "register_operand")
16956 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16959 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16963 (define_expand "vashl<mode>3"
16964 [(set (match_operand:VI48_128 0 "register_operand")
16966 (match_operand:VI48_128 1 "register_operand")
16967 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16968 "TARGET_AVX2 || TARGET_XOP"
16972 operands[2] = force_reg (<MODE>mode, operands[2]);
16973 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16978 (define_expand "vashl<mode>3"
16979 [(set (match_operand:VI48_512 0 "register_operand")
16981 (match_operand:VI48_512 1 "register_operand")
16982 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16985 (define_expand "vashl<mode>3"
16986 [(set (match_operand:VI48_256 0 "register_operand")
16988 (match_operand:VI48_256 1 "register_operand")
16989 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16992 (define_insn "xop_sha<mode>3"
16993 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16994 (if_then_else:VI_128
16996 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16999 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17003 (neg:VI_128 (match_dup 2)))))]
17004 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17005 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17006 [(set_attr "type" "sseishft")
17007 (set_attr "prefix_data16" "0")
17008 (set_attr "prefix_extra" "2")
17009 (set_attr "mode" "TI")])
17011 (define_insn "xop_shl<mode>3"
17012 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
17013 (if_then_else:VI_128
17015 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
17018 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
17022 (neg:VI_128 (match_dup 2)))))]
17023 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
17024 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17025 [(set_attr "type" "sseishft")
17026 (set_attr "prefix_data16" "0")
17027 (set_attr "prefix_extra" "2")
17028 (set_attr "mode" "TI")])
17030 (define_expand "<shift_insn><mode>3"
17031 [(set (match_operand:VI1_AVX512 0 "register_operand")
17032 (any_shift:VI1_AVX512
17033 (match_operand:VI1_AVX512 1 "register_operand")
17034 (match_operand:SI 2 "nonmemory_operand")))]
17037 if (TARGET_XOP && <MODE>mode == V16QImode)
17039 bool negate = false;
17040 rtx (*gen) (rtx, rtx, rtx);
17044 if (<CODE> != ASHIFT)
17046 if (CONST_INT_P (operands[2]))
17047 operands[2] = GEN_INT (-INTVAL (operands[2]));
17051 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
17052 for (i = 0; i < 16; i++)
17053 XVECEXP (par, 0, i) = operands[2];
17055 tmp = gen_reg_rtx (V16QImode);
17056 emit_insn (gen_vec_initv16qiqi (tmp, par));
17059 emit_insn (gen_negv16qi2 (tmp, tmp));
17061 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
17062 emit_insn (gen (operands[0], operands[1], tmp));
17065 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
17069 (define_expand "ashrv2di3"
17070 [(set (match_operand:V2DI 0 "register_operand")
17072 (match_operand:V2DI 1 "register_operand")
17073 (match_operand:DI 2 "nonmemory_operand")))]
17074 "TARGET_XOP || TARGET_AVX512VL"
17076 if (!TARGET_AVX512VL)
17078 rtx reg = gen_reg_rtx (V2DImode);
17080 bool negate = false;
17083 if (CONST_INT_P (operands[2]))
17084 operands[2] = GEN_INT (-INTVAL (operands[2]));
17088 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
17089 for (i = 0; i < 2; i++)
17090 XVECEXP (par, 0, i) = operands[2];
17092 emit_insn (gen_vec_initv2didi (reg, par));
17095 emit_insn (gen_negv2di2 (reg, reg));
17097 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
17102 ;; XOP FRCZ support
17103 (define_insn "xop_frcz<mode>2"
17104 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
17106 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
17109 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
17110 [(set_attr "type" "ssecvt1")
17111 (set_attr "mode" "<MODE>")])
17113 (define_expand "xop_vmfrcz<mode>2"
17114 [(set (match_operand:VF_128 0 "register_operand")
17117 [(match_operand:VF_128 1 "nonimmediate_operand")]
17122 "operands[2] = CONST0_RTX (<MODE>mode);")
17124 (define_insn "*xop_vmfrcz<mode>2"
17125 [(set (match_operand:VF_128 0 "register_operand" "=x")
17128 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
17130 (match_operand:VF_128 2 "const0_operand")
17133 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
17134 [(set_attr "type" "ssecvt1")
17135 (set_attr "mode" "<MODE>")])
17137 (define_insn "xop_maskcmp<mode>3"
17138 [(set (match_operand:VI_128 0 "register_operand" "=x")
17139 (match_operator:VI_128 1 "ix86_comparison_int_operator"
17140 [(match_operand:VI_128 2 "register_operand" "x")
17141 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17143 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17144 [(set_attr "type" "sse4arg")
17145 (set_attr "prefix_data16" "0")
17146 (set_attr "prefix_rep" "0")
17147 (set_attr "prefix_extra" "2")
17148 (set_attr "length_immediate" "1")
17149 (set_attr "mode" "TI")])
17151 (define_insn "xop_maskcmp_uns<mode>3"
17152 [(set (match_operand:VI_128 0 "register_operand" "=x")
17153 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
17154 [(match_operand:VI_128 2 "register_operand" "x")
17155 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
17157 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17158 [(set_attr "type" "ssecmp")
17159 (set_attr "prefix_data16" "0")
17160 (set_attr "prefix_rep" "0")
17161 (set_attr "prefix_extra" "2")
17162 (set_attr "length_immediate" "1")
17163 (set_attr "mode" "TI")])
17165 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
17166 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
17167 ;; the exact instruction generated for the intrinsic.
17168 (define_insn "xop_maskcmp_uns2<mode>3"
17169 [(set (match_operand:VI_128 0 "register_operand" "=x")
17171 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
17172 [(match_operand:VI_128 2 "register_operand" "x")
17173 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
17174 UNSPEC_XOP_UNSIGNED_CMP))]
17176 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
17177 [(set_attr "type" "ssecmp")
17178 (set_attr "prefix_data16" "0")
17179 (set_attr "prefix_extra" "2")
17180 (set_attr "length_immediate" "1")
17181 (set_attr "mode" "TI")])
17183 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
17184 ;; being added here to be complete.
17185 (define_insn "xop_pcom_tf<mode>3"
17186 [(set (match_operand:VI_128 0 "register_operand" "=x")
17188 [(match_operand:VI_128 1 "register_operand" "x")
17189 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
17190 (match_operand:SI 3 "const_int_operand" "n")]
17191 UNSPEC_XOP_TRUEFALSE))]
17194 return ((INTVAL (operands[3]) != 0)
17195 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17196 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
17198 [(set_attr "type" "ssecmp")
17199 (set_attr "prefix_data16" "0")
17200 (set_attr "prefix_extra" "2")
17201 (set_attr "length_immediate" "1")
17202 (set_attr "mode" "TI")])
17204 (define_insn "xop_vpermil2<mode>3"
17205 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
17207 [(match_operand:VF_128_256 1 "register_operand" "x,x")
17208 (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
17209 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
17210 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
17213 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
17214 [(set_attr "type" "sse4arg")
17215 (set_attr "length_immediate" "1")
17216 (set_attr "mode" "<MODE>")])
17218 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17220 (define_insn "aesenc"
17221 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17222 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17223 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17227 aesenc\t{%2, %0|%0, %2}
17228 vaesenc\t{%2, %1, %0|%0, %1, %2}"
17229 [(set_attr "isa" "noavx,avx")
17230 (set_attr "type" "sselog1")
17231 (set_attr "prefix_extra" "1")
17232 (set_attr "prefix" "orig,vex")
17233 (set_attr "btver2_decode" "double,double")
17234 (set_attr "mode" "TI")])
17236 (define_insn "aesenclast"
17237 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17238 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17239 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17240 UNSPEC_AESENCLAST))]
17243 aesenclast\t{%2, %0|%0, %2}
17244 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
17245 [(set_attr "isa" "noavx,avx")
17246 (set_attr "type" "sselog1")
17247 (set_attr "prefix_extra" "1")
17248 (set_attr "prefix" "orig,vex")
17249 (set_attr "btver2_decode" "double,double")
17250 (set_attr "mode" "TI")])
17252 (define_insn "aesdec"
17253 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17254 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17255 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17259 aesdec\t{%2, %0|%0, %2}
17260 vaesdec\t{%2, %1, %0|%0, %1, %2}"
17261 [(set_attr "isa" "noavx,avx")
17262 (set_attr "type" "sselog1")
17263 (set_attr "prefix_extra" "1")
17264 (set_attr "prefix" "orig,vex")
17265 (set_attr "btver2_decode" "double,double")
17266 (set_attr "mode" "TI")])
17268 (define_insn "aesdeclast"
17269 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17270 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17271 (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
17272 UNSPEC_AESDECLAST))]
17275 aesdeclast\t{%2, %0|%0, %2}
17276 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
17277 [(set_attr "isa" "noavx,avx")
17278 (set_attr "type" "sselog1")
17279 (set_attr "prefix_extra" "1")
17280 (set_attr "prefix" "orig,vex")
17281 (set_attr "btver2_decode" "double,double")
17282 (set_attr "mode" "TI")])
17284 (define_insn "aesimc"
17285 [(set (match_operand:V2DI 0 "register_operand" "=x")
17286 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
17289 "%vaesimc\t{%1, %0|%0, %1}"
17290 [(set_attr "type" "sselog1")
17291 (set_attr "prefix_extra" "1")
17292 (set_attr "prefix" "maybe_vex")
17293 (set_attr "mode" "TI")])
17295 (define_insn "aeskeygenassist"
17296 [(set (match_operand:V2DI 0 "register_operand" "=x")
17297 (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
17298 (match_operand:SI 2 "const_0_to_255_operand" "n")]
17299 UNSPEC_AESKEYGENASSIST))]
17301 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
17302 [(set_attr "type" "sselog1")
17303 (set_attr "prefix_extra" "1")
17304 (set_attr "length_immediate" "1")
17305 (set_attr "prefix" "maybe_vex")
17306 (set_attr "mode" "TI")])
17308 (define_insn "pclmulqdq"
17309 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
17310 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
17311 (match_operand:V2DI 2 "vector_operand" "xBm,xm")
17312 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
17316 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
17317 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17318 [(set_attr "isa" "noavx,avx")
17319 (set_attr "type" "sselog1")
17320 (set_attr "prefix_extra" "1")
17321 (set_attr "length_immediate" "1")
17322 (set_attr "prefix" "orig,vex")
17323 (set_attr "mode" "TI")])
17325 (define_expand "avx_vzeroall"
17326 [(match_par_dup 0 [(const_int 0)])]
17329 int nregs = TARGET_64BIT ? 16 : 8;
17332 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
17334 XVECEXP (operands[0], 0, 0)
17335 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
17338 for (regno = 0; regno < nregs; regno++)
17339 XVECEXP (operands[0], 0, regno + 1)
17340 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
17341 CONST0_RTX (V8SImode));
17344 (define_insn "*avx_vzeroall"
17345 [(match_parallel 0 "vzeroall_operation"
17346 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
17349 [(set_attr "type" "sse")
17350 (set_attr "modrm" "0")
17351 (set_attr "memory" "none")
17352 (set_attr "prefix" "vex")
17353 (set_attr "btver2_decode" "vector")
17354 (set_attr "mode" "OI")])
17356 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
17357 ;; if the upper 128bits are unused.
17358 (define_insn "avx_vzeroupper"
17359 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
17362 [(set_attr "type" "sse")
17363 (set_attr "modrm" "0")
17364 (set_attr "memory" "none")
17365 (set_attr "prefix" "vex")
17366 (set_attr "btver2_decode" "vector")
17367 (set_attr "mode" "OI")])
17369 (define_mode_attr pbroadcast_evex_isa
17370 [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
17371 (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
17372 (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
17373 (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
17375 (define_insn "avx2_pbroadcast<mode>"
17376 [(set (match_operand:VI 0 "register_operand" "=x,v")
17378 (vec_select:<ssescalarmode>
17379 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
17380 (parallel [(const_int 0)]))))]
17382 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
17383 [(set_attr "isa" "*,<pbroadcast_evex_isa>")
17384 (set_attr "type" "ssemov")
17385 (set_attr "prefix_extra" "1")
17386 (set_attr "prefix" "vex,evex")
17387 (set_attr "mode" "<sseinsnmode>")])
17389 (define_insn "avx2_pbroadcast<mode>_1"
17390 [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
17391 (vec_duplicate:VI_256
17392 (vec_select:<ssescalarmode>
17393 (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
17394 (parallel [(const_int 0)]))))]
17397 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17398 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17399 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
17400 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
17401 [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
17402 (set_attr "type" "ssemov")
17403 (set_attr "prefix_extra" "1")
17404 (set_attr "prefix" "vex")
17405 (set_attr "mode" "<sseinsnmode>")])
17407 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
17408 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
17409 (unspec:VI48F_256_512
17410 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
17411 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17413 "TARGET_AVX2 && <mask_mode512bit_condition>"
17414 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17415 [(set_attr "type" "sselog")
17416 (set_attr "prefix" "<mask_prefix2>")
17417 (set_attr "mode" "<sseinsnmode>")])
17419 (define_insn "<avx512>_permvar<mode><mask_name>"
17420 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17421 (unspec:VI1_AVX512VL
17422 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
17423 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17425 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
17426 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17427 [(set_attr "type" "sselog")
17428 (set_attr "prefix" "<mask_prefix2>")
17429 (set_attr "mode" "<sseinsnmode>")])
17431 (define_insn "<avx512>_permvar<mode><mask_name>"
17432 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17433 (unspec:VI2_AVX512VL
17434 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
17435 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
17437 "TARGET_AVX512BW && <mask_mode512bit_condition>"
17438 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
17439 [(set_attr "type" "sselog")
17440 (set_attr "prefix" "<mask_prefix2>")
17441 (set_attr "mode" "<sseinsnmode>")])
17443 (define_expand "avx2_perm<mode>"
17444 [(match_operand:VI8F_256 0 "register_operand")
17445 (match_operand:VI8F_256 1 "nonimmediate_operand")
17446 (match_operand:SI 2 "const_0_to_255_operand")]
17449 int mask = INTVAL (operands[2]);
17450 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
17451 GEN_INT ((mask >> 0) & 3),
17452 GEN_INT ((mask >> 2) & 3),
17453 GEN_INT ((mask >> 4) & 3),
17454 GEN_INT ((mask >> 6) & 3)));
17458 (define_expand "avx512vl_perm<mode>_mask"
17459 [(match_operand:VI8F_256 0 "register_operand")
17460 (match_operand:VI8F_256 1 "nonimmediate_operand")
17461 (match_operand:SI 2 "const_0_to_255_operand")
17462 (match_operand:VI8F_256 3 "vector_move_operand")
17463 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17466 int mask = INTVAL (operands[2]);
17467 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
17468 GEN_INT ((mask >> 0) & 3),
17469 GEN_INT ((mask >> 2) & 3),
17470 GEN_INT ((mask >> 4) & 3),
17471 GEN_INT ((mask >> 6) & 3),
17472 operands[3], operands[4]));
17476 (define_insn "avx2_perm<mode>_1<mask_name>"
17477 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17478 (vec_select:VI8F_256
17479 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
17480 (parallel [(match_operand 2 "const_0_to_3_operand")
17481 (match_operand 3 "const_0_to_3_operand")
17482 (match_operand 4 "const_0_to_3_operand")
17483 (match_operand 5 "const_0_to_3_operand")])))]
17484 "TARGET_AVX2 && <mask_mode512bit_condition>"
17487 mask |= INTVAL (operands[2]) << 0;
17488 mask |= INTVAL (operands[3]) << 2;
17489 mask |= INTVAL (operands[4]) << 4;
17490 mask |= INTVAL (operands[5]) << 6;
17491 operands[2] = GEN_INT (mask);
17492 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
17494 [(set_attr "type" "sselog")
17495 (set_attr "prefix" "<mask_prefix2>")
17496 (set_attr "mode" "<sseinsnmode>")])
17498 (define_expand "avx512f_perm<mode>"
17499 [(match_operand:V8FI 0 "register_operand")
17500 (match_operand:V8FI 1 "nonimmediate_operand")
17501 (match_operand:SI 2 "const_0_to_255_operand")]
17504 int mask = INTVAL (operands[2]);
17505 emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
17506 GEN_INT ((mask >> 0) & 3),
17507 GEN_INT ((mask >> 2) & 3),
17508 GEN_INT ((mask >> 4) & 3),
17509 GEN_INT ((mask >> 6) & 3),
17510 GEN_INT (((mask >> 0) & 3) + 4),
17511 GEN_INT (((mask >> 2) & 3) + 4),
17512 GEN_INT (((mask >> 4) & 3) + 4),
17513 GEN_INT (((mask >> 6) & 3) + 4)));
17517 (define_expand "avx512f_perm<mode>_mask"
17518 [(match_operand:V8FI 0 "register_operand")
17519 (match_operand:V8FI 1 "nonimmediate_operand")
17520 (match_operand:SI 2 "const_0_to_255_operand")
17521 (match_operand:V8FI 3 "vector_move_operand")
17522 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17525 int mask = INTVAL (operands[2]);
17526 emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
17527 GEN_INT ((mask >> 0) & 3),
17528 GEN_INT ((mask >> 2) & 3),
17529 GEN_INT ((mask >> 4) & 3),
17530 GEN_INT ((mask >> 6) & 3),
17531 GEN_INT (((mask >> 0) & 3) + 4),
17532 GEN_INT (((mask >> 2) & 3) + 4),
17533 GEN_INT (((mask >> 4) & 3) + 4),
17534 GEN_INT (((mask >> 6) & 3) + 4),
17535 operands[3], operands[4]));
17539 (define_insn "avx512f_perm<mode>_1<mask_name>"
17540 [(set (match_operand:V8FI 0 "register_operand" "=v")
17542 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
17543 (parallel [(match_operand 2 "const_0_to_3_operand")
17544 (match_operand 3 "const_0_to_3_operand")
17545 (match_operand 4 "const_0_to_3_operand")
17546 (match_operand 5 "const_0_to_3_operand")
17547 (match_operand 6 "const_4_to_7_operand")
17548 (match_operand 7 "const_4_to_7_operand")
17549 (match_operand 8 "const_4_to_7_operand")
17550 (match_operand 9 "const_4_to_7_operand")])))]
17551 "TARGET_AVX512F && <mask_mode512bit_condition>
17552 && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
17553 && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
17554 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
17555 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
17558 mask |= INTVAL (operands[2]) << 0;
17559 mask |= INTVAL (operands[3]) << 2;
17560 mask |= INTVAL (operands[4]) << 4;
17561 mask |= INTVAL (operands[5]) << 6;
17562 operands[2] = GEN_INT (mask);
17563 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
17565 [(set_attr "type" "sselog")
17566 (set_attr "prefix" "<mask_prefix2>")
17567 (set_attr "mode" "<sseinsnmode>")])
17569 (define_insn "avx2_permv2ti"
17570 [(set (match_operand:V4DI 0 "register_operand" "=x")
17572 [(match_operand:V4DI 1 "register_operand" "x")
17573 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
17574 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17577 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17578 [(set_attr "type" "sselog")
17579 (set_attr "prefix" "vex")
17580 (set_attr "mode" "OI")])
17582 (define_insn "avx2_vec_dupv4df"
17583 [(set (match_operand:V4DF 0 "register_operand" "=v")
17584 (vec_duplicate:V4DF
17586 (match_operand:V2DF 1 "register_operand" "v")
17587 (parallel [(const_int 0)]))))]
17589 "vbroadcastsd\t{%1, %0|%0, %1}"
17590 [(set_attr "type" "sselog1")
17591 (set_attr "prefix" "maybe_evex")
17592 (set_attr "mode" "V4DF")])
17594 (define_insn "<avx512>_vec_dup<mode>_1"
17595 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
17596 (vec_duplicate:VI_AVX512BW
17597 (vec_select:<ssescalarmode>
17598 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
17599 (parallel [(const_int 0)]))))]
17602 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
17603 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
17604 [(set_attr "type" "ssemov")
17605 (set_attr "prefix" "evex")
17606 (set_attr "mode" "<sseinsnmode>")])
17608 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17609 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
17610 (vec_duplicate:V48_AVX512VL
17611 (vec_select:<ssescalarmode>
17612 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17613 (parallel [(const_int 0)]))))]
17616 /* There is no DF broadcast (in AVX-512*) to 128b register.
17617 Mimic it with integer variant. */
17618 if (<MODE>mode == V2DFmode)
17619 return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17621 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
17622 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
17624 return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
17626 [(set_attr "type" "ssemov")
17627 (set_attr "prefix" "evex")
17628 (set_attr "mode" "<sseinsnmode>")])
17630 (define_insn "<avx512>_vec_dup<mode><mask_name>"
17631 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
17632 (vec_duplicate:VI12_AVX512VL
17633 (vec_select:<ssescalarmode>
17634 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17635 (parallel [(const_int 0)]))))]
17637 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17638 [(set_attr "type" "ssemov")
17639 (set_attr "prefix" "evex")
17640 (set_attr "mode" "<sseinsnmode>")])
17642 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17643 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17644 (vec_duplicate:V16FI
17645 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17648 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
17649 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17650 [(set_attr "type" "ssemov")
17651 (set_attr "prefix" "evex")
17652 (set_attr "mode" "<sseinsnmode>")])
17654 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
17655 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
17656 (vec_duplicate:V8FI
17657 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17660 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17661 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17662 [(set_attr "type" "ssemov")
17663 (set_attr "prefix" "evex")
17664 (set_attr "mode" "<sseinsnmode>")])
17666 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17667 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
17668 (vec_duplicate:VI12_AVX512VL
17669 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17672 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
17673 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
17674 [(set_attr "type" "ssemov")
17675 (set_attr "prefix" "evex")
17676 (set_attr "mode" "<sseinsnmode>")])
17678 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
17679 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
17680 (vec_duplicate:V48_AVX512VL
17681 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
17683 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17684 [(set_attr "type" "ssemov")
17685 (set_attr "prefix" "evex")
17686 (set_attr "mode" "<sseinsnmode>")
17687 (set (attr "enabled")
17688 (if_then_else (eq_attr "alternative" "1")
17689 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
17690 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
17693 (define_insn "vec_dupv4sf"
17694 [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
17695 (vec_duplicate:V4SF
17696 (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
17699 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
17700 vbroadcastss\t{%1, %0|%0, %1}
17701 shufps\t{$0, %0, %0|%0, %0, 0}"
17702 [(set_attr "isa" "avx,avx,noavx")
17703 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
17704 (set_attr "length_immediate" "1,0,1")
17705 (set_attr "prefix_extra" "0,1,*")
17706 (set_attr "prefix" "maybe_evex,maybe_evex,orig")
17707 (set_attr "mode" "V4SF")])
17709 (define_insn "*vec_dupv4si"
17710 [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
17711 (vec_duplicate:V4SI
17712 (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
17715 %vpshufd\t{$0, %1, %0|%0, %1, 0}
17716 vbroadcastss\t{%1, %0|%0, %1}
17717 shufps\t{$0, %0, %0|%0, %0, 0}"
17718 [(set_attr "isa" "sse2,avx,noavx")
17719 (set_attr "type" "sselog1,ssemov,sselog1")
17720 (set_attr "length_immediate" "1,0,1")
17721 (set_attr "prefix_extra" "0,1,*")
17722 (set_attr "prefix" "maybe_vex,maybe_evex,orig")
17723 (set_attr "mode" "TI,V4SF,V4SF")])
17725 (define_insn "*vec_dupv2di"
17726 [(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
17727 (vec_duplicate:V2DI
17728 (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,m,0")))]
17732 vpunpcklqdq\t{%d1, %0|%0, %d1}
17733 %vmovddup\t{%1, %0|%0, %1}
17735 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
17736 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
17737 (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
17738 (set_attr "mode" "TI,TI,DF,V4SF")])
17740 (define_insn "avx2_vbroadcasti128_<mode>"
17741 [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
17743 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
17747 vbroadcasti128\t{%1, %0|%0, %1}
17748 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17749 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
17750 [(set_attr "isa" "*,avx512dq,avx512vl")
17751 (set_attr "type" "ssemov")
17752 (set_attr "prefix_extra" "1")
17753 (set_attr "prefix" "vex,evex,evex")
17754 (set_attr "mode" "OI")])
17756 ;; Modes handled by AVX vec_dup patterns.
17757 (define_mode_iterator AVX_VEC_DUP_MODE
17758 [V8SI V8SF V4DI V4DF])
17759 ;; Modes handled by AVX2 vec_dup patterns.
17760 (define_mode_iterator AVX2_VEC_DUP_MODE
17761 [V32QI V16QI V16HI V8HI V8SI V4SI])
17763 (define_insn "*vec_dup<mode>"
17764 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
17765 (vec_duplicate:AVX2_VEC_DUP_MODE
17766 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
17769 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17770 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17772 [(set_attr "isa" "*,*,noavx512vl")
17773 (set_attr "type" "ssemov")
17774 (set_attr "prefix_extra" "1")
17775 (set_attr "prefix" "maybe_evex")
17776 (set_attr "mode" "<sseinsnmode>")])
17778 (define_insn "vec_dup<mode>"
17779 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
17780 (vec_duplicate:AVX_VEC_DUP_MODE
17781 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
17784 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
17785 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
17786 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
17787 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
17789 [(set_attr "type" "ssemov")
17790 (set_attr "prefix_extra" "1")
17791 (set_attr "prefix" "maybe_evex")
17792 (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
17793 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
17796 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
17797 (vec_duplicate:AVX2_VEC_DUP_MODE
17798 (match_operand:<ssescalarmode> 1 "register_operand")))]
17800 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
17801 available, because then we can broadcast from GPRs directly.
17802 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
17803 for V*SI mode it requires just -mavx512vl. */
17804 && !(TARGET_AVX512VL
17805 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
17806 && reload_completed && GENERAL_REG_P (operands[1])"
17809 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
17810 CONST0_RTX (V4SImode),
17811 gen_lowpart (SImode, operands[1])));
17812 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
17813 gen_lowpart (<ssexmmmode>mode,
17819 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
17820 (vec_duplicate:AVX_VEC_DUP_MODE
17821 (match_operand:<ssescalarmode> 1 "register_operand")))]
17822 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
17823 [(set (match_dup 2)
17824 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
17826 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
17827 "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
17829 (define_insn "avx_vbroadcastf128_<mode>"
17830 [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
17832 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
17836 vbroadcast<i128>\t{%1, %0|%0, %1}
17837 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17838 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
17839 vbroadcast<i128vldq>\t{%1, %0|%0, %1}
17840 vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
17841 vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
17842 vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
17843 [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
17844 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
17845 (set_attr "prefix_extra" "1")
17846 (set_attr "length_immediate" "0,1,1,0,1,0,1")
17847 (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
17848 (set_attr "mode" "<sseinsnmode>")])
17850 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
17851 (define_mode_iterator VI4F_BRCST32x2
17852 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
17853 V16SF (V8SF "TARGET_AVX512VL")])
17855 (define_mode_attr 64x2mode
17856 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
17858 (define_mode_attr 32x2mode
17859 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
17860 (V8SF "V2SF") (V4SI "V2SI")])
17862 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
17863 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
17864 (vec_duplicate:VI4F_BRCST32x2
17865 (vec_select:<32x2mode>
17866 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
17867 (parallel [(const_int 0) (const_int 1)]))))]
17869 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
17870 [(set_attr "type" "ssemov")
17871 (set_attr "prefix_extra" "1")
17872 (set_attr "prefix" "evex")
17873 (set_attr "mode" "<sseinsnmode>")])
17875 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
17876 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
17877 (vec_duplicate:VI4F_256
17878 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
17881 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
17882 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17883 [(set_attr "type" "ssemov")
17884 (set_attr "prefix_extra" "1")
17885 (set_attr "prefix" "evex")
17886 (set_attr "mode" "<sseinsnmode>")])
17888 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17889 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
17890 (vec_duplicate:V16FI
17891 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
17894 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
17895 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17896 [(set_attr "type" "ssemov")
17897 (set_attr "prefix_extra" "1")
17898 (set_attr "prefix" "evex")
17899 (set_attr "mode" "<sseinsnmode>")])
17901 ;; For broadcast[i|f]64x2
17902 (define_mode_iterator VI8F_BRCST64x2
17903 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
17905 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
17906 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
17907 (vec_duplicate:VI8F_BRCST64x2
17908 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
17911 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
17912 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17913 [(set_attr "type" "ssemov")
17914 (set_attr "prefix_extra" "1")
17915 (set_attr "prefix" "evex")
17916 (set_attr "mode" "<sseinsnmode>")])
17918 (define_insn "avx512cd_maskb_vec_dup<mode>"
17919 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
17920 (vec_duplicate:VI8_AVX512VL
17922 (match_operand:QI 1 "register_operand" "Yk"))))]
17924 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
17925 [(set_attr "type" "mskmov")
17926 (set_attr "prefix" "evex")
17927 (set_attr "mode" "XI")])
17929 (define_insn "avx512cd_maskw_vec_dup<mode>"
17930 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
17931 (vec_duplicate:VI4_AVX512VL
17933 (match_operand:HI 1 "register_operand" "Yk"))))]
17935 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
17936 [(set_attr "type" "mskmov")
17937 (set_attr "prefix" "evex")
17938 (set_attr "mode" "XI")])
17940 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
17941 ;; If it so happens that the input is in memory, use vbroadcast.
17942 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
17943 (define_insn "*avx_vperm_broadcast_v4sf"
17944 [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
17946 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
17947 (match_parallel 2 "avx_vbroadcast_operand"
17948 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17951 int elt = INTVAL (operands[3]);
17952 switch (which_alternative)
17956 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17957 return "vbroadcastss\t{%1, %0|%0, %k1}";
17959 operands[2] = GEN_INT (elt * 0x55);
17960 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17962 gcc_unreachable ();
17965 [(set_attr "type" "ssemov,ssemov,sselog1")
17966 (set_attr "prefix_extra" "1")
17967 (set_attr "length_immediate" "0,0,1")
17968 (set_attr "prefix" "maybe_evex")
17969 (set_attr "mode" "SF,SF,V4SF")])
17971 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17972 [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
17974 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
17975 (match_parallel 2 "avx_vbroadcast_operand"
17976 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17979 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17980 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17982 rtx op0 = operands[0], op1 = operands[1];
17983 int elt = INTVAL (operands[3]);
17989 if (TARGET_AVX2 && elt == 0)
17991 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17996 /* Shuffle element we care about into all elements of the 128-bit lane.
17997 The other lane gets shuffled too, but we don't care. */
17998 if (<MODE>mode == V4DFmode)
17999 mask = (elt & 1 ? 15 : 0);
18001 mask = (elt & 3) * 0x55;
18002 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
18004 /* Shuffle the lane we care about into both lanes of the dest. */
18005 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
18006 if (EXT_REX_SSE_REG_P (op0))
18008 /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
18010 gcc_assert (<MODE>mode == V8SFmode);
18011 if ((mask & 1) == 0)
18012 emit_insn (gen_avx2_vec_dupv8sf (op0,
18013 gen_lowpart (V4SFmode, op0)));
18015 emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
18016 GEN_INT (4), GEN_INT (5),
18017 GEN_INT (6), GEN_INT (7),
18018 GEN_INT (12), GEN_INT (13),
18019 GEN_INT (14), GEN_INT (15)));
18023 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
18027 operands[1] = adjust_address (op1, <ssescalarmode>mode,
18028 elt * GET_MODE_SIZE (<ssescalarmode>mode));
18031 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18032 [(set (match_operand:VF2 0 "register_operand")
18034 (match_operand:VF2 1 "nonimmediate_operand")
18035 (match_operand:SI 2 "const_0_to_255_operand")))]
18036 "TARGET_AVX && <mask_mode512bit_condition>"
18038 int mask = INTVAL (operands[2]);
18039 rtx perm[<ssescalarnum>];
18042 for (i = 0; i < <ssescalarnum>; i = i + 2)
18044 perm[i] = GEN_INT (((mask >> i) & 1) + i);
18045 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
18049 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18052 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
18053 [(set (match_operand:VF1 0 "register_operand")
18055 (match_operand:VF1 1 "nonimmediate_operand")
18056 (match_operand:SI 2 "const_0_to_255_operand")))]
18057 "TARGET_AVX && <mask_mode512bit_condition>"
18059 int mask = INTVAL (operands[2]);
18060 rtx perm[<ssescalarnum>];
18063 for (i = 0; i < <ssescalarnum>; i = i + 4)
18065 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
18066 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
18067 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
18068 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
18072 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
18075 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
18076 [(set (match_operand:VF 0 "register_operand" "=v")
18078 (match_operand:VF 1 "nonimmediate_operand" "vm")
18079 (match_parallel 2 ""
18080 [(match_operand 3 "const_int_operand")])))]
18081 "TARGET_AVX && <mask_mode512bit_condition>
18082 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
18084 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
18085 operands[2] = GEN_INT (mask);
18086 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
18088 [(set_attr "type" "sselog")
18089 (set_attr "prefix_extra" "1")
18090 (set_attr "length_immediate" "1")
18091 (set_attr "prefix" "<mask_prefix>")
18092 (set_attr "mode" "<sseinsnmode>")])
18094 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
18095 [(set (match_operand:VF 0 "register_operand" "=v")
18097 [(match_operand:VF 1 "register_operand" "v")
18098 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
18100 "TARGET_AVX && <mask_mode512bit_condition>"
18101 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18102 [(set_attr "type" "sselog")
18103 (set_attr "prefix_extra" "1")
18104 (set_attr "btver2_decode" "vector")
18105 (set_attr "prefix" "<mask_prefix>")
18106 (set_attr "mode" "<sseinsnmode>")])
18108 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
18109 [(match_operand:VI48F 0 "register_operand")
18110 (match_operand:VI48F 1 "register_operand")
18111 (match_operand:<sseintvecmode> 2 "register_operand")
18112 (match_operand:VI48F 3 "nonimmediate_operand")
18113 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18116 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
18117 operands[0], operands[1], operands[2], operands[3],
18118 CONST0_RTX (<MODE>mode), operands[4]));
18122 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
18123 [(match_operand:VI1_AVX512VL 0 "register_operand")
18124 (match_operand:VI1_AVX512VL 1 "register_operand")
18125 (match_operand:<sseintvecmode> 2 "register_operand")
18126 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
18127 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18128 "TARGET_AVX512VBMI"
18130 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
18131 operands[0], operands[1], operands[2], operands[3],
18132 CONST0_RTX (<MODE>mode), operands[4]));
18136 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
18137 [(match_operand:VI2_AVX512VL 0 "register_operand")
18138 (match_operand:VI2_AVX512VL 1 "register_operand")
18139 (match_operand:<sseintvecmode> 2 "register_operand")
18140 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
18141 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18144 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
18145 operands[0], operands[1], operands[2], operands[3],
18146 CONST0_RTX (<MODE>mode), operands[4]));
18150 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
18151 [(set (match_operand:VI48F 0 "register_operand" "=v")
18153 [(match_operand:VI48F 1 "register_operand" "v")
18154 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18155 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18158 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18159 [(set_attr "type" "sselog")
18160 (set_attr "prefix" "evex")
18161 (set_attr "mode" "<sseinsnmode>")])
18163 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
18164 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18165 (unspec:VI1_AVX512VL
18166 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18167 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18168 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18170 "TARGET_AVX512VBMI"
18171 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18172 [(set_attr "type" "sselog")
18173 (set_attr "prefix" "evex")
18174 (set_attr "mode" "<sseinsnmode>")])
18176 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
18177 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18178 (unspec:VI2_AVX512VL
18179 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
18180 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18181 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18184 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18185 [(set_attr "type" "sselog")
18186 (set_attr "prefix" "evex")
18187 (set_attr "mode" "<sseinsnmode>")])
18189 (define_insn "<avx512>_vpermi2var<mode>3_mask"
18190 [(set (match_operand:VI48F 0 "register_operand" "=v")
18193 [(match_operand:VI48F 1 "register_operand" "v")
18194 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18195 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18196 UNSPEC_VPERMI2_MASK)
18198 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18200 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18201 [(set_attr "type" "sselog")
18202 (set_attr "prefix" "evex")
18203 (set_attr "mode" "<sseinsnmode>")])
18205 (define_insn "<avx512>_vpermi2var<mode>3_mask"
18206 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18207 (vec_merge:VI1_AVX512VL
18208 (unspec:VI1_AVX512VL
18209 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18210 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18211 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18212 UNSPEC_VPERMI2_MASK)
18214 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18215 "TARGET_AVX512VBMI"
18216 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18217 [(set_attr "type" "sselog")
18218 (set_attr "prefix" "evex")
18219 (set_attr "mode" "<sseinsnmode>")])
18221 (define_insn "<avx512>_vpermi2var<mode>3_mask"
18222 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18223 (vec_merge:VI2_AVX512VL
18224 (unspec:VI2_AVX512VL
18225 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
18226 (match_operand:<sseintvecmode> 2 "register_operand" "0")
18227 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18228 UNSPEC_VPERMI2_MASK)
18230 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18232 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18233 [(set_attr "type" "sselog")
18234 (set_attr "prefix" "evex")
18235 (set_attr "mode" "<sseinsnmode>")])
18237 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18238 [(match_operand:VI48F 0 "register_operand")
18239 (match_operand:<sseintvecmode> 1 "register_operand")
18240 (match_operand:VI48F 2 "register_operand")
18241 (match_operand:VI48F 3 "nonimmediate_operand")
18242 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18245 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18246 operands[0], operands[1], operands[2], operands[3],
18247 CONST0_RTX (<MODE>mode), operands[4]));
18251 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18252 [(match_operand:VI1_AVX512VL 0 "register_operand")
18253 (match_operand:<sseintvecmode> 1 "register_operand")
18254 (match_operand:VI1_AVX512VL 2 "register_operand")
18255 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
18256 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18257 "TARGET_AVX512VBMI"
18259 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18260 operands[0], operands[1], operands[2], operands[3],
18261 CONST0_RTX (<MODE>mode), operands[4]));
18265 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
18266 [(match_operand:VI2_AVX512VL 0 "register_operand")
18267 (match_operand:<sseintvecmode> 1 "register_operand")
18268 (match_operand:VI2_AVX512VL 2 "register_operand")
18269 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand")
18270 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18273 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
18274 operands[0], operands[1], operands[2], operands[3],
18275 CONST0_RTX (<MODE>mode), operands[4]));
18279 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18280 [(set (match_operand:VI48F 0 "register_operand" "=v")
18282 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18283 (match_operand:VI48F 2 "register_operand" "0")
18284 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18287 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18288 [(set_attr "type" "sselog")
18289 (set_attr "prefix" "evex")
18290 (set_attr "mode" "<sseinsnmode>")])
18292 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18293 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18294 (unspec:VI1_AVX512VL
18295 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18296 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
18297 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18299 "TARGET_AVX512VBMI"
18300 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18301 [(set_attr "type" "sselog")
18302 (set_attr "prefix" "evex")
18303 (set_attr "mode" "<sseinsnmode>")])
18305 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
18306 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18307 (unspec:VI2_AVX512VL
18308 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18309 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
18310 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18313 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
18314 [(set_attr "type" "sselog")
18315 (set_attr "prefix" "evex")
18316 (set_attr "mode" "<sseinsnmode>")])
18318 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18319 [(set (match_operand:VI48F 0 "register_operand" "=v")
18322 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18323 (match_operand:VI48F 2 "register_operand" "0")
18324 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
18327 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18329 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18330 [(set_attr "type" "sselog")
18331 (set_attr "prefix" "evex")
18332 (set_attr "mode" "<sseinsnmode>")])
18334 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18335 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18336 (vec_merge:VI1_AVX512VL
18337 (unspec:VI1_AVX512VL
18338 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18339 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
18340 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
18343 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18344 "TARGET_AVX512VBMI"
18345 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18346 [(set_attr "type" "sselog")
18347 (set_attr "prefix" "evex")
18348 (set_attr "mode" "<sseinsnmode>")])
18350 (define_insn "<avx512>_vpermt2var<mode>3_mask"
18351 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18352 (vec_merge:VI2_AVX512VL
18353 (unspec:VI2_AVX512VL
18354 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
18355 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
18356 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
18359 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18361 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
18362 [(set_attr "type" "sselog")
18363 (set_attr "prefix" "evex")
18364 (set_attr "mode" "<sseinsnmode>")])
18366 (define_expand "avx_vperm2f128<mode>3"
18367 [(set (match_operand:AVX256MODE2P 0 "register_operand")
18368 (unspec:AVX256MODE2P
18369 [(match_operand:AVX256MODE2P 1 "register_operand")
18370 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
18371 (match_operand:SI 3 "const_0_to_255_operand")]
18372 UNSPEC_VPERMIL2F128))]
18375 int mask = INTVAL (operands[3]);
18376 if ((mask & 0x88) == 0)
18378 rtx perm[<ssescalarnum>], t1, t2;
18379 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
18381 base = (mask & 3) * nelt2;
18382 for (i = 0; i < nelt2; ++i)
18383 perm[i] = GEN_INT (base + i);
18385 base = ((mask >> 4) & 3) * nelt2;
18386 for (i = 0; i < nelt2; ++i)
18387 perm[i + nelt2] = GEN_INT (base + i);
18389 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
18390 operands[1], operands[2]);
18391 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
18392 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
18393 t2 = gen_rtx_SET (operands[0], t2);
18399 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
18400 ;; means that in order to represent this properly in rtl we'd have to
18401 ;; nest *another* vec_concat with a zero operand and do the select from
18402 ;; a 4x wide vector. That doesn't seem very nice.
18403 (define_insn "*avx_vperm2f128<mode>_full"
18404 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18405 (unspec:AVX256MODE2P
18406 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
18407 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
18408 (match_operand:SI 3 "const_0_to_255_operand" "n")]
18409 UNSPEC_VPERMIL2F128))]
18411 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18412 [(set_attr "type" "sselog")
18413 (set_attr "prefix_extra" "1")
18414 (set_attr "length_immediate" "1")
18415 (set_attr "prefix" "vex")
18416 (set_attr "mode" "<sseinsnmode>")])
18418 (define_insn "*avx_vperm2f128<mode>_nozero"
18419 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
18420 (vec_select:AVX256MODE2P
18421 (vec_concat:<ssedoublevecmode>
18422 (match_operand:AVX256MODE2P 1 "register_operand" "x")
18423 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
18424 (match_parallel 3 ""
18425 [(match_operand 4 "const_int_operand")])))]
18427 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
18429 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
18431 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
18433 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
18434 operands[3] = GEN_INT (mask);
18435 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
18437 [(set_attr "type" "sselog")
18438 (set_attr "prefix_extra" "1")
18439 (set_attr "length_immediate" "1")
18440 (set_attr "prefix" "vex")
18441 (set_attr "mode" "<sseinsnmode>")])
18443 (define_insn "*ssse3_palignr<mode>_perm"
18444 [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
18446 (match_operand:V_128 1 "register_operand" "0,x,v")
18447 (match_parallel 2 "palignr_operand"
18448 [(match_operand 3 "const_int_operand" "n,n,n")])))]
18451 operands[2] = (GEN_INT (INTVAL (operands[3])
18452 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
18454 switch (which_alternative)
18457 return "palignr\t{%2, %1, %0|%0, %1, %2}";
18460 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
18462 gcc_unreachable ();
18465 [(set_attr "isa" "noavx,avx,avx512bw")
18466 (set_attr "type" "sseishft")
18467 (set_attr "atom_unit" "sishuf")
18468 (set_attr "prefix_data16" "1,*,*")
18469 (set_attr "prefix_extra" "1")
18470 (set_attr "length_immediate" "1")
18471 (set_attr "prefix" "orig,vex,evex")])
18473 (define_expand "avx512vl_vinsert<mode>"
18474 [(match_operand:VI48F_256 0 "register_operand")
18475 (match_operand:VI48F_256 1 "register_operand")
18476 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18477 (match_operand:SI 3 "const_0_to_1_operand")
18478 (match_operand:VI48F_256 4 "register_operand")
18479 (match_operand:<avx512fmaskmode> 5 "register_operand")]
18482 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18484 switch (INTVAL (operands[3]))
18487 insn = gen_vec_set_lo_<mode>_mask;
18490 insn = gen_vec_set_hi_<mode>_mask;
18493 gcc_unreachable ();
18496 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
18501 (define_expand "avx_vinsertf128<mode>"
18502 [(match_operand:V_256 0 "register_operand")
18503 (match_operand:V_256 1 "register_operand")
18504 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
18505 (match_operand:SI 3 "const_0_to_1_operand")]
18508 rtx (*insn)(rtx, rtx, rtx);
18510 switch (INTVAL (operands[3]))
18513 insn = gen_vec_set_lo_<mode>;
18516 insn = gen_vec_set_hi_<mode>;
18519 gcc_unreachable ();
18522 emit_insn (insn (operands[0], operands[1], operands[2]));
18526 (define_insn "vec_set_lo_<mode><mask_name>"
18527 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18528 (vec_concat:VI8F_256
18529 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18530 (vec_select:<ssehalfvecmode>
18531 (match_operand:VI8F_256 1 "register_operand" "v")
18532 (parallel [(const_int 2) (const_int 3)]))))]
18533 "TARGET_AVX && <mask_avx512dq_condition>"
18535 if (TARGET_AVX512DQ)
18536 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18537 else if (TARGET_AVX512VL)
18538 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18540 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18542 [(set_attr "type" "sselog")
18543 (set_attr "prefix_extra" "1")
18544 (set_attr "length_immediate" "1")
18545 (set_attr "prefix" "vex")
18546 (set_attr "mode" "<sseinsnmode>")])
18548 (define_insn "vec_set_hi_<mode><mask_name>"
18549 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
18550 (vec_concat:VI8F_256
18551 (vec_select:<ssehalfvecmode>
18552 (match_operand:VI8F_256 1 "register_operand" "v")
18553 (parallel [(const_int 0) (const_int 1)]))
18554 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18555 "TARGET_AVX && <mask_avx512dq_condition>"
18557 if (TARGET_AVX512DQ)
18558 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18559 else if (TARGET_AVX512VL)
18560 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18562 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18564 [(set_attr "type" "sselog")
18565 (set_attr "prefix_extra" "1")
18566 (set_attr "length_immediate" "1")
18567 (set_attr "prefix" "vex")
18568 (set_attr "mode" "<sseinsnmode>")])
18570 (define_insn "vec_set_lo_<mode><mask_name>"
18571 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18572 (vec_concat:VI4F_256
18573 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
18574 (vec_select:<ssehalfvecmode>
18575 (match_operand:VI4F_256 1 "register_operand" "v")
18576 (parallel [(const_int 4) (const_int 5)
18577 (const_int 6) (const_int 7)]))))]
18580 if (TARGET_AVX512VL)
18581 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
18583 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
18585 [(set_attr "type" "sselog")
18586 (set_attr "prefix_extra" "1")
18587 (set_attr "length_immediate" "1")
18588 (set_attr "prefix" "vex")
18589 (set_attr "mode" "<sseinsnmode>")])
18591 (define_insn "vec_set_hi_<mode><mask_name>"
18592 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
18593 (vec_concat:VI4F_256
18594 (vec_select:<ssehalfvecmode>
18595 (match_operand:VI4F_256 1 "register_operand" "v")
18596 (parallel [(const_int 0) (const_int 1)
18597 (const_int 2) (const_int 3)]))
18598 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
18601 if (TARGET_AVX512VL)
18602 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
18604 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
18606 [(set_attr "type" "sselog")
18607 (set_attr "prefix_extra" "1")
18608 (set_attr "length_immediate" "1")
18609 (set_attr "prefix" "vex")
18610 (set_attr "mode" "<sseinsnmode>")])
18612 (define_insn "vec_set_lo_v16hi"
18613 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18615 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
18617 (match_operand:V16HI 1 "register_operand" "x,v")
18618 (parallel [(const_int 8) (const_int 9)
18619 (const_int 10) (const_int 11)
18620 (const_int 12) (const_int 13)
18621 (const_int 14) (const_int 15)]))))]
18624 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18625 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18626 [(set_attr "type" "sselog")
18627 (set_attr "prefix_extra" "1")
18628 (set_attr "length_immediate" "1")
18629 (set_attr "prefix" "vex,evex")
18630 (set_attr "mode" "OI")])
18632 (define_insn "vec_set_hi_v16hi"
18633 [(set (match_operand:V16HI 0 "register_operand" "=x,v")
18636 (match_operand:V16HI 1 "register_operand" "x,v")
18637 (parallel [(const_int 0) (const_int 1)
18638 (const_int 2) (const_int 3)
18639 (const_int 4) (const_int 5)
18640 (const_int 6) (const_int 7)]))
18641 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
18644 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18645 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18646 [(set_attr "type" "sselog")
18647 (set_attr "prefix_extra" "1")
18648 (set_attr "length_immediate" "1")
18649 (set_attr "prefix" "vex,evex")
18650 (set_attr "mode" "OI")])
18652 (define_insn "vec_set_lo_v32qi"
18653 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18655 (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
18657 (match_operand:V32QI 1 "register_operand" "x,v")
18658 (parallel [(const_int 16) (const_int 17)
18659 (const_int 18) (const_int 19)
18660 (const_int 20) (const_int 21)
18661 (const_int 22) (const_int 23)
18662 (const_int 24) (const_int 25)
18663 (const_int 26) (const_int 27)
18664 (const_int 28) (const_int 29)
18665 (const_int 30) (const_int 31)]))))]
18668 vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
18669 vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
18670 [(set_attr "type" "sselog")
18671 (set_attr "prefix_extra" "1")
18672 (set_attr "length_immediate" "1")
18673 (set_attr "prefix" "vex,evex")
18674 (set_attr "mode" "OI")])
18676 (define_insn "vec_set_hi_v32qi"
18677 [(set (match_operand:V32QI 0 "register_operand" "=x,v")
18680 (match_operand:V32QI 1 "register_operand" "x,v")
18681 (parallel [(const_int 0) (const_int 1)
18682 (const_int 2) (const_int 3)
18683 (const_int 4) (const_int 5)
18684 (const_int 6) (const_int 7)
18685 (const_int 8) (const_int 9)
18686 (const_int 10) (const_int 11)
18687 (const_int 12) (const_int 13)
18688 (const_int 14) (const_int 15)]))
18689 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
18692 vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
18693 vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
18694 [(set_attr "type" "sselog")
18695 (set_attr "prefix_extra" "1")
18696 (set_attr "length_immediate" "1")
18697 (set_attr "prefix" "vex,evex")
18698 (set_attr "mode" "OI")])
18700 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
18701 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
18703 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
18704 (match_operand:V48_AVX2 1 "memory_operand" "m")]
18707 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
18708 [(set_attr "type" "sselog1")
18709 (set_attr "prefix_extra" "1")
18710 (set_attr "prefix" "vex")
18711 (set_attr "btver2_decode" "vector")
18712 (set_attr "mode" "<sseinsnmode>")])
18714 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
18715 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
18717 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
18718 (match_operand:V48_AVX2 2 "register_operand" "x")
18722 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
18723 [(set_attr "type" "sselog1")
18724 (set_attr "prefix_extra" "1")
18725 (set_attr "prefix" "vex")
18726 (set_attr "btver2_decode" "vector")
18727 (set_attr "mode" "<sseinsnmode>")])
18729 (define_expand "maskload<mode><sseintvecmodelower>"
18730 [(set (match_operand:V48_AVX2 0 "register_operand")
18732 [(match_operand:<sseintvecmode> 2 "register_operand")
18733 (match_operand:V48_AVX2 1 "memory_operand")]
18737 (define_expand "maskload<mode><avx512fmaskmodelower>"
18738 [(set (match_operand:V48_AVX512VL 0 "register_operand")
18739 (vec_merge:V48_AVX512VL
18740 (match_operand:V48_AVX512VL 1 "memory_operand")
18742 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18745 (define_expand "maskload<mode><avx512fmaskmodelower>"
18746 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
18747 (vec_merge:VI12_AVX512VL
18748 (match_operand:VI12_AVX512VL 1 "memory_operand")
18750 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18753 (define_expand "maskstore<mode><sseintvecmodelower>"
18754 [(set (match_operand:V48_AVX2 0 "memory_operand")
18756 [(match_operand:<sseintvecmode> 2 "register_operand")
18757 (match_operand:V48_AVX2 1 "register_operand")
18762 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18763 [(set (match_operand:V48_AVX512VL 0 "memory_operand")
18764 (vec_merge:V48_AVX512VL
18765 (match_operand:V48_AVX512VL 1 "register_operand")
18767 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18770 (define_expand "maskstore<mode><avx512fmaskmodelower>"
18771 [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
18772 (vec_merge:VI12_AVX512VL
18773 (match_operand:VI12_AVX512VL 1 "register_operand")
18775 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
18778 (define_expand "cbranch<mode>4"
18779 [(set (reg:CC FLAGS_REG)
18780 (compare:CC (match_operand:VI48_AVX 1 "register_operand")
18781 (match_operand:VI48_AVX 2 "nonimmediate_operand")))
18782 (set (pc) (if_then_else
18783 (match_operator 0 "bt_comparison_operator"
18784 [(reg:CC FLAGS_REG) (const_int 0)])
18785 (label_ref (match_operand 3))
18789 ix86_expand_branch (GET_CODE (operands[0]),
18790 operands[1], operands[2], operands[3]);
18795 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
18796 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
18797 (unspec:AVX256MODE2P
18798 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18800 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
18802 "&& reload_completed"
18803 [(set (match_dup 0) (match_dup 1))]
18805 if (REG_P (operands[0]))
18806 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
18808 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
18809 <ssehalfvecmode>mode);
18812 ;; Modes handled by vec_init expanders.
18813 (define_mode_iterator VEC_INIT_MODE
18814 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18815 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18816 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18817 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
18818 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18819 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
18820 (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
18822 ;; Likewise, but for initialization from half sized vectors.
18823 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
18824 (define_mode_iterator VEC_INIT_HALF_MODE
18825 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
18826 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
18827 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
18828 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
18829 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
18830 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
18831 (V4TI "TARGET_AVX512F")])
18833 (define_expand "vec_init<mode><ssescalarmodelower>"
18834 [(match_operand:VEC_INIT_MODE 0 "register_operand")
18838 ix86_expand_vector_init (false, operands[0], operands[1]);
18842 (define_expand "vec_init<mode><ssehalfvecmodelower>"
18843 [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
18847 ix86_expand_vector_init (false, operands[0], operands[1]);
18851 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18852 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
18853 (ashiftrt:VI48_AVX512F_AVX512VL
18854 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
18855 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
18856 "TARGET_AVX2 && <mask_mode512bit_condition>"
18857 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18858 [(set_attr "type" "sseishft")
18859 (set_attr "prefix" "maybe_evex")
18860 (set_attr "mode" "<sseinsnmode>")])
18862 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
18863 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18864 (ashiftrt:VI2_AVX512VL
18865 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18866 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18868 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18869 [(set_attr "type" "sseishft")
18870 (set_attr "prefix" "maybe_evex")
18871 (set_attr "mode" "<sseinsnmode>")])
18873 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18874 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
18875 (any_lshift:VI48_AVX512F
18876 (match_operand:VI48_AVX512F 1 "register_operand" "v")
18877 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
18878 "TARGET_AVX2 && <mask_mode512bit_condition>"
18879 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18880 [(set_attr "type" "sseishft")
18881 (set_attr "prefix" "maybe_evex")
18882 (set_attr "mode" "<sseinsnmode>")])
18884 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
18885 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18886 (any_lshift:VI2_AVX512VL
18887 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
18888 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
18890 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18891 [(set_attr "type" "sseishft")
18892 (set_attr "prefix" "maybe_evex")
18893 (set_attr "mode" "<sseinsnmode>")])
18895 (define_insn "avx_vec_concat<mode>"
18896 [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
18897 (vec_concat:V_256_512
18898 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,v,x,v")
18899 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,vm,C,C")))]
18902 switch (which_alternative)
18905 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18907 if (<MODE_SIZE> == 64)
18909 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
18910 return "vinsert<shuffletype>32x8\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18912 return "vinsert<shuffletype>64x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18916 if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18917 return "vinsert<shuffletype>64x2\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18919 return "vinsert<shuffletype>32x4\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
18923 switch (get_attr_mode (insn))
18926 return "vmovaps\t{%1, %t0|%t0, %1}";
18928 return "vmovapd\t{%1, %t0|%t0, %1}";
18930 return "vmovaps\t{%1, %x0|%x0, %1}";
18932 return "vmovapd\t{%1, %x0|%x0, %1}";
18934 if (which_alternative == 2)
18935 return "vmovdqa\t{%1, %t0|%t0, %1}";
18936 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18937 return "vmovdqa64\t{%1, %t0|%t0, %1}";
18939 return "vmovdqa32\t{%1, %t0|%t0, %1}";
18941 if (which_alternative == 2)
18942 return "vmovdqa\t{%1, %x0|%x0, %1}";
18943 else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
18944 return "vmovdqa64\t{%1, %x0|%x0, %1}";
18946 return "vmovdqa32\t{%1, %x0|%x0, %1}";
18948 gcc_unreachable ();
18951 gcc_unreachable ();
18954 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
18955 (set_attr "prefix_extra" "1,1,*,*")
18956 (set_attr "length_immediate" "1,1,*,*")
18957 (set_attr "prefix" "maybe_evex")
18958 (set_attr "mode" "<sseinsnmode>")])
18960 (define_insn "vcvtph2ps<mask_name>"
18961 [(set (match_operand:V4SF 0 "register_operand" "=v")
18963 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
18965 (parallel [(const_int 0) (const_int 1)
18966 (const_int 2) (const_int 3)])))]
18967 "TARGET_F16C || TARGET_AVX512VL"
18968 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18969 [(set_attr "type" "ssecvt")
18970 (set_attr "prefix" "maybe_evex")
18971 (set_attr "mode" "V4SF")])
18973 (define_insn "*vcvtph2ps_load<mask_name>"
18974 [(set (match_operand:V4SF 0 "register_operand" "=v")
18975 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
18976 UNSPEC_VCVTPH2PS))]
18977 "TARGET_F16C || TARGET_AVX512VL"
18978 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18979 [(set_attr "type" "ssecvt")
18980 (set_attr "prefix" "vex")
18981 (set_attr "mode" "V8SF")])
18983 (define_insn "vcvtph2ps256<mask_name>"
18984 [(set (match_operand:V8SF 0 "register_operand" "=v")
18985 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
18986 UNSPEC_VCVTPH2PS))]
18987 "TARGET_F16C || TARGET_AVX512VL"
18988 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18989 [(set_attr "type" "ssecvt")
18990 (set_attr "prefix" "vex")
18991 (set_attr "btver2_decode" "double")
18992 (set_attr "mode" "V8SF")])
18994 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
18995 [(set (match_operand:V16SF 0 "register_operand" "=v")
18997 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18998 UNSPEC_VCVTPH2PS))]
19000 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
19001 [(set_attr "type" "ssecvt")
19002 (set_attr "prefix" "evex")
19003 (set_attr "mode" "V16SF")])
19005 (define_expand "vcvtps2ph_mask"
19006 [(set (match_operand:V8HI 0 "register_operand")
19009 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19010 (match_operand:SI 2 "const_0_to_255_operand")]
19013 (match_operand:V8HI 3 "vector_move_operand")
19014 (match_operand:QI 4 "register_operand")))]
19016 "operands[5] = CONST0_RTX (V4HImode);")
19018 (define_expand "vcvtps2ph"
19019 [(set (match_operand:V8HI 0 "register_operand")
19021 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
19022 (match_operand:SI 2 "const_0_to_255_operand")]
19026 "operands[3] = CONST0_RTX (V4HImode);")
19028 (define_insn "*vcvtps2ph<mask_name>"
19029 [(set (match_operand:V8HI 0 "register_operand" "=v")
19031 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19032 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19034 (match_operand:V4HI 3 "const0_operand")))]
19035 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
19036 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
19037 [(set_attr "type" "ssecvt")
19038 (set_attr "prefix" "maybe_evex")
19039 (set_attr "mode" "V4SF")])
19041 (define_insn "*vcvtps2ph_store<mask_name>"
19042 [(set (match_operand:V4HI 0 "memory_operand" "=m")
19043 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
19044 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19045 UNSPEC_VCVTPS2PH))]
19046 "TARGET_F16C || TARGET_AVX512VL"
19047 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19048 [(set_attr "type" "ssecvt")
19049 (set_attr "prefix" "maybe_evex")
19050 (set_attr "mode" "V4SF")])
19052 (define_insn "vcvtps2ph256<mask_name>"
19053 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
19054 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
19055 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19056 UNSPEC_VCVTPS2PH))]
19057 "TARGET_F16C || TARGET_AVX512VL"
19058 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19059 [(set_attr "type" "ssecvt")
19060 (set_attr "prefix" "maybe_evex")
19061 (set_attr "btver2_decode" "vector")
19062 (set_attr "mode" "V8SF")])
19064 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
19065 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
19067 [(match_operand:V16SF 1 "register_operand" "v")
19068 (match_operand:SI 2 "const_0_to_255_operand" "N")]
19069 UNSPEC_VCVTPS2PH))]
19071 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19072 [(set_attr "type" "ssecvt")
19073 (set_attr "prefix" "evex")
19074 (set_attr "mode" "V16SF")])
19076 ;; For gather* insn patterns
19077 (define_mode_iterator VEC_GATHER_MODE
19078 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
19079 (define_mode_attr VEC_GATHER_IDXSI
19080 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
19081 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
19082 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
19083 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
19085 (define_mode_attr VEC_GATHER_IDXDI
19086 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19087 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
19088 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
19089 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
19091 (define_mode_attr VEC_GATHER_SRCDI
19092 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
19093 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
19094 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
19095 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
19097 (define_expand "avx2_gathersi<mode>"
19098 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19099 (unspec:VEC_GATHER_MODE
19100 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
19101 (mem:<ssescalarmode>
19103 [(match_operand 2 "vsib_address_operand")
19104 (match_operand:<VEC_GATHER_IDXSI>
19105 3 "register_operand")
19106 (match_operand:SI 5 "const1248_operand ")]))
19107 (mem:BLK (scratch))
19108 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
19110 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19114 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19115 operands[5]), UNSPEC_VSIBADDR);
19118 (define_insn "*avx2_gathersi<mode>"
19119 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19120 (unspec:VEC_GATHER_MODE
19121 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
19122 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19124 [(match_operand:P 3 "vsib_address_operand" "Tv")
19125 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
19126 (match_operand:SI 6 "const1248_operand" "n")]
19128 (mem:BLK (scratch))
19129 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
19131 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19133 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
19134 [(set_attr "type" "ssemov")
19135 (set_attr "prefix" "vex")
19136 (set_attr "mode" "<sseinsnmode>")])
19138 (define_insn "*avx2_gathersi<mode>_2"
19139 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19140 (unspec:VEC_GATHER_MODE
19142 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19144 [(match_operand:P 2 "vsib_address_operand" "Tv")
19145 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
19146 (match_operand:SI 5 "const1248_operand" "n")]
19148 (mem:BLK (scratch))
19149 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
19151 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19153 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
19154 [(set_attr "type" "ssemov")
19155 (set_attr "prefix" "vex")
19156 (set_attr "mode" "<sseinsnmode>")])
19158 (define_expand "avx2_gatherdi<mode>"
19159 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
19160 (unspec:VEC_GATHER_MODE
19161 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19162 (mem:<ssescalarmode>
19164 [(match_operand 2 "vsib_address_operand")
19165 (match_operand:<VEC_GATHER_IDXDI>
19166 3 "register_operand")
19167 (match_operand:SI 5 "const1248_operand ")]))
19168 (mem:BLK (scratch))
19169 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
19171 (clobber (match_scratch:VEC_GATHER_MODE 7))])]
19175 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19176 operands[5]), UNSPEC_VSIBADDR);
19179 (define_insn "*avx2_gatherdi<mode>"
19180 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19181 (unspec:VEC_GATHER_MODE
19182 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19183 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19185 [(match_operand:P 3 "vsib_address_operand" "Tv")
19186 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19187 (match_operand:SI 6 "const1248_operand" "n")]
19189 (mem:BLK (scratch))
19190 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19192 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19194 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
19195 [(set_attr "type" "ssemov")
19196 (set_attr "prefix" "vex")
19197 (set_attr "mode" "<sseinsnmode>")])
19199 (define_insn "*avx2_gatherdi<mode>_2"
19200 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
19201 (unspec:VEC_GATHER_MODE
19203 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19205 [(match_operand:P 2 "vsib_address_operand" "Tv")
19206 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19207 (match_operand:SI 5 "const1248_operand" "n")]
19209 (mem:BLK (scratch))
19210 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19212 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
19215 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19216 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
19217 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
19219 [(set_attr "type" "ssemov")
19220 (set_attr "prefix" "vex")
19221 (set_attr "mode" "<sseinsnmode>")])
19223 (define_insn "*avx2_gatherdi<mode>_3"
19224 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19225 (vec_select:<VEC_GATHER_SRCDI>
19227 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
19228 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
19230 [(match_operand:P 3 "vsib_address_operand" "Tv")
19231 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
19232 (match_operand:SI 6 "const1248_operand" "n")]
19234 (mem:BLK (scratch))
19235 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
19237 (parallel [(const_int 0) (const_int 1)
19238 (const_int 2) (const_int 3)])))
19239 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19241 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
19242 [(set_attr "type" "ssemov")
19243 (set_attr "prefix" "vex")
19244 (set_attr "mode" "<sseinsnmode>")])
19246 (define_insn "*avx2_gatherdi<mode>_4"
19247 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
19248 (vec_select:<VEC_GATHER_SRCDI>
19251 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19253 [(match_operand:P 2 "vsib_address_operand" "Tv")
19254 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
19255 (match_operand:SI 5 "const1248_operand" "n")]
19257 (mem:BLK (scratch))
19258 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
19260 (parallel [(const_int 0) (const_int 1)
19261 (const_int 2) (const_int 3)])))
19262 (clobber (match_scratch:VI4F_256 1 "=&x"))]
19264 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
19265 [(set_attr "type" "ssemov")
19266 (set_attr "prefix" "vex")
19267 (set_attr "mode" "<sseinsnmode>")])
19269 ;; Memory operand override for -masm=intel of the v*gatherq* patterns.
19270 (define_mode_attr gatherq_mode
19271 [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
19272 (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
19273 (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
19275 (define_expand "<avx512>_gathersi<mode>"
19276 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19278 [(match_operand:VI48F 1 "register_operand")
19279 (match_operand:<avx512fmaskmode> 4 "register_operand")
19280 (mem:<ssescalarmode>
19282 [(match_operand 2 "vsib_address_operand")
19283 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
19284 (match_operand:SI 5 "const1248_operand")]))]
19286 (clobber (match_scratch:<avx512fmaskmode> 7))])]
19290 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19291 operands[5]), UNSPEC_VSIBADDR);
19294 (define_insn "*avx512f_gathersi<mode>"
19295 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19297 [(match_operand:VI48F 1 "register_operand" "0")
19298 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
19299 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19301 [(match_operand:P 4 "vsib_address_operand" "Tv")
19302 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
19303 (match_operand:SI 5 "const1248_operand" "n")]
19304 UNSPEC_VSIBADDR)])]
19306 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
19308 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
19309 [(set_attr "type" "ssemov")
19310 (set_attr "prefix" "evex")
19311 (set_attr "mode" "<sseinsnmode>")])
19313 (define_insn "*avx512f_gathersi<mode>_2"
19314 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19317 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19318 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19320 [(match_operand:P 3 "vsib_address_operand" "Tv")
19321 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19322 (match_operand:SI 4 "const1248_operand" "n")]
19323 UNSPEC_VSIBADDR)])]
19325 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19327 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
19328 [(set_attr "type" "ssemov")
19329 (set_attr "prefix" "evex")
19330 (set_attr "mode" "<sseinsnmode>")])
19333 (define_expand "<avx512>_gatherdi<mode>"
19334 [(parallel [(set (match_operand:VI48F 0 "register_operand")
19336 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
19337 (match_operand:QI 4 "register_operand")
19338 (mem:<ssescalarmode>
19340 [(match_operand 2 "vsib_address_operand")
19341 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
19342 (match_operand:SI 5 "const1248_operand")]))]
19344 (clobber (match_scratch:QI 7))])]
19348 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
19349 operands[5]), UNSPEC_VSIBADDR);
19352 (define_insn "*avx512f_gatherdi<mode>"
19353 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19355 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
19356 (match_operand:QI 7 "register_operand" "2")
19357 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
19359 [(match_operand:P 4 "vsib_address_operand" "Tv")
19360 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
19361 (match_operand:SI 5 "const1248_operand" "n")]
19362 UNSPEC_VSIBADDR)])]
19364 (clobber (match_scratch:QI 2 "=&Yk"))]
19367 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
19369 [(set_attr "type" "ssemov")
19370 (set_attr "prefix" "evex")
19371 (set_attr "mode" "<sseinsnmode>")])
19373 (define_insn "*avx512f_gatherdi<mode>_2"
19374 [(set (match_operand:VI48F 0 "register_operand" "=&v")
19377 (match_operand:QI 6 "register_operand" "1")
19378 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
19380 [(match_operand:P 3 "vsib_address_operand" "Tv")
19381 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19382 (match_operand:SI 4 "const1248_operand" "n")]
19383 UNSPEC_VSIBADDR)])]
19385 (clobber (match_scratch:QI 1 "=&Yk"))]
19388 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
19390 if (<MODE_SIZE> != 64)
19391 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
19393 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
19395 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
19397 [(set_attr "type" "ssemov")
19398 (set_attr "prefix" "evex")
19399 (set_attr "mode" "<sseinsnmode>")])
19401 (define_expand "<avx512>_scattersi<mode>"
19402 [(parallel [(set (mem:VI48F
19404 [(match_operand 0 "vsib_address_operand")
19405 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
19406 (match_operand:SI 4 "const1248_operand")]))
19408 [(match_operand:<avx512fmaskmode> 1 "register_operand")
19409 (match_operand:VI48F 3 "register_operand")]
19411 (clobber (match_scratch:<avx512fmaskmode> 6))])]
19415 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19416 operands[4]), UNSPEC_VSIBADDR);
19419 (define_insn "*avx512f_scattersi<mode>"
19420 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19422 [(match_operand:P 0 "vsib_address_operand" "Tv")
19423 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
19424 (match_operand:SI 4 "const1248_operand" "n")]
19427 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
19428 (match_operand:VI48F 3 "register_operand" "v")]
19430 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
19432 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
19433 [(set_attr "type" "ssemov")
19434 (set_attr "prefix" "evex")
19435 (set_attr "mode" "<sseinsnmode>")])
19437 (define_expand "<avx512>_scatterdi<mode>"
19438 [(parallel [(set (mem:VI48F
19440 [(match_operand 0 "vsib_address_operand")
19441 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
19442 (match_operand:SI 4 "const1248_operand")]))
19444 [(match_operand:QI 1 "register_operand")
19445 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
19447 (clobber (match_scratch:QI 6))])]
19451 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
19452 operands[4]), UNSPEC_VSIBADDR);
19455 (define_insn "*avx512f_scatterdi<mode>"
19456 [(set (match_operator:VI48F 5 "vsib_mem_operator"
19458 [(match_operand:P 0 "vsib_address_operand" "Tv")
19459 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
19460 (match_operand:SI 4 "const1248_operand" "n")]
19463 [(match_operand:QI 6 "register_operand" "1")
19464 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
19466 (clobber (match_scratch:QI 1 "=&Yk"))]
19469 if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
19470 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
19471 return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
19473 [(set_attr "type" "ssemov")
19474 (set_attr "prefix" "evex")
19475 (set_attr "mode" "<sseinsnmode>")])
19477 (define_insn "<avx512>_compress<mode>_mask"
19478 [(set (match_operand:VI48F 0 "register_operand" "=v")
19480 [(match_operand:VI48F 1 "register_operand" "v")
19481 (match_operand:VI48F 2 "vector_move_operand" "0C")
19482 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
19485 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19486 [(set_attr "type" "ssemov")
19487 (set_attr "prefix" "evex")
19488 (set_attr "mode" "<sseinsnmode>")])
19490 (define_insn "<avx512>_compressstore<mode>_mask"
19491 [(set (match_operand:VI48F 0 "memory_operand" "=m")
19493 [(match_operand:VI48F 1 "register_operand" "x")
19495 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
19496 UNSPEC_COMPRESS_STORE))]
19498 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
19499 [(set_attr "type" "ssemov")
19500 (set_attr "prefix" "evex")
19501 (set_attr "memory" "store")
19502 (set_attr "mode" "<sseinsnmode>")])
19504 (define_expand "<avx512>_expand<mode>_maskz"
19505 [(set (match_operand:VI48F 0 "register_operand")
19507 [(match_operand:VI48F 1 "nonimmediate_operand")
19508 (match_operand:VI48F 2 "vector_move_operand")
19509 (match_operand:<avx512fmaskmode> 3 "register_operand")]
19512 "operands[2] = CONST0_RTX (<MODE>mode);")
19514 (define_insn "<avx512>_expand<mode>_mask"
19515 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
19517 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
19518 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
19519 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
19522 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
19523 [(set_attr "type" "ssemov")
19524 (set_attr "prefix" "evex")
19525 (set_attr "memory" "none,load")
19526 (set_attr "mode" "<sseinsnmode>")])
19528 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
19529 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19530 (unspec:VF_AVX512VL
19531 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19532 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19533 (match_operand:SI 3 "const_0_to_15_operand")]
19535 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
19536 "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
19537 [(set_attr "type" "sse")
19538 (set_attr "prefix" "evex")
19539 (set_attr "mode" "<MODE>")])
19541 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
19542 [(set (match_operand:VF_128 0 "register_operand" "=v")
19545 [(match_operand:VF_128 1 "register_operand" "v")
19546 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
19547 (match_operand:SI 3 "const_0_to_15_operand")]
19552 "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
19553 [(set_attr "type" "sse")
19554 (set_attr "prefix" "evex")
19555 (set_attr "mode" "<MODE>")])
19557 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
19558 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19559 (unspec:<avx512fmaskmode>
19560 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
19561 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19564 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
19565 [(set_attr "type" "sse")
19566 (set_attr "length_immediate" "1")
19567 (set_attr "prefix" "evex")
19568 (set_attr "mode" "<MODE>")])
19570 (define_insn "avx512dq_vmfpclass<mode>"
19571 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
19572 (and:<avx512fmaskmode>
19573 (unspec:<avx512fmaskmode>
19574 [(match_operand:VF_128 1 "register_operand" "v")
19575 (match_operand:QI 2 "const_0_to_255_operand" "n")]
19579 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
19580 [(set_attr "type" "sse")
19581 (set_attr "length_immediate" "1")
19582 (set_attr "prefix" "evex")
19583 (set_attr "mode" "<MODE>")])
19585 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
19586 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
19587 (unspec:VF_AVX512VL
19588 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
19589 (match_operand:SI 2 "const_0_to_15_operand")]
19592 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
19593 [(set_attr "prefix" "evex")
19594 (set_attr "mode" "<MODE>")])
19596 (define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
19597 [(set (match_operand:VF_128 0 "register_operand" "=v")
19600 [(match_operand:VF_128 1 "register_operand" "v")
19601 (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
19602 (match_operand:SI 3 "const_0_to_15_operand")]
19607 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}";
19608 [(set_attr "prefix" "evex")
19609 (set_attr "mode" "<ssescalarmode>")])
19611 ;; The correct representation for this is absolutely enormous, and
19612 ;; surely not generally useful.
19613 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
19614 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19615 (unspec:VI2_AVX512VL
19616 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
19617 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
19618 (match_operand:SI 3 "const_0_to_255_operand")]
19621 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
19622 [(set_attr "isa" "avx")
19623 (set_attr "type" "sselog1")
19624 (set_attr "length_immediate" "1")
19625 (set_attr "prefix" "evex")
19626 (set_attr "mode" "<sseinsnmode>")])
19628 (define_insn "clz<mode>2<mask_name>"
19629 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19631 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
19633 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19634 [(set_attr "type" "sse")
19635 (set_attr "prefix" "evex")
19636 (set_attr "mode" "<sseinsnmode>")])
19638 (define_insn "<mask_codefor>conflict<mode><mask_name>"
19639 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
19640 (unspec:VI48_AVX512VL
19641 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
19644 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
19645 [(set_attr "type" "sse")
19646 (set_attr "prefix" "evex")
19647 (set_attr "mode" "<sseinsnmode>")])
19649 (define_insn "sha1msg1"
19650 [(set (match_operand:V4SI 0 "register_operand" "=x")
19652 [(match_operand:V4SI 1 "register_operand" "0")
19653 (match_operand:V4SI 2 "vector_operand" "xBm")]
19656 "sha1msg1\t{%2, %0|%0, %2}"
19657 [(set_attr "type" "sselog1")
19658 (set_attr "mode" "TI")])
19660 (define_insn "sha1msg2"
19661 [(set (match_operand:V4SI 0 "register_operand" "=x")
19663 [(match_operand:V4SI 1 "register_operand" "0")
19664 (match_operand:V4SI 2 "vector_operand" "xBm")]
19667 "sha1msg2\t{%2, %0|%0, %2}"
19668 [(set_attr "type" "sselog1")
19669 (set_attr "mode" "TI")])
19671 (define_insn "sha1nexte"
19672 [(set (match_operand:V4SI 0 "register_operand" "=x")
19674 [(match_operand:V4SI 1 "register_operand" "0")
19675 (match_operand:V4SI 2 "vector_operand" "xBm")]
19676 UNSPEC_SHA1NEXTE))]
19678 "sha1nexte\t{%2, %0|%0, %2}"
19679 [(set_attr "type" "sselog1")
19680 (set_attr "mode" "TI")])
19682 (define_insn "sha1rnds4"
19683 [(set (match_operand:V4SI 0 "register_operand" "=x")
19685 [(match_operand:V4SI 1 "register_operand" "0")
19686 (match_operand:V4SI 2 "vector_operand" "xBm")
19687 (match_operand:SI 3 "const_0_to_3_operand" "n")]
19688 UNSPEC_SHA1RNDS4))]
19690 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
19691 [(set_attr "type" "sselog1")
19692 (set_attr "length_immediate" "1")
19693 (set_attr "mode" "TI")])
19695 (define_insn "sha256msg1"
19696 [(set (match_operand:V4SI 0 "register_operand" "=x")
19698 [(match_operand:V4SI 1 "register_operand" "0")
19699 (match_operand:V4SI 2 "vector_operand" "xBm")]
19700 UNSPEC_SHA256MSG1))]
19702 "sha256msg1\t{%2, %0|%0, %2}"
19703 [(set_attr "type" "sselog1")
19704 (set_attr "mode" "TI")])
19706 (define_insn "sha256msg2"
19707 [(set (match_operand:V4SI 0 "register_operand" "=x")
19709 [(match_operand:V4SI 1 "register_operand" "0")
19710 (match_operand:V4SI 2 "vector_operand" "xBm")]
19711 UNSPEC_SHA256MSG2))]
19713 "sha256msg2\t{%2, %0|%0, %2}"
19714 [(set_attr "type" "sselog1")
19715 (set_attr "mode" "TI")])
19717 (define_insn "sha256rnds2"
19718 [(set (match_operand:V4SI 0 "register_operand" "=x")
19720 [(match_operand:V4SI 1 "register_operand" "0")
19721 (match_operand:V4SI 2 "vector_operand" "xBm")
19722 (match_operand:V4SI 3 "register_operand" "Yz")]
19723 UNSPEC_SHA256RNDS2))]
19725 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
19726 [(set_attr "type" "sselog1")
19727 (set_attr "length_immediate" "1")
19728 (set_attr "mode" "TI")])
19730 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
19731 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19732 (unspec:AVX512MODE2P
19733 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
19735 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19737 "&& reload_completed"
19738 [(set (match_dup 0) (match_dup 1))]
19740 if (REG_P (operands[0]))
19741 operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
19743 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19744 <ssequartermode>mode);
19747 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
19748 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
19749 (unspec:AVX512MODE2P
19750 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
19752 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
19754 "&& reload_completed"
19755 [(set (match_dup 0) (match_dup 1))]
19757 if (REG_P (operands[0]))
19758 operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
19760 operands[1] = lowpart_subreg (<MODE>mode, operands[1],
19761 <ssehalfvecmode>mode);
19764 (define_int_iterator VPMADD52
19765 [UNSPEC_VPMADD52LUQ
19766 UNSPEC_VPMADD52HUQ])
19768 (define_int_attr vpmadd52type
19769 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
19771 (define_expand "vpamdd52huq<mode>_maskz"
19772 [(match_operand:VI8_AVX512VL 0 "register_operand")
19773 (match_operand:VI8_AVX512VL 1 "register_operand")
19774 (match_operand:VI8_AVX512VL 2 "register_operand")
19775 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19776 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19777 "TARGET_AVX512IFMA"
19779 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
19780 operands[0], operands[1], operands[2], operands[3],
19781 CONST0_RTX (<MODE>mode), operands[4]));
19785 (define_expand "vpamdd52luq<mode>_maskz"
19786 [(match_operand:VI8_AVX512VL 0 "register_operand")
19787 (match_operand:VI8_AVX512VL 1 "register_operand")
19788 (match_operand:VI8_AVX512VL 2 "register_operand")
19789 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
19790 (match_operand:<avx512fmaskmode> 4 "register_operand")]
19791 "TARGET_AVX512IFMA"
19793 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
19794 operands[0], operands[1], operands[2], operands[3],
19795 CONST0_RTX (<MODE>mode), operands[4]));
19799 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
19800 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19801 (unspec:VI8_AVX512VL
19802 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19803 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19804 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19806 "TARGET_AVX512IFMA"
19807 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
19808 [(set_attr "type" "ssemuladd")
19809 (set_attr "prefix" "evex")
19810 (set_attr "mode" "<sseinsnmode>")])
19812 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
19813 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
19814 (vec_merge:VI8_AVX512VL
19815 (unspec:VI8_AVX512VL
19816 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
19817 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
19818 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
19821 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
19822 "TARGET_AVX512IFMA"
19823 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
19824 [(set_attr "type" "ssemuladd")
19825 (set_attr "prefix" "evex")
19826 (set_attr "mode" "<sseinsnmode>")])
19828 (define_insn "vpmultishiftqb<mode><mask_name>"
19829 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19830 (unspec:VI1_AVX512VL
19831 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
19832 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
19833 UNSPEC_VPMULTISHIFT))]
19834 "TARGET_AVX512VBMI"
19835 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
19836 [(set_attr "type" "sselog")
19837 (set_attr "prefix" "evex")
19838 (set_attr "mode" "<sseinsnmode>")])
19840 (define_mode_iterator IMOD4
19841 [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
19843 (define_mode_attr imod4_narrow
19844 [(V64SF "V16SF") (V64SI "V16SI")])
19846 (define_expand "mov<mode>"
19847 [(set (match_operand:IMOD4 0 "nonimmediate_operand")
19848 (match_operand:IMOD4 1 "vector_move_operand"))]
19851 ix86_expand_vector_move (<MODE>mode, operands);
19855 (define_insn_and_split "*mov<mode>_internal"
19856 [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
19857 (match_operand:IMOD4 1 "vector_move_operand" " C,vm,v"))]
19859 && (register_operand (operands[0], <MODE>mode)
19860 || register_operand (operands[1], <MODE>mode))"
19862 "&& reload_completed"
19868 for (i = 0; i < 4; i++)
19870 op0 = simplify_subreg
19871 (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
19872 op1 = simplify_subreg
19873 (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
19874 emit_move_insn (op0, op1);
19879 (define_insn "avx5124fmaddps_4fmaddps"
19880 [(set (match_operand:V16SF 0 "register_operand" "=v")
19882 [(match_operand:V16SF 1 "register_operand" "0")
19883 (match_operand:V64SF 2 "register_operand" "Yh")
19884 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19885 "TARGET_AVX5124FMAPS"
19886 "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19887 [(set_attr ("type") ("ssemuladd"))
19888 (set_attr ("prefix") ("evex"))
19889 (set_attr ("mode") ("V16SF"))])
19891 (define_insn "avx5124fmaddps_4fmaddps_mask"
19892 [(set (match_operand:V16SF 0 "register_operand" "=v")
19895 [(match_operand:V64SF 1 "register_operand" "Yh")
19896 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19897 (match_operand:V16SF 3 "register_operand" "0")
19898 (match_operand:HI 4 "register_operand" "Yk")))]
19899 "TARGET_AVX5124FMAPS"
19900 "v4fmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19901 [(set_attr ("type") ("ssemuladd"))
19902 (set_attr ("prefix") ("evex"))
19903 (set_attr ("mode") ("V16SF"))])
19905 (define_insn "avx5124fmaddps_4fmaddps_maskz"
19906 [(set (match_operand:V16SF 0 "register_operand" "=v")
19909 [(match_operand:V16SF 1 "register_operand" "0")
19910 (match_operand:V64SF 2 "register_operand" "Yh")
19911 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19912 (match_operand:V16SF 4 "const0_operand" "C")
19913 (match_operand:HI 5 "register_operand" "Yk")))]
19914 "TARGET_AVX5124FMAPS"
19915 "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19916 [(set_attr ("type") ("ssemuladd"))
19917 (set_attr ("prefix") ("evex"))
19918 (set_attr ("mode") ("V16SF"))])
19920 (define_insn "avx5124fmaddps_4fmaddss"
19921 [(set (match_operand:V4SF 0 "register_operand" "=v")
19923 [(match_operand:V4SF 1 "register_operand" "0")
19924 (match_operand:V64SF 2 "register_operand" "Yh")
19925 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
19926 "TARGET_AVX5124FMAPS"
19927 "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
19928 [(set_attr ("type") ("ssemuladd"))
19929 (set_attr ("prefix") ("evex"))
19930 (set_attr ("mode") ("SF"))])
19932 (define_insn "avx5124fmaddps_4fmaddss_mask"
19933 [(set (match_operand:V4SF 0 "register_operand" "=v")
19936 [(match_operand:V64SF 1 "register_operand" "Yh")
19937 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
19938 (match_operand:V4SF 3 "register_operand" "0")
19939 (match_operand:QI 4 "register_operand" "Yk")))]
19940 "TARGET_AVX5124FMAPS"
19941 "v4fmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
19942 [(set_attr ("type") ("ssemuladd"))
19943 (set_attr ("prefix") ("evex"))
19944 (set_attr ("mode") ("SF"))])
19946 (define_insn "avx5124fmaddps_4fmaddss_maskz"
19947 [(set (match_operand:V4SF 0 "register_operand" "=v")
19950 [(match_operand:V4SF 1 "register_operand" "0")
19951 (match_operand:V64SF 2 "register_operand" "Yh")
19952 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
19953 (match_operand:V4SF 4 "const0_operand" "C")
19954 (match_operand:QI 5 "register_operand" "Yk")))]
19955 "TARGET_AVX5124FMAPS"
19956 "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
19957 [(set_attr ("type") ("ssemuladd"))
19958 (set_attr ("prefix") ("evex"))
19959 (set_attr ("mode") ("SF"))])
19961 (define_insn "avx5124fmaddps_4fnmaddps"
19962 [(set (match_operand:V16SF 0 "register_operand" "=v")
19964 [(match_operand:V16SF 1 "register_operand" "0")
19965 (match_operand:V64SF 2 "register_operand" "Yh")
19966 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
19967 "TARGET_AVX5124FMAPS"
19968 "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
19969 [(set_attr ("type") ("ssemuladd"))
19970 (set_attr ("prefix") ("evex"))
19971 (set_attr ("mode") ("V16SF"))])
19973 (define_insn "avx5124fmaddps_4fnmaddps_mask"
19974 [(set (match_operand:V16SF 0 "register_operand" "=v")
19977 [(match_operand:V64SF 1 "register_operand" "Yh")
19978 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19979 (match_operand:V16SF 3 "register_operand" "0")
19980 (match_operand:HI 4 "register_operand" "Yk")))]
19981 "TARGET_AVX5124FMAPS"
19982 "v4fnmaddps\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
19983 [(set_attr ("type") ("ssemuladd"))
19984 (set_attr ("prefix") ("evex"))
19985 (set_attr ("mode") ("V16SF"))])
19987 (define_insn "avx5124fmaddps_4fnmaddps_maskz"
19988 [(set (match_operand:V16SF 0 "register_operand" "=v")
19991 [(match_operand:V16SF 1 "register_operand" "0")
19992 (match_operand:V64SF 2 "register_operand" "Yh")
19993 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
19994 (match_operand:V16SF 4 "const0_operand" "C")
19995 (match_operand:HI 5 "register_operand" "Yk")))]
19996 "TARGET_AVX5124FMAPS"
19997 "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
19998 [(set_attr ("type") ("ssemuladd"))
19999 (set_attr ("prefix") ("evex"))
20000 (set_attr ("mode") ("V16SF"))])
20002 (define_insn "avx5124fmaddps_4fnmaddss"
20003 [(set (match_operand:V4SF 0 "register_operand" "=v")
20005 [(match_operand:V4SF 1 "register_operand" "0")
20006 (match_operand:V64SF 2 "register_operand" "Yh")
20007 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
20008 "TARGET_AVX5124FMAPS"
20009 "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
20010 [(set_attr ("type") ("ssemuladd"))
20011 (set_attr ("prefix") ("evex"))
20012 (set_attr ("mode") ("SF"))])
20014 (define_insn "avx5124fmaddps_4fnmaddss_mask"
20015 [(set (match_operand:V4SF 0 "register_operand" "=v")
20018 [(match_operand:V64SF 1 "register_operand" "Yh")
20019 (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20020 (match_operand:V4SF 3 "register_operand" "0")
20021 (match_operand:QI 4 "register_operand" "Yk")))]
20022 "TARGET_AVX5124FMAPS"
20023 "v4fnmaddss\t{%2, %x1, %0%{%4%}|%{%4%}%0, %x1, %2}"
20024 [(set_attr ("type") ("ssemuladd"))
20025 (set_attr ("prefix") ("evex"))
20026 (set_attr ("mode") ("SF"))])
20028 (define_insn "avx5124fmaddps_4fnmaddss_maskz"
20029 [(set (match_operand:V4SF 0 "register_operand" "=v")
20032 [(match_operand:V4SF 1 "register_operand" "0")
20033 (match_operand:V64SF 2 "register_operand" "Yh")
20034 (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
20035 (match_operand:V4SF 4 "const0_operand" "C")
20036 (match_operand:QI 5 "register_operand" "Yk")))]
20037 "TARGET_AVX5124FMAPS"
20038 "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %x2, %3}"
20039 [(set_attr ("type") ("ssemuladd"))
20040 (set_attr ("prefix") ("evex"))
20041 (set_attr ("mode") ("SF"))])
20043 (define_insn "avx5124vnniw_vp4dpwssd"
20044 [(set (match_operand:V16SI 0 "register_operand" "=v")
20046 [(match_operand:V16SI 1 "register_operand" "0")
20047 (match_operand:V64SI 2 "register_operand" "Yh")
20048 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
20049 "TARGET_AVX5124VNNIW"
20050 "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
20051 [(set_attr ("type") ("ssemuladd"))
20052 (set_attr ("prefix") ("evex"))
20053 (set_attr ("mode") ("TI"))])
20055 (define_insn "avx5124vnniw_vp4dpwssd_mask"
20056 [(set (match_operand:V16SI 0 "register_operand" "=v")
20059 [(match_operand:V64SI 1 "register_operand" "Yh")
20060 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20061 (match_operand:V16SI 3 "register_operand" "0")
20062 (match_operand:HI 4 "register_operand" "Yk")))]
20063 "TARGET_AVX5124VNNIW"
20064 "vp4dpwssd\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
20065 [(set_attr ("type") ("ssemuladd"))
20066 (set_attr ("prefix") ("evex"))
20067 (set_attr ("mode") ("TI"))])
20069 (define_insn "avx5124vnniw_vp4dpwssd_maskz"
20070 [(set (match_operand:V16SI 0 "register_operand" "=v")
20073 [(match_operand:V16SI 1 "register_operand" "0")
20074 (match_operand:V64SI 2 "register_operand" "Yh")
20075 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
20076 (match_operand:V16SI 4 "const0_operand" "C")
20077 (match_operand:HI 5 "register_operand" "Yk")))]
20078 "TARGET_AVX5124VNNIW"
20079 "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
20080 [(set_attr ("type") ("ssemuladd"))
20081 (set_attr ("prefix") ("evex"))
20082 (set_attr ("mode") ("TI"))])
20084 (define_insn "avx5124vnniw_vp4dpwssds"
20085 [(set (match_operand:V16SI 0 "register_operand" "=v")
20087 [(match_operand:V16SI 1 "register_operand" "0")
20088 (match_operand:V64SI 2 "register_operand" "Yh")
20089 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
20090 "TARGET_AVX5124VNNIW"
20091 "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
20092 [(set_attr ("type") ("ssemuladd"))
20093 (set_attr ("prefix") ("evex"))
20094 (set_attr ("mode") ("TI"))])
20096 (define_insn "avx5124vnniw_vp4dpwssds_mask"
20097 [(set (match_operand:V16SI 0 "register_operand" "=v")
20100 [(match_operand:V64SI 1 "register_operand" "Yh")
20101 (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20102 (match_operand:V16SI 3 "register_operand" "0")
20103 (match_operand:HI 4 "register_operand" "Yk")))]
20104 "TARGET_AVX5124VNNIW"
20105 "vp4dpwssds\t{%2, %g1, %0%{%4%}|%{%4%}%0, %g1, %2}"
20106 [(set_attr ("type") ("ssemuladd"))
20107 (set_attr ("prefix") ("evex"))
20108 (set_attr ("mode") ("TI"))])
20110 (define_insn "avx5124vnniw_vp4dpwssds_maskz"
20111 [(set (match_operand:V16SI 0 "register_operand" "=v")
20114 [(match_operand:V16SI 1 "register_operand" "0")
20115 (match_operand:V64SI 2 "register_operand" "Yh")
20116 (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
20117 (match_operand:V16SI 4 "const0_operand" "C")
20118 (match_operand:HI 5 "register_operand" "Yk")))]
20119 "TARGET_AVX5124VNNIW"
20120 "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%{%5%}%{z%}%0, %g2, %3}"
20121 [(set_attr ("type") ("ssemuladd"))
20122 (set_attr ("prefix") ("evex"))
20123 (set_attr ("mode") ("TI"))])
20125 (define_insn "vpopcount<mode><mask_name>"
20126 [(set (match_operand:VI48_512 0 "register_operand" "=v")
20128 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
20129 "TARGET_AVX512VPOPCNTDQ"
20130 "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
20132 ;; Save multiple registers out-of-line.
20133 (define_insn "save_multiple<mode>"
20134 [(match_parallel 0 "save_multiple"
20135 [(use (match_operand:P 1 "symbol_operand"))])]
20136 "TARGET_SSE && TARGET_64BIT"
20139 ;; Restore multiple registers out-of-line.
20140 (define_insn "restore_multiple<mode>"
20141 [(match_parallel 0 "restore_multiple"
20142 [(use (match_operand:P 1 "symbol_operand"))])]
20143 "TARGET_SSE && TARGET_64BIT"
20146 ;; Restore multiple registers out-of-line and return.
20147 (define_insn "restore_multiple_and_return<mode>"
20148 [(match_parallel 0 "restore_multiple"
20150 (use (match_operand:P 1 "symbol_operand"))
20151 (set (reg:DI SP_REG) (reg:DI R10_REG))
20153 "TARGET_SSE && TARGET_64BIT"
20156 ;; Restore multiple registers out-of-line when hard frame pointer is used,
20157 ;; perform the leave operation prior to returning (from the function).
20158 (define_insn "restore_multiple_leave_return<mode>"
20159 [(match_parallel 0 "restore_multiple"
20161 (use (match_operand:P 1 "symbol_operand"))
20162 (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
20163 (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
20164 (clobber (mem:BLK (scratch)))
20166 "TARGET_SSE && TARGET_64BIT"