1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
132 ;; For AVX512BW support
140 ;; For AVX512DQ support
145 ;; For AVX512IFMA support
149 ;; For AVX512VBMI support
153 (define_c_enum "unspecv" [
163 ;; All vector modes including V?TImode, used in move patterns.
164 (define_mode_iterator VMOVE
165 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
169 (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
170 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
173 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174 (define_mode_iterator V48_AVX512VL
175 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177 V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
180 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181 (define_mode_iterator VI12_AVX512VL
182 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183 V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
185 (define_mode_iterator VI1_AVX512VL
186 [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
189 (define_mode_iterator V
190 [(V32QI "TARGET_AVX") V16QI
191 (V16HI "TARGET_AVX") V8HI
192 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
194 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
197 ;; All 128bit vector modes
198 (define_mode_iterator V_128
199 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
201 ;; All 256bit vector modes
202 (define_mode_iterator V_256
203 [V32QI V16HI V8SI V4DI V8SF V4DF])
205 ;; All 512bit vector modes
206 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
208 ;; All 256bit and 512bit vector modes
209 (define_mode_iterator V_256_512
210 [V32QI V16HI V8SI V4DI V8SF V4DF
211 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
214 ;; All vector float modes
215 (define_mode_iterator VF
216 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
219 ;; 128- and 256-bit float vector modes
220 (define_mode_iterator VF_128_256
221 [(V8SF "TARGET_AVX") V4SF
222 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
224 ;; All SFmode vector float modes
225 (define_mode_iterator VF1
226 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
228 ;; 128- and 256-bit SF vector modes
229 (define_mode_iterator VF1_128_256
230 [(V8SF "TARGET_AVX") V4SF])
232 (define_mode_iterator VF1_128_256VL
233 [V8SF (V4SF "TARGET_AVX512VL")])
235 ;; All DFmode vector float modes
236 (define_mode_iterator VF2
237 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
239 ;; 128- and 256-bit DF vector modes
240 (define_mode_iterator VF2_128_256
241 [(V4DF "TARGET_AVX") V2DF])
243 (define_mode_iterator VF2_512_256
244 [(V8DF "TARGET_AVX512F") V4DF])
246 (define_mode_iterator VF2_512_256VL
247 [V8DF (V4DF "TARGET_AVX512VL")])
249 ;; All 128bit vector float modes
250 (define_mode_iterator VF_128
251 [V4SF (V2DF "TARGET_SSE2")])
253 ;; All 256bit vector float modes
254 (define_mode_iterator VF_256
257 ;; All 512bit vector float modes
258 (define_mode_iterator VF_512
261 (define_mode_iterator VI48_AVX512VL
262 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
263 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
265 (define_mode_iterator VF_AVX512VL
266 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267 V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
269 (define_mode_iterator VF2_AVX512VL
270 [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
272 (define_mode_iterator VF1_AVX512VL
273 [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
275 ;; All vector integer modes
276 (define_mode_iterator VI
277 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
278 (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
280 (V8SI "TARGET_AVX") V4SI
281 (V4DI "TARGET_AVX") V2DI])
283 (define_mode_iterator VI_AVX2
284 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
286 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
289 ;; All QImode vector integer modes
290 (define_mode_iterator VI1
291 [(V32QI "TARGET_AVX") V16QI])
293 (define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
295 V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
297 (define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299 V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
301 ;; All DImode vector integer modes
302 (define_mode_iterator VI8
303 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
305 (define_mode_iterator VI8_AVX512VL
306 [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
308 (define_mode_iterator VI8_256_512
309 [V8DI (V4DI "TARGET_AVX512VL")])
311 (define_mode_iterator VI1_AVX2
312 [(V32QI "TARGET_AVX2") V16QI])
314 (define_mode_iterator VI1_AVX512
315 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
317 (define_mode_iterator VI2_AVX2
318 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
320 (define_mode_iterator VI2_AVX512F
321 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
323 (define_mode_iterator VI4_AVX
324 [(V8SI "TARGET_AVX") V4SI])
326 (define_mode_iterator VI4_AVX2
327 [(V8SI "TARGET_AVX2") V4SI])
329 (define_mode_iterator VI4_AVX512F
330 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
332 (define_mode_iterator VI4_AVX512VL
333 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
335 (define_mode_iterator VI48_AVX512F_AVX512VL
336 [V4SI V8SI (V16SI "TARGET_AVX512F")
337 (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
339 (define_mode_iterator VI2_AVX512VL
340 [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
342 (define_mode_iterator VI8_AVX2_AVX512BW
343 [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
345 (define_mode_iterator VI8_AVX2
346 [(V4DI "TARGET_AVX2") V2DI])
348 (define_mode_iterator VI8_AVX2_AVX512F
349 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
351 (define_mode_iterator VI4_128_8_256
355 (define_mode_iterator V8FI
359 (define_mode_iterator V16FI
362 ;; ??? We should probably use TImode instead.
363 (define_mode_iterator VIMAX_AVX2
364 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
366 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367 (define_mode_iterator SSESCALARMODE
368 [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
370 (define_mode_iterator VI12_AVX2
371 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
374 (define_mode_iterator VI24_AVX2
375 [(V16HI "TARGET_AVX2") V8HI
376 (V8SI "TARGET_AVX2") V4SI])
378 (define_mode_iterator VI124_AVX512F
379 [(V32QI "TARGET_AVX2") V16QI
380 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
383 (define_mode_iterator VI124_AVX2
384 [(V32QI "TARGET_AVX2") V16QI
385 (V16HI "TARGET_AVX2") V8HI
386 (V8SI "TARGET_AVX2") V4SI])
388 (define_mode_iterator VI2_AVX2_AVX512BW
389 [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
391 (define_mode_iterator VI48_AVX2
392 [(V8SI "TARGET_AVX2") V4SI
393 (V4DI "TARGET_AVX2") V2DI])
395 (define_mode_iterator VI248_AVX2_8_AVX512F
396 [(V16HI "TARGET_AVX2") V8HI
397 (V8SI "TARGET_AVX2") V4SI
398 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
400 (define_mode_iterator VI248_AVX512BW_AVX512VL
401 [(V32HI "TARGET_AVX512BW")
402 (V4DI "TARGET_AVX512VL") V16SI V8DI])
404 ;; Suppose TARGET_AVX512VL as baseline
405 (define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
409 (define_mode_iterator VI48_AVX512F
410 [(V16SI "TARGET_AVX512F") V8SI V4SI
411 (V8DI "TARGET_AVX512F") V4DI V2DI])
413 (define_mode_iterator V48_AVX2
416 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
417 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
419 (define_mode_attr avx512
420 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
421 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
422 (V4SI "avx512vl") (V8SI "avx512vl") (V16SI "avx512f")
423 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
424 (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
425 (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
427 (define_mode_attr sse2_avx_avx512f
428 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
429 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
430 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
431 (V2DI "avx512vl") (V4DI "avx512vl") (V8DI "avx512f")
432 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
433 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
435 (define_mode_attr sse2_avx2
436 [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
437 (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
438 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
439 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
440 (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
442 (define_mode_attr ssse3_avx2
443 [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
444 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
445 (V4SI "ssse3") (V8SI "avx2")
446 (V2DI "ssse3") (V4DI "avx2")
447 (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
449 (define_mode_attr sse4_1_avx2
450 [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
451 (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
452 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
453 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
455 (define_mode_attr avx_avx2
456 [(V4SF "avx") (V2DF "avx")
457 (V8SF "avx") (V4DF "avx")
458 (V4SI "avx2") (V2DI "avx2")
459 (V8SI "avx2") (V4DI "avx2")])
461 (define_mode_attr vec_avx2
462 [(V16QI "vec") (V32QI "avx2")
463 (V8HI "vec") (V16HI "avx2")
464 (V4SI "vec") (V8SI "avx2")
465 (V2DI "vec") (V4DI "avx2")])
467 (define_mode_attr avx2_avx512
468 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
469 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
470 (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
471 (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
472 (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
474 (define_mode_attr shuffletype
475 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
476 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
477 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
478 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
479 (V64QI "i") (V1TI "i") (V2TI "i")])
481 (define_mode_attr ssequartermode
482 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
484 (define_mode_attr ssedoublemodelower
485 [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
486 (V8HI "v8si") (V16HI "v16si") (V32HI "v32si")
487 (V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
489 (define_mode_attr ssedoublemode
490 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
491 (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
492 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
493 (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
495 (define_mode_attr ssebytemode
496 [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
498 ;; All 128bit vector integer modes
499 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
501 ;; All 256bit vector integer modes
502 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
504 ;; All 512bit vector integer modes
505 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
507 ;; Various 128bit vector integer mode combinations
508 (define_mode_iterator VI12_128 [V16QI V8HI])
509 (define_mode_iterator VI14_128 [V16QI V4SI])
510 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
511 (define_mode_iterator VI24_128 [V8HI V4SI])
512 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
513 (define_mode_iterator VI48_128 [V4SI V2DI])
515 ;; Various 256bit and 512 vector integer mode combinations
516 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
517 (define_mode_iterator VI124_256_AVX512F_AVX512BW
519 (V64QI "TARGET_AVX512BW")
520 (V32HI "TARGET_AVX512BW")
521 (V16SI "TARGET_AVX512F")])
522 (define_mode_iterator VI48_256 [V8SI V4DI])
523 (define_mode_iterator VI48_512 [V16SI V8DI])
524 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
525 (define_mode_iterator VI_AVX512BW
526 [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
528 ;; Int-float size matches
529 (define_mode_iterator VI4F_128 [V4SI V4SF])
530 (define_mode_iterator VI8F_128 [V2DI V2DF])
531 (define_mode_iterator VI4F_256 [V8SI V8SF])
532 (define_mode_iterator VI8F_256 [V4DI V4DF])
533 (define_mode_iterator VI8F_256_512
534 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
535 (define_mode_iterator VI48F_256_512
537 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
538 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
539 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
540 (define_mode_iterator VF48_I1248
541 [V16SI V16SF V8DI V8DF V32HI V64QI])
542 (define_mode_iterator VI48F
543 [V16SI V16SF V8DI V8DF
544 (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
545 (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
546 (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
547 (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
548 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
550 ;; Mapping from float mode to required SSE level
551 (define_mode_attr sse
552 [(SF "sse") (DF "sse2")
553 (V4SF "sse") (V2DF "sse2")
554 (V16SF "avx512f") (V8SF "avx")
555 (V8DF "avx512f") (V4DF "avx")])
557 (define_mode_attr sse2
558 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
559 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
561 (define_mode_attr sse3
562 [(V16QI "sse3") (V32QI "avx")])
564 (define_mode_attr sse4_1
565 [(V4SF "sse4_1") (V2DF "sse4_1")
566 (V8SF "avx") (V4DF "avx")
569 (define_mode_attr avxsizesuffix
570 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
571 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
572 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
573 (V16SF "512") (V8DF "512")
574 (V8SF "256") (V4DF "256")
575 (V4SF "") (V2DF "")])
577 ;; SSE instruction mode
578 (define_mode_attr sseinsnmode
579 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
580 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
581 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
582 (V16SF "V16SF") (V8DF "V8DF")
583 (V8SF "V8SF") (V4DF "V4DF")
584 (V4SF "V4SF") (V2DF "V2DF")
587 ;; Mapping of vector modes to corresponding mask size
588 (define_mode_attr avx512fmaskmode
589 [(V64QI "DI") (V32QI "SI") (V16QI "HI")
590 (V32HI "SI") (V16HI "HI") (V8HI "QI") (V4HI "QI")
591 (V16SI "HI") (V8SI "QI") (V4SI "QI")
592 (V8DI "QI") (V4DI "QI") (V2DI "QI")
593 (V16SF "HI") (V8SF "QI") (V4SF "QI")
594 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
596 ;; Mapping of vector float modes to an integer mode of the same size
597 (define_mode_attr sseintvecmode
598 [(V16SF "V16SI") (V8DF "V8DI")
599 (V8SF "V8SI") (V4DF "V4DI")
600 (V4SF "V4SI") (V2DF "V2DI")
601 (V16SI "V16SI") (V8DI "V8DI")
602 (V8SI "V8SI") (V4DI "V4DI")
603 (V4SI "V4SI") (V2DI "V2DI")
604 (V16HI "V16HI") (V8HI "V8HI")
605 (V32HI "V32HI") (V64QI "V64QI")
606 (V32QI "V32QI") (V16QI "V16QI")])
608 (define_mode_attr sseintvecmode2
609 [(V8DF "XI") (V4DF "OI") (V2DF "TI")
610 (V8SF "OI") (V4SF "TI")])
612 (define_mode_attr sseintvecmodelower
613 [(V16SF "v16si") (V8DF "v8di")
614 (V8SF "v8si") (V4DF "v4di")
615 (V4SF "v4si") (V2DF "v2di")
616 (V8SI "v8si") (V4DI "v4di")
617 (V4SI "v4si") (V2DI "v2di")
618 (V16HI "v16hi") (V8HI "v8hi")
619 (V32QI "v32qi") (V16QI "v16qi")])
621 ;; Mapping of vector modes to a vector mode of double size
622 (define_mode_attr ssedoublevecmode
623 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
624 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
625 (V8SF "V16SF") (V4DF "V8DF")
626 (V4SF "V8SF") (V2DF "V4DF")])
628 ;; Mapping of vector modes to a vector mode of half size
629 (define_mode_attr ssehalfvecmode
630 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
631 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
632 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
633 (V16SF "V8SF") (V8DF "V4DF")
634 (V8SF "V4SF") (V4DF "V2DF")
637 ;; Mapping of vector modes ti packed single mode of the same size
638 (define_mode_attr ssePSmode
639 [(V16SI "V16SF") (V8DF "V16SF")
640 (V16SF "V16SF") (V8DI "V16SF")
641 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
642 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
643 (V8SI "V8SF") (V4SI "V4SF")
644 (V4DI "V8SF") (V2DI "V4SF")
645 (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
646 (V8SF "V8SF") (V4SF "V4SF")
647 (V4DF "V8SF") (V2DF "V4SF")])
649 (define_mode_attr ssePSmode2
650 [(V8DI "V8SF") (V4DI "V4SF")])
652 ;; Mapping of vector modes back to the scalar modes
653 (define_mode_attr ssescalarmode
654 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
655 (V32HI "HI") (V16HI "HI") (V8HI "HI")
656 (V16SI "SI") (V8SI "SI") (V4SI "SI")
657 (V8DI "DI") (V4DI "DI") (V2DI "DI")
658 (V16SF "SF") (V8SF "SF") (V4SF "SF")
659 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
661 ;; Mapping of vector modes to the 128bit modes
662 (define_mode_attr ssexmmmode
663 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
664 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
665 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
666 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
667 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
668 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
670 ;; Pointer size override for scalar modes (Intel asm dialect)
671 (define_mode_attr iptr
672 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
673 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
674 (V8SF "k") (V4DF "q")
675 (V4SF "k") (V2DF "q")
678 ;; Number of scalar elements in each vector type
679 (define_mode_attr ssescalarnum
680 [(V64QI "64") (V16SI "16") (V8DI "8")
681 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
682 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
683 (V16SF "16") (V8DF "8")
684 (V8SF "8") (V4DF "4")
685 (V4SF "4") (V2DF "2")])
687 ;; Mask of scalar elements in each vector type
688 (define_mode_attr ssescalarnummask
689 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
690 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
691 (V8SF "7") (V4DF "3")
692 (V4SF "3") (V2DF "1")])
694 (define_mode_attr ssescalarsize
695 [(V8DI "64") (V4DI "64") (V2DI "64")
696 (V64QI "8") (V32QI "8") (V16QI "8")
697 (V32HI "16") (V16HI "16") (V8HI "16")
698 (V16SI "32") (V8SI "32") (V4SI "32")
699 (V16SF "32") (V8DF "64")])
701 ;; SSE prefix for integer vector modes
702 (define_mode_attr sseintprefix
703 [(V2DI "p") (V2DF "")
708 (V16SI "p") (V16SF "")
709 (V16QI "p") (V8HI "p")
710 (V32QI "p") (V16HI "p")
711 (V64QI "p") (V32HI "p")])
713 ;; SSE scalar suffix for vector modes
714 (define_mode_attr ssescalarmodesuffix
716 (V8SF "ss") (V4DF "sd")
717 (V4SF "ss") (V2DF "sd")
718 (V8SI "ss") (V4DI "sd")
721 ;; Pack/unpack vector modes
722 (define_mode_attr sseunpackmode
723 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
724 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
725 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
727 (define_mode_attr ssepackmode
728 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
729 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
730 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
732 ;; Mapping of the max integer size for xop rotate immediate constraint
733 (define_mode_attr sserotatemax
734 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
736 ;; Mapping of mode to cast intrinsic name
737 (define_mode_attr castmode
738 [(V8SI "si") (V8SF "ps") (V4DF "pd")
739 (V16SI "si") (V16SF "ps") (V8DF "pd")])
741 ;; Instruction suffix for sign and zero extensions.
742 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
744 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
745 ;; i64x4 or f64x4 for 512bit modes.
746 (define_mode_attr i128
747 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
748 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
749 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
752 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
753 (define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
755 ;; Mapping for dbpsabbw modes
756 (define_mode_attr dbpsadbwmode
757 [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
759 ;; Mapping suffixes for broadcast
760 (define_mode_attr bcstscalarsuff
761 [(V64QI "b") (V32QI "b") (V16QI "b")
762 (V32HI "w") (V16HI "w") (V8HI "w")
763 (V16SI "d") (V8SI "d") (V4SI "d")
764 (V8DI "q") (V4DI "q") (V2DI "q")
765 (V16SF "ss") (V8SF "ss") (V4SF "ss")
766 (V8DF "sd") (V4DF "sd") (V2DF "sd")])
768 ;; Tie mode of assembler operand to mode iterator
769 (define_mode_attr concat_tg_mode
770 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
771 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
774 ;; Include define_subst patterns for instructions with mask
777 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
783 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
785 ;; All of these patterns are enabled for SSE1 as well as SSE2.
786 ;; This is essential for maintaining stable calling conventions.
788 (define_expand "mov<mode>"
789 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
790 (match_operand:VMOVE 1 "nonimmediate_operand"))]
793 ix86_expand_vector_move (<MODE>mode, operands);
797 (define_insn "*mov<mode>_internal"
798 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
799 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
801 && (register_operand (operands[0], <MODE>mode)
802 || register_operand (operands[1], <MODE>mode))"
804 int mode = get_attr_mode (insn);
805 switch (which_alternative)
808 return standard_sse_constant_opcode (insn, operands[1]);
811 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
812 in avx512f, so we need to use workarounds, to access sse registers
813 16-31, which are evex-only. In avx512vl we don't need workarounds. */
814 if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
815 && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
816 || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
818 if (memory_operand (operands[0], <MODE>mode))
820 if (<MODE_SIZE> == 32)
821 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
822 else if (<MODE_SIZE> == 16)
823 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
827 else if (memory_operand (operands[1], <MODE>mode))
829 if (<MODE_SIZE> == 32)
830 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
831 else if (<MODE_SIZE> == 16)
832 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
837 /* Reg -> reg move is always aligned. Just use wider move. */
842 return "vmovaps\t{%g1, %g0|%g0, %g1}";
845 return "vmovapd\t{%g1, %g0|%g0, %g1}";
848 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
859 && (misaligned_operand (operands[0], <MODE>mode)
860 || misaligned_operand (operands[1], <MODE>mode)))
861 return "vmovups\t{%1, %0|%0, %1}";
863 return "%vmovaps\t{%1, %0|%0, %1}";
869 && (misaligned_operand (operands[0], <MODE>mode)
870 || misaligned_operand (operands[1], <MODE>mode)))
871 return "vmovupd\t{%1, %0|%0, %1}";
873 return "%vmovapd\t{%1, %0|%0, %1}";
878 && (misaligned_operand (operands[0], <MODE>mode)
879 || misaligned_operand (operands[1], <MODE>mode)))
880 return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
881 : "vmovdqu\t{%1, %0|%0, %1}";
883 return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
884 : "%vmovdqa\t{%1, %0|%0, %1}";
886 if (misaligned_operand (operands[0], <MODE>mode)
887 || misaligned_operand (operands[1], <MODE>mode))
888 return "vmovdqu64\t{%1, %0|%0, %1}";
890 return "vmovdqa64\t{%1, %0|%0, %1}";
899 [(set_attr "type" "sselog1,ssemov,ssemov")
900 (set_attr "prefix" "maybe_vex")
902 (cond [(and (match_test "<MODE_SIZE> == 16")
903 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
904 (and (eq_attr "alternative" "2")
905 (match_test "TARGET_SSE_TYPELESS_STORES"))))
906 (const_string "<ssePSmode>")
907 (match_test "TARGET_AVX")
908 (const_string "<sseinsnmode>")
909 (ior (not (match_test "TARGET_SSE2"))
910 (match_test "optimize_function_for_size_p (cfun)"))
911 (const_string "V4SF")
912 (and (eq_attr "alternative" "0")
913 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
916 (const_string "<sseinsnmode>")))])
918 (define_insn "<avx512>_load<mode>_mask"
919 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
920 (vec_merge:V48_AVX512VL
921 (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
922 (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
923 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
926 static char buf [64];
929 const char *sse_suffix;
931 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
934 sse_suffix = "<ssemodesuffix>";
939 sse_suffix = "<ssescalarsize>";
942 if (misaligned_operand (operands[1], <MODE>mode))
947 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
948 insn_op, align, sse_suffix);
951 [(set_attr "type" "ssemov")
952 (set_attr "prefix" "evex")
953 (set_attr "memory" "none,load")
954 (set_attr "mode" "<sseinsnmode>")])
956 (define_insn "<avx512>_load<mode>_mask"
957 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
958 (vec_merge:VI12_AVX512VL
959 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
960 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
961 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
963 "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
964 [(set_attr "type" "ssemov")
965 (set_attr "prefix" "evex")
966 (set_attr "memory" "none,load")
967 (set_attr "mode" "<sseinsnmode>")])
969 (define_insn "<avx512>_blendm<mode>"
970 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
971 (vec_merge:V48_AVX512VL
972 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
973 (match_operand:V48_AVX512VL 1 "register_operand" "v")
974 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
976 "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
977 [(set_attr "type" "ssemov")
978 (set_attr "prefix" "evex")
979 (set_attr "mode" "<sseinsnmode>")])
981 (define_insn "<avx512>_blendm<mode>"
982 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
983 (vec_merge:VI12_AVX512VL
984 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
985 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
986 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
988 "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
989 [(set_attr "type" "ssemov")
990 (set_attr "prefix" "evex")
991 (set_attr "mode" "<sseinsnmode>")])
993 (define_insn "<avx512>_store<mode>_mask"
994 [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
995 (vec_merge:V48_AVX512VL
996 (match_operand:V48_AVX512VL 1 "register_operand" "v")
998 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1001 static char buf [64];
1003 const char *insn_op;
1004 const char *sse_suffix;
1006 if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1009 sse_suffix = "<ssemodesuffix>";
1014 sse_suffix = "<ssescalarsize>";
1017 if (misaligned_operand (operands[1], <MODE>mode))
1022 snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1023 insn_op, align, sse_suffix);
1026 [(set_attr "type" "ssemov")
1027 (set_attr "prefix" "evex")
1028 (set_attr "memory" "store")
1029 (set_attr "mode" "<sseinsnmode>")])
1031 (define_insn "<avx512>_store<mode>_mask"
1032 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1033 (vec_merge:VI12_AVX512VL
1034 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1036 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1038 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1039 [(set_attr "type" "ssemov")
1040 (set_attr "prefix" "evex")
1041 (set_attr "memory" "store")
1042 (set_attr "mode" "<sseinsnmode>")])
1044 (define_insn "sse2_movq128"
1045 [(set (match_operand:V2DI 0 "register_operand" "=x")
1048 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1049 (parallel [(const_int 0)]))
1052 "%vmovq\t{%1, %0|%0, %q1}"
1053 [(set_attr "type" "ssemov")
1054 (set_attr "prefix" "maybe_vex")
1055 (set_attr "mode" "TI")])
1057 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1058 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1059 ;; from memory, we'd prefer to load the memory directly into the %xmm
1060 ;; register. To facilitate this happy circumstance, this pattern won't
1061 ;; split until after register allocation. If the 64-bit value didn't
1062 ;; come from memory, this is the best we can do. This is much better
1063 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
1066 (define_insn_and_split "movdi_to_sse"
1068 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1069 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1070 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1071 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1073 "&& reload_completed"
1076 if (register_operand (operands[1], DImode))
1078 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1079 Assemble the 64-bit DImode value in an xmm register. */
1080 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1081 gen_rtx_SUBREG (SImode, operands[1], 0)));
1082 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1083 gen_rtx_SUBREG (SImode, operands[1], 4)));
1084 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1087 else if (memory_operand (operands[1], DImode))
1089 rtx tmp = gen_reg_rtx (V2DImode);
1090 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1091 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1098 [(set (match_operand:V4SF 0 "register_operand")
1099 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1100 "TARGET_SSE && reload_completed"
1103 (vec_duplicate:V4SF (match_dup 1))
1107 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1108 operands[2] = CONST0_RTX (V4SFmode);
1112 [(set (match_operand:V2DF 0 "register_operand")
1113 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1114 "TARGET_SSE2 && reload_completed"
1115 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1117 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1118 operands[2] = CONST0_RTX (DFmode);
1121 (define_expand "movmisalign<mode>"
1122 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1123 (match_operand:VMOVE 1 "nonimmediate_operand"))]
1126 ix86_expand_vector_move_misalign (<MODE>mode, operands);
1130 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1131 [(set (match_operand:VF 0 "register_operand")
1132 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1134 "TARGET_SSE && <mask_mode512bit_condition>"
1136 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1137 just fine if misaligned_operand is true, and without the UNSPEC it can
1138 be combined with arithmetic instructions. If misaligned_operand is
1139 false, still emit UNSPEC_LOADU insn to honor user's request for
1142 && misaligned_operand (operands[1], <MODE>mode))
1144 rtx src = operands[1];
1146 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1147 operands[2 * <mask_applied>],
1148 operands[3 * <mask_applied>]);
1149 emit_insn (gen_rtx_SET (operands[0], src));
1154 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1155 [(set (match_operand:VF 0 "register_operand" "=v")
1157 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1159 "TARGET_SSE && <mask_mode512bit_condition>"
1161 switch (get_attr_mode (insn))
1166 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1168 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1171 [(set_attr "type" "ssemov")
1172 (set_attr "movu" "1")
1173 (set_attr "ssememalign" "8")
1174 (set_attr "prefix" "maybe_vex")
1176 (cond [(and (match_test "<MODE_SIZE> == 16")
1177 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1178 (const_string "<ssePSmode>")
1179 (match_test "TARGET_AVX")
1180 (const_string "<MODE>")
1181 (match_test "optimize_function_for_size_p (cfun)")
1182 (const_string "V4SF")
1184 (const_string "<MODE>")))])
1186 ;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1188 [(set (match_operand:V2DF 0 "register_operand")
1189 (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1190 (match_operand:DF 4 "const0_operand")))
1191 (set (match_operand:V2DF 2 "register_operand")
1192 (vec_concat:V2DF (vec_select:DF (match_dup 2)
1193 (parallel [(const_int 0)]))
1194 (match_operand:DF 3 "memory_operand")))]
1195 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1196 && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1198 (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
1199 "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
1201 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1202 [(set (match_operand:VF 0 "memory_operand" "=m")
1204 [(match_operand:VF 1 "register_operand" "v")]
1208 switch (get_attr_mode (insn))
1213 return "%vmovups\t{%1, %0|%0, %1}";
1215 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1218 [(set_attr "type" "ssemov")
1219 (set_attr "movu" "1")
1220 (set_attr "ssememalign" "8")
1221 (set_attr "prefix" "maybe_vex")
1223 (cond [(and (match_test "<MODE_SIZE> == 16")
1224 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1225 (match_test "TARGET_SSE_TYPELESS_STORES")))
1226 (const_string "<ssePSmode>")
1227 (match_test "TARGET_AVX")
1228 (const_string "<MODE>")
1229 (match_test "optimize_function_for_size_p (cfun)")
1230 (const_string "V4SF")
1232 (const_string "<MODE>")))])
1234 (define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1235 [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1236 (vec_merge:VF_AVX512VL
1238 [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1241 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1244 switch (get_attr_mode (insn))
1249 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1251 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1254 [(set_attr "type" "ssemov")
1255 (set_attr "movu" "1")
1256 (set_attr "memory" "store")
1257 (set_attr "prefix" "evex")
1258 (set_attr "mode" "<sseinsnmode>")])
1260 ;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1262 [(set (match_operand:DF 0 "memory_operand")
1263 (vec_select:DF (match_operand:V2DF 1 "register_operand")
1264 (parallel [(const_int 0)])))
1265 (set (match_operand:DF 2 "memory_operand")
1266 (vec_select:DF (match_operand:V2DF 3 "register_operand")
1267 (parallel [(const_int 1)])))]
1268 "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1269 && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1271 (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
1272 "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1274 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1275 just fine if misaligned_operand is true, and without the UNSPEC it can
1276 be combined with arithmetic instructions. If misaligned_operand is
1277 false, still emit UNSPEC_LOADU insn to honor user's request for
1279 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1280 [(set (match_operand:VI1 0 "register_operand")
1282 [(match_operand:VI1 1 "nonimmediate_operand")]
1284 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1287 && misaligned_operand (operands[1], <MODE>mode))
1289 rtx src = operands[1];
1291 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1292 operands[2 * <mask_applied>],
1293 operands[3 * <mask_applied>]);
1294 emit_insn (gen_rtx_SET (operands[0], src));
1299 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1300 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1301 (unspec:VI_ULOADSTORE_BW_AVX512VL
1302 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1306 if (misaligned_operand (operands[1], <MODE>mode))
1308 rtx src = operands[1];
1310 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1311 operands[2 * <mask_applied>],
1312 operands[3 * <mask_applied>]);
1313 emit_insn (gen_rtx_SET (operands[0], src));
1318 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1319 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1320 (unspec:VI_ULOADSTORE_F_AVX512VL
1321 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1325 if (misaligned_operand (operands[1], <MODE>mode))
1327 rtx src = operands[1];
1329 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1330 operands[2 * <mask_applied>],
1331 operands[3 * <mask_applied>]);
1332 emit_insn (gen_rtx_SET (operands[0], src));
1337 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1338 [(set (match_operand:VI1 0 "register_operand" "=v")
1340 [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1342 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1344 switch (get_attr_mode (insn))
1348 return "%vmovups\t{%1, %0|%0, %1}";
1350 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1351 return "%vmovdqu\t{%1, %0|%0, %1}";
1353 return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1356 [(set_attr "type" "ssemov")
1357 (set_attr "movu" "1")
1358 (set_attr "ssememalign" "8")
1359 (set (attr "prefix_data16")
1361 (match_test "TARGET_AVX")
1363 (const_string "1")))
1364 (set_attr "prefix" "maybe_vex")
1366 (cond [(and (match_test "<MODE_SIZE> == 16")
1367 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1368 (const_string "<ssePSmode>")
1369 (match_test "TARGET_AVX")
1370 (const_string "<sseinsnmode>")
1371 (match_test "optimize_function_for_size_p (cfun)")
1372 (const_string "V4SF")
1374 (const_string "<sseinsnmode>")))])
1376 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1377 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1378 (unspec:VI_ULOADSTORE_BW_AVX512VL
1379 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1382 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1383 [(set_attr "type" "ssemov")
1384 (set_attr "movu" "1")
1385 (set_attr "ssememalign" "8")
1386 (set_attr "prefix" "maybe_evex")])
1388 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1389 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1390 (unspec:VI_ULOADSTORE_F_AVX512VL
1391 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1394 "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1395 [(set_attr "type" "ssemov")
1396 (set_attr "movu" "1")
1397 (set_attr "ssememalign" "8")
1398 (set_attr "prefix" "maybe_evex")])
1400 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1401 [(set (match_operand:VI1 0 "memory_operand" "=m")
1403 [(match_operand:VI1 1 "register_operand" "v")]
1407 switch (get_attr_mode (insn))
1412 return "%vmovups\t{%1, %0|%0, %1}";
1418 if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1419 return "%vmovdqu\t{%1, %0|%0, %1}";
1421 return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1425 [(set_attr "type" "ssemov")
1426 (set_attr "movu" "1")
1427 (set_attr "ssememalign" "8")
1428 (set (attr "prefix_data16")
1430 (match_test "TARGET_AVX")
1432 (const_string "1")))
1433 (set_attr "prefix" "maybe_vex")
1435 (cond [(and (match_test "<MODE_SIZE> == 16")
1436 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1437 (match_test "TARGET_SSE_TYPELESS_STORES")))
1438 (const_string "<ssePSmode>")
1439 (match_test "TARGET_AVX")
1440 (const_string "<sseinsnmode>")
1441 (match_test "optimize_function_for_size_p (cfun)")
1442 (const_string "V4SF")
1444 (const_string "<sseinsnmode>")))])
1446 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1447 [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1448 (unspec:VI_ULOADSTORE_BW_AVX512VL
1449 [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1452 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1453 [(set_attr "type" "ssemov")
1454 (set_attr "movu" "1")
1455 (set_attr "ssememalign" "8")
1456 (set_attr "prefix" "maybe_evex")])
1458 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1459 [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1460 (unspec:VI_ULOADSTORE_F_AVX512VL
1461 [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1464 "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1465 [(set_attr "type" "ssemov")
1466 (set_attr "movu" "1")
1467 (set_attr "ssememalign" "8")
1468 (set_attr "prefix" "maybe_vex")])
1470 (define_insn "<avx512>_storedqu<mode>_mask"
1471 [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1472 (vec_merge:VI48_AVX512VL
1473 (unspec:VI48_AVX512VL
1474 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1477 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1479 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1480 [(set_attr "type" "ssemov")
1481 (set_attr "movu" "1")
1482 (set_attr "memory" "store")
1483 (set_attr "prefix" "evex")
1484 (set_attr "mode" "<sseinsnmode>")])
1486 (define_insn "<avx512>_storedqu<mode>_mask"
1487 [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1488 (vec_merge:VI12_AVX512VL
1489 (unspec:VI12_AVX512VL
1490 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1493 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1495 "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1496 [(set_attr "type" "ssemov")
1497 (set_attr "movu" "1")
1498 (set_attr "memory" "store")
1499 (set_attr "prefix" "evex")
1500 (set_attr "mode" "<sseinsnmode>")])
1502 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1503 [(set (match_operand:VI1 0 "register_operand" "=x")
1504 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1507 "%vlddqu\t{%1, %0|%0, %1}"
1508 [(set_attr "type" "ssemov")
1509 (set_attr "movu" "1")
1510 (set_attr "ssememalign" "8")
1511 (set (attr "prefix_data16")
1513 (match_test "TARGET_AVX")
1515 (const_string "0")))
1516 (set (attr "prefix_rep")
1518 (match_test "TARGET_AVX")
1520 (const_string "1")))
1521 (set_attr "prefix" "maybe_vex")
1522 (set_attr "mode" "<sseinsnmode>")])
1524 (define_insn "sse2_movnti<mode>"
1525 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1526 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1529 "movnti\t{%1, %0|%0, %1}"
1530 [(set_attr "type" "ssemov")
1531 (set_attr "prefix_data16" "0")
1532 (set_attr "mode" "<MODE>")])
1534 (define_insn "<sse>_movnt<mode>"
1535 [(set (match_operand:VF 0 "memory_operand" "=m")
1537 [(match_operand:VF 1 "register_operand" "v")]
1540 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1541 [(set_attr "type" "ssemov")
1542 (set_attr "prefix" "maybe_vex")
1543 (set_attr "mode" "<MODE>")])
1545 (define_insn "<sse2>_movnt<mode>"
1546 [(set (match_operand:VI8 0 "memory_operand" "=m")
1547 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1550 "%vmovntdq\t{%1, %0|%0, %1}"
1551 [(set_attr "type" "ssecvt")
1552 (set (attr "prefix_data16")
1554 (match_test "TARGET_AVX")
1556 (const_string "1")))
1557 (set_attr "prefix" "maybe_vex")
1558 (set_attr "mode" "<sseinsnmode>")])
1560 ; Expand patterns for non-temporal stores. At the moment, only those
1561 ; that directly map to insns are defined; it would be possible to
1562 ; define patterns for other modes that would expand to several insns.
1564 ;; Modes handled by storent patterns.
1565 (define_mode_iterator STORENT_MODE
1566 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1567 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1568 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1569 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1570 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1572 (define_expand "storent<mode>"
1573 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1574 (unspec:STORENT_MODE
1575 [(match_operand:STORENT_MODE 1 "register_operand")]
1579 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1581 ;; Parallel floating point arithmetic
1583 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1585 (define_expand "<code><mode>2"
1586 [(set (match_operand:VF 0 "register_operand")
1588 (match_operand:VF 1 "register_operand")))]
1590 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1592 (define_insn_and_split "*absneg<mode>2"
1593 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1594 (match_operator:VF 3 "absneg_operator"
1595 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1596 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1599 "&& reload_completed"
1602 enum rtx_code absneg_op;
1608 if (MEM_P (operands[1]))
1609 op1 = operands[2], op2 = operands[1];
1611 op1 = operands[1], op2 = operands[2];
1616 if (rtx_equal_p (operands[0], operands[1]))
1622 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1623 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1624 t = gen_rtx_SET (operands[0], t);
1628 [(set_attr "isa" "noavx,noavx,avx,avx")])
1630 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1631 [(set (match_operand:VF 0 "register_operand")
1633 (match_operand:VF 1 "<round_nimm_predicate>")
1634 (match_operand:VF 2 "<round_nimm_predicate>")))]
1635 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1636 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1638 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1639 [(set (match_operand:VF 0 "register_operand" "=x,v")
1641 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1642 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1643 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1645 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1646 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1647 [(set_attr "isa" "noavx,avx")
1648 (set_attr "type" "sseadd")
1649 (set_attr "prefix" "<mask_prefix3>")
1650 (set_attr "mode" "<MODE>")])
1652 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1653 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1656 (match_operand:VF_128 1 "register_operand" "0,v")
1657 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1662 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1663 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1664 [(set_attr "isa" "noavx,avx")
1665 (set_attr "type" "sseadd")
1666 (set_attr "prefix" "<round_prefix>")
1667 (set_attr "mode" "<ssescalarmode>")])
1669 (define_expand "mul<mode>3<mask_name><round_name>"
1670 [(set (match_operand:VF 0 "register_operand")
1672 (match_operand:VF 1 "<round_nimm_predicate>")
1673 (match_operand:VF 2 "<round_nimm_predicate>")))]
1674 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1675 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1677 (define_insn "*mul<mode>3<mask_name><round_name>"
1678 [(set (match_operand:VF 0 "register_operand" "=x,v")
1680 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1681 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1682 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1684 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1685 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1686 [(set_attr "isa" "noavx,avx")
1687 (set_attr "type" "ssemul")
1688 (set_attr "prefix" "<mask_prefix3>")
1689 (set_attr "btver2_decode" "direct,double")
1690 (set_attr "mode" "<MODE>")])
1692 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1693 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1696 (match_operand:VF_128 1 "register_operand" "0,v")
1697 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1702 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1703 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1704 [(set_attr "isa" "noavx,avx")
1705 (set_attr "type" "sse<multdiv_mnemonic>")
1706 (set_attr "prefix" "<round_prefix>")
1707 (set_attr "btver2_decode" "direct,double")
1708 (set_attr "mode" "<ssescalarmode>")])
1710 (define_expand "div<mode>3"
1711 [(set (match_operand:VF2 0 "register_operand")
1712 (div:VF2 (match_operand:VF2 1 "register_operand")
1713 (match_operand:VF2 2 "nonimmediate_operand")))]
1715 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1717 (define_expand "div<mode>3"
1718 [(set (match_operand:VF1 0 "register_operand")
1719 (div:VF1 (match_operand:VF1 1 "register_operand")
1720 (match_operand:VF1 2 "nonimmediate_operand")))]
1723 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1726 && TARGET_RECIP_VEC_DIV
1727 && !optimize_insn_for_size_p ()
1728 && flag_finite_math_only && !flag_trapping_math
1729 && flag_unsafe_math_optimizations)
1731 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1736 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1737 [(set (match_operand:VF 0 "register_operand" "=x,v")
1739 (match_operand:VF 1 "register_operand" "0,v")
1740 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1741 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1743 div<ssemodesuffix>\t{%2, %0|%0, %2}
1744 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1745 [(set_attr "isa" "noavx,avx")
1746 (set_attr "type" "ssediv")
1747 (set_attr "prefix" "<mask_prefix3>")
1748 (set_attr "mode" "<MODE>")])
1750 (define_insn "<sse>_rcp<mode>2"
1751 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1753 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1755 "%vrcpps\t{%1, %0|%0, %1}"
1756 [(set_attr "type" "sse")
1757 (set_attr "atom_sse_attr" "rcp")
1758 (set_attr "btver2_sse_attr" "rcp")
1759 (set_attr "prefix" "maybe_vex")
1760 (set_attr "mode" "<MODE>")])
1762 (define_insn "sse_vmrcpv4sf2"
1763 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1765 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1767 (match_operand:V4SF 2 "register_operand" "0,x")
1771 rcpss\t{%1, %0|%0, %k1}
1772 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1773 [(set_attr "isa" "noavx,avx")
1774 (set_attr "type" "sse")
1775 (set_attr "ssememalign" "32")
1776 (set_attr "atom_sse_attr" "rcp")
1777 (set_attr "btver2_sse_attr" "rcp")
1778 (set_attr "prefix" "orig,vex")
1779 (set_attr "mode" "SF")])
1781 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1782 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1784 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1787 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1788 [(set_attr "type" "sse")
1789 (set_attr "prefix" "evex")
1790 (set_attr "mode" "<MODE>")])
1792 (define_insn "srcp14<mode>"
1793 [(set (match_operand:VF_128 0 "register_operand" "=v")
1796 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1798 (match_operand:VF_128 2 "register_operand" "v")
1801 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1802 [(set_attr "type" "sse")
1803 (set_attr "prefix" "evex")
1804 (set_attr "mode" "<MODE>")])
1806 (define_expand "sqrt<mode>2"
1807 [(set (match_operand:VF2 0 "register_operand")
1808 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1811 (define_expand "sqrt<mode>2"
1812 [(set (match_operand:VF1 0 "register_operand")
1813 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1817 && TARGET_RECIP_VEC_SQRT
1818 && !optimize_insn_for_size_p ()
1819 && flag_finite_math_only && !flag_trapping_math
1820 && flag_unsafe_math_optimizations)
1822 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1827 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1828 [(set (match_operand:VF 0 "register_operand" "=v")
1829 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1830 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1831 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1832 [(set_attr "type" "sse")
1833 (set_attr "atom_sse_attr" "sqrt")
1834 (set_attr "btver2_sse_attr" "sqrt")
1835 (set_attr "prefix" "maybe_vex")
1836 (set_attr "mode" "<MODE>")])
1838 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1839 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1842 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1843 (match_operand:VF_128 2 "register_operand" "0,v")
1847 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1848 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1849 [(set_attr "isa" "noavx,avx")
1850 (set_attr "type" "sse")
1851 (set_attr "atom_sse_attr" "sqrt")
1852 (set_attr "prefix" "<round_prefix>")
1853 (set_attr "btver2_sse_attr" "sqrt")
1854 (set_attr "mode" "<ssescalarmode>")])
1856 (define_expand "rsqrt<mode>2"
1857 [(set (match_operand:VF1_128_256 0 "register_operand")
1859 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1862 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1866 (define_insn "<sse>_rsqrt<mode>2"
1867 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1869 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1871 "%vrsqrtps\t{%1, %0|%0, %1}"
1872 [(set_attr "type" "sse")
1873 (set_attr "prefix" "maybe_vex")
1874 (set_attr "mode" "<MODE>")])
1876 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1877 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1879 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1882 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1883 [(set_attr "type" "sse")
1884 (set_attr "prefix" "evex")
1885 (set_attr "mode" "<MODE>")])
1887 (define_insn "rsqrt14<mode>"
1888 [(set (match_operand:VF_128 0 "register_operand" "=v")
1891 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1893 (match_operand:VF_128 2 "register_operand" "v")
1896 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1897 [(set_attr "type" "sse")
1898 (set_attr "prefix" "evex")
1899 (set_attr "mode" "<MODE>")])
1901 (define_insn "sse_vmrsqrtv4sf2"
1902 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1904 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1906 (match_operand:V4SF 2 "register_operand" "0,x")
1910 rsqrtss\t{%1, %0|%0, %k1}
1911 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1912 [(set_attr "isa" "noavx,avx")
1913 (set_attr "type" "sse")
1914 (set_attr "ssememalign" "32")
1915 (set_attr "prefix" "orig,vex")
1916 (set_attr "mode" "SF")])
1918 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1919 ;; isn't really correct, as those rtl operators aren't defined when
1920 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1922 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1923 [(set (match_operand:VF 0 "register_operand")
1925 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1926 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1927 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1929 if (!flag_finite_math_only)
1930 operands[1] = force_reg (<MODE>mode, operands[1]);
1931 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1934 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1935 [(set (match_operand:VF 0 "register_operand" "=x,v")
1937 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1938 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1939 "TARGET_SSE && flag_finite_math_only
1940 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1941 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1943 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1944 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1945 [(set_attr "isa" "noavx,avx")
1946 (set_attr "type" "sseadd")
1947 (set_attr "btver2_sse_attr" "maxmin")
1948 (set_attr "prefix" "<mask_prefix3>")
1949 (set_attr "mode" "<MODE>")])
1951 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1952 [(set (match_operand:VF 0 "register_operand" "=x,v")
1954 (match_operand:VF 1 "register_operand" "0,v")
1955 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1956 "TARGET_SSE && !flag_finite_math_only
1957 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1959 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1960 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1961 [(set_attr "isa" "noavx,avx")
1962 (set_attr "type" "sseadd")
1963 (set_attr "btver2_sse_attr" "maxmin")
1964 (set_attr "prefix" "<mask_prefix3>")
1965 (set_attr "mode" "<MODE>")])
1967 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1968 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1971 (match_operand:VF_128 1 "register_operand" "0,v")
1972 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1977 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1978 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1979 [(set_attr "isa" "noavx,avx")
1980 (set_attr "type" "sse")
1981 (set_attr "btver2_sse_attr" "maxmin")
1982 (set_attr "prefix" "<round_saeonly_prefix>")
1983 (set_attr "mode" "<ssescalarmode>")])
1985 ;; These versions of the min/max patterns implement exactly the operations
1986 ;; min = (op1 < op2 ? op1 : op2)
1987 ;; max = (!(op1 < op2) ? op1 : op2)
1988 ;; Their operands are not commutative, and thus they may be used in the
1989 ;; presence of -0.0 and NaN.
1991 (define_insn "*ieee_smin<mode>3"
1992 [(set (match_operand:VF 0 "register_operand" "=v,v")
1994 [(match_operand:VF 1 "register_operand" "0,v")
1995 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1999 min<ssemodesuffix>\t{%2, %0|%0, %2}
2000 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2001 [(set_attr "isa" "noavx,avx")
2002 (set_attr "type" "sseadd")
2003 (set_attr "prefix" "orig,vex")
2004 (set_attr "mode" "<MODE>")])
2006 (define_insn "*ieee_smax<mode>3"
2007 [(set (match_operand:VF 0 "register_operand" "=v,v")
2009 [(match_operand:VF 1 "register_operand" "0,v")
2010 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
2014 max<ssemodesuffix>\t{%2, %0|%0, %2}
2015 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2016 [(set_attr "isa" "noavx,avx")
2017 (set_attr "type" "sseadd")
2018 (set_attr "prefix" "orig,vex")
2019 (set_attr "mode" "<MODE>")])
2021 (define_insn "avx_addsubv4df3"
2022 [(set (match_operand:V4DF 0 "register_operand" "=x")
2025 (match_operand:V4DF 1 "register_operand" "x")
2026 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2027 (minus:V4DF (match_dup 1) (match_dup 2))
2030 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2031 [(set_attr "type" "sseadd")
2032 (set_attr "prefix" "vex")
2033 (set_attr "mode" "V4DF")])
2035 (define_insn "sse3_addsubv2df3"
2036 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2039 (match_operand:V2DF 1 "register_operand" "0,x")
2040 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2041 (minus:V2DF (match_dup 1) (match_dup 2))
2045 addsubpd\t{%2, %0|%0, %2}
2046 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2047 [(set_attr "isa" "noavx,avx")
2048 (set_attr "type" "sseadd")
2049 (set_attr "atom_unit" "complex")
2050 (set_attr "prefix" "orig,vex")
2051 (set_attr "mode" "V2DF")])
2053 (define_insn "avx_addsubv8sf3"
2054 [(set (match_operand:V8SF 0 "register_operand" "=x")
2057 (match_operand:V8SF 1 "register_operand" "x")
2058 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2059 (minus:V8SF (match_dup 1) (match_dup 2))
2062 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2063 [(set_attr "type" "sseadd")
2064 (set_attr "prefix" "vex")
2065 (set_attr "mode" "V8SF")])
2067 (define_insn "sse3_addsubv4sf3"
2068 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2071 (match_operand:V4SF 1 "register_operand" "0,x")
2072 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2073 (minus:V4SF (match_dup 1) (match_dup 2))
2077 addsubps\t{%2, %0|%0, %2}
2078 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2079 [(set_attr "isa" "noavx,avx")
2080 (set_attr "type" "sseadd")
2081 (set_attr "prefix" "orig,vex")
2082 (set_attr "prefix_rep" "1,*")
2083 (set_attr "mode" "V4SF")])
2085 (define_insn "avx_h<plusminus_insn>v4df3"
2086 [(set (match_operand:V4DF 0 "register_operand" "=x")
2091 (match_operand:V4DF 1 "register_operand" "x")
2092 (parallel [(const_int 0)]))
2093 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2096 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
2097 (parallel [(const_int 0)]))
2098 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2101 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2102 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2104 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2105 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2107 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2108 [(set_attr "type" "sseadd")
2109 (set_attr "prefix" "vex")
2110 (set_attr "mode" "V4DF")])
2112 (define_expand "sse3_haddv2df3"
2113 [(set (match_operand:V2DF 0 "register_operand")
2117 (match_operand:V2DF 1 "register_operand")
2118 (parallel [(const_int 0)]))
2119 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2122 (match_operand:V2DF 2 "nonimmediate_operand")
2123 (parallel [(const_int 0)]))
2124 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2127 (define_insn "*sse3_haddv2df3"
2128 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2132 (match_operand:V2DF 1 "register_operand" "0,x")
2133 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2136 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2139 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2140 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2143 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2145 && INTVAL (operands[3]) != INTVAL (operands[4])
2146 && INTVAL (operands[5]) != INTVAL (operands[6])"
2148 haddpd\t{%2, %0|%0, %2}
2149 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2150 [(set_attr "isa" "noavx,avx")
2151 (set_attr "type" "sseadd")
2152 (set_attr "prefix" "orig,vex")
2153 (set_attr "mode" "V2DF")])
2155 (define_insn "sse3_hsubv2df3"
2156 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2160 (match_operand:V2DF 1 "register_operand" "0,x")
2161 (parallel [(const_int 0)]))
2162 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2165 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2166 (parallel [(const_int 0)]))
2167 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2170 hsubpd\t{%2, %0|%0, %2}
2171 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2172 [(set_attr "isa" "noavx,avx")
2173 (set_attr "type" "sseadd")
2174 (set_attr "prefix" "orig,vex")
2175 (set_attr "mode" "V2DF")])
2177 (define_insn "*sse3_haddv2df3_low"
2178 [(set (match_operand:DF 0 "register_operand" "=x,x")
2181 (match_operand:V2DF 1 "register_operand" "0,x")
2182 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2185 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2187 && INTVAL (operands[2]) != INTVAL (operands[3])"
2189 haddpd\t{%0, %0|%0, %0}
2190 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2191 [(set_attr "isa" "noavx,avx")
2192 (set_attr "type" "sseadd1")
2193 (set_attr "prefix" "orig,vex")
2194 (set_attr "mode" "V2DF")])
2196 (define_insn "*sse3_hsubv2df3_low"
2197 [(set (match_operand:DF 0 "register_operand" "=x,x")
2200 (match_operand:V2DF 1 "register_operand" "0,x")
2201 (parallel [(const_int 0)]))
2204 (parallel [(const_int 1)]))))]
2207 hsubpd\t{%0, %0|%0, %0}
2208 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2209 [(set_attr "isa" "noavx,avx")
2210 (set_attr "type" "sseadd1")
2211 (set_attr "prefix" "orig,vex")
2212 (set_attr "mode" "V2DF")])
2214 (define_insn "avx_h<plusminus_insn>v8sf3"
2215 [(set (match_operand:V8SF 0 "register_operand" "=x")
2221 (match_operand:V8SF 1 "register_operand" "x")
2222 (parallel [(const_int 0)]))
2223 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2225 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2226 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2230 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2231 (parallel [(const_int 0)]))
2232 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2234 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2235 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2239 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2240 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2242 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2243 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2246 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2247 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2249 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2250 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2252 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2253 [(set_attr "type" "sseadd")
2254 (set_attr "prefix" "vex")
2255 (set_attr "mode" "V8SF")])
2257 (define_insn "sse3_h<plusminus_insn>v4sf3"
2258 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2263 (match_operand:V4SF 1 "register_operand" "0,x")
2264 (parallel [(const_int 0)]))
2265 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2267 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2268 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2272 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2273 (parallel [(const_int 0)]))
2274 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2276 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2277 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2280 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2281 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2282 [(set_attr "isa" "noavx,avx")
2283 (set_attr "type" "sseadd")
2284 (set_attr "atom_unit" "complex")
2285 (set_attr "prefix" "orig,vex")
2286 (set_attr "prefix_rep" "1,*")
2287 (set_attr "mode" "V4SF")])
2289 (define_expand "reduc_splus_v8df"
2290 [(match_operand:V8DF 0 "register_operand")
2291 (match_operand:V8DF 1 "register_operand")]
2294 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2298 (define_expand "reduc_splus_v4df"
2299 [(match_operand:V4DF 0 "register_operand")
2300 (match_operand:V4DF 1 "register_operand")]
2303 rtx tmp = gen_reg_rtx (V4DFmode);
2304 rtx tmp2 = gen_reg_rtx (V4DFmode);
2305 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2306 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2307 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2311 (define_expand "reduc_splus_v2df"
2312 [(match_operand:V2DF 0 "register_operand")
2313 (match_operand:V2DF 1 "register_operand")]
2316 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2320 (define_expand "reduc_splus_v16sf"
2321 [(match_operand:V16SF 0 "register_operand")
2322 (match_operand:V16SF 1 "register_operand")]
2325 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2329 (define_expand "reduc_splus_v8sf"
2330 [(match_operand:V8SF 0 "register_operand")
2331 (match_operand:V8SF 1 "register_operand")]
2334 rtx tmp = gen_reg_rtx (V8SFmode);
2335 rtx tmp2 = gen_reg_rtx (V8SFmode);
2336 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2337 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2338 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2339 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2343 (define_expand "reduc_splus_v4sf"
2344 [(match_operand:V4SF 0 "register_operand")
2345 (match_operand:V4SF 1 "register_operand")]
2350 rtx tmp = gen_reg_rtx (V4SFmode);
2351 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2352 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2355 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2359 ;; Modes handled by reduc_sm{in,ax}* patterns.
2360 (define_mode_iterator REDUC_SMINMAX_MODE
2361 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2362 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2363 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2364 (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2365 (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2366 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2367 (V8DF "TARGET_AVX512F")])
2369 (define_expand "reduc_<code>_<mode>"
2370 [(smaxmin:REDUC_SMINMAX_MODE
2371 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2372 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2375 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2379 (define_expand "reduc_<code>_<mode>"
2380 [(umaxmin:VI_AVX512BW
2381 (match_operand:VI_AVX512BW 0 "register_operand")
2382 (match_operand:VI_AVX512BW 1 "register_operand"))]
2385 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2389 (define_expand "reduc_<code>_<mode>"
2391 (match_operand:VI_256 0 "register_operand")
2392 (match_operand:VI_256 1 "register_operand"))]
2395 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2399 (define_expand "reduc_umin_v8hi"
2401 (match_operand:V8HI 0 "register_operand")
2402 (match_operand:V8HI 1 "register_operand"))]
2405 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2409 (define_insn "<mask_codefor>reducep<mode><mask_name>"
2410 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2412 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2413 (match_operand:SI 2 "const_0_to_255_operand")]
2416 "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2417 [(set_attr "type" "sse")
2418 (set_attr "prefix" "evex")
2419 (set_attr "mode" "<MODE>")])
2421 (define_insn "reduces<mode>"
2422 [(set (match_operand:VF_128 0 "register_operand" "=v")
2425 [(match_operand:VF_128 1 "register_operand" "v")
2426 (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2427 (match_operand:SI 3 "const_0_to_255_operand")]
2432 "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2433 [(set_attr "type" "sse")
2434 (set_attr "prefix" "evex")
2435 (set_attr "mode" "<MODE>")])
2437 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2439 ;; Parallel floating point comparisons
2441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2443 (define_insn "avx_cmp<mode>3"
2444 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2446 [(match_operand:VF_128_256 1 "register_operand" "x")
2447 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2448 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2451 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2452 [(set_attr "type" "ssecmp")
2453 (set_attr "length_immediate" "1")
2454 (set_attr "prefix" "vex")
2455 (set_attr "mode" "<MODE>")])
2457 (define_insn "avx_vmcmp<mode>3"
2458 [(set (match_operand:VF_128 0 "register_operand" "=x")
2461 [(match_operand:VF_128 1 "register_operand" "x")
2462 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2463 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2468 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2469 [(set_attr "type" "ssecmp")
2470 (set_attr "length_immediate" "1")
2471 (set_attr "prefix" "vex")
2472 (set_attr "mode" "<ssescalarmode>")])
2474 (define_insn "*<sse>_maskcmp<mode>3_comm"
2475 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2476 (match_operator:VF_128_256 3 "sse_comparison_operator"
2477 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2478 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2480 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2482 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2483 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2484 [(set_attr "isa" "noavx,avx")
2485 (set_attr "type" "ssecmp")
2486 (set_attr "length_immediate" "1")
2487 (set_attr "prefix" "orig,vex")
2488 (set_attr "mode" "<MODE>")])
2490 (define_insn "<sse>_maskcmp<mode>3"
2491 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2492 (match_operator:VF_128_256 3 "sse_comparison_operator"
2493 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2494 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2497 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2498 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2499 [(set_attr "isa" "noavx,avx")
2500 (set_attr "type" "ssecmp")
2501 (set_attr "length_immediate" "1")
2502 (set_attr "prefix" "orig,vex")
2503 (set_attr "mode" "<MODE>")])
2505 (define_insn "<sse>_vmmaskcmp<mode>3"
2506 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2508 (match_operator:VF_128 3 "sse_comparison_operator"
2509 [(match_operand:VF_128 1 "register_operand" "0,x")
2510 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2515 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2516 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2517 [(set_attr "isa" "noavx,avx")
2518 (set_attr "type" "ssecmp")
2519 (set_attr "length_immediate" "1,*")
2520 (set_attr "prefix" "orig,vex")
2521 (set_attr "mode" "<ssescalarmode>")])
2523 (define_mode_attr cmp_imm_predicate
2524 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2525 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")
2526 (V8SF "const_0_to_31_operand") (V4DF "const_0_to_31_operand")
2527 (V8SI "const_0_to_7_operand") (V4DI "const_0_to_7_operand")
2528 (V4SF "const_0_to_31_operand") (V2DF "const_0_to_31_operand")
2529 (V4SI "const_0_to_7_operand") (V2DI "const_0_to_7_operand")
2530 (V32HI "const_0_to_7_operand") (V64QI "const_0_to_7_operand")
2531 (V16HI "const_0_to_7_operand") (V32QI "const_0_to_7_operand")
2532 (V8HI "const_0_to_7_operand") (V16QI "const_0_to_7_operand")])
2534 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2535 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2536 (unspec:<avx512fmaskmode>
2537 [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2538 (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2539 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2541 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2542 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2543 [(set_attr "type" "ssecmp")
2544 (set_attr "length_immediate" "1")
2545 (set_attr "prefix" "evex")
2546 (set_attr "mode" "<sseinsnmode>")])
2548 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2549 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2550 (unspec:<avx512fmaskmode>
2551 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2552 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2553 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2556 "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2557 [(set_attr "type" "ssecmp")
2558 (set_attr "length_immediate" "1")
2559 (set_attr "prefix" "evex")
2560 (set_attr "mode" "<sseinsnmode>")])
2562 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2563 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2564 (unspec:<avx512fmaskmode>
2565 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2566 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2567 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2568 UNSPEC_UNSIGNED_PCMP))]
2570 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2571 [(set_attr "type" "ssecmp")
2572 (set_attr "length_immediate" "1")
2573 (set_attr "prefix" "evex")
2574 (set_attr "mode" "<sseinsnmode>")])
2576 (define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2577 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2578 (unspec:<avx512fmaskmode>
2579 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2580 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2581 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2582 UNSPEC_UNSIGNED_PCMP))]
2584 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2585 [(set_attr "type" "ssecmp")
2586 (set_attr "length_immediate" "1")
2587 (set_attr "prefix" "evex")
2588 (set_attr "mode" "<sseinsnmode>")])
2590 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2591 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2592 (and:<avx512fmaskmode>
2593 (unspec:<avx512fmaskmode>
2594 [(match_operand:VF_128 1 "register_operand" "v")
2595 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2596 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2600 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2601 [(set_attr "type" "ssecmp")
2602 (set_attr "length_immediate" "1")
2603 (set_attr "prefix" "evex")
2604 (set_attr "mode" "<ssescalarmode>")])
2606 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2607 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2608 (and:<avx512fmaskmode>
2609 (unspec:<avx512fmaskmode>
2610 [(match_operand:VF_128 1 "register_operand" "v")
2611 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2612 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2614 (and:<avx512fmaskmode>
2615 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2618 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2619 [(set_attr "type" "ssecmp")
2620 (set_attr "length_immediate" "1")
2621 (set_attr "prefix" "evex")
2622 (set_attr "mode" "<ssescalarmode>")])
2624 (define_insn "avx512f_maskcmp<mode>3"
2625 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2626 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2627 [(match_operand:VF 1 "register_operand" "v")
2628 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2630 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2631 [(set_attr "type" "ssecmp")
2632 (set_attr "length_immediate" "1")
2633 (set_attr "prefix" "evex")
2634 (set_attr "mode" "<sseinsnmode>")])
2636 (define_insn "<sse>_comi<round_saeonly_name>"
2637 [(set (reg:CCFP FLAGS_REG)
2640 (match_operand:<ssevecmode> 0 "register_operand" "v")
2641 (parallel [(const_int 0)]))
2643 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2644 (parallel [(const_int 0)]))))]
2645 "SSE_FLOAT_MODE_P (<MODE>mode)"
2646 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2647 [(set_attr "type" "ssecomi")
2648 (set_attr "prefix" "maybe_vex")
2649 (set_attr "prefix_rep" "0")
2650 (set (attr "prefix_data16")
2651 (if_then_else (eq_attr "mode" "DF")
2653 (const_string "0")))
2654 (set_attr "mode" "<MODE>")])
2656 (define_insn "<sse>_ucomi<round_saeonly_name>"
2657 [(set (reg:CCFPU FLAGS_REG)
2660 (match_operand:<ssevecmode> 0 "register_operand" "v")
2661 (parallel [(const_int 0)]))
2663 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2664 (parallel [(const_int 0)]))))]
2665 "SSE_FLOAT_MODE_P (<MODE>mode)"
2666 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2667 [(set_attr "type" "ssecomi")
2668 (set_attr "prefix" "maybe_vex")
2669 (set_attr "prefix_rep" "0")
2670 (set (attr "prefix_data16")
2671 (if_then_else (eq_attr "mode" "DF")
2673 (const_string "0")))
2674 (set_attr "mode" "<MODE>")])
2676 (define_expand "vcond<V_512:mode><VF_512:mode>"
2677 [(set (match_operand:V_512 0 "register_operand")
2679 (match_operator 3 ""
2680 [(match_operand:VF_512 4 "nonimmediate_operand")
2681 (match_operand:VF_512 5 "nonimmediate_operand")])
2682 (match_operand:V_512 1 "general_operand")
2683 (match_operand:V_512 2 "general_operand")))]
2685 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2686 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2688 bool ok = ix86_expand_fp_vcond (operands);
2693 (define_expand "vcond<V_256:mode><VF_256:mode>"
2694 [(set (match_operand:V_256 0 "register_operand")
2696 (match_operator 3 ""
2697 [(match_operand:VF_256 4 "nonimmediate_operand")
2698 (match_operand:VF_256 5 "nonimmediate_operand")])
2699 (match_operand:V_256 1 "general_operand")
2700 (match_operand:V_256 2 "general_operand")))]
2702 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2703 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2705 bool ok = ix86_expand_fp_vcond (operands);
2710 (define_expand "vcond<V_128:mode><VF_128:mode>"
2711 [(set (match_operand:V_128 0 "register_operand")
2713 (match_operator 3 ""
2714 [(match_operand:VF_128 4 "nonimmediate_operand")
2715 (match_operand:VF_128 5 "nonimmediate_operand")])
2716 (match_operand:V_128 1 "general_operand")
2717 (match_operand:V_128 2 "general_operand")))]
2719 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2720 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2722 bool ok = ix86_expand_fp_vcond (operands);
2727 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2729 ;; Parallel floating point logical operations
2731 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2733 (define_insn "<sse>_andnot<mode>3<mask_name>"
2734 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2737 (match_operand:VF_128_256 1 "register_operand" "0,v"))
2738 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2739 "TARGET_SSE && <mask_avx512vl_condition>"
2741 static char buf[128];
2745 switch (get_attr_mode (insn))
2752 suffix = "<ssemodesuffix>";
2755 switch (which_alternative)
2758 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2761 ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2767 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2768 if (<mask_applied> && !TARGET_AVX512DQ)
2770 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2771 ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2774 snprintf (buf, sizeof (buf), ops, suffix);
2777 [(set_attr "isa" "noavx,avx")
2778 (set_attr "type" "sselog")
2779 (set_attr "prefix" "orig,maybe_evex")
2781 (cond [(and (match_test "<MODE_SIZE> == 16")
2782 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2783 (const_string "<ssePSmode>")
2784 (match_test "TARGET_AVX")
2785 (const_string "<MODE>")
2786 (match_test "optimize_function_for_size_p (cfun)")
2787 (const_string "V4SF")
2789 (const_string "<MODE>")))])
2792 (define_insn "<sse>_andnot<mode>3<mask_name>"
2793 [(set (match_operand:VF_512 0 "register_operand" "=v")
2796 (match_operand:VF_512 1 "register_operand" "v"))
2797 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2800 static char buf[128];
2804 suffix = "<ssemodesuffix>";
2807 /* There is no vandnp[sd] in avx512f. Use vpandn[qd]. */
2808 if (!TARGET_AVX512DQ)
2810 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2814 snprintf (buf, sizeof (buf),
2815 "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2819 [(set_attr "type" "sselog")
2820 (set_attr "prefix" "evex")
2821 (set_attr "mode" "<sseinsnmode>")])
2823 (define_expand "<code><mode>3<mask_name>"
2824 [(set (match_operand:VF_128_256 0 "register_operand")
2825 (any_logic:VF_128_256
2826 (match_operand:VF_128_256 1 "nonimmediate_operand")
2827 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2828 "TARGET_SSE && <mask_avx512vl_condition>"
2829 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2831 (define_expand "<code><mode>3<mask_name>"
2832 [(set (match_operand:VF_512 0 "register_operand")
2834 (match_operand:VF_512 1 "nonimmediate_operand")
2835 (match_operand:VF_512 2 "nonimmediate_operand")))]
2837 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2839 (define_insn "*<code><mode>3<mask_name>"
2840 [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2841 (any_logic:VF_128_256
2842 (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2843 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2844 "TARGET_SSE && <mask_avx512vl_condition>
2845 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2847 static char buf[128];
2851 switch (get_attr_mode (insn))
2858 suffix = "<ssemodesuffix>";
2861 switch (which_alternative)
2864 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2867 ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2873 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2874 if (<mask_applied> && !TARGET_AVX512DQ)
2876 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2877 ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2880 snprintf (buf, sizeof (buf), ops, suffix);
2883 [(set_attr "isa" "noavx,avx")
2884 (set_attr "type" "sselog")
2885 (set_attr "prefix" "orig,maybe_evex")
2887 (cond [(and (match_test "<MODE_SIZE> == 16")
2888 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2889 (const_string "<ssePSmode>")
2890 (match_test "TARGET_AVX")
2891 (const_string "<MODE>")
2892 (match_test "optimize_function_for_size_p (cfun)")
2893 (const_string "V4SF")
2895 (const_string "<MODE>")))])
2897 (define_insn "*<code><mode>3<mask_name>"
2898 [(set (match_operand:VF_512 0 "register_operand" "=v")
2900 (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2901 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2902 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2904 static char buf[128];
2908 suffix = "<ssemodesuffix>";
2911 /* There is no v<logic>p[sd] in avx512f. Use vp<logic>[dq]. */
2912 if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2914 suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2918 snprintf (buf, sizeof (buf),
2919 "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2923 [(set_attr "type" "sselog")
2924 (set_attr "prefix" "evex")
2925 (set_attr "mode" "<sseinsnmode>")])
2927 (define_expand "copysign<mode>3"
2930 (not:VF (match_dup 3))
2931 (match_operand:VF 1 "nonimmediate_operand")))
2933 (and:VF (match_dup 3)
2934 (match_operand:VF 2 "nonimmediate_operand")))
2935 (set (match_operand:VF 0 "register_operand")
2936 (ior:VF (match_dup 4) (match_dup 5)))]
2939 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2941 operands[4] = gen_reg_rtx (<MODE>mode);
2942 operands[5] = gen_reg_rtx (<MODE>mode);
2945 ;; Also define scalar versions. These are used for abs, neg, and
2946 ;; conditional move. Using subregs into vector modes causes register
2947 ;; allocation lossage. These patterns do not allow memory operands
2948 ;; because the native instructions read the full 128-bits.
2950 (define_insn "*andnot<mode>3"
2951 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2954 (match_operand:MODEF 1 "register_operand" "0,x"))
2955 (match_operand:MODEF 2 "register_operand" "x,x")))]
2956 "SSE_FLOAT_MODE_P (<MODE>mode)"
2958 static char buf[32];
2961 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2963 switch (which_alternative)
2966 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2969 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2975 snprintf (buf, sizeof (buf), ops, suffix);
2978 [(set_attr "isa" "noavx,avx")
2979 (set_attr "type" "sselog")
2980 (set_attr "prefix" "orig,vex")
2982 (cond [(and (match_test "<MODE_SIZE> == 16")
2983 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2984 (const_string "V4SF")
2985 (match_test "TARGET_AVX")
2986 (const_string "<ssevecmode>")
2987 (match_test "optimize_function_for_size_p (cfun)")
2988 (const_string "V4SF")
2990 (const_string "<ssevecmode>")))])
2992 (define_insn "*andnottf3"
2993 [(set (match_operand:TF 0 "register_operand" "=x,x")
2995 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2996 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2999 static char buf[32];
3002 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
3004 switch (which_alternative)
3007 ops = "%s\t{%%2, %%0|%%0, %%2}";
3010 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3016 snprintf (buf, sizeof (buf), ops, tmp);
3019 [(set_attr "isa" "noavx,avx")
3020 (set_attr "type" "sselog")
3021 (set (attr "prefix_data16")
3023 (and (eq_attr "alternative" "0")
3024 (eq_attr "mode" "TI"))
3026 (const_string "*")))
3027 (set_attr "prefix" "orig,vex")
3029 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3030 (const_string "V4SF")
3031 (match_test "TARGET_AVX")
3033 (ior (not (match_test "TARGET_SSE2"))
3034 (match_test "optimize_function_for_size_p (cfun)"))
3035 (const_string "V4SF")
3037 (const_string "TI")))])
3039 (define_insn "*<code><mode>3"
3040 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3042 (match_operand:MODEF 1 "register_operand" "%0,x")
3043 (match_operand:MODEF 2 "register_operand" "x,x")))]
3044 "SSE_FLOAT_MODE_P (<MODE>mode)"
3046 static char buf[32];
3049 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3051 switch (which_alternative)
3054 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3057 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3063 snprintf (buf, sizeof (buf), ops, suffix);
3066 [(set_attr "isa" "noavx,avx")
3067 (set_attr "type" "sselog")
3068 (set_attr "prefix" "orig,vex")
3070 (cond [(and (match_test "<MODE_SIZE> == 16")
3071 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3072 (const_string "V4SF")
3073 (match_test "TARGET_AVX")
3074 (const_string "<ssevecmode>")
3075 (match_test "optimize_function_for_size_p (cfun)")
3076 (const_string "V4SF")
3078 (const_string "<ssevecmode>")))])
3080 (define_expand "<code>tf3"
3081 [(set (match_operand:TF 0 "register_operand")
3083 (match_operand:TF 1 "nonimmediate_operand")
3084 (match_operand:TF 2 "nonimmediate_operand")))]
3086 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3088 (define_insn "*<code>tf3"
3089 [(set (match_operand:TF 0 "register_operand" "=x,x")
3091 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3092 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3094 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3096 static char buf[32];
3099 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3101 switch (which_alternative)
3104 ops = "%s\t{%%2, %%0|%%0, %%2}";
3107 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3113 snprintf (buf, sizeof (buf), ops, tmp);
3116 [(set_attr "isa" "noavx,avx")
3117 (set_attr "type" "sselog")
3118 (set (attr "prefix_data16")
3120 (and (eq_attr "alternative" "0")
3121 (eq_attr "mode" "TI"))
3123 (const_string "*")))
3124 (set_attr "prefix" "orig,vex")
3126 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3127 (const_string "V4SF")
3128 (match_test "TARGET_AVX")
3130 (ior (not (match_test "TARGET_SSE2"))
3131 (match_test "optimize_function_for_size_p (cfun)"))
3132 (const_string "V4SF")
3134 (const_string "TI")))])
3136 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3138 ;; FMA floating point multiply/accumulate instructions. These include
3139 ;; scalar versions of the instructions as well as vector versions.
3141 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3143 ;; The standard names for scalar FMA are only available with SSE math enabled.
3144 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
3145 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3146 ;; and TARGET_FMA4 are both false.
3147 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3148 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
3149 ;; GAS to allow proper prefix selection. However, for the moment all hardware
3150 ;; that supports AVX512F also supports FMA so we can ignore this for now.
3151 (define_mode_iterator FMAMODEM
3152 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3153 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3154 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3155 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3156 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3157 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3158 (V16SF "TARGET_AVX512F")
3159 (V8DF "TARGET_AVX512F")])
3161 (define_expand "fma<mode>4"
3162 [(set (match_operand:FMAMODEM 0 "register_operand")
3164 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3165 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3166 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3168 (define_expand "fms<mode>4"
3169 [(set (match_operand:FMAMODEM 0 "register_operand")
3171 (match_operand:FMAMODEM 1 "nonimmediate_operand")
3172 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3173 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3175 (define_expand "fnma<mode>4"
3176 [(set (match_operand:FMAMODEM 0 "register_operand")
3178 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3179 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3180 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3182 (define_expand "fnms<mode>4"
3183 [(set (match_operand:FMAMODEM 0 "register_operand")
3185 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3186 (match_operand:FMAMODEM 2 "nonimmediate_operand")
3187 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3189 ;; The builtins for intrinsics are not constrained by SSE math enabled.
3190 (define_mode_iterator FMAMODE_AVX512
3191 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3192 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3193 (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3194 (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3195 (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3196 (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3197 (V16SF "TARGET_AVX512F")
3198 (V8DF "TARGET_AVX512F")])
3200 (define_mode_iterator FMAMODE
3201 [SF DF V4SF V2DF V8SF V4DF])
3203 (define_expand "fma4i_fmadd_<mode>"
3204 [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3206 (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3207 (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3208 (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3210 (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3211 [(match_operand:VF_AVX512VL 0 "register_operand")
3212 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3213 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3214 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3215 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3216 "TARGET_AVX512F && <round_mode512bit_condition>"
3218 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3219 operands[0], operands[1], operands[2], operands[3],
3220 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3224 (define_insn "*fma_fmadd_<mode>"
3225 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3227 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3228 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3229 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3230 "TARGET_FMA || TARGET_FMA4"
3232 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3233 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3234 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3235 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3236 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3237 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3238 (set_attr "type" "ssemuladd")
3239 (set_attr "mode" "<MODE>")])
3241 ;; Suppose AVX-512F as baseline
3242 (define_mode_iterator VF_SF_AVX512VL
3243 [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3244 DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3246 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3247 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3249 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3250 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3251 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3252 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3254 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3255 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3256 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3257 [(set_attr "type" "ssemuladd")
3258 (set_attr "mode" "<MODE>")])
3260 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3261 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3262 (vec_merge:VF_AVX512VL
3264 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3265 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3266 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3268 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3269 "TARGET_AVX512F && <round_mode512bit_condition>"
3271 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3272 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3273 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3274 (set_attr "type" "ssemuladd")
3275 (set_attr "mode" "<MODE>")])
3277 (define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3278 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3279 (vec_merge:VF_AVX512VL
3281 (match_operand:VF_AVX512VL 1 "register_operand" "x")
3282 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3283 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3285 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3287 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3288 [(set_attr "isa" "fma_avx512f")
3289 (set_attr "type" "ssemuladd")
3290 (set_attr "mode" "<MODE>")])
3292 (define_insn "*fma_fmsub_<mode>"
3293 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3295 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3296 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3298 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3299 "TARGET_FMA || TARGET_FMA4"
3301 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3302 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3303 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3304 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3305 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3306 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3307 (set_attr "type" "ssemuladd")
3308 (set_attr "mode" "<MODE>")])
3310 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3311 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3313 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3314 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3316 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3317 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3319 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3320 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3321 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3322 [(set_attr "type" "ssemuladd")
3323 (set_attr "mode" "<MODE>")])
3325 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3326 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3327 (vec_merge:VF_AVX512VL
3329 (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3330 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3332 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3334 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3337 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3338 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3339 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3340 (set_attr "type" "ssemuladd")
3341 (set_attr "mode" "<MODE>")])
3343 (define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3344 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3345 (vec_merge:VF_AVX512VL
3347 (match_operand:VF_AVX512VL 1 "register_operand" "v")
3348 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3350 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3352 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3353 "TARGET_AVX512F && <round_mode512bit_condition>"
3354 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3355 [(set_attr "isa" "fma_avx512f")
3356 (set_attr "type" "ssemuladd")
3357 (set_attr "mode" "<MODE>")])
3359 (define_insn "*fma_fnmadd_<mode>"
3360 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3363 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3364 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3365 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3366 "TARGET_FMA || TARGET_FMA4"
3368 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3369 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3370 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3371 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3372 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3373 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3374 (set_attr "type" "ssemuladd")
3375 (set_attr "mode" "<MODE>")])
3377 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3378 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3381 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3382 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3383 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3384 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3386 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3387 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3388 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3389 [(set_attr "type" "ssemuladd")
3390 (set_attr "mode" "<MODE>")])
3392 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3393 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3394 (vec_merge:VF_AVX512VL
3397 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3398 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3399 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3401 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3402 "TARGET_AVX512F && <round_mode512bit_condition>"
3404 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3405 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3406 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3407 (set_attr "type" "ssemuladd")
3408 (set_attr "mode" "<MODE>")])
3410 (define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3411 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3412 (vec_merge:VF_AVX512VL
3415 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3416 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3417 (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3419 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3420 "TARGET_AVX512F && <round_mode512bit_condition>"
3421 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3422 [(set_attr "isa" "fma_avx512f")
3423 (set_attr "type" "ssemuladd")
3424 (set_attr "mode" "<MODE>")])
3426 (define_insn "*fma_fnmsub_<mode>"
3427 [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3430 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3431 (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3433 (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3434 "TARGET_FMA || TARGET_FMA4"
3436 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3437 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3438 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3439 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3440 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3441 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3442 (set_attr "type" "ssemuladd")
3443 (set_attr "mode" "<MODE>")])
3445 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3446 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3449 (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3450 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3452 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3453 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3455 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3456 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3457 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3458 [(set_attr "type" "ssemuladd")
3459 (set_attr "mode" "<MODE>")])
3461 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3462 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3463 (vec_merge:VF_AVX512VL
3466 (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3467 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3469 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3471 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3472 "TARGET_AVX512F && <round_mode512bit_condition>"
3474 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3475 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3476 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3477 (set_attr "type" "ssemuladd")
3478 (set_attr "mode" "<MODE>")])
3480 (define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3481 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3482 (vec_merge:VF_AVX512VL
3485 (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3486 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3488 (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3490 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3492 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3493 [(set_attr "isa" "fma_avx512f")
3494 (set_attr "type" "ssemuladd")
3495 (set_attr "mode" "<MODE>")])
3497 ;; FMA parallel floating point multiply addsub and subadd operations.
3499 ;; It would be possible to represent these without the UNSPEC as
3502 ;; (fma op1 op2 op3)
3503 ;; (fma op1 op2 (neg op3))
3506 ;; But this doesn't seem useful in practice.
3508 (define_expand "fmaddsub_<mode>"
3509 [(set (match_operand:VF 0 "register_operand")
3511 [(match_operand:VF 1 "nonimmediate_operand")
3512 (match_operand:VF 2 "nonimmediate_operand")
3513 (match_operand:VF 3 "nonimmediate_operand")]
3515 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3517 (define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3518 [(match_operand:VF_AVX512VL 0 "register_operand")
3519 (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3520 (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3521 (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3522 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3525 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3526 operands[0], operands[1], operands[2], operands[3],
3527 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3531 (define_insn "*fma_fmaddsub_<mode>"
3532 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3534 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3535 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3536 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3538 "TARGET_FMA || TARGET_FMA4"
3540 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3541 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3542 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3543 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3544 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3545 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3546 (set_attr "type" "ssemuladd")
3547 (set_attr "mode" "<MODE>")])
3549 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3550 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3551 (unspec:VF_SF_AVX512VL
3552 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3553 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3554 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3556 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3558 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3559 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3560 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3561 [(set_attr "type" "ssemuladd")
3562 (set_attr "mode" "<MODE>")])
3564 (define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3565 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3566 (vec_merge:VF_AVX512VL
3568 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3569 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3570 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3573 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3576 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3577 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3578 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3579 (set_attr "type" "ssemuladd")
3580 (set_attr "mode" "<MODE>")])
3582 (define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3583 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3584 (vec_merge:VF_AVX512VL
3586 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3587 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3588 (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3591 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3593 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3594 [(set_attr "isa" "fma_avx512f")
3595 (set_attr "type" "ssemuladd")
3596 (set_attr "mode" "<MODE>")])
3598 (define_insn "*fma_fmsubadd_<mode>"
3599 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3601 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3602 (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3604 (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3606 "TARGET_FMA || TARGET_FMA4"
3608 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3609 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3610 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3611 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3612 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3613 [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3614 (set_attr "type" "ssemuladd")
3615 (set_attr "mode" "<MODE>")])
3617 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3618 [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3619 (unspec:VF_SF_AVX512VL
3620 [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3621 (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3623 (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3625 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3627 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3628 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3629 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3630 [(set_attr "type" "ssemuladd")
3631 (set_attr "mode" "<MODE>")])
3633 (define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3634 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3635 (vec_merge:VF_AVX512VL
3637 [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3638 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3640 (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3643 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3646 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3647 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3648 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3649 (set_attr "type" "ssemuladd")
3650 (set_attr "mode" "<MODE>")])
3652 (define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3653 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3654 (vec_merge:VF_AVX512VL
3656 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3657 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3659 (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3662 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3664 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3665 [(set_attr "isa" "fma_avx512f")
3666 (set_attr "type" "ssemuladd")
3667 (set_attr "mode" "<MODE>")])
3669 ;; FMA3 floating point scalar intrinsics. These merge result with
3670 ;; high-order elements from the destination register.
3672 (define_expand "fmai_vmfmadd_<mode><round_name>"
3673 [(set (match_operand:VF_128 0 "register_operand")
3676 (match_operand:VF_128 1 "<round_nimm_predicate>")
3677 (match_operand:VF_128 2 "<round_nimm_predicate>")
3678 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3683 (define_insn "*fmai_fmadd_<mode>"
3684 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3687 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3688 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3689 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3692 "TARGET_FMA || TARGET_AVX512F"
3694 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3695 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3696 [(set_attr "type" "ssemuladd")
3697 (set_attr "mode" "<MODE>")])
3699 (define_insn "*fmai_fmsub_<mode>"
3700 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3703 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3704 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3706 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3709 "TARGET_FMA || TARGET_AVX512F"
3711 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3712 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3713 [(set_attr "type" "ssemuladd")
3714 (set_attr "mode" "<MODE>")])
3716 (define_insn "*fmai_fnmadd_<mode><round_name>"
3717 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3721 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3722 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3723 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3726 "TARGET_FMA || TARGET_AVX512F"
3728 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3729 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3730 [(set_attr "type" "ssemuladd")
3731 (set_attr "mode" "<MODE>")])
3733 (define_insn "*fmai_fnmsub_<mode><round_name>"
3734 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3738 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3739 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3741 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3744 "TARGET_FMA || TARGET_AVX512F"
3746 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3747 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3748 [(set_attr "type" "ssemuladd")
3749 (set_attr "mode" "<MODE>")])
3751 ;; FMA4 floating point scalar intrinsics. These write the
3752 ;; entire destination register, with the high-order elements zeroed.
3754 (define_expand "fma4i_vmfmadd_<mode>"
3755 [(set (match_operand:VF_128 0 "register_operand")
3758 (match_operand:VF_128 1 "nonimmediate_operand")
3759 (match_operand:VF_128 2 "nonimmediate_operand")
3760 (match_operand:VF_128 3 "nonimmediate_operand"))
3764 "operands[4] = CONST0_RTX (<MODE>mode);")
3766 (define_insn "*fma4i_vmfmadd_<mode>"
3767 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3770 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3771 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3772 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3773 (match_operand:VF_128 4 "const0_operand")
3776 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3777 [(set_attr "type" "ssemuladd")
3778 (set_attr "mode" "<MODE>")])
3780 (define_insn "*fma4i_vmfmsub_<mode>"
3781 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3784 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3785 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3787 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3788 (match_operand:VF_128 4 "const0_operand")
3791 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3792 [(set_attr "type" "ssemuladd")
3793 (set_attr "mode" "<MODE>")])
3795 (define_insn "*fma4i_vmfnmadd_<mode>"
3796 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3800 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3801 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3802 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3803 (match_operand:VF_128 4 "const0_operand")
3806 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3807 [(set_attr "type" "ssemuladd")
3808 (set_attr "mode" "<MODE>")])
3810 (define_insn "*fma4i_vmfnmsub_<mode>"
3811 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3815 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3816 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3818 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3819 (match_operand:VF_128 4 "const0_operand")
3822 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3823 [(set_attr "type" "ssemuladd")
3824 (set_attr "mode" "<MODE>")])
3826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3828 ;; Parallel single-precision floating point conversion operations
3830 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3832 (define_insn "sse_cvtpi2ps"
3833 [(set (match_operand:V4SF 0 "register_operand" "=x")
3836 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3837 (match_operand:V4SF 1 "register_operand" "0")
3840 "cvtpi2ps\t{%2, %0|%0, %2}"
3841 [(set_attr "type" "ssecvt")
3842 (set_attr "mode" "V4SF")])
3844 (define_insn "sse_cvtps2pi"
3845 [(set (match_operand:V2SI 0 "register_operand" "=y")
3847 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3849 (parallel [(const_int 0) (const_int 1)])))]
3851 "cvtps2pi\t{%1, %0|%0, %q1}"
3852 [(set_attr "type" "ssecvt")
3853 (set_attr "unit" "mmx")
3854 (set_attr "mode" "DI")])
3856 (define_insn "sse_cvttps2pi"
3857 [(set (match_operand:V2SI 0 "register_operand" "=y")
3859 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3860 (parallel [(const_int 0) (const_int 1)])))]
3862 "cvttps2pi\t{%1, %0|%0, %q1}"
3863 [(set_attr "type" "ssecvt")
3864 (set_attr "unit" "mmx")
3865 (set_attr "prefix_rep" "0")
3866 (set_attr "mode" "SF")])
3868 (define_insn "sse_cvtsi2ss<round_name>"
3869 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3872 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3873 (match_operand:V4SF 1 "register_operand" "0,0,v")
3877 cvtsi2ss\t{%2, %0|%0, %2}
3878 cvtsi2ss\t{%2, %0|%0, %2}
3879 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3880 [(set_attr "isa" "noavx,noavx,avx")
3881 (set_attr "type" "sseicvt")
3882 (set_attr "athlon_decode" "vector,double,*")
3883 (set_attr "amdfam10_decode" "vector,double,*")
3884 (set_attr "bdver1_decode" "double,direct,*")
3885 (set_attr "btver2_decode" "double,double,double")
3886 (set_attr "prefix" "orig,orig,maybe_evex")
3887 (set_attr "mode" "SF")])
3889 (define_insn "sse_cvtsi2ssq<round_name>"
3890 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3893 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3894 (match_operand:V4SF 1 "register_operand" "0,0,v")
3896 "TARGET_SSE && TARGET_64BIT"
3898 cvtsi2ssq\t{%2, %0|%0, %2}
3899 cvtsi2ssq\t{%2, %0|%0, %2}
3900 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3901 [(set_attr "isa" "noavx,noavx,avx")
3902 (set_attr "type" "sseicvt")
3903 (set_attr "athlon_decode" "vector,double,*")
3904 (set_attr "amdfam10_decode" "vector,double,*")
3905 (set_attr "bdver1_decode" "double,direct,*")
3906 (set_attr "btver2_decode" "double,double,double")
3907 (set_attr "length_vex" "*,*,4")
3908 (set_attr "prefix_rex" "1,1,*")
3909 (set_attr "prefix" "orig,orig,maybe_evex")
3910 (set_attr "mode" "SF")])
3912 (define_insn "sse_cvtss2si<round_name>"
3913 [(set (match_operand:SI 0 "register_operand" "=r,r")
3916 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3917 (parallel [(const_int 0)]))]
3918 UNSPEC_FIX_NOTRUNC))]
3920 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3921 [(set_attr "type" "sseicvt")
3922 (set_attr "athlon_decode" "double,vector")
3923 (set_attr "bdver1_decode" "double,double")
3924 (set_attr "prefix_rep" "1")
3925 (set_attr "prefix" "maybe_vex")
3926 (set_attr "mode" "SI")])
3928 (define_insn "sse_cvtss2si_2"
3929 [(set (match_operand:SI 0 "register_operand" "=r,r")
3930 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3931 UNSPEC_FIX_NOTRUNC))]
3933 "%vcvtss2si\t{%1, %0|%0, %k1}"
3934 [(set_attr "type" "sseicvt")
3935 (set_attr "athlon_decode" "double,vector")
3936 (set_attr "amdfam10_decode" "double,double")
3937 (set_attr "bdver1_decode" "double,double")
3938 (set_attr "prefix_rep" "1")
3939 (set_attr "prefix" "maybe_vex")
3940 (set_attr "mode" "SI")])
3942 (define_insn "sse_cvtss2siq<round_name>"
3943 [(set (match_operand:DI 0 "register_operand" "=r,r")
3946 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3947 (parallel [(const_int 0)]))]
3948 UNSPEC_FIX_NOTRUNC))]
3949 "TARGET_SSE && TARGET_64BIT"
3950 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3951 [(set_attr "type" "sseicvt")
3952 (set_attr "athlon_decode" "double,vector")
3953 (set_attr "bdver1_decode" "double,double")
3954 (set_attr "prefix_rep" "1")
3955 (set_attr "prefix" "maybe_vex")
3956 (set_attr "mode" "DI")])
3958 (define_insn "sse_cvtss2siq_2"
3959 [(set (match_operand:DI 0 "register_operand" "=r,r")
3960 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3961 UNSPEC_FIX_NOTRUNC))]
3962 "TARGET_SSE && TARGET_64BIT"
3963 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3964 [(set_attr "type" "sseicvt")
3965 (set_attr "athlon_decode" "double,vector")
3966 (set_attr "amdfam10_decode" "double,double")
3967 (set_attr "bdver1_decode" "double,double")
3968 (set_attr "prefix_rep" "1")
3969 (set_attr "prefix" "maybe_vex")
3970 (set_attr "mode" "DI")])
3972 (define_insn "sse_cvttss2si<round_saeonly_name>"
3973 [(set (match_operand:SI 0 "register_operand" "=r,r")
3976 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3977 (parallel [(const_int 0)]))))]
3979 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3980 [(set_attr "type" "sseicvt")
3981 (set_attr "athlon_decode" "double,vector")
3982 (set_attr "amdfam10_decode" "double,double")
3983 (set_attr "bdver1_decode" "double,double")
3984 (set_attr "prefix_rep" "1")
3985 (set_attr "prefix" "maybe_vex")
3986 (set_attr "mode" "SI")])
3988 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3989 [(set (match_operand:DI 0 "register_operand" "=r,r")
3992 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3993 (parallel [(const_int 0)]))))]
3994 "TARGET_SSE && TARGET_64BIT"
3995 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3996 [(set_attr "type" "sseicvt")
3997 (set_attr "athlon_decode" "double,vector")
3998 (set_attr "amdfam10_decode" "double,double")
3999 (set_attr "bdver1_decode" "double,double")
4000 (set_attr "prefix_rep" "1")
4001 (set_attr "prefix" "maybe_vex")
4002 (set_attr "mode" "DI")])
4004 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
4005 [(set (match_operand:VF_128 0 "register_operand" "=v")
4007 (vec_duplicate:VF_128
4008 (unsigned_float:<ssescalarmode>
4009 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4010 (match_operand:VF_128 1 "register_operand" "v")
4012 "TARGET_AVX512F && <round_modev4sf_condition>"
4013 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4014 [(set_attr "type" "sseicvt")
4015 (set_attr "prefix" "evex")
4016 (set_attr "mode" "<ssescalarmode>")])
4018 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4019 [(set (match_operand:VF_128 0 "register_operand" "=v")
4021 (vec_duplicate:VF_128
4022 (unsigned_float:<ssescalarmode>
4023 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4024 (match_operand:VF_128 1 "register_operand" "v")
4026 "TARGET_AVX512F && TARGET_64BIT"
4027 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4028 [(set_attr "type" "sseicvt")
4029 (set_attr "prefix" "evex")
4030 (set_attr "mode" "<ssescalarmode>")])
4032 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4033 [(set (match_operand:VF1 0 "register_operand" "=v")
4035 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4036 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4037 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4038 [(set_attr "type" "ssecvt")
4039 (set_attr "prefix" "maybe_vex")
4040 (set_attr "mode" "<sseinsnmode>")])
4042 (define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4043 [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4044 (unsigned_float:VF1_AVX512VL
4045 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4047 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4048 [(set_attr "type" "ssecvt")
4049 (set_attr "prefix" "evex")
4050 (set_attr "mode" "<MODE>")])
4052 (define_expand "floatuns<sseintvecmodelower><mode>2"
4053 [(match_operand:VF1 0 "register_operand")
4054 (match_operand:<sseintvecmode> 1 "register_operand")]
4055 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4057 if (<MODE>mode == V16SFmode)
4058 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4060 if (TARGET_AVX512VL)
4062 if (<MODE>mode == V4SFmode)
4063 emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4065 emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4068 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4074 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4075 (define_mode_attr sf2simodelower
4076 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4078 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4079 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4081 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4082 UNSPEC_FIX_NOTRUNC))]
4083 "TARGET_SSE2 && <mask_mode512bit_condition>"
4084 "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4085 [(set_attr "type" "ssecvt")
4086 (set (attr "prefix_data16")
4088 (match_test "TARGET_AVX")
4090 (const_string "1")))
4091 (set_attr "prefix" "maybe_vex")
4092 (set_attr "mode" "<sseinsnmode>")])
4094 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4095 [(set (match_operand:V16SI 0 "register_operand" "=v")
4097 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4098 UNSPEC_FIX_NOTRUNC))]
4100 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4101 [(set_attr "type" "ssecvt")
4102 (set_attr "prefix" "evex")
4103 (set_attr "mode" "XI")])
4105 (define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4106 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4107 (unspec:VI4_AVX512VL
4108 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4109 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4111 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4112 [(set_attr "type" "ssecvt")
4113 (set_attr "prefix" "evex")
4114 (set_attr "mode" "<sseinsnmode>")])
4116 (define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4117 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4118 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4119 UNSPEC_FIX_NOTRUNC))]
4120 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4121 "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4122 [(set_attr "type" "ssecvt")
4123 (set_attr "prefix" "evex")
4124 (set_attr "mode" "<sseinsnmode>")])
4126 (define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4127 [(set (match_operand:V2DI 0 "register_operand" "=v")
4130 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4131 (parallel [(const_int 0) (const_int 1)]))]
4132 UNSPEC_FIX_NOTRUNC))]
4133 "TARGET_AVX512DQ && TARGET_AVX512VL"
4134 "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4135 [(set_attr "type" "ssecvt")
4136 (set_attr "prefix" "evex")
4137 (set_attr "mode" "TI")])
4139 (define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4140 [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4141 (unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4142 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4143 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4144 "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4145 [(set_attr "type" "ssecvt")
4146 (set_attr "prefix" "evex")
4147 (set_attr "mode" "<sseinsnmode>")])
4149 (define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4150 [(set (match_operand:V2DI 0 "register_operand" "=v")
4153 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4154 (parallel [(const_int 0) (const_int 1)]))]
4155 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4156 "TARGET_AVX512DQ && TARGET_AVX512VL"
4157 "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4158 [(set_attr "type" "ssecvt")
4159 (set_attr "prefix" "evex")
4160 (set_attr "mode" "TI")])
4162 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4163 [(set (match_operand:V16SI 0 "register_operand" "=v")
4165 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4167 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4168 [(set_attr "type" "ssecvt")
4169 (set_attr "prefix" "evex")
4170 (set_attr "mode" "XI")])
4172 (define_insn "fix_truncv8sfv8si2<mask_name>"
4173 [(set (match_operand:V8SI 0 "register_operand" "=v")
4174 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4175 "TARGET_AVX && <mask_avx512vl_condition>"
4176 "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4177 [(set_attr "type" "ssecvt")
4178 (set_attr "prefix" "<mask_prefix>")
4179 (set_attr "mode" "OI")])
4181 (define_insn "fix_truncv4sfv4si2<mask_name>"
4182 [(set (match_operand:V4SI 0 "register_operand" "=v")
4183 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4184 "TARGET_SSE2 && <mask_avx512vl_condition>"
4185 "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4186 [(set_attr "type" "ssecvt")
4187 (set (attr "prefix_rep")
4189 (match_test "TARGET_AVX")
4191 (const_string "1")))
4192 (set (attr "prefix_data16")
4194 (match_test "TARGET_AVX")
4196 (const_string "0")))
4197 (set_attr "prefix_data16" "0")
4198 (set_attr "prefix" "<mask_prefix2>")
4199 (set_attr "mode" "TI")])
4201 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4202 [(match_operand:<sseintvecmode> 0 "register_operand")
4203 (match_operand:VF1 1 "register_operand")]
4206 if (<MODE>mode == V16SFmode)
4207 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4212 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4213 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4214 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4215 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4220 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4222 ;; Parallel double-precision floating point conversion operations
4224 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4226 (define_insn "sse2_cvtpi2pd"
4227 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4228 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4230 "cvtpi2pd\t{%1, %0|%0, %1}"
4231 [(set_attr "type" "ssecvt")
4232 (set_attr "unit" "mmx,*")
4233 (set_attr "prefix_data16" "1,*")
4234 (set_attr "mode" "V2DF")])
4236 (define_insn "sse2_cvtpd2pi"
4237 [(set (match_operand:V2SI 0 "register_operand" "=y")
4238 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4239 UNSPEC_FIX_NOTRUNC))]
4241 "cvtpd2pi\t{%1, %0|%0, %1}"
4242 [(set_attr "type" "ssecvt")
4243 (set_attr "unit" "mmx")
4244 (set_attr "bdver1_decode" "double")
4245 (set_attr "btver2_decode" "direct")
4246 (set_attr "prefix_data16" "1")
4247 (set_attr "mode" "DI")])
4249 (define_insn "sse2_cvttpd2pi"
4250 [(set (match_operand:V2SI 0 "register_operand" "=y")
4251 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4253 "cvttpd2pi\t{%1, %0|%0, %1}"
4254 [(set_attr "type" "ssecvt")
4255 (set_attr "unit" "mmx")
4256 (set_attr "bdver1_decode" "double")
4257 (set_attr "prefix_data16" "1")
4258 (set_attr "mode" "TI")])
4260 (define_insn "sse2_cvtsi2sd"
4261 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4264 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4265 (match_operand:V2DF 1 "register_operand" "0,0,v")
4269 cvtsi2sd\t{%2, %0|%0, %2}
4270 cvtsi2sd\t{%2, %0|%0, %2}
4271 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4272 [(set_attr "isa" "noavx,noavx,avx")
4273 (set_attr "type" "sseicvt")
4274 (set_attr "athlon_decode" "double,direct,*")
4275 (set_attr "amdfam10_decode" "vector,double,*")
4276 (set_attr "bdver1_decode" "double,direct,*")
4277 (set_attr "btver2_decode" "double,double,double")
4278 (set_attr "prefix" "orig,orig,maybe_evex")
4279 (set_attr "mode" "DF")])
4281 (define_insn "sse2_cvtsi2sdq<round_name>"
4282 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4285 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4286 (match_operand:V2DF 1 "register_operand" "0,0,v")
4288 "TARGET_SSE2 && TARGET_64BIT"
4290 cvtsi2sdq\t{%2, %0|%0, %2}
4291 cvtsi2sdq\t{%2, %0|%0, %2}
4292 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
4293 [(set_attr "isa" "noavx,noavx,avx")
4294 (set_attr "type" "sseicvt")
4295 (set_attr "athlon_decode" "double,direct,*")
4296 (set_attr "amdfam10_decode" "vector,double,*")
4297 (set_attr "bdver1_decode" "double,direct,*")
4298 (set_attr "length_vex" "*,*,4")
4299 (set_attr "prefix_rex" "1,1,*")
4300 (set_attr "prefix" "orig,orig,maybe_evex")
4301 (set_attr "mode" "DF")])
4303 (define_insn "avx512f_vcvtss2usi<round_name>"
4304 [(set (match_operand:SI 0 "register_operand" "=r")
4307 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4308 (parallel [(const_int 0)]))]
4309 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4311 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4312 [(set_attr "type" "sseicvt")
4313 (set_attr "prefix" "evex")
4314 (set_attr "mode" "SI")])
4316 (define_insn "avx512f_vcvtss2usiq<round_name>"
4317 [(set (match_operand:DI 0 "register_operand" "=r")
4320 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4321 (parallel [(const_int 0)]))]
4322 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4323 "TARGET_AVX512F && TARGET_64BIT"
4324 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4325 [(set_attr "type" "sseicvt")
4326 (set_attr "prefix" "evex")
4327 (set_attr "mode" "DI")])
4329 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4330 [(set (match_operand:SI 0 "register_operand" "=r")
4333 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4334 (parallel [(const_int 0)]))))]
4336 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4337 [(set_attr "type" "sseicvt")
4338 (set_attr "prefix" "evex")
4339 (set_attr "mode" "SI")])
4341 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4342 [(set (match_operand:DI 0 "register_operand" "=r")
4345 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4346 (parallel [(const_int 0)]))))]
4347 "TARGET_AVX512F && TARGET_64BIT"
4348 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4349 [(set_attr "type" "sseicvt")
4350 (set_attr "prefix" "evex")
4351 (set_attr "mode" "DI")])
4353 (define_insn "avx512f_vcvtsd2usi<round_name>"
4354 [(set (match_operand:SI 0 "register_operand" "=r")
4357 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4358 (parallel [(const_int 0)]))]
4359 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4361 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4362 [(set_attr "type" "sseicvt")
4363 (set_attr "prefix" "evex")
4364 (set_attr "mode" "SI")])
4366 (define_insn "avx512f_vcvtsd2usiq<round_name>"
4367 [(set (match_operand:DI 0 "register_operand" "=r")
4370 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4371 (parallel [(const_int 0)]))]
4372 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4373 "TARGET_AVX512F && TARGET_64BIT"
4374 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4375 [(set_attr "type" "sseicvt")
4376 (set_attr "prefix" "evex")
4377 (set_attr "mode" "DI")])
4379 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4380 [(set (match_operand:SI 0 "register_operand" "=r")
4383 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4384 (parallel [(const_int 0)]))))]
4386 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4387 [(set_attr "type" "sseicvt")
4388 (set_attr "prefix" "evex")
4389 (set_attr "mode" "SI")])
4391 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4392 [(set (match_operand:DI 0 "register_operand" "=r")
4395 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4396 (parallel [(const_int 0)]))))]
4397 "TARGET_AVX512F && TARGET_64BIT"
4398 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4399 [(set_attr "type" "sseicvt")
4400 (set_attr "prefix" "evex")
4401 (set_attr "mode" "DI")])
4403 (define_insn "sse2_cvtsd2si<round_name>"
4404 [(set (match_operand:SI 0 "register_operand" "=r,r")
4407 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4408 (parallel [(const_int 0)]))]
4409 UNSPEC_FIX_NOTRUNC))]
4411 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4412 [(set_attr "type" "sseicvt")
4413 (set_attr "athlon_decode" "double,vector")
4414 (set_attr "bdver1_decode" "double,double")
4415 (set_attr "btver2_decode" "double,double")
4416 (set_attr "prefix_rep" "1")
4417 (set_attr "prefix" "maybe_vex")
4418 (set_attr "mode" "SI")])
4420 (define_insn "sse2_cvtsd2si_2"
4421 [(set (match_operand:SI 0 "register_operand" "=r,r")
4422 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4423 UNSPEC_FIX_NOTRUNC))]
4425 "%vcvtsd2si\t{%1, %0|%0, %q1}"
4426 [(set_attr "type" "sseicvt")
4427 (set_attr "athlon_decode" "double,vector")
4428 (set_attr "amdfam10_decode" "double,double")
4429 (set_attr "bdver1_decode" "double,double")
4430 (set_attr "prefix_rep" "1")
4431 (set_attr "prefix" "maybe_vex")
4432 (set_attr "mode" "SI")])
4434 (define_insn "sse2_cvtsd2siq<round_name>"
4435 [(set (match_operand:DI 0 "register_operand" "=r,r")
4438 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4439 (parallel [(const_int 0)]))]
4440 UNSPEC_FIX_NOTRUNC))]
4441 "TARGET_SSE2 && TARGET_64BIT"
4442 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4443 [(set_attr "type" "sseicvt")
4444 (set_attr "athlon_decode" "double,vector")
4445 (set_attr "bdver1_decode" "double,double")
4446 (set_attr "prefix_rep" "1")
4447 (set_attr "prefix" "maybe_vex")
4448 (set_attr "mode" "DI")])
4450 (define_insn "sse2_cvtsd2siq_2"
4451 [(set (match_operand:DI 0 "register_operand" "=r,r")
4452 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4453 UNSPEC_FIX_NOTRUNC))]
4454 "TARGET_SSE2 && TARGET_64BIT"
4455 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4456 [(set_attr "type" "sseicvt")
4457 (set_attr "athlon_decode" "double,vector")
4458 (set_attr "amdfam10_decode" "double,double")
4459 (set_attr "bdver1_decode" "double,double")
4460 (set_attr "prefix_rep" "1")
4461 (set_attr "prefix" "maybe_vex")
4462 (set_attr "mode" "DI")])
4464 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
4465 [(set (match_operand:SI 0 "register_operand" "=r,r")
4468 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4469 (parallel [(const_int 0)]))))]
4471 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4472 [(set_attr "type" "sseicvt")
4473 (set_attr "athlon_decode" "double,vector")
4474 (set_attr "amdfam10_decode" "double,double")
4475 (set_attr "bdver1_decode" "double,double")
4476 (set_attr "btver2_decode" "double,double")
4477 (set_attr "prefix_rep" "1")
4478 (set_attr "prefix" "maybe_vex")
4479 (set_attr "mode" "SI")])
4481 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4482 [(set (match_operand:DI 0 "register_operand" "=r,r")
4485 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4486 (parallel [(const_int 0)]))))]
4487 "TARGET_SSE2 && TARGET_64BIT"
4488 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4489 [(set_attr "type" "sseicvt")
4490 (set_attr "athlon_decode" "double,vector")
4491 (set_attr "amdfam10_decode" "double,double")
4492 (set_attr "bdver1_decode" "double,double")
4493 (set_attr "prefix_rep" "1")
4494 (set_attr "prefix" "maybe_vex")
4495 (set_attr "mode" "DI")])
4497 ;; For float<si2dfmode><mode>2 insn pattern
4498 (define_mode_attr si2dfmode
4499 [(V8DF "V8SI") (V4DF "V4SI")])
4500 (define_mode_attr si2dfmodelower
4501 [(V8DF "v8si") (V4DF "v4si")])
4503 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4504 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4505 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4506 "TARGET_AVX && <mask_mode512bit_condition>"
4507 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4508 [(set_attr "type" "ssecvt")
4509 (set_attr "prefix" "maybe_vex")
4510 (set_attr "mode" "<MODE>")])
4512 (define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4513 [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4514 (any_float:VF2_AVX512VL
4515 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4517 "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4518 [(set_attr "type" "ssecvt")
4519 (set_attr "prefix" "evex")
4520 (set_attr "mode" "<MODE>")])
4522 ;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4523 (define_mode_attr qq2pssuff
4524 [(V8SF "") (V4SF "{y}")])
4526 (define_mode_attr sselongvecmode
4527 [(V8SF "V8DI") (V4SF "V4DI")])
4529 (define_mode_attr sselongvecmodelower
4530 [(V8SF "v8di") (V4SF "v4di")])
4532 (define_mode_attr sseintvecmode3
4533 [(V8SF "XI") (V4SF "OI")
4534 (V8DF "OI") (V4DF "TI")])
4536 (define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4537 [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4538 (any_float:VF1_128_256VL
4539 (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4540 "TARGET_AVX512DQ && <round_modev8sf_condition>"
4541 "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4542 [(set_attr "type" "ssecvt")
4543 (set_attr "prefix" "evex")
4544 (set_attr "mode" "<MODE>")])
4546 (define_insn "*<floatsuffix>floatv2div2sf2"
4547 [(set (match_operand:V4SF 0 "register_operand" "=v")
4549 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4550 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4551 "TARGET_AVX512DQ && TARGET_AVX512VL"
4552 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4553 [(set_attr "type" "ssecvt")
4554 (set_attr "prefix" "evex")
4555 (set_attr "mode" "V4SF")])
4557 (define_insn "<floatsuffix>floatv2div2sf2_mask"
4558 [(set (match_operand:V4SF 0 "register_operand" "=v")
4561 (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4563 (match_operand:V4SF 2 "vector_move_operand" "0C")
4564 (parallel [(const_int 0) (const_int 1)]))
4565 (match_operand:QI 3 "register_operand" "Yk"))
4566 (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4567 "TARGET_AVX512DQ && TARGET_AVX512VL"
4568 "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4569 [(set_attr "type" "ssecvt")
4570 (set_attr "prefix" "evex")
4571 (set_attr "mode" "V4SF")])
4573 (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4574 [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4575 (unsigned_float:VF2_512_256VL
4576 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4578 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4579 [(set_attr "type" "ssecvt")
4580 (set_attr "prefix" "evex")
4581 (set_attr "mode" "<MODE>")])
4583 (define_insn "ufloatv2siv2df2<mask_name>"
4584 [(set (match_operand:V2DF 0 "register_operand" "=v")
4585 (unsigned_float:V2DF
4587 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4588 (parallel [(const_int 0) (const_int 1)]))))]
4590 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4591 [(set_attr "type" "ssecvt")
4592 (set_attr "prefix" "evex")
4593 (set_attr "mode" "V2DF")])
4595 (define_insn "avx512f_cvtdq2pd512_2"
4596 [(set (match_operand:V8DF 0 "register_operand" "=v")
4599 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4600 (parallel [(const_int 0) (const_int 1)
4601 (const_int 2) (const_int 3)
4602 (const_int 4) (const_int 5)
4603 (const_int 6) (const_int 7)]))))]
4605 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4606 [(set_attr "type" "ssecvt")
4607 (set_attr "prefix" "evex")
4608 (set_attr "mode" "V8DF")])
4610 (define_insn "avx_cvtdq2pd256_2"
4611 [(set (match_operand:V4DF 0 "register_operand" "=v")
4614 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4615 (parallel [(const_int 0) (const_int 1)
4616 (const_int 2) (const_int 3)]))))]
4618 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4619 [(set_attr "type" "ssecvt")
4620 (set_attr "prefix" "maybe_evex")
4621 (set_attr "mode" "V4DF")])
4623 (define_insn "sse2_cvtdq2pd<mask_name>"
4624 [(set (match_operand:V2DF 0 "register_operand" "=v")
4627 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4628 (parallel [(const_int 0) (const_int 1)]))))]
4629 "TARGET_SSE2 && <mask_avx512vl_condition>"
4630 "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4631 [(set_attr "type" "ssecvt")
4632 (set_attr "prefix" "maybe_vex")
4633 (set_attr "ssememalign" "64")
4634 (set_attr "mode" "V2DF")])
4636 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4637 [(set (match_operand:V8SI 0 "register_operand" "=v")
4639 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4640 UNSPEC_FIX_NOTRUNC))]
4642 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4643 [(set_attr "type" "ssecvt")
4644 (set_attr "prefix" "evex")
4645 (set_attr "mode" "OI")])
4647 (define_insn "avx_cvtpd2dq256<mask_name>"
4648 [(set (match_operand:V4SI 0 "register_operand" "=v")
4649 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4650 UNSPEC_FIX_NOTRUNC))]
4651 "TARGET_AVX && <mask_avx512vl_condition>"
4652 "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4653 [(set_attr "type" "ssecvt")
4654 (set_attr "prefix" "<mask_prefix>")
4655 (set_attr "mode" "OI")])
4657 (define_expand "avx_cvtpd2dq256_2"
4658 [(set (match_operand:V8SI 0 "register_operand")
4660 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4664 "operands[2] = CONST0_RTX (V4SImode);")
4666 (define_insn "*avx_cvtpd2dq256_2"
4667 [(set (match_operand:V8SI 0 "register_operand" "=x")
4669 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4671 (match_operand:V4SI 2 "const0_operand")))]
4673 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4674 [(set_attr "type" "ssecvt")
4675 (set_attr "prefix" "vex")
4676 (set_attr "btver2_decode" "vector")
4677 (set_attr "mode" "OI")])
4679 (define_insn "sse2_cvtpd2dq<mask_name>"
4680 [(set (match_operand:V4SI 0 "register_operand" "=v")
4682 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4684 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4685 "TARGET_SSE2 && <mask_avx512vl_condition>"
4688 return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4690 return "cvtpd2dq\t{%1, %0|%0, %1}";
4692 [(set_attr "type" "ssecvt")
4693 (set_attr "prefix_rep" "1")
4694 (set_attr "prefix_data16" "0")
4695 (set_attr "prefix" "maybe_vex")
4696 (set_attr "mode" "TI")
4697 (set_attr "amdfam10_decode" "double")
4698 (set_attr "athlon_decode" "vector")
4699 (set_attr "bdver1_decode" "double")])
4701 ;; For ufix_notrunc* insn patterns
4702 (define_mode_attr pd2udqsuff
4703 [(V8DF "") (V4DF "{y}")])
4705 (define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4706 [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4708 [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4709 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4711 "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4712 [(set_attr "type" "ssecvt")
4713 (set_attr "prefix" "evex")
4714 (set_attr "mode" "<sseinsnmode>")])
4716 (define_insn "ufix_notruncv2dfv2si2<mask_name>"
4717 [(set (match_operand:V4SI 0 "register_operand" "=v")
4720 [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4721 UNSPEC_UNSIGNED_FIX_NOTRUNC)
4722 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4724 "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4725 [(set_attr "type" "ssecvt")
4726 (set_attr "prefix" "evex")
4727 (set_attr "mode" "TI")])
4729 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4730 [(set (match_operand:V8SI 0 "register_operand" "=v")
4732 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4734 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4735 [(set_attr "type" "ssecvt")
4736 (set_attr "prefix" "evex")
4737 (set_attr "mode" "OI")])
4739 (define_insn "ufix_truncv2dfv2si2<mask_name>"
4740 [(set (match_operand:V4SI 0 "register_operand" "=v")
4742 (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4743 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4745 "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4746 [(set_attr "type" "ssecvt")
4747 (set_attr "prefix" "evex")
4748 (set_attr "mode" "TI")])
4750 (define_insn "fix_truncv4dfv4si2<mask_name>"
4751 [(set (match_operand:V4SI 0 "register_operand" "=v")
4752 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4753 "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4754 "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4755 [(set_attr "type" "ssecvt")
4756 (set_attr "prefix" "maybe_evex")
4757 (set_attr "mode" "OI")])
4759 (define_insn "ufix_truncv4dfv4si2<mask_name>"
4760 [(set (match_operand:V4SI 0 "register_operand" "=v")
4761 (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4762 "TARGET_AVX512VL && TARGET_AVX512F"
4763 "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4764 [(set_attr "type" "ssecvt")
4765 (set_attr "prefix" "maybe_evex")
4766 (set_attr "mode" "OI")])
4768 (define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4769 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4770 (any_fix:<sseintvecmode>
4771 (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4772 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4773 "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4774 [(set_attr "type" "ssecvt")
4775 (set_attr "prefix" "evex")
4776 (set_attr "mode" "<sseintvecmode2>")])
4778 (define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4779 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4780 (unspec:<sseintvecmode>
4781 [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4782 UNSPEC_FIX_NOTRUNC))]
4783 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4784 "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4785 [(set_attr "type" "ssecvt")
4786 (set_attr "prefix" "evex")
4787 (set_attr "mode" "<sseintvecmode2>")])
4789 (define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4790 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4791 (unspec:<sseintvecmode>
4792 [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4793 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4794 "TARGET_AVX512DQ && <round_mode512bit_condition>"
4795 "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4796 [(set_attr "type" "ssecvt")
4797 (set_attr "prefix" "evex")
4798 (set_attr "mode" "<sseintvecmode2>")])
4800 (define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4801 [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4802 (any_fix:<sselongvecmode>
4803 (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4804 "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4805 "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4806 [(set_attr "type" "ssecvt")
4807 (set_attr "prefix" "evex")
4808 (set_attr "mode" "<sseintvecmode3>")])
4810 (define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4811 [(set (match_operand:V2DI 0 "register_operand" "=v")
4814 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4815 (parallel [(const_int 0) (const_int 1)]))))]
4816 "TARGET_AVX512DQ && TARGET_AVX512VL"
4817 "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4818 [(set_attr "type" "ssecvt")
4819 (set_attr "prefix" "evex")
4820 (set_attr "mode" "TI")])
4822 (define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4823 [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4824 (unsigned_fix:<sseintvecmode>
4825 (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4827 "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4828 [(set_attr "type" "ssecvt")
4829 (set_attr "prefix" "evex")
4830 (set_attr "mode" "<sseintvecmode2>")])
4832 (define_expand "avx_cvttpd2dq256_2"
4833 [(set (match_operand:V8SI 0 "register_operand")
4835 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4838 "operands[2] = CONST0_RTX (V4SImode);")
4840 (define_insn "sse2_cvttpd2dq<mask_name>"
4841 [(set (match_operand:V4SI 0 "register_operand" "=v")
4843 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4844 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4845 "TARGET_SSE2 && <mask_avx512vl_condition>"
4848 return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4850 return "cvttpd2dq\t{%1, %0|%0, %1}";
4852 [(set_attr "type" "ssecvt")
4853 (set_attr "amdfam10_decode" "double")
4854 (set_attr "athlon_decode" "vector")
4855 (set_attr "bdver1_decode" "double")
4856 (set_attr "prefix" "maybe_vex")
4857 (set_attr "mode" "TI")])
4859 (define_insn "sse2_cvtsd2ss<round_name>"
4860 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4863 (float_truncate:V2SF
4864 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4865 (match_operand:V4SF 1 "register_operand" "0,0,v")
4869 cvtsd2ss\t{%2, %0|%0, %2}
4870 cvtsd2ss\t{%2, %0|%0, %q2}
4871 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4872 [(set_attr "isa" "noavx,noavx,avx")
4873 (set_attr "type" "ssecvt")
4874 (set_attr "athlon_decode" "vector,double,*")
4875 (set_attr "amdfam10_decode" "vector,double,*")
4876 (set_attr "bdver1_decode" "direct,direct,*")
4877 (set_attr "btver2_decode" "double,double,double")
4878 (set_attr "prefix" "orig,orig,<round_prefix>")
4879 (set_attr "mode" "SF")])
4881 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4882 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4886 (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4887 (parallel [(const_int 0) (const_int 1)])))
4888 (match_operand:V2DF 1 "register_operand" "0,0,v")
4892 cvtss2sd\t{%2, %0|%0, %2}
4893 cvtss2sd\t{%2, %0|%0, %k2}
4894 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4895 [(set_attr "isa" "noavx,noavx,avx")
4896 (set_attr "type" "ssecvt")
4897 (set_attr "amdfam10_decode" "vector,double,*")
4898 (set_attr "athlon_decode" "direct,direct,*")
4899 (set_attr "bdver1_decode" "direct,direct,*")
4900 (set_attr "btver2_decode" "double,double,double")
4901 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4902 (set_attr "mode" "DF")])
4904 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4905 [(set (match_operand:V8SF 0 "register_operand" "=v")
4906 (float_truncate:V8SF
4907 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4909 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4910 [(set_attr "type" "ssecvt")
4911 (set_attr "prefix" "evex")
4912 (set_attr "mode" "V8SF")])
4914 (define_insn "avx_cvtpd2ps256<mask_name>"
4915 [(set (match_operand:V4SF 0 "register_operand" "=v")
4916 (float_truncate:V4SF
4917 (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4918 "TARGET_AVX && <mask_avx512vl_condition>"
4919 "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4920 [(set_attr "type" "ssecvt")
4921 (set_attr "prefix" "maybe_evex")
4922 (set_attr "btver2_decode" "vector")
4923 (set_attr "mode" "V4SF")])
4925 (define_expand "sse2_cvtpd2ps"
4926 [(set (match_operand:V4SF 0 "register_operand")
4928 (float_truncate:V2SF
4929 (match_operand:V2DF 1 "nonimmediate_operand"))
4932 "operands[2] = CONST0_RTX (V2SFmode);")
4934 (define_expand "sse2_cvtpd2ps_mask"
4935 [(set (match_operand:V4SF 0 "register_operand")
4938 (float_truncate:V2SF
4939 (match_operand:V2DF 1 "nonimmediate_operand"))
4941 (match_operand:V4SF 2 "register_operand")
4942 (match_operand:QI 3 "register_operand")))]
4944 "operands[4] = CONST0_RTX (V2SFmode);")
4946 (define_insn "*sse2_cvtpd2ps<mask_name>"
4947 [(set (match_operand:V4SF 0 "register_operand" "=v")
4949 (float_truncate:V2SF
4950 (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4951 (match_operand:V2SF 2 "const0_operand")))]
4952 "TARGET_SSE2 && <mask_avx512vl_condition>"
4955 return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4957 return "cvtpd2ps\t{%1, %0|%0, %1}";
4959 [(set_attr "type" "ssecvt")
4960 (set_attr "amdfam10_decode" "double")
4961 (set_attr "athlon_decode" "vector")
4962 (set_attr "bdver1_decode" "double")
4963 (set_attr "prefix_data16" "1")
4964 (set_attr "prefix" "maybe_vex")
4965 (set_attr "mode" "V4SF")])
4967 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4968 (define_mode_attr sf2dfmode
4969 [(V8DF "V8SF") (V4DF "V4SF")])
4971 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4972 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4973 (float_extend:VF2_512_256
4974 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4975 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4976 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4977 [(set_attr "type" "ssecvt")
4978 (set_attr "prefix" "maybe_vex")
4979 (set_attr "mode" "<MODE>")])
4981 (define_insn "*avx_cvtps2pd256_2"
4982 [(set (match_operand:V4DF 0 "register_operand" "=x")
4985 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4986 (parallel [(const_int 0) (const_int 1)
4987 (const_int 2) (const_int 3)]))))]
4989 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4990 [(set_attr "type" "ssecvt")
4991 (set_attr "prefix" "vex")
4992 (set_attr "mode" "V4DF")])
4994 (define_insn "vec_unpacks_lo_v16sf"
4995 [(set (match_operand:V8DF 0 "register_operand" "=v")
4998 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4999 (parallel [(const_int 0) (const_int 1)
5000 (const_int 2) (const_int 3)
5001 (const_int 4) (const_int 5)
5002 (const_int 6) (const_int 7)]))))]
5004 "vcvtps2pd\t{%t1, %0|%0, %t1}"
5005 [(set_attr "type" "ssecvt")
5006 (set_attr "prefix" "evex")
5007 (set_attr "mode" "V8DF")])
5009 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5010 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5011 (unspec:<avx512fmaskmode>
5012 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
5013 UNSPEC_CVTINT2MASK))]
5015 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5016 [(set_attr "prefix" "evex")
5017 (set_attr "mode" "<sseinsnmode>")])
5019 (define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5020 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5021 (unspec:<avx512fmaskmode>
5022 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5023 UNSPEC_CVTINT2MASK))]
5025 "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5026 [(set_attr "prefix" "evex")
5027 (set_attr "mode" "<sseinsnmode>")])
5029 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5030 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5031 (vec_merge:VI12_AVX512VL
5034 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5037 operands[2] = CONSTM1_RTX (<MODE>mode);
5038 operands[3] = CONST0_RTX (<MODE>mode);
5041 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5042 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5043 (vec_merge:VI12_AVX512VL
5044 (match_operand:VI12_AVX512VL 2 "constm1_operand")
5045 (match_operand:VI12_AVX512VL 3 "const0_operand")
5046 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5048 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5049 [(set_attr "prefix" "evex")
5050 (set_attr "mode" "<sseinsnmode>")])
5052 (define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5053 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5054 (vec_merge:VI48_AVX512VL
5057 (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5060 operands[2] = CONSTM1_RTX (<MODE>mode);
5061 operands[3] = CONST0_RTX (<MODE>mode);
5064 (define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5065 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5066 (vec_merge:VI48_AVX512VL
5067 (match_operand:VI48_AVX512VL 2 "constm1_operand")
5068 (match_operand:VI48_AVX512VL 3 "const0_operand")
5069 (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5071 "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5072 [(set_attr "prefix" "evex")
5073 (set_attr "mode" "<sseinsnmode>")])
5075 (define_insn "sse2_cvtps2pd<mask_name>"
5076 [(set (match_operand:V2DF 0 "register_operand" "=v")
5079 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5080 (parallel [(const_int 0) (const_int 1)]))))]
5081 "TARGET_SSE2 && <mask_avx512vl_condition>"
5082 "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5083 [(set_attr "type" "ssecvt")
5084 (set_attr "amdfam10_decode" "direct")
5085 (set_attr "athlon_decode" "double")
5086 (set_attr "bdver1_decode" "double")
5087 (set_attr "prefix_data16" "0")
5088 (set_attr "prefix" "maybe_vex")
5089 (set_attr "mode" "V2DF")])
5091 (define_expand "vec_unpacks_hi_v4sf"
5096 (match_operand:V4SF 1 "nonimmediate_operand"))
5097 (parallel [(const_int 6) (const_int 7)
5098 (const_int 2) (const_int 3)])))
5099 (set (match_operand:V2DF 0 "register_operand")
5103 (parallel [(const_int 0) (const_int 1)]))))]
5105 "operands[2] = gen_reg_rtx (V4SFmode);")
5107 (define_expand "vec_unpacks_hi_v8sf"
5110 (match_operand:V8SF 1 "register_operand")
5111 (parallel [(const_int 4) (const_int 5)
5112 (const_int 6) (const_int 7)])))
5113 (set (match_operand:V4DF 0 "register_operand")
5117 "operands[2] = gen_reg_rtx (V4SFmode);")
5119 (define_expand "vec_unpacks_hi_v16sf"
5122 (match_operand:V16SF 1 "register_operand")
5123 (parallel [(const_int 8) (const_int 9)
5124 (const_int 10) (const_int 11)
5125 (const_int 12) (const_int 13)
5126 (const_int 14) (const_int 15)])))
5127 (set (match_operand:V8DF 0 "register_operand")
5131 "operands[2] = gen_reg_rtx (V8SFmode);")
5133 (define_expand "vec_unpacks_lo_v4sf"
5134 [(set (match_operand:V2DF 0 "register_operand")
5137 (match_operand:V4SF 1 "nonimmediate_operand")
5138 (parallel [(const_int 0) (const_int 1)]))))]
5141 (define_expand "vec_unpacks_lo_v8sf"
5142 [(set (match_operand:V4DF 0 "register_operand")
5145 (match_operand:V8SF 1 "nonimmediate_operand")
5146 (parallel [(const_int 0) (const_int 1)
5147 (const_int 2) (const_int 3)]))))]
5150 (define_mode_attr sseunpackfltmode
5151 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5152 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5154 (define_expand "vec_unpacks_float_hi_<mode>"
5155 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5156 (match_operand:VI2_AVX512F 1 "register_operand")]
5159 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5161 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5162 emit_insn (gen_rtx_SET (operands[0],
5163 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5167 (define_expand "vec_unpacks_float_lo_<mode>"
5168 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5169 (match_operand:VI2_AVX512F 1 "register_operand")]
5172 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5174 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5175 emit_insn (gen_rtx_SET (operands[0],
5176 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5180 (define_expand "vec_unpacku_float_hi_<mode>"
5181 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5182 (match_operand:VI2_AVX512F 1 "register_operand")]
5185 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5187 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5188 emit_insn (gen_rtx_SET (operands[0],
5189 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5193 (define_expand "vec_unpacku_float_lo_<mode>"
5194 [(match_operand:<sseunpackfltmode> 0 "register_operand")
5195 (match_operand:VI2_AVX512F 1 "register_operand")]
5198 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5200 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5201 emit_insn (gen_rtx_SET (operands[0],
5202 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5206 (define_expand "vec_unpacks_float_hi_v4si"
5209 (match_operand:V4SI 1 "nonimmediate_operand")
5210 (parallel [(const_int 2) (const_int 3)
5211 (const_int 2) (const_int 3)])))
5212 (set (match_operand:V2DF 0 "register_operand")
5216 (parallel [(const_int 0) (const_int 1)]))))]
5218 "operands[2] = gen_reg_rtx (V4SImode);")
5220 (define_expand "vec_unpacks_float_lo_v4si"
5221 [(set (match_operand:V2DF 0 "register_operand")
5224 (match_operand:V4SI 1 "nonimmediate_operand")
5225 (parallel [(const_int 0) (const_int 1)]))))]
5228 (define_expand "vec_unpacks_float_hi_v8si"
5231 (match_operand:V8SI 1 "nonimmediate_operand")
5232 (parallel [(const_int 4) (const_int 5)
5233 (const_int 6) (const_int 7)])))
5234 (set (match_operand:V4DF 0 "register_operand")
5238 "operands[2] = gen_reg_rtx (V4SImode);")
5240 (define_expand "vec_unpacks_float_lo_v8si"
5241 [(set (match_operand:V4DF 0 "register_operand")
5244 (match_operand:V8SI 1 "nonimmediate_operand")
5245 (parallel [(const_int 0) (const_int 1)
5246 (const_int 2) (const_int 3)]))))]
5249 (define_expand "vec_unpacks_float_hi_v16si"
5252 (match_operand:V16SI 1 "nonimmediate_operand")
5253 (parallel [(const_int 8) (const_int 9)
5254 (const_int 10) (const_int 11)
5255 (const_int 12) (const_int 13)
5256 (const_int 14) (const_int 15)])))
5257 (set (match_operand:V8DF 0 "register_operand")
5261 "operands[2] = gen_reg_rtx (V8SImode);")
5263 (define_expand "vec_unpacks_float_lo_v16si"
5264 [(set (match_operand:V8DF 0 "register_operand")
5267 (match_operand:V16SI 1 "nonimmediate_operand")
5268 (parallel [(const_int 0) (const_int 1)
5269 (const_int 2) (const_int 3)
5270 (const_int 4) (const_int 5)
5271 (const_int 6) (const_int 7)]))))]
5274 (define_expand "vec_unpacku_float_hi_v4si"
5277 (match_operand:V4SI 1 "nonimmediate_operand")
5278 (parallel [(const_int 2) (const_int 3)
5279 (const_int 2) (const_int 3)])))
5284 (parallel [(const_int 0) (const_int 1)]))))
5286 (lt:V2DF (match_dup 6) (match_dup 3)))
5288 (and:V2DF (match_dup 7) (match_dup 4)))
5289 (set (match_operand:V2DF 0 "register_operand")
5290 (plus:V2DF (match_dup 6) (match_dup 8)))]
5293 REAL_VALUE_TYPE TWO32r;
5297 real_ldexp (&TWO32r, &dconst1, 32);
5298 x = const_double_from_real_value (TWO32r, DFmode);
5300 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5301 operands[4] = force_reg (V2DFmode,
5302 ix86_build_const_vector (V2DFmode, 1, x));
5304 operands[5] = gen_reg_rtx (V4SImode);
5306 for (i = 6; i < 9; i++)
5307 operands[i] = gen_reg_rtx (V2DFmode);
5310 (define_expand "vec_unpacku_float_lo_v4si"
5314 (match_operand:V4SI 1 "nonimmediate_operand")
5315 (parallel [(const_int 0) (const_int 1)]))))
5317 (lt:V2DF (match_dup 5) (match_dup 3)))
5319 (and:V2DF (match_dup 6) (match_dup 4)))
5320 (set (match_operand:V2DF 0 "register_operand")
5321 (plus:V2DF (match_dup 5) (match_dup 7)))]
5324 REAL_VALUE_TYPE TWO32r;
5328 real_ldexp (&TWO32r, &dconst1, 32);
5329 x = const_double_from_real_value (TWO32r, DFmode);
5331 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5332 operands[4] = force_reg (V2DFmode,
5333 ix86_build_const_vector (V2DFmode, 1, x));
5335 for (i = 5; i < 8; i++)
5336 operands[i] = gen_reg_rtx (V2DFmode);
5339 (define_expand "vec_unpacku_float_hi_v8si"
5340 [(match_operand:V4DF 0 "register_operand")
5341 (match_operand:V8SI 1 "register_operand")]
5344 REAL_VALUE_TYPE TWO32r;
5348 real_ldexp (&TWO32r, &dconst1, 32);
5349 x = const_double_from_real_value (TWO32r, DFmode);
5351 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5352 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5353 tmp[5] = gen_reg_rtx (V4SImode);
5355 for (i = 2; i < 5; i++)
5356 tmp[i] = gen_reg_rtx (V4DFmode);
5357 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5358 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5359 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5360 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5361 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5365 (define_expand "vec_unpacku_float_hi_v16si"
5366 [(match_operand:V8DF 0 "register_operand")
5367 (match_operand:V16SI 1 "register_operand")]
5370 REAL_VALUE_TYPE TWO32r;
5373 real_ldexp (&TWO32r, &dconst1, 32);
5374 x = const_double_from_real_value (TWO32r, DFmode);
5376 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5377 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5378 tmp[2] = gen_reg_rtx (V8DFmode);
5379 tmp[3] = gen_reg_rtx (V8SImode);
5380 k = gen_reg_rtx (QImode);
5382 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5383 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5384 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5385 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5386 emit_move_insn (operands[0], tmp[2]);
5390 (define_expand "vec_unpacku_float_lo_v8si"
5391 [(match_operand:V4DF 0 "register_operand")
5392 (match_operand:V8SI 1 "nonimmediate_operand")]
5395 REAL_VALUE_TYPE TWO32r;
5399 real_ldexp (&TWO32r, &dconst1, 32);
5400 x = const_double_from_real_value (TWO32r, DFmode);
5402 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5403 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5405 for (i = 2; i < 5; i++)
5406 tmp[i] = gen_reg_rtx (V4DFmode);
5407 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5408 emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5409 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5410 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5414 (define_expand "vec_unpacku_float_lo_v16si"
5415 [(match_operand:V8DF 0 "register_operand")
5416 (match_operand:V16SI 1 "nonimmediate_operand")]
5419 REAL_VALUE_TYPE TWO32r;
5422 real_ldexp (&TWO32r, &dconst1, 32);
5423 x = const_double_from_real_value (TWO32r, DFmode);
5425 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5426 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5427 tmp[2] = gen_reg_rtx (V8DFmode);
5428 k = gen_reg_rtx (QImode);
5430 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5431 emit_insn (gen_rtx_SET (k, gen_rtx_LT (QImode, tmp[2], tmp[0])));
5432 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5433 emit_move_insn (operands[0], tmp[2]);
5437 (define_expand "vec_pack_trunc_<mode>"
5439 (float_truncate:<sf2dfmode>
5440 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5442 (float_truncate:<sf2dfmode>
5443 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5444 (set (match_operand:<ssePSmode> 0 "register_operand")
5445 (vec_concat:<ssePSmode>
5450 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5451 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5454 (define_expand "vec_pack_trunc_v2df"
5455 [(match_operand:V4SF 0 "register_operand")
5456 (match_operand:V2DF 1 "nonimmediate_operand")
5457 (match_operand:V2DF 2 "nonimmediate_operand")]
5462 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5464 tmp0 = gen_reg_rtx (V4DFmode);
5465 tmp1 = force_reg (V2DFmode, operands[1]);
5467 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5468 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5472 tmp0 = gen_reg_rtx (V4SFmode);
5473 tmp1 = gen_reg_rtx (V4SFmode);
5475 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5476 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5477 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5482 (define_expand "vec_pack_sfix_trunc_v8df"
5483 [(match_operand:V16SI 0 "register_operand")
5484 (match_operand:V8DF 1 "nonimmediate_operand")
5485 (match_operand:V8DF 2 "nonimmediate_operand")]
5490 r1 = gen_reg_rtx (V8SImode);
5491 r2 = gen_reg_rtx (V8SImode);
5493 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5494 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5495 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5499 (define_expand "vec_pack_sfix_trunc_v4df"
5500 [(match_operand:V8SI 0 "register_operand")
5501 (match_operand:V4DF 1 "nonimmediate_operand")
5502 (match_operand:V4DF 2 "nonimmediate_operand")]
5507 r1 = gen_reg_rtx (V4SImode);
5508 r2 = gen_reg_rtx (V4SImode);
5510 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5511 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5512 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5516 (define_expand "vec_pack_sfix_trunc_v2df"
5517 [(match_operand:V4SI 0 "register_operand")
5518 (match_operand:V2DF 1 "nonimmediate_operand")
5519 (match_operand:V2DF 2 "nonimmediate_operand")]
5522 rtx tmp0, tmp1, tmp2;
5524 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5526 tmp0 = gen_reg_rtx (V4DFmode);
5527 tmp1 = force_reg (V2DFmode, operands[1]);
5529 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5530 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5534 tmp0 = gen_reg_rtx (V4SImode);
5535 tmp1 = gen_reg_rtx (V4SImode);
5536 tmp2 = gen_reg_rtx (V2DImode);
5538 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5539 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5540 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5541 gen_lowpart (V2DImode, tmp0),
5542 gen_lowpart (V2DImode, tmp1)));
5543 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5548 (define_mode_attr ssepackfltmode
5549 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5551 (define_expand "vec_pack_ufix_trunc_<mode>"
5552 [(match_operand:<ssepackfltmode> 0 "register_operand")
5553 (match_operand:VF2 1 "register_operand")
5554 (match_operand:VF2 2 "register_operand")]
5557 if (<MODE>mode == V8DFmode)
5561 r1 = gen_reg_rtx (V8SImode);
5562 r2 = gen_reg_rtx (V8SImode);
5564 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5565 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5566 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5571 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5572 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5573 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5574 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5575 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5577 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5578 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5582 tmp[5] = gen_reg_rtx (V8SFmode);
5583 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5584 gen_lowpart (V8SFmode, tmp[3]), 0);
5585 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5587 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5588 operands[0], 0, OPTAB_DIRECT);
5589 if (tmp[6] != operands[0])
5590 emit_move_insn (operands[0], tmp[6]);
5596 (define_expand "vec_pack_sfix_v4df"
5597 [(match_operand:V8SI 0 "register_operand")
5598 (match_operand:V4DF 1 "nonimmediate_operand")
5599 (match_operand:V4DF 2 "nonimmediate_operand")]
5604 r1 = gen_reg_rtx (V4SImode);
5605 r2 = gen_reg_rtx (V4SImode);
5607 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5608 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5609 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5613 (define_expand "vec_pack_sfix_v2df"
5614 [(match_operand:V4SI 0 "register_operand")
5615 (match_operand:V2DF 1 "nonimmediate_operand")
5616 (match_operand:V2DF 2 "nonimmediate_operand")]
5619 rtx tmp0, tmp1, tmp2;
5621 if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5623 tmp0 = gen_reg_rtx (V4DFmode);
5624 tmp1 = force_reg (V2DFmode, operands[1]);
5626 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5627 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5631 tmp0 = gen_reg_rtx (V4SImode);
5632 tmp1 = gen_reg_rtx (V4SImode);
5633 tmp2 = gen_reg_rtx (V2DImode);
5635 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5636 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5637 emit_insn (gen_vec_interleave_lowv2di (tmp2,
5638 gen_lowpart (V2DImode, tmp0),
5639 gen_lowpart (V2DImode, tmp1)));
5640 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5645 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5647 ;; Parallel single-precision floating point element swizzling
5649 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5651 (define_expand "sse_movhlps_exp"
5652 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5655 (match_operand:V4SF 1 "nonimmediate_operand")
5656 (match_operand:V4SF 2 "nonimmediate_operand"))
5657 (parallel [(const_int 6)
5663 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5665 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5667 /* Fix up the destination if needed. */
5668 if (dst != operands[0])
5669 emit_move_insn (operands[0], dst);
5674 (define_insn "sse_movhlps"
5675 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5678 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5679 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5680 (parallel [(const_int 6)
5684 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5686 movhlps\t{%2, %0|%0, %2}
5687 vmovhlps\t{%2, %1, %0|%0, %1, %2}
5688 movlps\t{%H2, %0|%0, %H2}
5689 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5690 %vmovhps\t{%2, %0|%q0, %2}"
5691 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5692 (set_attr "type" "ssemov")
5693 (set_attr "ssememalign" "64")
5694 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5695 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5697 (define_expand "sse_movlhps_exp"
5698 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5701 (match_operand:V4SF 1 "nonimmediate_operand")
5702 (match_operand:V4SF 2 "nonimmediate_operand"))
5703 (parallel [(const_int 0)
5709 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5711 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5713 /* Fix up the destination if needed. */
5714 if (dst != operands[0])
5715 emit_move_insn (operands[0], dst);
5720 (define_insn "sse_movlhps"
5721 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5724 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5725 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5726 (parallel [(const_int 0)
5730 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5732 movlhps\t{%2, %0|%0, %2}
5733 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5734 movhps\t{%2, %0|%0, %q2}
5735 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5736 %vmovlps\t{%2, %H0|%H0, %2}"
5737 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5738 (set_attr "type" "ssemov")
5739 (set_attr "ssememalign" "64")
5740 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5741 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5743 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5744 [(set (match_operand:V16SF 0 "register_operand" "=v")
5747 (match_operand:V16SF 1 "register_operand" "v")
5748 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5749 (parallel [(const_int 2) (const_int 18)
5750 (const_int 3) (const_int 19)
5751 (const_int 6) (const_int 22)
5752 (const_int 7) (const_int 23)
5753 (const_int 10) (const_int 26)
5754 (const_int 11) (const_int 27)
5755 (const_int 14) (const_int 30)
5756 (const_int 15) (const_int 31)])))]
5758 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5759 [(set_attr "type" "sselog")
5760 (set_attr "prefix" "evex")
5761 (set_attr "mode" "V16SF")])
5763 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5764 (define_insn "avx_unpckhps256<mask_name>"
5765 [(set (match_operand:V8SF 0 "register_operand" "=v")
5768 (match_operand:V8SF 1 "register_operand" "v")
5769 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5770 (parallel [(const_int 2) (const_int 10)
5771 (const_int 3) (const_int 11)
5772 (const_int 6) (const_int 14)
5773 (const_int 7) (const_int 15)])))]
5774 "TARGET_AVX && <mask_avx512vl_condition>"
5775 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5776 [(set_attr "type" "sselog")
5777 (set_attr "prefix" "vex")
5778 (set_attr "mode" "V8SF")])
5780 (define_expand "vec_interleave_highv8sf"
5784 (match_operand:V8SF 1 "register_operand" "x")
5785 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5786 (parallel [(const_int 0) (const_int 8)
5787 (const_int 1) (const_int 9)
5788 (const_int 4) (const_int 12)
5789 (const_int 5) (const_int 13)])))
5795 (parallel [(const_int 2) (const_int 10)
5796 (const_int 3) (const_int 11)
5797 (const_int 6) (const_int 14)
5798 (const_int 7) (const_int 15)])))
5799 (set (match_operand:V8SF 0 "register_operand")
5804 (parallel [(const_int 4) (const_int 5)
5805 (const_int 6) (const_int 7)
5806 (const_int 12) (const_int 13)
5807 (const_int 14) (const_int 15)])))]
5810 operands[3] = gen_reg_rtx (V8SFmode);
5811 operands[4] = gen_reg_rtx (V8SFmode);
5814 (define_insn "vec_interleave_highv4sf<mask_name>"
5815 [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5818 (match_operand:V4SF 1 "register_operand" "0,v")
5819 (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5820 (parallel [(const_int 2) (const_int 6)
5821 (const_int 3) (const_int 7)])))]
5822 "TARGET_SSE && <mask_avx512vl_condition>"
5824 unpckhps\t{%2, %0|%0, %2}
5825 vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5826 [(set_attr "isa" "noavx,avx")
5827 (set_attr "type" "sselog")
5828 (set_attr "prefix" "orig,vex")
5829 (set_attr "mode" "V4SF")])
5831 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5832 [(set (match_operand:V16SF 0 "register_operand" "=v")
5835 (match_operand:V16SF 1 "register_operand" "v")
5836 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5837 (parallel [(const_int 0) (const_int 16)
5838 (const_int 1) (const_int 17)
5839 (const_int 4) (const_int 20)
5840 (const_int 5) (const_int 21)
5841 (const_int 8) (const_int 24)
5842 (const_int 9) (const_int 25)
5843 (const_int 12) (const_int 28)
5844 (const_int 13) (const_int 29)])))]
5846 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5847 [(set_attr "type" "sselog")
5848 (set_attr "prefix" "evex")
5849 (set_attr "mode" "V16SF")])
5851 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5852 (define_insn "avx_unpcklps256<mask_name>"
5853 [(set (match_operand:V8SF 0 "register_operand" "=v")
5856 (match_operand:V8SF 1 "register_operand" "v")
5857 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5858 (parallel [(const_int 0) (const_int 8)
5859 (const_int 1) (const_int 9)
5860 (const_int 4) (const_int 12)
5861 (const_int 5) (const_int 13)])))]
5862 "TARGET_AVX && <mask_avx512vl_condition>"
5863 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5864 [(set_attr "type" "sselog")
5865 (set_attr "prefix" "vex")
5866 (set_attr "mode" "V8SF")])
5868 (define_insn "unpcklps128_mask"
5869 [(set (match_operand:V4SF 0 "register_operand" "=v")
5873 (match_operand:V4SF 1 "register_operand" "v")
5874 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5875 (parallel [(const_int 0) (const_int 4)
5876 (const_int 1) (const_int 5)]))
5877 (match_operand:V4SF 3 "vector_move_operand" "0C")
5878 (match_operand:QI 4 "register_operand" "Yk")))]
5880 "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5881 [(set_attr "type" "sselog")
5882 (set_attr "prefix" "evex")
5883 (set_attr "mode" "V4SF")])
5885 (define_expand "vec_interleave_lowv8sf"
5889 (match_operand:V8SF 1 "register_operand" "x")
5890 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5891 (parallel [(const_int 0) (const_int 8)
5892 (const_int 1) (const_int 9)
5893 (const_int 4) (const_int 12)
5894 (const_int 5) (const_int 13)])))
5900 (parallel [(const_int 2) (const_int 10)
5901 (const_int 3) (const_int 11)
5902 (const_int 6) (const_int 14)
5903 (const_int 7) (const_int 15)])))
5904 (set (match_operand:V8SF 0 "register_operand")
5909 (parallel [(const_int 0) (const_int 1)
5910 (const_int 2) (const_int 3)
5911 (const_int 8) (const_int 9)
5912 (const_int 10) (const_int 11)])))]
5915 operands[3] = gen_reg_rtx (V8SFmode);
5916 operands[4] = gen_reg_rtx (V8SFmode);
5919 (define_insn "vec_interleave_lowv4sf"
5920 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5923 (match_operand:V4SF 1 "register_operand" "0,x")
5924 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5925 (parallel [(const_int 0) (const_int 4)
5926 (const_int 1) (const_int 5)])))]
5929 unpcklps\t{%2, %0|%0, %2}
5930 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5931 [(set_attr "isa" "noavx,avx")
5932 (set_attr "type" "sselog")
5933 (set_attr "prefix" "orig,vex")
5934 (set_attr "mode" "V4SF")])
5936 ;; These are modeled with the same vec_concat as the others so that we
5937 ;; capture users of shufps that can use the new instructions
5938 (define_insn "avx_movshdup256<mask_name>"
5939 [(set (match_operand:V8SF 0 "register_operand" "=v")
5942 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5944 (parallel [(const_int 1) (const_int 1)
5945 (const_int 3) (const_int 3)
5946 (const_int 5) (const_int 5)
5947 (const_int 7) (const_int 7)])))]
5948 "TARGET_AVX && <mask_avx512vl_condition>"
5949 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5950 [(set_attr "type" "sse")
5951 (set_attr "prefix" "vex")
5952 (set_attr "mode" "V8SF")])
5954 (define_insn "sse3_movshdup<mask_name>"
5955 [(set (match_operand:V4SF 0 "register_operand" "=v")
5958 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5960 (parallel [(const_int 1)
5964 "TARGET_SSE3 && <mask_avx512vl_condition>"
5965 "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5966 [(set_attr "type" "sse")
5967 (set_attr "prefix_rep" "1")
5968 (set_attr "prefix" "maybe_vex")
5969 (set_attr "mode" "V4SF")])
5971 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5972 [(set (match_operand:V16SF 0 "register_operand" "=v")
5975 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5977 (parallel [(const_int 1) (const_int 1)
5978 (const_int 3) (const_int 3)
5979 (const_int 5) (const_int 5)
5980 (const_int 7) (const_int 7)
5981 (const_int 9) (const_int 9)
5982 (const_int 11) (const_int 11)
5983 (const_int 13) (const_int 13)
5984 (const_int 15) (const_int 15)])))]
5986 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5987 [(set_attr "type" "sse")
5988 (set_attr "prefix" "evex")
5989 (set_attr "mode" "V16SF")])
5991 (define_insn "avx_movsldup256<mask_name>"
5992 [(set (match_operand:V8SF 0 "register_operand" "=v")
5995 (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5997 (parallel [(const_int 0) (const_int 0)
5998 (const_int 2) (const_int 2)
5999 (const_int 4) (const_int 4)
6000 (const_int 6) (const_int 6)])))]
6001 "TARGET_AVX && <mask_avx512vl_condition>"
6002 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6003 [(set_attr "type" "sse")
6004 (set_attr "prefix" "vex")
6005 (set_attr "mode" "V8SF")])
6007 (define_insn "sse3_movsldup<mask_name>"
6008 [(set (match_operand:V4SF 0 "register_operand" "=v")
6011 (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6013 (parallel [(const_int 0)
6017 "TARGET_SSE3 && <mask_avx512vl_condition>"
6018 "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6019 [(set_attr "type" "sse")
6020 (set_attr "prefix_rep" "1")
6021 (set_attr "prefix" "maybe_vex")
6022 (set_attr "mode" "V4SF")])
6024 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6025 [(set (match_operand:V16SF 0 "register_operand" "=v")
6028 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6030 (parallel [(const_int 0) (const_int 0)
6031 (const_int 2) (const_int 2)
6032 (const_int 4) (const_int 4)
6033 (const_int 6) (const_int 6)
6034 (const_int 8) (const_int 8)
6035 (const_int 10) (const_int 10)
6036 (const_int 12) (const_int 12)
6037 (const_int 14) (const_int 14)])))]
6039 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6040 [(set_attr "type" "sse")
6041 (set_attr "prefix" "evex")
6042 (set_attr "mode" "V16SF")])
6044 (define_expand "avx_shufps256<mask_expand4_name>"
6045 [(match_operand:V8SF 0 "register_operand")
6046 (match_operand:V8SF 1 "register_operand")
6047 (match_operand:V8SF 2 "nonimmediate_operand")
6048 (match_operand:SI 3 "const_int_operand")]
6051 int mask = INTVAL (operands[3]);
6052 emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6055 GEN_INT ((mask >> 0) & 3),
6056 GEN_INT ((mask >> 2) & 3),
6057 GEN_INT (((mask >> 4) & 3) + 8),
6058 GEN_INT (((mask >> 6) & 3) + 8),
6059 GEN_INT (((mask >> 0) & 3) + 4),
6060 GEN_INT (((mask >> 2) & 3) + 4),
6061 GEN_INT (((mask >> 4) & 3) + 12),
6062 GEN_INT (((mask >> 6) & 3) + 12)
6063 <mask_expand4_args>));
6067 ;; One bit in mask selects 2 elements.
6068 (define_insn "avx_shufps256_1<mask_name>"
6069 [(set (match_operand:V8SF 0 "register_operand" "=v")
6072 (match_operand:V8SF 1 "register_operand" "v")
6073 (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6074 (parallel [(match_operand 3 "const_0_to_3_operand" )
6075 (match_operand 4 "const_0_to_3_operand" )
6076 (match_operand 5 "const_8_to_11_operand" )
6077 (match_operand 6 "const_8_to_11_operand" )
6078 (match_operand 7 "const_4_to_7_operand" )
6079 (match_operand 8 "const_4_to_7_operand" )
6080 (match_operand 9 "const_12_to_15_operand")
6081 (match_operand 10 "const_12_to_15_operand")])))]
6083 && <mask_avx512vl_condition>
6084 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6085 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6086 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6087 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6090 mask = INTVAL (operands[3]);
6091 mask |= INTVAL (operands[4]) << 2;
6092 mask |= (INTVAL (operands[5]) - 8) << 4;
6093 mask |= (INTVAL (operands[6]) - 8) << 6;
6094 operands[3] = GEN_INT (mask);
6096 return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6098 [(set_attr "type" "sseshuf")
6099 (set_attr "length_immediate" "1")
6100 (set_attr "prefix" "<mask_prefix>")
6101 (set_attr "mode" "V8SF")])
6103 (define_expand "sse_shufps<mask_expand4_name>"
6104 [(match_operand:V4SF 0 "register_operand")
6105 (match_operand:V4SF 1 "register_operand")
6106 (match_operand:V4SF 2 "nonimmediate_operand")
6107 (match_operand:SI 3 "const_int_operand")]
6110 int mask = INTVAL (operands[3]);
6111 emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6114 GEN_INT ((mask >> 0) & 3),
6115 GEN_INT ((mask >> 2) & 3),
6116 GEN_INT (((mask >> 4) & 3) + 4),
6117 GEN_INT (((mask >> 6) & 3) + 4)
6118 <mask_expand4_args>));
6122 (define_insn "sse_shufps_v4sf_mask"
6123 [(set (match_operand:V4SF 0 "register_operand" "=v")
6127 (match_operand:V4SF 1 "register_operand" "v")
6128 (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6129 (parallel [(match_operand 3 "const_0_to_3_operand")
6130 (match_operand 4 "const_0_to_3_operand")
6131 (match_operand 5 "const_4_to_7_operand")
6132 (match_operand 6 "const_4_to_7_operand")]))
6133 (match_operand:V4SF 7 "vector_move_operand" "0C")
6134 (match_operand:QI 8 "register_operand" "Yk")))]
6138 mask |= INTVAL (operands[3]) << 0;
6139 mask |= INTVAL (operands[4]) << 2;
6140 mask |= (INTVAL (operands[5]) - 4) << 4;
6141 mask |= (INTVAL (operands[6]) - 4) << 6;
6142 operands[3] = GEN_INT (mask);
6144 return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6146 [(set_attr "type" "sseshuf")
6147 (set_attr "length_immediate" "1")
6148 (set_attr "prefix" "evex")
6149 (set_attr "mode" "V4SF")])
6151 (define_insn "sse_shufps_<mode>"
6152 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6153 (vec_select:VI4F_128
6154 (vec_concat:<ssedoublevecmode>
6155 (match_operand:VI4F_128 1 "register_operand" "0,x")
6156 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6157 (parallel [(match_operand 3 "const_0_to_3_operand")
6158 (match_operand 4 "const_0_to_3_operand")
6159 (match_operand 5 "const_4_to_7_operand")
6160 (match_operand 6 "const_4_to_7_operand")])))]
6164 mask |= INTVAL (operands[3]) << 0;
6165 mask |= INTVAL (operands[4]) << 2;
6166 mask |= (INTVAL (operands[5]) - 4) << 4;
6167 mask |= (INTVAL (operands[6]) - 4) << 6;
6168 operands[3] = GEN_INT (mask);
6170 switch (which_alternative)
6173 return "shufps\t{%3, %2, %0|%0, %2, %3}";
6175 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6180 [(set_attr "isa" "noavx,avx")
6181 (set_attr "type" "sseshuf")
6182 (set_attr "length_immediate" "1")
6183 (set_attr "prefix" "orig,vex")
6184 (set_attr "mode" "V4SF")])
6186 (define_insn "sse_storehps"
6187 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6189 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6190 (parallel [(const_int 2) (const_int 3)])))]
6193 %vmovhps\t{%1, %0|%q0, %1}
6194 %vmovhlps\t{%1, %d0|%d0, %1}
6195 %vmovlps\t{%H1, %d0|%d0, %H1}"
6196 [(set_attr "type" "ssemov")
6197 (set_attr "ssememalign" "64")
6198 (set_attr "prefix" "maybe_vex")
6199 (set_attr "mode" "V2SF,V4SF,V2SF")])
6201 (define_expand "sse_loadhps_exp"
6202 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6205 (match_operand:V4SF 1 "nonimmediate_operand")
6206 (parallel [(const_int 0) (const_int 1)]))
6207 (match_operand:V2SF 2 "nonimmediate_operand")))]
6210 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6212 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6214 /* Fix up the destination if needed. */
6215 if (dst != operands[0])
6216 emit_move_insn (operands[0], dst);
6221 (define_insn "sse_loadhps"
6222 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
6225 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6226 (parallel [(const_int 0) (const_int 1)]))
6227 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
6230 movhps\t{%2, %0|%0, %q2}
6231 vmovhps\t{%2, %1, %0|%0, %1, %q2}
6232 movlhps\t{%2, %0|%0, %2}
6233 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6234 %vmovlps\t{%2, %H0|%H0, %2}"
6235 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6236 (set_attr "type" "ssemov")
6237 (set_attr "ssememalign" "64")
6238 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6239 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6241 (define_insn "sse_storelps"
6242 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6244 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6245 (parallel [(const_int 0) (const_int 1)])))]
6248 %vmovlps\t{%1, %0|%q0, %1}
6249 %vmovaps\t{%1, %0|%0, %1}
6250 %vmovlps\t{%1, %d0|%d0, %q1}"
6251 [(set_attr "type" "ssemov")
6252 (set_attr "prefix" "maybe_vex")
6253 (set_attr "mode" "V2SF,V4SF,V2SF")])
6255 (define_expand "sse_loadlps_exp"
6256 [(set (match_operand:V4SF 0 "nonimmediate_operand")
6258 (match_operand:V2SF 2 "nonimmediate_operand")
6260 (match_operand:V4SF 1 "nonimmediate_operand")
6261 (parallel [(const_int 2) (const_int 3)]))))]
6264 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6266 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6268 /* Fix up the destination if needed. */
6269 if (dst != operands[0])
6270 emit_move_insn (operands[0], dst);
6275 (define_insn "sse_loadlps"
6276 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
6278 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
6280 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6281 (parallel [(const_int 2) (const_int 3)]))))]
6284 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6285 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6286 movlps\t{%2, %0|%0, %q2}
6287 vmovlps\t{%2, %1, %0|%0, %1, %q2}
6288 %vmovlps\t{%2, %0|%q0, %2}"
6289 [(set_attr "isa" "noavx,avx,noavx,avx,*")
6290 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6291 (set_attr "ssememalign" "64")
6292 (set_attr "length_immediate" "1,1,*,*,*")
6293 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6294 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6296 (define_insn "sse_movss"
6297 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
6299 (match_operand:V4SF 2 "register_operand" " x,x")
6300 (match_operand:V4SF 1 "register_operand" " 0,x")
6304 movss\t{%2, %0|%0, %2}
6305 vmovss\t{%2, %1, %0|%0, %1, %2}"
6306 [(set_attr "isa" "noavx,avx")
6307 (set_attr "type" "ssemov")
6308 (set_attr "prefix" "orig,vex")
6309 (set_attr "mode" "SF")])
6311 (define_insn "avx2_vec_dup<mode>"
6312 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6313 (vec_duplicate:VF1_128_256
6315 (match_operand:V4SF 1 "register_operand" "x")
6316 (parallel [(const_int 0)]))))]
6318 "vbroadcastss\t{%1, %0|%0, %1}"
6319 [(set_attr "type" "sselog1")
6320 (set_attr "prefix" "vex")
6321 (set_attr "mode" "<MODE>")])
6323 (define_insn "avx2_vec_dupv8sf_1"
6324 [(set (match_operand:V8SF 0 "register_operand" "=x")
6327 (match_operand:V8SF 1 "register_operand" "x")
6328 (parallel [(const_int 0)]))))]
6330 "vbroadcastss\t{%x1, %0|%0, %x1}"
6331 [(set_attr "type" "sselog1")
6332 (set_attr "prefix" "vex")
6333 (set_attr "mode" "V8SF")])
6335 (define_insn "avx512f_vec_dup<mode>_1"
6336 [(set (match_operand:VF_512 0 "register_operand" "=v")
6337 (vec_duplicate:VF_512
6338 (vec_select:<ssescalarmode>
6339 (match_operand:VF_512 1 "register_operand" "v")
6340 (parallel [(const_int 0)]))))]
6342 "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6343 [(set_attr "type" "sselog1")
6344 (set_attr "prefix" "evex")
6345 (set_attr "mode" "<MODE>")])
6347 ;; Although insertps takes register source, we prefer
6348 ;; unpcklps with register source since it is shorter.
6349 (define_insn "*vec_concatv2sf_sse4_1"
6350 [(set (match_operand:V2SF 0 "register_operand"
6351 "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6353 (match_operand:SF 1 "nonimmediate_operand"
6354 " 0, 0,x, 0,0, x,m, 0 , m")
6355 (match_operand:SF 2 "vector_move_operand"
6356 " Yr,*x,x, m,m, m,C,*ym, C")))]
6357 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
6359 unpcklps\t{%2, %0|%0, %2}
6360 unpcklps\t{%2, %0|%0, %2}
6361 vunpcklps\t{%2, %1, %0|%0, %1, %2}
6362 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6363 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6364 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6365 %vmovss\t{%1, %0|%0, %1}
6366 punpckldq\t{%2, %0|%0, %2}
6367 movd\t{%1, %0|%0, %1}"
6368 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6369 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6370 (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6371 (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6372 (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6373 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6374 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6376 ;; ??? In theory we can match memory for the MMX alternative, but allowing
6377 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6378 ;; alternatives pretty much forces the MMX alternative to be chosen.
6379 (define_insn "*vec_concatv2sf_sse"
6380 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
6382 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6383 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
6386 unpcklps\t{%2, %0|%0, %2}
6387 movss\t{%1, %0|%0, %1}
6388 punpckldq\t{%2, %0|%0, %2}
6389 movd\t{%1, %0|%0, %1}"
6390 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6391 (set_attr "mode" "V4SF,SF,DI,DI")])
6393 (define_insn "*vec_concatv4sf"
6394 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
6396 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
6397 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6400 movlhps\t{%2, %0|%0, %2}
6401 vmovlhps\t{%2, %1, %0|%0, %1, %2}
6402 movhps\t{%2, %0|%0, %q2}
6403 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6404 [(set_attr "isa" "noavx,avx,noavx,avx")
6405 (set_attr "type" "ssemov")
6406 (set_attr "prefix" "orig,vex,orig,vex")
6407 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6409 (define_expand "vec_init<mode>"
6410 [(match_operand:V_128 0 "register_operand")
6414 ix86_expand_vector_init (false, operands[0], operands[1]);
6418 ;; Avoid combining registers from different units in a single alternative,
6419 ;; see comment above inline_secondary_memory_needed function in i386.c
6420 (define_insn "vec_set<mode>_0"
6421 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6422 "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x ,m ,m ,m")
6424 (vec_duplicate:VI4F_128
6425 (match_operand:<ssescalarmode> 2 "general_operand"
6426 " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6427 (match_operand:VI4F_128 1 "vector_move_operand"
6428 " C , C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
6432 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6433 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6434 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6435 %vmovd\t{%2, %0|%0, %2}
6436 movss\t{%2, %0|%0, %2}
6437 movss\t{%2, %0|%0, %2}
6438 vmovss\t{%2, %1, %0|%0, %1, %2}
6439 pinsrd\t{$0, %2, %0|%0, %2, 0}
6440 pinsrd\t{$0, %2, %0|%0, %2, 0}
6441 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6445 [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6447 (cond [(eq_attr "alternative" "0,1,7,8,9")
6448 (const_string "sselog")
6449 (eq_attr "alternative" "11")
6450 (const_string "imov")
6451 (eq_attr "alternative" "12")
6452 (const_string "fmov")
6454 (const_string "ssemov")))
6455 (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6456 (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6457 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6458 (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6460 ;; A subset is vec_setv4sf.
6461 (define_insn "*vec_setv4sf_sse4_1"
6462 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6465 (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6466 (match_operand:V4SF 1 "register_operand" "0,0,x")
6467 (match_operand:SI 3 "const_int_operand")))]
6469 && ((unsigned) exact_log2 (INTVAL (operands[3]))
6470 < GET_MODE_NUNITS (V4SFmode))"
6472 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6473 switch (which_alternative)
6477 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6479 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6484 [(set_attr "isa" "noavx,noavx,avx")
6485 (set_attr "type" "sselog")
6486 (set_attr "prefix_data16" "1,1,*")
6487 (set_attr "prefix_extra" "1")
6488 (set_attr "length_immediate" "1")
6489 (set_attr "prefix" "orig,orig,vex")
6490 (set_attr "mode" "V4SF")])
6492 (define_insn "sse4_1_insertps"
6493 [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6494 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6495 (match_operand:V4SF 1 "register_operand" "0,0,x")
6496 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6500 if (MEM_P (operands[2]))
6502 unsigned count_s = INTVAL (operands[3]) >> 6;
6504 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6505 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6507 switch (which_alternative)
6511 return "insertps\t{%3, %2, %0|%0, %2, %3}";
6513 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6518 [(set_attr "isa" "noavx,noavx,avx")
6519 (set_attr "type" "sselog")
6520 (set_attr "prefix_data16" "1,1,*")
6521 (set_attr "prefix_extra" "1")
6522 (set_attr "length_immediate" "1")
6523 (set_attr "prefix" "orig,orig,vex")
6524 (set_attr "mode" "V4SF")])
6527 [(set (match_operand:VI4F_128 0 "memory_operand")
6529 (vec_duplicate:VI4F_128
6530 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6533 "TARGET_SSE && reload_completed"
6534 [(set (match_dup 0) (match_dup 1))]
6535 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6537 (define_expand "vec_set<mode>"
6538 [(match_operand:V 0 "register_operand")
6539 (match_operand:<ssescalarmode> 1 "register_operand")
6540 (match_operand 2 "const_int_operand")]
6543 ix86_expand_vector_set (false, operands[0], operands[1],
6544 INTVAL (operands[2]));
6548 (define_insn_and_split "*vec_extractv4sf_0"
6549 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6551 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6552 (parallel [(const_int 0)])))]
6553 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6555 "&& reload_completed"
6556 [(set (match_dup 0) (match_dup 1))]
6558 if (REG_P (operands[1]))
6559 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6561 operands[1] = adjust_address (operands[1], SFmode, 0);
6564 (define_insn_and_split "*sse4_1_extractps"
6565 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6567 (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6568 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6571 %vextractps\t{%2, %1, %0|%0, %1, %2}
6572 %vextractps\t{%2, %1, %0|%0, %1, %2}
6575 "&& reload_completed && SSE_REG_P (operands[0])"
6578 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6579 switch (INTVAL (operands[2]))
6583 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6584 operands[2], operands[2],
6585 GEN_INT (INTVAL (operands[2]) + 4),
6586 GEN_INT (INTVAL (operands[2]) + 4)));
6589 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6592 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
6597 [(set_attr "isa" "*,*,noavx,avx")
6598 (set_attr "type" "sselog,sselog,*,*")
6599 (set_attr "prefix_data16" "1,1,*,*")
6600 (set_attr "prefix_extra" "1,1,*,*")
6601 (set_attr "length_immediate" "1,1,*,*")
6602 (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6603 (set_attr "mode" "V4SF,V4SF,*,*")])
6605 (define_insn_and_split "*vec_extractv4sf_mem"
6606 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6608 (match_operand:V4SF 1 "memory_operand" "o,o,o")
6609 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6612 "&& reload_completed"
6613 [(set (match_dup 0) (match_dup 1))]
6615 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6618 (define_mode_attr extract_type
6619 [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6621 (define_mode_attr extract_suf
6622 [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6624 (define_mode_iterator AVX512_VEC
6625 [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6627 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6628 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6629 (match_operand:AVX512_VEC 1 "register_operand")
6630 (match_operand:SI 2 "const_0_to_3_operand")
6631 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6632 (match_operand:QI 4 "register_operand")]
6636 mask = INTVAL (operands[2]);
6638 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6639 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6641 if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6642 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6643 operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6644 GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6647 emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6648 operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6653 (define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6654 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6655 (vec_merge:<ssequartermode>
6656 (vec_select:<ssequartermode>
6657 (match_operand:V8FI 1 "register_operand" "v")
6658 (parallel [(match_operand 2 "const_0_to_7_operand")
6659 (match_operand 3 "const_0_to_7_operand")]))
6660 (match_operand:<ssequartermode> 4 "memory_operand" "0")
6661 (match_operand:QI 5 "register_operand" "k")))]
6663 && (INTVAL (operands[2]) % 2 == 0)
6664 && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6665 && rtx_equal_p (operands[4], operands[0])"
6667 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6668 return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6670 [(set_attr "type" "sselog")
6671 (set_attr "prefix_extra" "1")
6672 (set_attr "length_immediate" "1")
6673 (set_attr "memory" "store")
6674 (set_attr "prefix" "evex")
6675 (set_attr "mode" "<sseinsnmode>")])
6677 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6678 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6679 (vec_merge:<ssequartermode>
6680 (vec_select:<ssequartermode>
6681 (match_operand:V16FI 1 "register_operand" "v")
6682 (parallel [(match_operand 2 "const_0_to_15_operand")
6683 (match_operand 3 "const_0_to_15_operand")
6684 (match_operand 4 "const_0_to_15_operand")
6685 (match_operand 5 "const_0_to_15_operand")]))
6686 (match_operand:<ssequartermode> 6 "memory_operand" "0")
6687 (match_operand:QI 7 "register_operand" "Yk")))]
6689 && ((INTVAL (operands[2]) % 4 == 0)
6690 && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6691 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6692 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6693 && rtx_equal_p (operands[6], operands[0])"
6695 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6696 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6698 [(set_attr "type" "sselog")
6699 (set_attr "prefix_extra" "1")
6700 (set_attr "length_immediate" "1")
6701 (set_attr "memory" "store")
6702 (set_attr "prefix" "evex")
6703 (set_attr "mode" "<sseinsnmode>")])
6705 (define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6706 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6707 (vec_select:<ssequartermode>
6708 (match_operand:V8FI 1 "register_operand" "v")
6709 (parallel [(match_operand 2 "const_0_to_7_operand")
6710 (match_operand 3 "const_0_to_7_operand")])))]
6711 "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6713 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6714 return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6716 [(set_attr "type" "sselog1")
6717 (set_attr "prefix_extra" "1")
6718 (set_attr "length_immediate" "1")
6719 (set_attr "prefix" "evex")
6720 (set_attr "mode" "<sseinsnmode>")])
6722 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6723 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6724 (vec_select:<ssequartermode>
6725 (match_operand:V16FI 1 "register_operand" "v")
6726 (parallel [(match_operand 2 "const_0_to_15_operand")
6727 (match_operand 3 "const_0_to_15_operand")
6728 (match_operand 4 "const_0_to_15_operand")
6729 (match_operand 5 "const_0_to_15_operand")])))]
6731 && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6732 && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6733 && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6735 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6736 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6738 [(set_attr "type" "sselog1")
6739 (set_attr "prefix_extra" "1")
6740 (set_attr "length_immediate" "1")
6741 (set_attr "prefix" "evex")
6742 (set_attr "mode" "<sseinsnmode>")])
6744 (define_mode_attr extract_type_2
6745 [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6747 (define_mode_attr extract_suf_2
6748 [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6750 (define_mode_iterator AVX512_VEC_2
6751 [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6753 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6754 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6755 (match_operand:AVX512_VEC_2 1 "register_operand")
6756 (match_operand:SI 2 "const_0_to_1_operand")
6757 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6758 (match_operand:QI 4 "register_operand")]
6761 rtx (*insn)(rtx, rtx, rtx, rtx);
6763 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6764 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6766 switch (INTVAL (operands[2]))
6769 insn = gen_vec_extract_lo_<mode>_mask;
6772 insn = gen_vec_extract_hi_<mode>_mask;
6778 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6783 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6784 (vec_select:<ssehalfvecmode>
6785 (match_operand:V8FI 1 "nonimmediate_operand")
6786 (parallel [(const_int 0) (const_int 1)
6787 (const_int 2) (const_int 3)])))]
6788 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6789 && reload_completed"
6792 rtx op1 = operands[1];
6794 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6796 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6797 emit_move_insn (operands[0], op1);
6801 (define_insn "vec_extract_lo_<mode>_maskm"
6802 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6803 (vec_merge:<ssehalfvecmode>
6804 (vec_select:<ssehalfvecmode>
6805 (match_operand:V8FI 1 "register_operand" "v")
6806 (parallel [(const_int 0) (const_int 1)
6807 (const_int 2) (const_int 3)]))
6808 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6809 (match_operand:QI 3 "register_operand" "Yk")))]
6811 && rtx_equal_p (operands[2], operands[0])"
6812 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6813 [(set_attr "type" "sselog1")
6814 (set_attr "prefix_extra" "1")
6815 (set_attr "length_immediate" "1")
6816 (set_attr "prefix" "evex")
6817 (set_attr "mode" "<sseinsnmode>")])
6819 (define_insn "vec_extract_lo_<mode><mask_name>"
6820 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6821 (vec_select:<ssehalfvecmode>
6822 (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6823 (parallel [(const_int 0) (const_int 1)
6824 (const_int 2) (const_int 3)])))]
6825 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6828 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6832 [(set_attr "type" "sselog1")
6833 (set_attr "prefix_extra" "1")
6834 (set_attr "length_immediate" "1")
6835 (set_attr "prefix" "evex")
6836 (set_attr "mode" "<sseinsnmode>")])
6838 (define_insn "vec_extract_hi_<mode>_maskm"
6839 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6840 (vec_merge:<ssehalfvecmode>
6841 (vec_select:<ssehalfvecmode>
6842 (match_operand:V8FI 1 "register_operand" "v")
6843 (parallel [(const_int 4) (const_int 5)
6844 (const_int 6) (const_int 7)]))
6845 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6846 (match_operand:QI 3 "register_operand" "Yk")))]
6848 && rtx_equal_p (operands[2], operands[0])"
6849 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6850 [(set_attr "type" "sselog")
6851 (set_attr "prefix_extra" "1")
6852 (set_attr "length_immediate" "1")
6853 (set_attr "memory" "store")
6854 (set_attr "prefix" "evex")
6855 (set_attr "mode" "<sseinsnmode>")])
6857 (define_insn "vec_extract_hi_<mode><mask_name>"
6858 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6859 (vec_select:<ssehalfvecmode>
6860 (match_operand:V8FI 1 "register_operand" "v")
6861 (parallel [(const_int 4) (const_int 5)
6862 (const_int 6) (const_int 7)])))]
6864 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6865 [(set_attr "type" "sselog1")
6866 (set_attr "prefix_extra" "1")
6867 (set_attr "length_immediate" "1")
6868 (set_attr "prefix" "evex")
6869 (set_attr "mode" "<sseinsnmode>")])
6871 (define_insn "vec_extract_hi_<mode>_maskm"
6872 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6873 (vec_merge:<ssehalfvecmode>
6874 (vec_select:<ssehalfvecmode>
6875 (match_operand:V16FI 1 "register_operand" "v")
6876 (parallel [(const_int 8) (const_int 9)
6877 (const_int 10) (const_int 11)
6878 (const_int 12) (const_int 13)
6879 (const_int 14) (const_int 15)]))
6880 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6881 (match_operand:QI 3 "register_operand" "k")))]
6883 && rtx_equal_p (operands[2], operands[0])"
6884 "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6885 [(set_attr "type" "sselog1")
6886 (set_attr "prefix_extra" "1")
6887 (set_attr "length_immediate" "1")
6888 (set_attr "prefix" "evex")
6889 (set_attr "mode" "<sseinsnmode>")])
6891 (define_insn "vec_extract_hi_<mode><mask_name>"
6892 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6893 (vec_select:<ssehalfvecmode>
6894 (match_operand:V16FI 1 "register_operand" "v,v")
6895 (parallel [(const_int 8) (const_int 9)
6896 (const_int 10) (const_int 11)
6897 (const_int 12) (const_int 13)
6898 (const_int 14) (const_int 15)])))]
6899 "TARGET_AVX512F && <mask_avx512dq_condition>"
6901 vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6902 vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6903 [(set_attr "type" "sselog1")
6904 (set_attr "prefix_extra" "1")
6905 (set_attr "isa" "avx512dq,noavx512dq")
6906 (set_attr "length_immediate" "1")
6907 (set_attr "prefix" "evex")
6908 (set_attr "mode" "<sseinsnmode>")])
6910 (define_expand "avx512vl_vextractf128<mode>"
6911 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6912 (match_operand:VI48F_256 1 "register_operand")
6913 (match_operand:SI 2 "const_0_to_1_operand")
6914 (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6915 (match_operand:QI 4 "register_operand")]
6916 "TARGET_AVX512DQ && TARGET_AVX512VL"
6918 rtx (*insn)(rtx, rtx, rtx, rtx);
6920 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6921 operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6923 switch (INTVAL (operands[2]))
6926 insn = gen_vec_extract_lo_<mode>_mask;
6929 insn = gen_vec_extract_hi_<mode>_mask;
6935 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6939 (define_expand "avx_vextractf128<mode>"
6940 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6941 (match_operand:V_256 1 "register_operand")
6942 (match_operand:SI 2 "const_0_to_1_operand")]
6945 rtx (*insn)(rtx, rtx);
6947 switch (INTVAL (operands[2]))
6950 insn = gen_vec_extract_lo_<mode>;
6953 insn = gen_vec_extract_hi_<mode>;
6959 emit_insn (insn (operands[0], operands[1]));
6963 (define_insn "vec_extract_lo_<mode><mask_name>"
6964 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6965 (vec_select:<ssehalfvecmode>
6966 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6967 (parallel [(const_int 0) (const_int 1)
6968 (const_int 2) (const_int 3)
6969 (const_int 4) (const_int 5)
6970 (const_int 6) (const_int 7)])))]
6972 && <mask_mode512bit_condition>
6973 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6976 return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6982 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6983 (vec_select:<ssehalfvecmode>
6984 (match_operand:V16FI 1 "nonimmediate_operand")
6985 (parallel [(const_int 0) (const_int 1)
6986 (const_int 2) (const_int 3)
6987 (const_int 4) (const_int 5)
6988 (const_int 6) (const_int 7)])))]
6989 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6990 && reload_completed"
6993 rtx op1 = operands[1];
6995 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6997 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6998 emit_move_insn (operands[0], op1);
7002 (define_insn "vec_extract_lo_<mode><mask_name>"
7003 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
7004 (vec_select:<ssehalfvecmode>
7005 (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
7006 (parallel [(const_int 0) (const_int 1)])))]
7008 && <mask_avx512vl_condition> && <mask_avx512dq_condition>
7009 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7012 return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
7016 [(set_attr "type" "sselog")
7017 (set_attr "prefix_extra" "1")
7018 (set_attr "length_immediate" "1")
7019 (set_attr "memory" "none,store")
7020 (set_attr "prefix" "evex")
7021 (set_attr "mode" "XI")])
7024 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7025 (vec_select:<ssehalfvecmode>
7026 (match_operand:VI8F_256 1 "nonimmediate_operand")
7027 (parallel [(const_int 0) (const_int 1)])))]
7028 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7029 && reload_completed"
7032 rtx op1 = operands[1];
7034 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7036 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7037 emit_move_insn (operands[0], op1);
7041 (define_insn "vec_extract_hi_<mode><mask_name>"
7042 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
7043 (vec_select:<ssehalfvecmode>
7044 (match_operand:VI8F_256 1 "register_operand" "v,v")
7045 (parallel [(const_int 2) (const_int 3)])))]
7046 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7048 if (TARGET_AVX512VL)
7050 if (TARGET_AVX512DQ)
7051 return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7053 return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
7056 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7058 [(set_attr "type" "sselog")
7059 (set_attr "prefix_extra" "1")
7060 (set_attr "length_immediate" "1")
7061 (set_attr "memory" "none,store")
7062 (set_attr "prefix" "vex")
7063 (set_attr "mode" "<sseinsnmode>")])
7066 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7067 (vec_select:<ssehalfvecmode>
7068 (match_operand:VI4F_256 1 "nonimmediate_operand")
7069 (parallel [(const_int 0) (const_int 1)
7070 (const_int 2) (const_int 3)])))]
7071 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7074 rtx op1 = operands[1];
7076 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7078 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7079 emit_move_insn (operands[0], op1);
7084 (define_insn "vec_extract_lo_<mode><mask_name>"
7085 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7086 (vec_select:<ssehalfvecmode>
7087 (match_operand:VI4F_256 1 "register_operand" "v")
7088 (parallel [(const_int 0) (const_int 1)
7089 (const_int 2) (const_int 3)])))]
7090 "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7093 return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7097 [(set_attr "type" "sselog1")
7098 (set_attr "prefix_extra" "1")
7099 (set_attr "length_immediate" "1")
7100 (set_attr "prefix" "evex")
7101 (set_attr "mode" "<sseinsnmode>")])
7103 (define_insn "vec_extract_lo_<mode>_maskm"
7104 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7105 (vec_merge:<ssehalfvecmode>
7106 (vec_select:<ssehalfvecmode>
7107 (match_operand:VI4F_256 1 "register_operand" "v")
7108 (parallel [(const_int 0) (const_int 1)
7109 (const_int 2) (const_int 3)]))
7110 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7111 (match_operand:QI 3 "register_operand" "k")))]
7112 "TARGET_AVX512VL && TARGET_AVX512F
7113 && rtx_equal_p (operands[2], operands[0])"
7114 "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7115 [(set_attr "type" "sselog1")
7116 (set_attr "prefix_extra" "1")
7117 (set_attr "length_immediate" "1")
7118 (set_attr "prefix" "evex")
7119 (set_attr "mode" "<sseinsnmode>")])
7121 (define_insn "vec_extract_hi_<mode>_maskm"
7122 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7123 (vec_merge:<ssehalfvecmode>
7124 (vec_select:<ssehalfvecmode>
7125 (match_operand:VI4F_256 1 "register_operand" "v")
7126 (parallel [(const_int 4) (const_int 5)
7127 (const_int 6) (const_int 7)]))
7128 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7129 (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7130 "TARGET_AVX512F && TARGET_AVX512VL
7131 && rtx_equal_p (operands[2], operands[0])"
7132 "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7133 [(set_attr "type" "sselog1")
7134 (set_attr "prefix_extra" "1")
7135 (set_attr "length_immediate" "1")
7136 (set_attr "prefix" "evex")
7137 (set_attr "mode" "<sseinsnmode>")])
7139 (define_insn "vec_extract_hi_<mode><mask_name>"
7140 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7141 (vec_select:<ssehalfvecmode>
7142 (match_operand:VI4F_256 1 "register_operand" "v")
7143 (parallel [(const_int 4) (const_int 5)
7144 (const_int 6) (const_int 7)])))]
7145 "TARGET_AVX && <mask_avx512vl_condition>"
7147 if (TARGET_AVX512VL)
7148 return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7150 return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7152 [(set_attr "type" "sselog1")
7153 (set_attr "prefix_extra" "1")
7154 (set_attr "length_immediate" "1")
7155 (set (attr "prefix")
7157 (match_test "TARGET_AVX512VL")
7158 (const_string "evex")
7159 (const_string "vex")))
7160 (set_attr "mode" "<sseinsnmode>")])
7162 (define_insn_and_split "vec_extract_lo_v32hi"
7163 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7165 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7166 (parallel [(const_int 0) (const_int 1)
7167 (const_int 2) (const_int 3)
7168 (const_int 4) (const_int 5)
7169 (const_int 6) (const_int 7)
7170 (const_int 8) (const_int 9)
7171 (const_int 10) (const_int 11)
7172 (const_int 12) (const_int 13)
7173 (const_int 14) (const_int 15)])))]
7174 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7176 "&& reload_completed"
7177 [(set (match_dup 0) (match_dup 1))]
7179 if (REG_P (operands[1]))
7180 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7182 operands[1] = adjust_address (operands[1], V16HImode, 0);
7185 (define_insn "vec_extract_hi_v32hi"
7186 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7188 (match_operand:V32HI 1 "register_operand" "v,v")
7189 (parallel [(const_int 16) (const_int 17)
7190 (const_int 18) (const_int 19)
7191 (const_int 20) (const_int 21)
7192 (const_int 22) (const_int 23)
7193 (const_int 24) (const_int 25)
7194 (const_int 26) (const_int 27)
7195 (const_int 28) (const_int 29)
7196 (const_int 30) (const_int 31)])))]
7198 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7199 [(set_attr "type" "sselog")
7200 (set_attr "prefix_extra" "1")
7201 (set_attr "length_immediate" "1")
7202 (set_attr "memory" "none,store")
7203 (set_attr "prefix" "evex")
7204 (set_attr "mode" "XI")])
7206 (define_insn_and_split "vec_extract_lo_v16hi"
7207 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7209 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7210 (parallel [(const_int 0) (const_int 1)
7211 (const_int 2) (const_int 3)
7212 (const_int 4) (const_int 5)
7213 (const_int 6) (const_int 7)])))]
7214 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7216 "&& reload_completed"
7217 [(set (match_dup 0) (match_dup 1))]
7219 if (REG_P (operands[1]))
7220 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7222 operands[1] = adjust_address (operands[1], V8HImode, 0);
7225 (define_insn "vec_extract_hi_v16hi"
7226 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7228 (match_operand:V16HI 1 "register_operand" "x,x")
7229 (parallel [(const_int 8) (const_int 9)
7230 (const_int 10) (const_int 11)
7231 (const_int 12) (const_int 13)
7232 (const_int 14) (const_int 15)])))]
7234 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7235 [(set_attr "type" "sselog")
7236 (set_attr "prefix_extra" "1")
7237 (set_attr "length_immediate" "1")
7238 (set_attr "memory" "none,store")
7239 (set_attr "prefix" "vex")
7240 (set_attr "mode" "OI")])
7242 (define_insn_and_split "vec_extract_lo_v64qi"
7243 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7245 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7246 (parallel [(const_int 0) (const_int 1)
7247 (const_int 2) (const_int 3)
7248 (const_int 4) (const_int 5)
7249 (const_int 6) (const_int 7)
7250 (const_int 8) (const_int 9)
7251 (const_int 10) (const_int 11)
7252 (const_int 12) (const_int 13)
7253 (const_int 14) (const_int 15)
7254 (const_int 16) (const_int 17)
7255 (const_int 18) (const_int 19)
7256 (const_int 20) (const_int 21)
7257 (const_int 22) (const_int 23)
7258 (const_int 24) (const_int 25)
7259 (const_int 26) (const_int 27)
7260 (const_int 28) (const_int 29)
7261 (const_int 30) (const_int 31)])))]
7262 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7264 "&& reload_completed"
7265 [(set (match_dup 0) (match_dup 1))]
7267 if (REG_P (operands[1]))
7268 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7270 operands[1] = adjust_address (operands[1], V32QImode, 0);
7273 (define_insn "vec_extract_hi_v64qi"
7274 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7276 (match_operand:V64QI 1 "register_operand" "v,v")
7277 (parallel [(const_int 32) (const_int 33)
7278 (const_int 34) (const_int 35)
7279 (const_int 36) (const_int 37)
7280 (const_int 38) (const_int 39)
7281 (const_int 40) (const_int 41)
7282 (const_int 42) (const_int 43)
7283 (const_int 44) (const_int 45)
7284 (const_int 46) (const_int 47)
7285 (const_int 48) (const_int 49)
7286 (const_int 50) (const_int 51)
7287 (const_int 52) (const_int 53)
7288 (const_int 54) (const_int 55)
7289 (const_int 56) (const_int 57)
7290 (const_int 58) (const_int 59)
7291 (const_int 60) (const_int 61)
7292 (const_int 62) (const_int 63)])))]
7294 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7295 [(set_attr "type" "sselog")
7296 (set_attr "prefix_extra" "1")
7297 (set_attr "length_immediate" "1")
7298 (set_attr "memory" "none,store")
7299 (set_attr "prefix" "evex")
7300 (set_attr "mode" "XI")])
7302 (define_insn_and_split "vec_extract_lo_v32qi"
7303 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7305 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7306 (parallel [(const_int 0) (const_int 1)
7307 (const_int 2) (const_int 3)
7308 (const_int 4) (const_int 5)
7309 (const_int 6) (const_int 7)
7310 (const_int 8) (const_int 9)
7311 (const_int 10) (const_int 11)
7312 (const_int 12) (const_int 13)
7313 (const_int 14) (const_int 15)])))]
7314 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7316 "&& reload_completed"
7317 [(set (match_dup 0) (match_dup 1))]
7319 if (REG_P (operands[1]))
7320 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7322 operands[1] = adjust_address (operands[1], V16QImode, 0);
7325 (define_insn "vec_extract_hi_v32qi"
7326 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7328 (match_operand:V32QI 1 "register_operand" "x,x")
7329 (parallel [(const_int 16) (const_int 17)
7330 (const_int 18) (const_int 19)
7331 (const_int 20) (const_int 21)
7332 (const_int 22) (const_int 23)
7333 (const_int 24) (const_int 25)
7334 (const_int 26) (const_int 27)
7335 (const_int 28) (const_int 29)
7336 (const_int 30) (const_int 31)])))]
7338 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7339 [(set_attr "type" "sselog")
7340 (set_attr "prefix_extra" "1")
7341 (set_attr "length_immediate" "1")
7342 (set_attr "memory" "none,store")
7343 (set_attr "prefix" "vex")
7344 (set_attr "mode" "OI")])
7346 ;; Modes handled by vec_extract patterns.
7347 (define_mode_iterator VEC_EXTRACT_MODE
7348 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7349 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7350 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7351 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7352 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7353 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7355 (define_expand "vec_extract<mode>"
7356 [(match_operand:<ssescalarmode> 0 "register_operand")
7357 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7358 (match_operand 2 "const_int_operand")]
7361 ix86_expand_vector_extract (false, operands[0], operands[1],
7362 INTVAL (operands[2]));
7366 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7368 ;; Parallel double-precision floating point element swizzling
7370 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7372 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7373 [(set (match_operand:V8DF 0 "register_operand" "=v")
7376 (match_operand:V8DF 1 "register_operand" "v")
7377 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7378 (parallel [(const_int 1) (const_int 9)
7379 (const_int 3) (const_int 11)
7380 (const_int 5) (const_int 13)
7381 (const_int 7) (const_int 15)])))]
7383 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7384 [(set_attr "type" "sselog")
7385 (set_attr "prefix" "evex")
7386 (set_attr "mode" "V8DF")])
7388 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7389 (define_insn "avx_unpckhpd256<mask_name>"
7390 [(set (match_operand:V4DF 0 "register_operand" "=v")
7393 (match_operand:V4DF 1 "register_operand" "v")
7394 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7395 (parallel [(const_int 1) (const_int 5)
7396 (const_int 3) (const_int 7)])))]
7397 "TARGET_AVX && <mask_avx512vl_condition>"
7398 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7399 [(set_attr "type" "sselog")
7400 (set_attr "prefix" "vex")
7401 (set_attr "mode" "V4DF")])
7403 (define_expand "vec_interleave_highv4df"
7407 (match_operand:V4DF 1 "register_operand" "x")
7408 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7409 (parallel [(const_int 0) (const_int 4)
7410 (const_int 2) (const_int 6)])))
7416 (parallel [(const_int 1) (const_int 5)
7417 (const_int 3) (const_int 7)])))
7418 (set (match_operand:V4DF 0 "register_operand")
7423 (parallel [(const_int 2) (const_int 3)
7424 (const_int 6) (const_int 7)])))]
7427 operands[3] = gen_reg_rtx (V4DFmode);
7428 operands[4] = gen_reg_rtx (V4DFmode);
7432 (define_insn "avx512vl_unpckhpd128_mask"
7433 [(set (match_operand:V2DF 0 "register_operand" "=v")
7437 (match_operand:V2DF 1 "register_operand" "v")
7438 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7439 (parallel [(const_int 1) (const_int 3)]))
7440 (match_operand:V2DF 3 "vector_move_operand" "0C")
7441 (match_operand:QI 4 "register_operand" "Yk")))]
7443 "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7444 [(set_attr "type" "sselog")
7445 (set_attr "prefix" "evex")
7446 (set_attr "mode" "V2DF")])
7448 (define_expand "vec_interleave_highv2df"
7449 [(set (match_operand:V2DF 0 "register_operand")
7452 (match_operand:V2DF 1 "nonimmediate_operand")
7453 (match_operand:V2DF 2 "nonimmediate_operand"))
7454 (parallel [(const_int 1)
7458 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7459 operands[2] = force_reg (V2DFmode, operands[2]);
7462 (define_insn "*vec_interleave_highv2df"
7463 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
7466 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7467 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7468 (parallel [(const_int 1)
7470 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7472 unpckhpd\t{%2, %0|%0, %2}
7473 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7474 %vmovddup\t{%H1, %0|%0, %H1}
7475 movlpd\t{%H1, %0|%0, %H1}
7476 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7477 %vmovhpd\t{%1, %0|%q0, %1}"
7478 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7479 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7480 (set_attr "ssememalign" "64")
7481 (set_attr "prefix_data16" "*,*,*,1,*,1")
7482 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7483 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7485 (define_expand "avx512f_movddup512<mask_name>"
7486 [(set (match_operand:V8DF 0 "register_operand")
7489 (match_operand:V8DF 1 "nonimmediate_operand")
7491 (parallel [(const_int 0) (const_int 8)
7492 (const_int 2) (const_int 10)
7493 (const_int 4) (const_int 12)
7494 (const_int 6) (const_int 14)])))]
7497 (define_expand "avx512f_unpcklpd512<mask_name>"
7498 [(set (match_operand:V8DF 0 "register_operand")
7501 (match_operand:V8DF 1 "register_operand")
7502 (match_operand:V8DF 2 "nonimmediate_operand"))
7503 (parallel [(const_int 0) (const_int 8)
7504 (const_int 2) (const_int 10)
7505 (const_int 4) (const_int 12)
7506 (const_int 6) (const_int 14)])))]
7509 (define_insn "*avx512f_unpcklpd512<mask_name>"
7510 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7513 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7514 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7515 (parallel [(const_int 0) (const_int 8)
7516 (const_int 2) (const_int 10)
7517 (const_int 4) (const_int 12)
7518 (const_int 6) (const_int 14)])))]
7521 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7522 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7523 [(set_attr "type" "sselog")
7524 (set_attr "prefix" "evex")
7525 (set_attr "mode" "V8DF")])
7527 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7528 (define_expand "avx_movddup256<mask_name>"
7529 [(set (match_operand:V4DF 0 "register_operand")
7532 (match_operand:V4DF 1 "nonimmediate_operand")
7534 (parallel [(const_int 0) (const_int 4)
7535 (const_int 2) (const_int 6)])))]
7536 "TARGET_AVX && <mask_avx512vl_condition>")
7538 (define_expand "avx_unpcklpd256<mask_name>"
7539 [(set (match_operand:V4DF 0 "register_operand")
7542 (match_operand:V4DF 1 "register_operand")
7543 (match_operand:V4DF 2 "nonimmediate_operand"))
7544 (parallel [(const_int 0) (const_int 4)
7545 (const_int 2) (const_int 6)])))]
7546 "TARGET_AVX && <mask_avx512vl_condition>")
7548 (define_insn "*avx_unpcklpd256<mask_name>"
7549 [(set (match_operand:V4DF 0 "register_operand" "=v,v")
7552 (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7553 (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7554 (parallel [(const_int 0) (const_int 4)
7555 (const_int 2) (const_int 6)])))]
7556 "TARGET_AVX && <mask_avx512vl_condition>"
7558 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7559 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7560 [(set_attr "type" "sselog")
7561 (set_attr "prefix" "vex")
7562 (set_attr "mode" "V4DF")])
7564 (define_expand "vec_interleave_lowv4df"
7568 (match_operand:V4DF 1 "register_operand" "x")
7569 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7570 (parallel [(const_int 0) (const_int 4)
7571 (const_int 2) (const_int 6)])))
7577 (parallel [(const_int 1) (const_int 5)
7578 (const_int 3) (const_int 7)])))
7579 (set (match_operand:V4DF 0 "register_operand")
7584 (parallel [(const_int 0) (const_int 1)
7585 (const_int 4) (const_int 5)])))]
7588 operands[3] = gen_reg_rtx (V4DFmode);
7589 operands[4] = gen_reg_rtx (V4DFmode);
7592 (define_insn "avx512vl_unpcklpd128_mask"
7593 [(set (match_operand:V2DF 0 "register_operand" "=v")
7597 (match_operand:V2DF 1 "register_operand" "v")
7598 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7599 (parallel [(const_int 0) (const_int 2)]))
7600 (match_operand:V2DF 3 "vector_move_operand" "0C")
7601 (match_operand:QI 4 "register_operand" "Yk")))]
7603 "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7604 [(set_attr "type" "sselog")
7605 (set_attr "prefix" "evex")
7606 (set_attr "mode" "V2DF")])
7608 (define_expand "vec_interleave_lowv2df"
7609 [(set (match_operand:V2DF 0 "register_operand")
7612 (match_operand:V2DF 1 "nonimmediate_operand")
7613 (match_operand:V2DF 2 "nonimmediate_operand"))
7614 (parallel [(const_int 0)
7618 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7619 operands[1] = force_reg (V2DFmode, operands[1]);
7622 (define_insn "*vec_interleave_lowv2df"
7623 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
7626 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7627 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7628 (parallel [(const_int 0)
7630 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7632 unpcklpd\t{%2, %0|%0, %2}
7633 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7634 %vmovddup\t{%1, %0|%0, %q1}
7635 movhpd\t{%2, %0|%0, %q2}
7636 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7637 %vmovlpd\t{%2, %H0|%H0, %2}"
7638 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7639 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7640 (set_attr "ssememalign" "64")
7641 (set_attr "prefix_data16" "*,*,*,1,*,1")
7642 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7643 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7646 [(set (match_operand:V2DF 0 "memory_operand")
7649 (match_operand:V2DF 1 "register_operand")
7651 (parallel [(const_int 0)
7653 "TARGET_SSE3 && reload_completed"
7656 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7657 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7658 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7663 [(set (match_operand:V2DF 0 "register_operand")
7666 (match_operand:V2DF 1 "memory_operand")
7668 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7669 (match_operand:SI 3 "const_int_operand")])))]
7670 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7671 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7673 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7676 (define_insn "avx512f_vmscalef<mode><round_name>"
7677 [(set (match_operand:VF_128 0 "register_operand" "=v")
7680 [(match_operand:VF_128 1 "register_operand" "v")
7681 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7686 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7687 [(set_attr "prefix" "evex")
7688 (set_attr "mode" "<ssescalarmode>")])
7690 (define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7691 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7693 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7694 (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7697 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7698 [(set_attr "prefix" "evex")
7699 (set_attr "mode" "<MODE>")])
7701 (define_expand "<avx512>_vternlog<mode>_maskz"
7702 [(match_operand:VI48_AVX512VL 0 "register_operand")
7703 (match_operand:VI48_AVX512VL 1 "register_operand")
7704 (match_operand:VI48_AVX512VL 2 "register_operand")
7705 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7706 (match_operand:SI 4 "const_0_to_255_operand")
7707 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7710 emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7711 operands[0], operands[1], operands[2], operands[3],
7712 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7716 (define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7717 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7718 (unspec:VI48_AVX512VL
7719 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7720 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7721 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7722 (match_operand:SI 4 "const_0_to_255_operand")]
7725 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7726 [(set_attr "type" "sselog")
7727 (set_attr "prefix" "evex")
7728 (set_attr "mode" "<sseinsnmode>")])
7730 (define_insn "<avx512>_vternlog<mode>_mask"
7731 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7732 (vec_merge:VI48_AVX512VL
7733 (unspec:VI48_AVX512VL
7734 [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7735 (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7736 (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7737 (match_operand:SI 4 "const_0_to_255_operand")]
7740 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7742 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7743 [(set_attr "type" "sselog")
7744 (set_attr "prefix" "evex")
7745 (set_attr "mode" "<sseinsnmode>")])
7747 (define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7748 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7749 (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7752 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7753 [(set_attr "prefix" "evex")
7754 (set_attr "mode" "<MODE>")])
7756 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7757 [(set (match_operand:VF_128 0 "register_operand" "=v")
7760 [(match_operand:VF_128 1 "register_operand" "v")
7761 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7766 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7767 [(set_attr "prefix" "evex")
7768 (set_attr "mode" "<ssescalarmode>")])
7770 (define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7771 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7772 (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7773 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7774 (match_operand:SI 3 "const_0_to_255_operand")]
7777 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7778 [(set_attr "prefix" "evex")
7779 (set_attr "mode" "<sseinsnmode>")])
7781 (define_expand "avx512f_shufps512_mask"
7782 [(match_operand:V16SF 0 "register_operand")
7783 (match_operand:V16SF 1 "register_operand")
7784 (match_operand:V16SF 2 "nonimmediate_operand")
7785 (match_operand:SI 3 "const_0_to_255_operand")
7786 (match_operand:V16SF 4 "register_operand")
7787 (match_operand:HI 5 "register_operand")]
7790 int mask = INTVAL (operands[3]);
7791 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7792 GEN_INT ((mask >> 0) & 3),
7793 GEN_INT ((mask >> 2) & 3),
7794 GEN_INT (((mask >> 4) & 3) + 16),
7795 GEN_INT (((mask >> 6) & 3) + 16),
7796 GEN_INT (((mask >> 0) & 3) + 4),
7797 GEN_INT (((mask >> 2) & 3) + 4),
7798 GEN_INT (((mask >> 4) & 3) + 20),
7799 GEN_INT (((mask >> 6) & 3) + 20),
7800 GEN_INT (((mask >> 0) & 3) + 8),
7801 GEN_INT (((mask >> 2) & 3) + 8),
7802 GEN_INT (((mask >> 4) & 3) + 24),
7803 GEN_INT (((mask >> 6) & 3) + 24),
7804 GEN_INT (((mask >> 0) & 3) + 12),
7805 GEN_INT (((mask >> 2) & 3) + 12),
7806 GEN_INT (((mask >> 4) & 3) + 28),
7807 GEN_INT (((mask >> 6) & 3) + 28),
7808 operands[4], operands[5]));
7813 (define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7814 [(match_operand:VF_AVX512VL 0 "register_operand")
7815 (match_operand:VF_AVX512VL 1 "register_operand")
7816 (match_operand:VF_AVX512VL 2 "register_operand")
7817 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7818 (match_operand:SI 4 "const_0_to_255_operand")
7819 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7822 emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7823 operands[0], operands[1], operands[2], operands[3],
7824 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7825 <round_saeonly_expand_operand6>));
7829 (define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7830 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7832 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7833 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7834 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7835 (match_operand:SI 4 "const_0_to_255_operand")]
7838 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7839 [(set_attr "prefix" "evex")
7840 (set_attr "mode" "<MODE>")])
7842 (define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7843 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7844 (vec_merge:VF_AVX512VL
7846 [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7847 (match_operand:VF_AVX512VL 2 "register_operand" "v")
7848 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7849 (match_operand:SI 4 "const_0_to_255_operand")]
7852 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7854 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7855 [(set_attr "prefix" "evex")
7856 (set_attr "mode" "<MODE>")])
7858 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7859 [(match_operand:VF_128 0 "register_operand")
7860 (match_operand:VF_128 1 "register_operand")
7861 (match_operand:VF_128 2 "register_operand")
7862 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7863 (match_operand:SI 4 "const_0_to_255_operand")
7864 (match_operand:<avx512fmaskmode> 5 "register_operand")]
7867 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7868 operands[0], operands[1], operands[2], operands[3],
7869 operands[4], CONST0_RTX (<MODE>mode), operands[5]
7870 <round_saeonly_expand_operand6>));
7874 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7875 [(set (match_operand:VF_128 0 "register_operand" "=v")
7878 [(match_operand:VF_128 1 "register_operand" "0")
7879 (match_operand:VF_128 2 "register_operand" "v")
7880 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7881 (match_operand:SI 4 "const_0_to_255_operand")]
7886 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7887 [(set_attr "prefix" "evex")
7888 (set_attr "mode" "<ssescalarmode>")])
7890 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7891 [(set (match_operand:VF_128 0 "register_operand" "=v")
7895 [(match_operand:VF_128 1 "register_operand" "0")
7896 (match_operand:VF_128 2 "register_operand" "v")
7897 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7898 (match_operand:SI 4 "const_0_to_255_operand")]
7903 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7905 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7906 [(set_attr "prefix" "evex")
7907 (set_attr "mode" "<ssescalarmode>")])
7909 (define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7910 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7912 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7913 (match_operand:SI 2 "const_0_to_255_operand")]
7916 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7917 [(set_attr "length_immediate" "1")
7918 (set_attr "prefix" "evex")
7919 (set_attr "mode" "<MODE>")])
7921 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7922 [(set (match_operand:VF_128 0 "register_operand" "=v")
7925 [(match_operand:VF_128 1 "register_operand" "v")
7926 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7927 (match_operand:SI 3 "const_0_to_255_operand")]
7932 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7933 [(set_attr "length_immediate" "1")
7934 (set_attr "prefix" "evex")
7935 (set_attr "mode" "<MODE>")])
7937 ;; One bit in mask selects 2 elements.
7938 (define_insn "avx512f_shufps512_1<mask_name>"
7939 [(set (match_operand:V16SF 0 "register_operand" "=v")
7942 (match_operand:V16SF 1 "register_operand" "v")
7943 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7944 (parallel [(match_operand 3 "const_0_to_3_operand")
7945 (match_operand 4 "const_0_to_3_operand")
7946 (match_operand 5 "const_16_to_19_operand")
7947 (match_operand 6 "const_16_to_19_operand")
7948 (match_operand 7 "const_4_to_7_operand")
7949 (match_operand 8 "const_4_to_7_operand")
7950 (match_operand 9 "const_20_to_23_operand")
7951 (match_operand 10 "const_20_to_23_operand")
7952 (match_operand 11 "const_8_to_11_operand")
7953 (match_operand 12 "const_8_to_11_operand")
7954 (match_operand 13 "const_24_to_27_operand")
7955 (match_operand 14 "const_24_to_27_operand")
7956 (match_operand 15 "const_12_to_15_operand")
7957 (match_operand 16 "const_12_to_15_operand")
7958 (match_operand 17 "const_28_to_31_operand")
7959 (match_operand 18 "const_28_to_31_operand")])))]
7961 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7962 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7963 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7964 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7965 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7966 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7967 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7968 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7969 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7970 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7971 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7972 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7975 mask = INTVAL (operands[3]);
7976 mask |= INTVAL (operands[4]) << 2;
7977 mask |= (INTVAL (operands[5]) - 16) << 4;
7978 mask |= (INTVAL (operands[6]) - 16) << 6;
7979 operands[3] = GEN_INT (mask);
7981 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7983 [(set_attr "type" "sselog")
7984 (set_attr "length_immediate" "1")
7985 (set_attr "prefix" "evex")
7986 (set_attr "mode" "V16SF")])
7988 (define_expand "avx512f_shufpd512_mask"
7989 [(match_operand:V8DF 0 "register_operand")
7990 (match_operand:V8DF 1 "register_operand")
7991 (match_operand:V8DF 2 "nonimmediate_operand")
7992 (match_operand:SI 3 "const_0_to_255_operand")
7993 (match_operand:V8DF 4 "register_operand")
7994 (match_operand:QI 5 "register_operand")]
7997 int mask = INTVAL (operands[3]);
7998 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8000 GEN_INT (mask & 2 ? 9 : 8),
8001 GEN_INT (mask & 4 ? 3 : 2),
8002 GEN_INT (mask & 8 ? 11 : 10),
8003 GEN_INT (mask & 16 ? 5 : 4),
8004 GEN_INT (mask & 32 ? 13 : 12),
8005 GEN_INT (mask & 64 ? 7 : 6),
8006 GEN_INT (mask & 128 ? 15 : 14),
8007 operands[4], operands[5]));
8011 (define_insn "avx512f_shufpd512_1<mask_name>"
8012 [(set (match_operand:V8DF 0 "register_operand" "=v")
8015 (match_operand:V8DF 1 "register_operand" "v")
8016 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8017 (parallel [(match_operand 3 "const_0_to_1_operand")
8018 (match_operand 4 "const_8_to_9_operand")
8019 (match_operand 5 "const_2_to_3_operand")
8020 (match_operand 6 "const_10_to_11_operand")
8021 (match_operand 7 "const_4_to_5_operand")
8022 (match_operand 8 "const_12_to_13_operand")
8023 (match_operand 9 "const_6_to_7_operand")
8024 (match_operand 10 "const_14_to_15_operand")])))]
8028 mask = INTVAL (operands[3]);
8029 mask |= (INTVAL (operands[4]) - 8) << 1;
8030 mask |= (INTVAL (operands[5]) - 2) << 2;
8031 mask |= (INTVAL (operands[6]) - 10) << 3;
8032 mask |= (INTVAL (operands[7]) - 4) << 4;
8033 mask |= (INTVAL (operands[8]) - 12) << 5;
8034 mask |= (INTVAL (operands[9]) - 6) << 6;
8035 mask |= (INTVAL (operands[10]) - 14) << 7;
8036 operands[3] = GEN_INT (mask);
8038 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8040 [(set_attr "type" "sselog")
8041 (set_attr "length_immediate" "1")
8042 (set_attr "prefix" "evex")
8043 (set_attr "mode" "V8DF")])
8045 (define_expand "avx_shufpd256<mask_expand4_name>"
8046 [(match_operand:V4DF 0 "register_operand")
8047 (match_operand:V4DF 1 "register_operand")
8048 (match_operand:V4DF 2 "nonimmediate_operand")
8049 (match_operand:SI 3 "const_int_operand")]
8052 int mask = INTVAL (operands[3]);
8053 emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8057 GEN_INT (mask & 2 ? 5 : 4),
8058 GEN_INT (mask & 4 ? 3 : 2),
8059 GEN_INT (mask & 8 ? 7 : 6)
8060 <mask_expand4_args>));
8064 (define_insn "avx_shufpd256_1<mask_name>"
8065 [(set (match_operand:V4DF 0 "register_operand" "=v")
8068 (match_operand:V4DF 1 "register_operand" "v")
8069 (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8070 (parallel [(match_operand 3 "const_0_to_1_operand")
8071 (match_operand 4 "const_4_to_5_operand")
8072 (match_operand 5 "const_2_to_3_operand")
8073 (match_operand 6 "const_6_to_7_operand")])))]
8074 "TARGET_AVX && <mask_avx512vl_condition>"
8077 mask = INTVAL (operands[3]);
8078 mask |= (INTVAL (operands[4]) - 4) << 1;
8079 mask |= (INTVAL (operands[5]) - 2) << 2;
8080 mask |= (INTVAL (operands[6]) - 6) << 3;
8081 operands[3] = GEN_INT (mask);
8083 return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8085 [(set_attr "type" "sseshuf")
8086 (set_attr "length_immediate" "1")
8087 (set_attr "prefix" "vex")
8088 (set_attr "mode" "V4DF")])
8090 (define_expand "sse2_shufpd<mask_expand4_name>"
8091 [(match_operand:V2DF 0 "register_operand")
8092 (match_operand:V2DF 1 "register_operand")
8093 (match_operand:V2DF 2 "nonimmediate_operand")
8094 (match_operand:SI 3 "const_int_operand")]
8097 int mask = INTVAL (operands[3]);
8098 emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8099 operands[2], GEN_INT (mask & 1),
8100 GEN_INT (mask & 2 ? 3 : 2)
8101 <mask_expand4_args>));
8105 (define_insn "sse2_shufpd_v2df_mask"
8106 [(set (match_operand:V2DF 0 "register_operand" "=v")
8110 (match_operand:V2DF 1 "register_operand" "v")
8111 (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8112 (parallel [(match_operand 3 "const_0_to_1_operand")
8113 (match_operand 4 "const_2_to_3_operand")]))
8114 (match_operand:V2DF 5 "vector_move_operand" "0C")
8115 (match_operand:QI 6 "register_operand" "Yk")))]
8119 mask = INTVAL (operands[3]);
8120 mask |= (INTVAL (operands[4]) - 2) << 1;
8121 operands[3] = GEN_INT (mask);
8123 return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8125 [(set_attr "type" "sseshuf")
8126 (set_attr "length_immediate" "1")
8127 (set_attr "prefix" "evex")
8128 (set_attr "mode" "V2DF")])
8130 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
8131 (define_insn "avx2_interleave_highv4di<mask_name>"
8132 [(set (match_operand:V4DI 0 "register_operand" "=v")
8135 (match_operand:V4DI 1 "register_operand" "v")
8136 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8137 (parallel [(const_int 1)
8141 "TARGET_AVX2 && <mask_avx512vl_condition>"
8142 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8143 [(set_attr "type" "sselog")
8144 (set_attr "prefix" "vex")
8145 (set_attr "mode" "OI")])
8147 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8148 [(set (match_operand:V8DI 0 "register_operand" "=v")
8151 (match_operand:V8DI 1 "register_operand" "v")
8152 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8153 (parallel [(const_int 1) (const_int 9)
8154 (const_int 3) (const_int 11)
8155 (const_int 5) (const_int 13)
8156 (const_int 7) (const_int 15)])))]
8158 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8159 [(set_attr "type" "sselog")
8160 (set_attr "prefix" "evex")
8161 (set_attr "mode" "XI")])
8163 (define_insn "vec_interleave_highv2di<mask_name>"
8164 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8167 (match_operand:V2DI 1 "register_operand" "0,v")
8168 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8169 (parallel [(const_int 1)
8171 "TARGET_SSE2 && <mask_avx512vl_condition>"
8173 punpckhqdq\t{%2, %0|%0, %2}
8174 vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8175 [(set_attr "isa" "noavx,avx")
8176 (set_attr "type" "sselog")
8177 (set_attr "prefix_data16" "1,*")
8178 (set_attr "prefix" "orig,<mask_prefix>")
8179 (set_attr "mode" "TI")])
8181 (define_insn "avx2_interleave_lowv4di<mask_name>"
8182 [(set (match_operand:V4DI 0 "register_operand" "=v")
8185 (match_operand:V4DI 1 "register_operand" "v")
8186 (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8187 (parallel [(const_int 0)
8191 "TARGET_AVX2 && <mask_avx512vl_condition>"
8192 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8193 [(set_attr "type" "sselog")
8194 (set_attr "prefix" "vex")
8195 (set_attr "mode" "OI")])
8197 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8198 [(set (match_operand:V8DI 0 "register_operand" "=v")
8201 (match_operand:V8DI 1 "register_operand" "v")
8202 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8203 (parallel [(const_int 0) (const_int 8)
8204 (const_int 2) (const_int 10)
8205 (const_int 4) (const_int 12)
8206 (const_int 6) (const_int 14)])))]
8208 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8209 [(set_attr "type" "sselog")
8210 (set_attr "prefix" "evex")
8211 (set_attr "mode" "XI")])
8213 (define_insn "vec_interleave_lowv2di<mask_name>"
8214 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8217 (match_operand:V2DI 1 "register_operand" "0,v")
8218 (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8219 (parallel [(const_int 0)
8221 "TARGET_SSE2 && <mask_avx512vl_condition>"
8223 punpcklqdq\t{%2, %0|%0, %2}
8224 vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8225 [(set_attr "isa" "noavx,avx")
8226 (set_attr "type" "sselog")
8227 (set_attr "prefix_data16" "1,*")
8228 (set_attr "prefix" "orig,vex")
8229 (set_attr "mode" "TI")])
8231 (define_insn "sse2_shufpd_<mode>"
8232 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8233 (vec_select:VI8F_128
8234 (vec_concat:<ssedoublevecmode>
8235 (match_operand:VI8F_128 1 "register_operand" "0,x")
8236 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8237 (parallel [(match_operand 3 "const_0_to_1_operand")
8238 (match_operand 4 "const_2_to_3_operand")])))]
8242 mask = INTVAL (operands[3]);
8243 mask |= (INTVAL (operands[4]) - 2) << 1;
8244 operands[3] = GEN_INT (mask);
8246 switch (which_alternative)
8249 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8251 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8256 [(set_attr "isa" "noavx,avx")
8257 (set_attr "type" "sseshuf")
8258 (set_attr "length_immediate" "1")
8259 (set_attr "prefix" "orig,vex")
8260 (set_attr "mode" "V2DF")])
8262 ;; Avoid combining registers from different units in a single alternative,
8263 ;; see comment above inline_secondary_memory_needed function in i386.c
8264 (define_insn "sse2_storehpd"
8265 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
8267 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8268 (parallel [(const_int 1)])))]
8269 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8271 %vmovhpd\t{%1, %0|%0, %1}
8273 vunpckhpd\t{%d1, %0|%0, %d1}
8277 [(set_attr "isa" "*,noavx,avx,*,*,*")
8278 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8279 (set (attr "prefix_data16")
8281 (and (eq_attr "alternative" "0")
8282 (not (match_test "TARGET_AVX")))
8284 (const_string "*")))
8285 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8286 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8289 [(set (match_operand:DF 0 "register_operand")
8291 (match_operand:V2DF 1 "memory_operand")
8292 (parallel [(const_int 1)])))]
8293 "TARGET_SSE2 && reload_completed"
8294 [(set (match_dup 0) (match_dup 1))]
8295 "operands[1] = adjust_address (operands[1], DFmode, 8);")
8297 (define_insn "*vec_extractv2df_1_sse"
8298 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8300 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8301 (parallel [(const_int 1)])))]
8302 "!TARGET_SSE2 && TARGET_SSE
8303 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8305 movhps\t{%1, %0|%q0, %1}
8306 movhlps\t{%1, %0|%0, %1}
8307 movlps\t{%H1, %0|%0, %H1}"
8308 [(set_attr "type" "ssemov")
8309 (set_attr "ssememalign" "64")
8310 (set_attr "mode" "V2SF,V4SF,V2SF")])
8312 ;; Avoid combining registers from different units in a single alternative,
8313 ;; see comment above inline_secondary_memory_needed function in i386.c
8314 (define_insn "sse2_storelpd"
8315 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
8317 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8318 (parallel [(const_int 0)])))]
8319 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8321 %vmovlpd\t{%1, %0|%0, %1}
8326 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8327 (set_attr "prefix_data16" "1,*,*,*,*")
8328 (set_attr "prefix" "maybe_vex")
8329 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8332 [(set (match_operand:DF 0 "register_operand")
8334 (match_operand:V2DF 1 "nonimmediate_operand")
8335 (parallel [(const_int 0)])))]
8336 "TARGET_SSE2 && reload_completed"
8337 [(set (match_dup 0) (match_dup 1))]
8339 if (REG_P (operands[1]))
8340 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8342 operands[1] = adjust_address (operands[1], DFmode, 0);
8345 (define_insn "*vec_extractv2df_0_sse"
8346 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8348 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8349 (parallel [(const_int 0)])))]
8350 "!TARGET_SSE2 && TARGET_SSE
8351 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8353 movlps\t{%1, %0|%0, %1}
8354 movaps\t{%1, %0|%0, %1}
8355 movlps\t{%1, %0|%0, %q1}"
8356 [(set_attr "type" "ssemov")
8357 (set_attr "mode" "V2SF,V4SF,V2SF")])
8359 (define_expand "sse2_loadhpd_exp"
8360 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8363 (match_operand:V2DF 1 "nonimmediate_operand")
8364 (parallel [(const_int 0)]))
8365 (match_operand:DF 2 "nonimmediate_operand")))]
8368 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8370 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8372 /* Fix up the destination if needed. */
8373 if (dst != operands[0])
8374 emit_move_insn (operands[0], dst);
8379 ;; Avoid combining registers from different units in a single alternative,
8380 ;; see comment above inline_secondary_memory_needed function in i386.c
8381 (define_insn "sse2_loadhpd"
8382 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8386 (match_operand:V2DF 1 "nonimmediate_operand"
8388 (parallel [(const_int 0)]))
8389 (match_operand:DF 2 "nonimmediate_operand"
8390 " m,m,x,x,x,*f,r")))]
8391 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8393 movhpd\t{%2, %0|%0, %2}
8394 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8395 unpcklpd\t{%2, %0|%0, %2}
8396 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8400 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8401 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8402 (set_attr "ssememalign" "64")
8403 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8404 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8405 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8408 [(set (match_operand:V2DF 0 "memory_operand")
8410 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8411 (match_operand:DF 1 "register_operand")))]
8412 "TARGET_SSE2 && reload_completed"
8413 [(set (match_dup 0) (match_dup 1))]
8414 "operands[0] = adjust_address (operands[0], DFmode, 8);")
8416 (define_expand "sse2_loadlpd_exp"
8417 [(set (match_operand:V2DF 0 "nonimmediate_operand")
8419 (match_operand:DF 2 "nonimmediate_operand")
8421 (match_operand:V2DF 1 "nonimmediate_operand")
8422 (parallel [(const_int 1)]))))]
8425 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8427 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8429 /* Fix up the destination if needed. */
8430 if (dst != operands[0])
8431 emit_move_insn (operands[0], dst);
8436 ;; Avoid combining registers from different units in a single alternative,
8437 ;; see comment above inline_secondary_memory_needed function in i386.c
8438 (define_insn "sse2_loadlpd"
8439 [(set (match_operand:V2DF 0 "nonimmediate_operand"
8440 "=x,x,x,x,x,x,x,x,m,m ,m")
8442 (match_operand:DF 2 "nonimmediate_operand"
8443 " m,m,m,x,x,0,0,x,x,*f,r")
8445 (match_operand:V2DF 1 "vector_move_operand"
8446 " C,0,x,0,x,x,o,o,0,0 ,0")
8447 (parallel [(const_int 1)]))))]
8448 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8450 %vmovsd\t{%2, %0|%0, %2}
8451 movlpd\t{%2, %0|%0, %2}
8452 vmovlpd\t{%2, %1, %0|%0, %1, %2}
8453 movsd\t{%2, %0|%0, %2}
8454 vmovsd\t{%2, %1, %0|%0, %1, %2}
8455 shufpd\t{$2, %1, %0|%0, %1, 2}
8456 movhpd\t{%H1, %0|%0, %H1}
8457 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8461 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8463 (cond [(eq_attr "alternative" "5")
8464 (const_string "sselog")
8465 (eq_attr "alternative" "9")
8466 (const_string "fmov")
8467 (eq_attr "alternative" "10")
8468 (const_string "imov")
8470 (const_string "ssemov")))
8471 (set_attr "ssememalign" "64")
8472 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8473 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8474 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8475 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8478 [(set (match_operand:V2DF 0 "memory_operand")
8480 (match_operand:DF 1 "register_operand")
8481 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8482 "TARGET_SSE2 && reload_completed"
8483 [(set (match_dup 0) (match_dup 1))]
8484 "operands[0] = adjust_address (operands[0], DFmode, 0);")
8486 (define_insn "sse2_movsd"
8487 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
8489 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8490 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8494 movsd\t{%2, %0|%0, %2}
8495 vmovsd\t{%2, %1, %0|%0, %1, %2}
8496 movlpd\t{%2, %0|%0, %q2}
8497 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8498 %vmovlpd\t{%2, %0|%q0, %2}
8499 shufpd\t{$2, %1, %0|%0, %1, 2}
8500 movhps\t{%H1, %0|%0, %H1}
8501 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8502 %vmovhps\t{%1, %H0|%H0, %1}"
8503 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8506 (eq_attr "alternative" "5")
8507 (const_string "sselog")
8508 (const_string "ssemov")))
8509 (set (attr "prefix_data16")
8511 (and (eq_attr "alternative" "2,4")
8512 (not (match_test "TARGET_AVX")))
8514 (const_string "*")))
8515 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8516 (set_attr "ssememalign" "64")
8517 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8518 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8520 (define_insn "vec_dupv2df<mask_name>"
8521 [(set (match_operand:V2DF 0 "register_operand" "=x,v")
8523 (match_operand:DF 1 "nonimmediate_operand" " 0,vm")))]
8524 "TARGET_SSE2 && <mask_avx512vl_condition>"
8527 %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8528 [(set_attr "isa" "noavx,sse3")
8529 (set_attr "type" "sselog1")
8530 (set_attr "prefix" "orig,maybe_vex")
8531 (set_attr "mode" "V2DF,DF")])
8533 (define_insn "*vec_concatv2df"
8534 [(set (match_operand:V2DF 0 "register_operand" "=x,v,v,x,x,v,x,x")
8536 (match_operand:DF 1 "nonimmediate_operand" " 0,v,m,0,x,m,0,0")
8537 (match_operand:DF 2 "vector_move_operand" " x,v,1,m,m,C,x,m")))]
8539 && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
8540 || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
8542 unpcklpd\t{%2, %0|%0, %2}
8543 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8544 %vmovddup\t{%1, %0|%0, %1}
8545 movhpd\t{%2, %0|%0, %2}
8546 vmovhpd\t{%2, %1, %0|%0, %1, %2}
8547 %vmovsd\t{%1, %0|%0, %1}
8548 movlhps\t{%2, %0|%0, %2}
8549 movhps\t{%2, %0|%0, %2}"
8550 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
8553 (eq_attr "alternative" "0,1,2")
8554 (const_string "sselog")
8555 (const_string "ssemov")))
8556 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
8557 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
8558 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
8560 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8562 ;; Parallel integer down-conversion operations
8564 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8566 (define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8567 (define_mode_attr pmov_src_mode
8568 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8569 (define_mode_attr pmov_src_lower
8570 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8571 (define_mode_attr pmov_suff_1
8572 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8574 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8575 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8576 (any_truncate:PMOV_DST_MODE_1
8577 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8579 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8580 [(set_attr "type" "ssemov")
8581 (set_attr "memory" "none,store")
8582 (set_attr "prefix" "evex")
8583 (set_attr "mode" "<sseinsnmode>")])
8585 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8586 [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8587 (vec_merge:PMOV_DST_MODE_1
8588 (any_truncate:PMOV_DST_MODE_1
8589 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8590 (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8591 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8593 "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8594 [(set_attr "type" "ssemov")
8595 (set_attr "memory" "none,store")
8596 (set_attr "prefix" "evex")
8597 (set_attr "mode" "<sseinsnmode>")])
8599 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8600 [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8601 (vec_merge:PMOV_DST_MODE_1
8602 (any_truncate:PMOV_DST_MODE_1
8603 (match_operand:<pmov_src_mode> 1 "register_operand"))
8605 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8608 (define_insn "*avx512bw_<code>v32hiv32qi2"
8609 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8611 (match_operand:V32HI 1 "register_operand" "v,v")))]
8613 "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8614 [(set_attr "type" "ssemov")
8615 (set_attr "memory" "none,store")
8616 (set_attr "prefix" "evex")
8617 (set_attr "mode" "XI")])
8619 (define_insn "avx512bw_<code>v32hiv32qi2_mask"
8620 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8623 (match_operand:V32HI 1 "register_operand" "v,v"))
8624 (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8625 (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8627 "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8628 [(set_attr "type" "ssemov")
8629 (set_attr "memory" "none,store")
8630 (set_attr "prefix" "evex")
8631 (set_attr "mode" "XI")])
8633 (define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8634 [(set (match_operand:V32QI 0 "nonimmediate_operand")
8637 (match_operand:V32HI 1 "register_operand"))
8639 (match_operand:SI 2 "register_operand")))]
8642 (define_mode_iterator PMOV_DST_MODE_2
8643 [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8644 (define_mode_attr pmov_suff_2
8645 [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8647 (define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8648 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8649 (any_truncate:PMOV_DST_MODE_2
8650 (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8652 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8653 [(set_attr "type" "ssemov")
8654 (set_attr "memory" "none,store")
8655 (set_attr "prefix" "evex")
8656 (set_attr "mode" "<sseinsnmode>")])
8658 (define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8659 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8660 (vec_merge:PMOV_DST_MODE_2
8661 (any_truncate:PMOV_DST_MODE_2
8662 (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8663 (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8664 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8666 "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8667 [(set_attr "type" "ssemov")
8668 (set_attr "memory" "none,store")
8669 (set_attr "prefix" "evex")
8670 (set_attr "mode" "<sseinsnmode>")])
8672 (define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8673 [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8674 (vec_merge:PMOV_DST_MODE_2
8675 (any_truncate:PMOV_DST_MODE_2
8676 (match_operand:<ssedoublemode> 1 "register_operand"))
8678 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8681 (define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8682 (define_mode_attr pmov_dst_3
8683 [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8684 (define_mode_attr pmov_dst_zeroed_3
8685 [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8686 (define_mode_attr pmov_suff_3
8687 [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8689 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8690 [(set (match_operand:V16QI 0 "register_operand" "=v")
8692 (any_truncate:<pmov_dst_3>
8693 (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8694 (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8696 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8697 [(set_attr "type" "ssemov")
8698 (set_attr "prefix" "evex")
8699 (set_attr "mode" "TI")])
8701 (define_insn "*avx512vl_<code>v2div2qi2_store"
8702 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8705 (match_operand:V2DI 1 "register_operand" "v"))
8708 (parallel [(const_int 2) (const_int 3)
8709 (const_int 4) (const_int 5)
8710 (const_int 6) (const_int 7)
8711 (const_int 8) (const_int 9)
8712 (const_int 10) (const_int 11)
8713 (const_int 12) (const_int 13)
8714 (const_int 14) (const_int 15)]))))]
8716 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8717 [(set_attr "type" "ssemov")
8718 (set_attr "memory" "store")
8719 (set_attr "prefix" "evex")
8720 (set_attr "mode" "TI")])
8722 (define_insn "avx512vl_<code>v2div2qi2_mask"
8723 [(set (match_operand:V16QI 0 "register_operand" "=v")
8727 (match_operand:V2DI 1 "register_operand" "v"))
8729 (match_operand:V16QI 2 "vector_move_operand" "0C")
8730 (parallel [(const_int 0) (const_int 1)]))
8731 (match_operand:QI 3 "register_operand" "Yk"))
8732 (const_vector:V14QI [(const_int 0) (const_int 0)
8733 (const_int 0) (const_int 0)
8734 (const_int 0) (const_int 0)
8735 (const_int 0) (const_int 0)
8736 (const_int 0) (const_int 0)
8737 (const_int 0) (const_int 0)
8738 (const_int 0) (const_int 0)])))]
8740 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8741 [(set_attr "type" "ssemov")
8742 (set_attr "prefix" "evex")
8743 (set_attr "mode" "TI")])
8745 (define_insn "avx512vl_<code>v2div2qi2_mask_store"
8746 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8750 (match_operand:V2DI 1 "register_operand" "v"))
8753 (parallel [(const_int 0) (const_int 1)]))
8754 (match_operand:QI 2 "register_operand" "Yk"))
8757 (parallel [(const_int 2) (const_int 3)
8758 (const_int 4) (const_int 5)
8759 (const_int 6) (const_int 7)
8760 (const_int 8) (const_int 9)
8761 (const_int 10) (const_int 11)
8762 (const_int 12) (const_int 13)
8763 (const_int 14) (const_int 15)]))))]
8765 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8766 [(set_attr "type" "ssemov")
8767 (set_attr "memory" "store")
8768 (set_attr "prefix" "evex")
8769 (set_attr "mode" "TI")])
8771 (define_insn "*avx512vl_<code><mode>v4qi2_store"
8772 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8775 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8778 (parallel [(const_int 4) (const_int 5)
8779 (const_int 6) (const_int 7)
8780 (const_int 8) (const_int 9)
8781 (const_int 10) (const_int 11)
8782 (const_int 12) (const_int 13)
8783 (const_int 14) (const_int 15)]))))]
8785 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8786 [(set_attr "type" "ssemov")
8787 (set_attr "memory" "store")
8788 (set_attr "prefix" "evex")
8789 (set_attr "mode" "TI")])
8791 (define_insn "avx512vl_<code><mode>v4qi2_mask"
8792 [(set (match_operand:V16QI 0 "register_operand" "=v")
8796 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8798 (match_operand:V16QI 2 "vector_move_operand" "0C")
8799 (parallel [(const_int 0) (const_int 1)
8800 (const_int 2) (const_int 3)]))
8801 (match_operand:QI 3 "register_operand" "Yk"))
8802 (const_vector:V12QI [(const_int 0) (const_int 0)
8803 (const_int 0) (const_int 0)
8804 (const_int 0) (const_int 0)
8805 (const_int 0) (const_int 0)
8806 (const_int 0) (const_int 0)
8807 (const_int 0) (const_int 0)])))]
8809 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8810 [(set_attr "type" "ssemov")
8811 (set_attr "prefix" "evex")
8812 (set_attr "mode" "TI")])
8814 (define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8815 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8819 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8822 (parallel [(const_int 0) (const_int 1)
8823 (const_int 2) (const_int 3)]))
8824 (match_operand:QI 2 "register_operand" "Yk"))
8827 (parallel [(const_int 4) (const_int 5)
8828 (const_int 6) (const_int 7)
8829 (const_int 8) (const_int 9)
8830 (const_int 10) (const_int 11)
8831 (const_int 12) (const_int 13)
8832 (const_int 14) (const_int 15)]))))]
8834 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8835 [(set_attr "type" "ssemov")
8836 (set_attr "memory" "store")
8837 (set_attr "prefix" "evex")
8838 (set_attr "mode" "TI")])
8840 (define_mode_iterator VI2_128_BW_4_256
8841 [(V8HI "TARGET_AVX512BW") V8SI])
8843 (define_insn "*avx512vl_<code><mode>v8qi2_store"
8844 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8847 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8850 (parallel [(const_int 8) (const_int 9)
8851 (const_int 10) (const_int 11)
8852 (const_int 12) (const_int 13)
8853 (const_int 14) (const_int 15)]))))]
8855 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8856 [(set_attr "type" "ssemov")
8857 (set_attr "memory" "store")
8858 (set_attr "prefix" "evex")
8859 (set_attr "mode" "TI")])
8861 (define_insn "avx512vl_<code><mode>v8qi2_mask"
8862 [(set (match_operand:V16QI 0 "register_operand" "=v")
8866 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8868 (match_operand:V16QI 2 "vector_move_operand" "0C")
8869 (parallel [(const_int 0) (const_int 1)
8870 (const_int 2) (const_int 3)
8871 (const_int 4) (const_int 5)
8872 (const_int 6) (const_int 7)]))
8873 (match_operand:QI 3 "register_operand" "Yk"))
8874 (const_vector:V8QI [(const_int 0) (const_int 0)
8875 (const_int 0) (const_int 0)
8876 (const_int 0) (const_int 0)
8877 (const_int 0) (const_int 0)])))]
8879 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8880 [(set_attr "type" "ssemov")
8881 (set_attr "prefix" "evex")
8882 (set_attr "mode" "TI")])
8884 (define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8885 [(set (match_operand:V16QI 0 "memory_operand" "=m")
8889 (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8892 (parallel [(const_int 0) (const_int 1)
8893 (const_int 2) (const_int 3)
8894 (const_int 4) (const_int 5)
8895 (const_int 6) (const_int 7)]))
8896 (match_operand:QI 2 "register_operand" "Yk"))
8899 (parallel [(const_int 8) (const_int 9)
8900 (const_int 10) (const_int 11)
8901 (const_int 12) (const_int 13)
8902 (const_int 14) (const_int 15)]))))]
8904 "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8905 [(set_attr "type" "ssemov")
8906 (set_attr "memory" "store")
8907 (set_attr "prefix" "evex")
8908 (set_attr "mode" "TI")])
8910 (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8911 (define_mode_attr pmov_dst_4
8912 [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8913 (define_mode_attr pmov_dst_zeroed_4
8914 [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8915 (define_mode_attr pmov_suff_4
8916 [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8918 (define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8919 [(set (match_operand:V8HI 0 "register_operand" "=v")
8921 (any_truncate:<pmov_dst_4>
8922 (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8923 (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8925 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8926 [(set_attr "type" "ssemov")
8927 (set_attr "prefix" "evex")
8928 (set_attr "mode" "TI")])
8930 (define_insn "*avx512vl_<code><mode>v4hi2_store"
8931 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8934 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8937 (parallel [(const_int 4) (const_int 5)
8938 (const_int 6) (const_int 7)]))))]
8940 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8941 [(set_attr "type" "ssemov")
8942 (set_attr "memory" "store")
8943 (set_attr "prefix" "evex")
8944 (set_attr "mode" "TI")])
8946 (define_insn "avx512vl_<code><mode>v4hi2_mask"
8947 [(set (match_operand:V8HI 0 "register_operand" "=v")
8951 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8953 (match_operand:V8HI 2 "vector_move_operand" "0C")
8954 (parallel [(const_int 0) (const_int 1)
8955 (const_int 2) (const_int 3)]))
8956 (match_operand:QI 3 "register_operand" "Yk"))
8957 (const_vector:V4HI [(const_int 0) (const_int 0)
8958 (const_int 0) (const_int 0)])))]
8960 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8961 [(set_attr "type" "ssemov")
8962 (set_attr "prefix" "evex")
8963 (set_attr "mode" "TI")])
8965 (define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8966 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8970 (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8973 (parallel [(const_int 0) (const_int 1)
8974 (const_int 2) (const_int 3)]))
8975 (match_operand:QI 2 "register_operand" "Yk"))
8978 (parallel [(const_int 4) (const_int 5)
8979 (const_int 6) (const_int 7)]))))]
8981 "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8982 [(set_attr "type" "ssemov")
8983 (set_attr "memory" "store")
8984 (set_attr "prefix" "evex")
8985 (set_attr "mode" "TI")])
8987 (define_insn "*avx512vl_<code>v2div2hi2_store"
8988 [(set (match_operand:V8HI 0 "memory_operand" "=m")
8991 (match_operand:V2DI 1 "register_operand" "v"))
8994 (parallel [(const_int 2) (const_int 3)
8995 (const_int 4) (const_int 5)
8996 (const_int 6) (const_int 7)]))))]
8998 "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
8999 [(set_attr "type" "ssemov")
9000 (set_attr "memory" "store")
9001 (set_attr "prefix" "evex")
9002 (set_attr "mode" "TI")])
9004 (define_insn "avx512vl_<code>v2div2hi2_mask"
9005 [(set (match_operand:V8HI 0 "register_operand" "=v")
9009 (match_operand:V2DI 1 "register_operand" "v"))
9011 (match_operand:V8HI 2 "vector_move_operand" "0C")
9012 (parallel [(const_int 0) (const_int 1)]))
9013 (match_operand:QI 3 "register_operand" "Yk"))
9014 (const_vector:V6HI [(const_int 0) (const_int 0)
9015 (const_int 0) (const_int 0)
9016 (const_int 0) (const_int 0)])))]
9018 "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9019 [(set_attr "type" "ssemov")
9020 (set_attr "prefix" "evex")
9021 (set_attr "mode" "TI")])
9023 (define_insn "avx512vl_<code>v2div2hi2_mask_store"
9024 [(set (match_operand:V8HI 0 "memory_operand" "=m")
9028 (match_operand:V2DI 1 "register_operand" "v"))
9031 (parallel [(const_int 0) (const_int 1)]))
9032 (match_operand:QI 2 "register_operand" "Yk"))
9035 (parallel [(const_int 2) (const_int 3)
9036 (const_int 4) (const_int 5)
9037 (const_int 6) (const_int 7)]))))]
9039 "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9040 [(set_attr "type" "ssemov")
9041 (set_attr "memory" "store")
9042 (set_attr "prefix" "evex")
9043 (set_attr "mode" "TI")])
9045 (define_insn "*avx512vl_<code>v2div2si2"
9046 [(set (match_operand:V4SI 0 "register_operand" "=v")
9049 (match_operand:V2DI 1 "register_operand" "v"))
9050 (match_operand:V2SI 2 "const0_operand")))]
9052 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9053 [(set_attr "type" "ssemov")
9054 (set_attr "prefix" "evex")
9055 (set_attr "mode" "TI")])
9057 (define_insn "*avx512vl_<code>v2div2si2_store"
9058 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9061 (match_operand:V2DI 1 "register_operand" "v"))
9064 (parallel [(const_int 2) (const_int 3)]))))]
9066 "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9067 [(set_attr "type" "ssemov")
9068 (set_attr "memory" "store")
9069 (set_attr "prefix" "evex")
9070 (set_attr "mode" "TI")])
9072 (define_insn "avx512vl_<code>v2div2si2_mask"
9073 [(set (match_operand:V4SI 0 "register_operand" "=v")
9077 (match_operand:V2DI 1 "register_operand" "v"))
9079 (match_operand:V4SI 2 "vector_move_operand" "0C")
9080 (parallel [(const_int 0) (const_int 1)]))
9081 (match_operand:QI 3 "register_operand" "Yk"))
9082 (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9084 "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9085 [(set_attr "type" "ssemov")
9086 (set_attr "prefix" "evex")
9087 (set_attr "mode" "TI")])
9089 (define_insn "avx512vl_<code>v2div2si2_mask_store"
9090 [(set (match_operand:V4SI 0 "memory_operand" "=m")
9094 (match_operand:V2DI 1 "register_operand" "v"))
9097 (parallel [(const_int 0) (const_int 1)]))
9098 (match_operand:QI 2 "register_operand" "Yk"))
9101 (parallel [(const_int 2) (const_int 3)]))))]
9103 "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9104 [(set_attr "type" "ssemov")
9105 (set_attr "memory" "store")
9106 (set_attr "prefix" "evex")
9107 (set_attr "mode" "TI")])
9109 (define_insn "*avx512f_<code>v8div16qi2"
9110 [(set (match_operand:V16QI 0 "register_operand" "=v")
9113 (match_operand:V8DI 1 "register_operand" "v"))
9114 (const_vector:V8QI [(const_int 0) (const_int 0)
9115 (const_int 0) (const_int 0)
9116 (const_int 0) (const_int 0)
9117 (const_int 0) (const_int 0)])))]
9119 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9120 [(set_attr "type" "ssemov")
9121 (set_attr "prefix" "evex")
9122 (set_attr "mode" "TI")])
9124 (define_insn "*avx512f_<code>v8div16qi2_store"
9125 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9128 (match_operand:V8DI 1 "register_operand" "v"))
9131 (parallel [(const_int 8) (const_int 9)
9132 (const_int 10) (const_int 11)
9133 (const_int 12) (const_int 13)
9134 (const_int 14) (const_int 15)]))))]
9136 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9137 [(set_attr "type" "ssemov")
9138 (set_attr "memory" "store")
9139 (set_attr "prefix" "evex")
9140 (set_attr "mode" "TI")])
9142 (define_insn "avx512f_<code>v8div16qi2_mask"
9143 [(set (match_operand:V16QI 0 "register_operand" "=v")
9147 (match_operand:V8DI 1 "register_operand" "v"))
9149 (match_operand:V16QI 2 "vector_move_operand" "0C")
9150 (parallel [(const_int 0) (const_int 1)
9151 (const_int 2) (const_int 3)
9152 (const_int 4) (const_int 5)
9153 (const_int 6) (const_int 7)]))
9154 (match_operand:QI 3 "register_operand" "Yk"))
9155 (const_vector:V8QI [(const_int 0) (const_int 0)
9156 (const_int 0) (const_int 0)
9157 (const_int 0) (const_int 0)
9158 (const_int 0) (const_int 0)])))]
9160 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9161 [(set_attr "type" "ssemov")
9162 (set_attr "prefix" "evex")
9163 (set_attr "mode" "TI")])
9165 (define_insn "avx512f_<code>v8div16qi2_mask_store"
9166 [(set (match_operand:V16QI 0 "memory_operand" "=m")
9170 (match_operand:V8DI 1 "register_operand" "v"))
9173 (parallel [(const_int 0) (const_int 1)
9174 (const_int 2) (const_int 3)
9175 (const_int 4) (const_int 5)
9176 (const_int 6) (const_int 7)]))
9177 (match_operand:QI 2 "register_operand" "Yk"))
9180 (parallel [(const_int 8) (const_int 9)
9181 (const_int 10) (const_int 11)
9182 (const_int 12) (const_int 13)
9183 (const_int 14) (const_int 15)]))))]
9185 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9186 [(set_attr "type" "ssemov")
9187 (set_attr "memory" "store")
9188 (set_attr "prefix" "evex")
9189 (set_attr "mode" "TI")])
9191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9193 ;; Parallel integral arithmetic
9195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9197 (define_expand "neg<mode>2"
9198 [(set (match_operand:VI_AVX2 0 "register_operand")
9201 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9203 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9205 (define_expand "<plusminus_insn><mode>3"
9206 [(set (match_operand:VI_AVX2 0 "register_operand")
9208 (match_operand:VI_AVX2 1 "nonimmediate_operand")
9209 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9211 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9213 (define_expand "<plusminus_insn><mode>3_mask"
9214 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9215 (vec_merge:VI48_AVX512VL
9216 (plusminus:VI48_AVX512VL
9217 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9218 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9219 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9220 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9222 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9224 (define_expand "<plusminus_insn><mode>3_mask"
9225 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9226 (vec_merge:VI12_AVX512VL
9227 (plusminus:VI12_AVX512VL
9228 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9229 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9230 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9231 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9233 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9235 (define_insn "*<plusminus_insn><mode>3"
9236 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9238 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9239 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9241 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9243 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9244 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9245 [(set_attr "isa" "noavx,avx")
9246 (set_attr "type" "sseiadd")
9247 (set_attr "prefix_data16" "1,*")
9248 (set_attr "prefix" "<mask_prefix3>")
9249 (set_attr "mode" "<sseinsnmode>")])
9251 (define_insn "*<plusminus_insn><mode>3_mask"
9252 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9253 (vec_merge:VI48_AVX512VL
9254 (plusminus:VI48_AVX512VL
9255 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9256 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9257 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9258 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9260 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9261 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9262 [(set_attr "type" "sseiadd")
9263 (set_attr "prefix" "evex")
9264 (set_attr "mode" "<sseinsnmode>")])
9266 (define_insn "*<plusminus_insn><mode>3_mask"
9267 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9268 (vec_merge:VI12_AVX512VL
9269 (plusminus:VI12_AVX512VL
9270 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9271 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9272 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9273 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9274 "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9275 "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9276 [(set_attr "type" "sseiadd")
9277 (set_attr "prefix" "evex")
9278 (set_attr "mode" "<sseinsnmode>")])
9280 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9281 [(set (match_operand:VI12_AVX2 0 "register_operand")
9282 (sat_plusminus:VI12_AVX2
9283 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9284 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9285 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9286 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9288 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9289 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9290 (sat_plusminus:VI12_AVX2
9291 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9292 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9293 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9294 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9296 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9297 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9298 [(set_attr "isa" "noavx,avx")
9299 (set_attr "type" "sseiadd")
9300 (set_attr "prefix_data16" "1,*")
9301 (set_attr "prefix" "orig,maybe_evex")
9302 (set_attr "mode" "TI")])
9304 (define_expand "mul<mode>3<mask_name>"
9305 [(set (match_operand:VI1_AVX512 0 "register_operand")
9306 (mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9307 (match_operand:VI1_AVX512 2 "register_operand")))]
9308 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9310 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9314 (define_expand "mul<mode>3<mask_name>"
9315 [(set (match_operand:VI2_AVX2 0 "register_operand")
9316 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9317 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9318 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9319 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9321 (define_insn "*mul<mode>3<mask_name>"
9322 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9323 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9324 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9326 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9327 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9329 pmullw\t{%2, %0|%0, %2}
9330 vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9331 [(set_attr "isa" "noavx,avx")
9332 (set_attr "type" "sseimul")
9333 (set_attr "prefix_data16" "1,*")
9334 (set_attr "prefix" "orig,vex")
9335 (set_attr "mode" "<sseinsnmode>")])
9337 (define_expand "<s>mul<mode>3_highpart<mask_name>"
9338 [(set (match_operand:VI2_AVX2 0 "register_operand")
9340 (lshiftrt:<ssedoublemode>
9341 (mult:<ssedoublemode>
9342 (any_extend:<ssedoublemode>
9343 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9344 (any_extend:<ssedoublemode>
9345 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9348 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9349 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9351 (define_insn "*<s>mul<mode>3_highpart<mask_name>"
9352 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9354 (lshiftrt:<ssedoublemode>
9355 (mult:<ssedoublemode>
9356 (any_extend:<ssedoublemode>
9357 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9358 (any_extend:<ssedoublemode>
9359 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9362 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9363 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9365 pmulh<u>w\t{%2, %0|%0, %2}
9366 vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9367 [(set_attr "isa" "noavx,avx")
9368 (set_attr "type" "sseimul")
9369 (set_attr "prefix_data16" "1,*")
9370 (set_attr "prefix" "orig,vex")
9371 (set_attr "mode" "<sseinsnmode>")])
9373 (define_expand "vec_widen_umult_even_v16si<mask_name>"
9374 [(set (match_operand:V8DI 0 "register_operand")
9378 (match_operand:V16SI 1 "nonimmediate_operand")
9379 (parallel [(const_int 0) (const_int 2)
9380 (const_int 4) (const_int 6)
9381 (const_int 8) (const_int 10)
9382 (const_int 12) (const_int 14)])))
9385 (match_operand:V16SI 2 "nonimmediate_operand")
9386 (parallel [(const_int 0) (const_int 2)
9387 (const_int 4) (const_int 6)
9388 (const_int 8) (const_int 10)
9389 (const_int 12) (const_int 14)])))))]
9391 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9393 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
9394 [(set (match_operand:V8DI 0 "register_operand" "=v")
9398 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9399 (parallel [(const_int 0) (const_int 2)
9400 (const_int 4) (const_int 6)
9401 (const_int 8) (const_int 10)
9402 (const_int 12) (const_int 14)])))
9405 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9406 (parallel [(const_int 0) (const_int 2)
9407 (const_int 4) (const_int 6)
9408 (const_int 8) (const_int 10)
9409 (const_int 12) (const_int 14)])))))]
9410 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9411 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9412 [(set_attr "isa" "avx512f")
9413 (set_attr "type" "sseimul")
9414 (set_attr "prefix_extra" "1")
9415 (set_attr "prefix" "evex")
9416 (set_attr "mode" "XI")])
9418 (define_expand "vec_widen_umult_even_v8si<mask_name>"
9419 [(set (match_operand:V4DI 0 "register_operand")
9423 (match_operand:V8SI 1 "nonimmediate_operand")
9424 (parallel [(const_int 0) (const_int 2)
9425 (const_int 4) (const_int 6)])))
9428 (match_operand:V8SI 2 "nonimmediate_operand")
9429 (parallel [(const_int 0) (const_int 2)
9430 (const_int 4) (const_int 6)])))))]
9431 "TARGET_AVX2 && <mask_avx512vl_condition>"
9432 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9434 (define_insn "*vec_widen_umult_even_v8si<mask_name>"
9435 [(set (match_operand:V4DI 0 "register_operand" "=v")
9439 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9440 (parallel [(const_int 0) (const_int 2)
9441 (const_int 4) (const_int 6)])))
9444 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9445 (parallel [(const_int 0) (const_int 2)
9446 (const_int 4) (const_int 6)])))))]
9447 "TARGET_AVX2 && <mask_avx512vl_condition>
9448 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9449 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9450 [(set_attr "type" "sseimul")
9451 (set_attr "prefix" "maybe_evex")
9452 (set_attr "mode" "OI")])
9454 (define_expand "vec_widen_umult_even_v4si<mask_name>"
9455 [(set (match_operand:V2DI 0 "register_operand")
9459 (match_operand:V4SI 1 "nonimmediate_operand")
9460 (parallel [(const_int 0) (const_int 2)])))
9463 (match_operand:V4SI 2 "nonimmediate_operand")
9464 (parallel [(const_int 0) (const_int 2)])))))]
9465 "TARGET_SSE2 && <mask_avx512vl_condition>"
9466 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9468 (define_insn "*vec_widen_umult_even_v4si<mask_name>"
9469 [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9473 (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9474 (parallel [(const_int 0) (const_int 2)])))
9477 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9478 (parallel [(const_int 0) (const_int 2)])))))]
9479 "TARGET_SSE2 && <mask_avx512vl_condition>
9480 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9482 pmuludq\t{%2, %0|%0, %2}
9483 vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9484 [(set_attr "isa" "noavx,avx")
9485 (set_attr "type" "sseimul")
9486 (set_attr "prefix_data16" "1,*")
9487 (set_attr "prefix" "orig,maybe_evex")
9488 (set_attr "mode" "TI")])
9490 (define_expand "vec_widen_smult_even_v16si<mask_name>"
9491 [(set (match_operand:V8DI 0 "register_operand")
9495 (match_operand:V16SI 1 "nonimmediate_operand")
9496 (parallel [(const_int 0) (const_int 2)
9497 (const_int 4) (const_int 6)
9498 (const_int 8) (const_int 10)
9499 (const_int 12) (const_int 14)])))
9502 (match_operand:V16SI 2 "nonimmediate_operand")
9503 (parallel [(const_int 0) (const_int 2)
9504 (const_int 4) (const_int 6)
9505 (const_int 8) (const_int 10)
9506 (const_int 12) (const_int 14)])))))]
9508 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9510 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
9511 [(set (match_operand:V8DI 0 "register_operand" "=v")
9515 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9516 (parallel [(const_int 0) (const_int 2)
9517 (const_int 4) (const_int 6)
9518 (const_int 8) (const_int 10)
9519 (const_int 12) (const_int 14)])))
9522 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9523 (parallel [(const_int 0) (const_int 2)
9524 (const_int 4) (const_int 6)
9525 (const_int 8) (const_int 10)
9526 (const_int 12) (const_int 14)])))))]
9527 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9528 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9529 [(set_attr "isa" "avx512f")
9530 (set_attr "type" "sseimul")
9531 (set_attr "prefix_extra" "1")
9532 (set_attr "prefix" "evex")
9533 (set_attr "mode" "XI")])
9535 (define_expand "vec_widen_smult_even_v8si<mask_name>"
9536 [(set (match_operand:V4DI 0 "register_operand")
9540 (match_operand:V8SI 1 "nonimmediate_operand")
9541 (parallel [(const_int 0) (const_int 2)
9542 (const_int 4) (const_int 6)])))
9545 (match_operand:V8SI 2 "nonimmediate_operand")
9546 (parallel [(const_int 0) (const_int 2)
9547 (const_int 4) (const_int 6)])))))]
9548 "TARGET_AVX2 && <mask_avx512vl_condition>"
9549 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9551 (define_insn "*vec_widen_smult_even_v8si<mask_name>"
9552 [(set (match_operand:V4DI 0 "register_operand" "=v")
9556 (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9557 (parallel [(const_int 0) (const_int 2)
9558 (const_int 4) (const_int 6)])))
9561 (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9562 (parallel [(const_int 0) (const_int 2)
9563 (const_int 4) (const_int 6)])))))]
9565 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9566 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9567 [(set_attr "type" "sseimul")
9568 (set_attr "prefix_extra" "1")
9569 (set_attr "prefix" "vex")
9570 (set_attr "mode" "OI")])
9572 (define_expand "sse4_1_mulv2siv2di3<mask_name>"
9573 [(set (match_operand:V2DI 0 "register_operand")
9577 (match_operand:V4SI 1 "nonimmediate_operand")
9578 (parallel [(const_int 0) (const_int 2)])))
9581 (match_operand:V4SI 2 "nonimmediate_operand")
9582 (parallel [(const_int 0) (const_int 2)])))))]
9583 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9584 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9586 (define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9587 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9591 (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
9592 (parallel [(const_int 0) (const_int 2)])))
9595 (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
9596 (parallel [(const_int 0) (const_int 2)])))))]
9597 "TARGET_SSE4_1 && <mask_avx512vl_condition>
9598 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9600 pmuldq\t{%2, %0|%0, %2}
9601 pmuldq\t{%2, %0|%0, %2}
9602 vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9603 [(set_attr "isa" "noavx,noavx,avx")
9604 (set_attr "type" "sseimul")
9605 (set_attr "prefix_data16" "1,1,*")
9606 (set_attr "prefix_extra" "1")
9607 (set_attr "prefix" "orig,orig,vex")
9608 (set_attr "mode" "TI")])
9610 (define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9611 [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9612 (unspec:<sseunpackmode>
9613 [(match_operand:VI2_AVX2 1 "register_operand" "v")
9614 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9615 UNSPEC_PMADDWD512))]
9616 "TARGET_AVX512BW && <mask_mode512bit_condition>"
9617 "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9618 [(set_attr "type" "sseiadd")
9619 (set_attr "prefix" "evex")
9620 (set_attr "mode" "XI")])
9622 (define_expand "avx2_pmaddwd"
9623 [(set (match_operand:V8SI 0 "register_operand")
9628 (match_operand:V16HI 1 "nonimmediate_operand")
9629 (parallel [(const_int 0) (const_int 2)
9630 (const_int 4) (const_int 6)
9631 (const_int 8) (const_int 10)
9632 (const_int 12) (const_int 14)])))
9635 (match_operand:V16HI 2 "nonimmediate_operand")
9636 (parallel [(const_int 0) (const_int 2)
9637 (const_int 4) (const_int 6)
9638 (const_int 8) (const_int 10)
9639 (const_int 12) (const_int 14)]))))
9642 (vec_select:V8HI (match_dup 1)
9643 (parallel [(const_int 1) (const_int 3)
9644 (const_int 5) (const_int 7)
9645 (const_int 9) (const_int 11)
9646 (const_int 13) (const_int 15)])))
9648 (vec_select:V8HI (match_dup 2)
9649 (parallel [(const_int 1) (const_int 3)
9650 (const_int 5) (const_int 7)
9651 (const_int 9) (const_int 11)
9652 (const_int 13) (const_int 15)]))))))]
9654 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9656 (define_insn "*avx2_pmaddwd"
9657 [(set (match_operand:V8SI 0 "register_operand" "=x")
9662 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
9663 (parallel [(const_int 0) (const_int 2)
9664 (const_int 4) (const_int 6)
9665 (const_int 8) (const_int 10)
9666 (const_int 12) (const_int 14)])))
9669 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
9670 (parallel [(const_int 0) (const_int 2)
9671 (const_int 4) (const_int 6)
9672 (const_int 8) (const_int 10)
9673 (const_int 12) (const_int 14)]))))
9676 (vec_select:V8HI (match_dup 1)
9677 (parallel [(const_int 1) (const_int 3)
9678 (const_int 5) (const_int 7)
9679 (const_int 9) (const_int 11)
9680 (const_int 13) (const_int 15)])))
9682 (vec_select:V8HI (match_dup 2)
9683 (parallel [(const_int 1) (const_int 3)
9684 (const_int 5) (const_int 7)
9685 (const_int 9) (const_int 11)
9686 (const_int 13) (const_int 15)]))))))]
9687 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9688 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9689 [(set_attr "type" "sseiadd")
9690 (set_attr "prefix" "vex")
9691 (set_attr "mode" "OI")])
9693 (define_expand "sse2_pmaddwd"
9694 [(set (match_operand:V4SI 0 "register_operand")
9699 (match_operand:V8HI 1 "nonimmediate_operand")
9700 (parallel [(const_int 0) (const_int 2)
9701 (const_int 4) (const_int 6)])))
9704 (match_operand:V8HI 2 "nonimmediate_operand")
9705 (parallel [(const_int 0) (const_int 2)
9706 (const_int 4) (const_int 6)]))))
9709 (vec_select:V4HI (match_dup 1)
9710 (parallel [(const_int 1) (const_int 3)
9711 (const_int 5) (const_int 7)])))
9713 (vec_select:V4HI (match_dup 2)
9714 (parallel [(const_int 1) (const_int 3)
9715 (const_int 5) (const_int 7)]))))))]
9717 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9719 (define_insn "*sse2_pmaddwd"
9720 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9725 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9726 (parallel [(const_int 0) (const_int 2)
9727 (const_int 4) (const_int 6)])))
9730 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9731 (parallel [(const_int 0) (const_int 2)
9732 (const_int 4) (const_int 6)]))))
9735 (vec_select:V4HI (match_dup 1)
9736 (parallel [(const_int 1) (const_int 3)
9737 (const_int 5) (const_int 7)])))
9739 (vec_select:V4HI (match_dup 2)
9740 (parallel [(const_int 1) (const_int 3)
9741 (const_int 5) (const_int 7)]))))))]
9742 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9744 pmaddwd\t{%2, %0|%0, %2}
9745 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9746 [(set_attr "isa" "noavx,avx")
9747 (set_attr "type" "sseiadd")
9748 (set_attr "atom_unit" "simul")
9749 (set_attr "prefix_data16" "1,*")
9750 (set_attr "prefix" "orig,vex")
9751 (set_attr "mode" "TI")])
9753 (define_insn "avx512dq_mul<mode>3<mask_name>"
9754 [(set (match_operand:VI8 0 "register_operand" "=v")
9756 (match_operand:VI8 1 "register_operand" "v")
9757 (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9758 "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9759 "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9760 [(set_attr "type" "sseimul")
9761 (set_attr "prefix" "evex")
9762 (set_attr "mode" "<sseinsnmode>")])
9764 (define_expand "mul<mode>3<mask_name>"
9765 [(set (match_operand:VI4_AVX512F 0 "register_operand")
9767 (match_operand:VI4_AVX512F 1 "general_vector_operand")
9768 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9769 "TARGET_SSE2 && <mask_mode512bit_condition>"
9773 if (!nonimmediate_operand (operands[1], <MODE>mode))
9774 operands[1] = force_reg (<MODE>mode, operands[1]);
9775 if (!nonimmediate_operand (operands[2], <MODE>mode))
9776 operands[2] = force_reg (<MODE>mode, operands[2]);
9777 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9781 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9786 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9787 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9789 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
9790 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
9791 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9793 pmulld\t{%2, %0|%0, %2}
9794 pmulld\t{%2, %0|%0, %2}
9795 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9796 [(set_attr "isa" "noavx,noavx,avx")
9797 (set_attr "type" "sseimul")
9798 (set_attr "prefix_extra" "1")
9799 (set_attr "prefix" "<mask_prefix4>")
9800 (set_attr "btver2_decode" "vector,vector,vector")
9801 (set_attr "mode" "<sseinsnmode>")])
9803 (define_expand "mul<mode>3"
9804 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9805 (mult:VI8_AVX2_AVX512F
9806 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9807 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9810 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9814 (define_expand "vec_widen_<s>mult_hi_<mode>"
9815 [(match_operand:<sseunpackmode> 0 "register_operand")
9816 (any_extend:<sseunpackmode>
9817 (match_operand:VI124_AVX2 1 "register_operand"))
9818 (match_operand:VI124_AVX2 2 "register_operand")]
9821 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9826 (define_expand "vec_widen_<s>mult_lo_<mode>"
9827 [(match_operand:<sseunpackmode> 0 "register_operand")
9828 (any_extend:<sseunpackmode>
9829 (match_operand:VI124_AVX2 1 "register_operand"))
9830 (match_operand:VI124_AVX2 2 "register_operand")]
9833 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9838 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
9839 ;; named patterns, but signed V4SI needs special help for plain SSE2.
9840 (define_expand "vec_widen_smult_even_v4si"
9841 [(match_operand:V2DI 0 "register_operand")
9842 (match_operand:V4SI 1 "nonimmediate_operand")
9843 (match_operand:V4SI 2 "nonimmediate_operand")]
9846 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9851 (define_expand "vec_widen_<s>mult_odd_<mode>"
9852 [(match_operand:<sseunpackmode> 0 "register_operand")
9853 (any_extend:<sseunpackmode>
9854 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9855 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9858 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9863 (define_mode_attr SDOT_PMADD_SUF
9864 [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9866 (define_expand "sdot_prod<mode>"
9867 [(match_operand:<sseunpackmode> 0 "register_operand")
9868 (match_operand:VI2_AVX2 1 "register_operand")
9869 (match_operand:VI2_AVX2 2 "register_operand")
9870 (match_operand:<sseunpackmode> 3 "register_operand")]
9873 rtx t = gen_reg_rtx (<sseunpackmode>mode);
9874 emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9875 emit_insn (gen_rtx_SET (operands[0],
9876 gen_rtx_PLUS (<sseunpackmode>mode,
9881 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9882 ;; back together when madd is available.
9883 (define_expand "sdot_prodv4si"
9884 [(match_operand:V2DI 0 "register_operand")
9885 (match_operand:V4SI 1 "register_operand")
9886 (match_operand:V4SI 2 "register_operand")
9887 (match_operand:V2DI 3 "register_operand")]
9890 rtx t = gen_reg_rtx (V2DImode);
9891 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9892 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9896 (define_expand "usadv16qi"
9897 [(match_operand:V4SI 0 "register_operand")
9898 (match_operand:V16QI 1 "register_operand")
9899 (match_operand:V16QI 2 "nonimmediate_operand")
9900 (match_operand:V4SI 3 "nonimmediate_operand")]
9903 rtx t1 = gen_reg_rtx (V2DImode);
9904 rtx t2 = gen_reg_rtx (V4SImode);
9905 emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9906 convert_move (t2, t1, 0);
9907 emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9911 (define_expand "usadv32qi"
9912 [(match_operand:V8SI 0 "register_operand")
9913 (match_operand:V32QI 1 "register_operand")
9914 (match_operand:V32QI 2 "nonimmediate_operand")
9915 (match_operand:V8SI 3 "nonimmediate_operand")]
9918 rtx t1 = gen_reg_rtx (V4DImode);
9919 rtx t2 = gen_reg_rtx (V8SImode);
9920 emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9921 convert_move (t2, t1, 0);
9922 emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9926 (define_insn "ashr<mode>3"
9927 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9929 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9930 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9933 psra<ssemodesuffix>\t{%2, %0|%0, %2}
9934 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9935 [(set_attr "isa" "noavx,avx")
9936 (set_attr "type" "sseishft")
9937 (set (attr "length_immediate")
9938 (if_then_else (match_operand 2 "const_int_operand")
9940 (const_string "0")))
9941 (set_attr "prefix_data16" "1,*")
9942 (set_attr "prefix" "orig,vex")
9943 (set_attr "mode" "<sseinsnmode>")])
9945 (define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9946 [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9947 (ashiftrt:VI24_AVX512BW_1
9948 (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9949 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9951 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9952 [(set_attr "type" "sseishft")
9953 (set (attr "length_immediate")
9954 (if_then_else (match_operand 2 "const_int_operand")
9956 (const_string "0")))
9957 (set_attr "mode" "<sseinsnmode>")])
9959 (define_insn "<mask_codefor>ashrv2di3<mask_name>"
9960 [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9962 (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9963 (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9965 "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9966 [(set_attr "type" "sseishft")
9967 (set (attr "length_immediate")
9968 (if_then_else (match_operand 2 "const_int_operand")
9970 (const_string "0")))
9971 (set_attr "mode" "TI")])
9973 (define_insn "ashr<mode>3<mask_name>"
9974 [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9975 (ashiftrt:VI248_AVX512BW_AVX512VL
9976 (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9977 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9979 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9980 [(set_attr "type" "sseishft")
9981 (set (attr "length_immediate")
9982 (if_then_else (match_operand 2 "const_int_operand")
9984 (const_string "0")))
9985 (set_attr "mode" "<sseinsnmode>")])
9987 (define_insn "<shift_insn><mode>3<mask_name>"
9988 [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9989 (any_lshift:VI2_AVX2_AVX512BW
9990 (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
9991 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
9992 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9994 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
9995 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9996 [(set_attr "isa" "noavx,avx")
9997 (set_attr "type" "sseishft")
9998 (set (attr "length_immediate")
9999 (if_then_else (match_operand 2 "const_int_operand")
10001 (const_string "0")))
10002 (set_attr "prefix_data16" "1,*")
10003 (set_attr "prefix" "orig,vex")
10004 (set_attr "mode" "<sseinsnmode>")])
10006 (define_insn "<shift_insn><mode>3<mask_name>"
10007 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
10008 (any_lshift:VI48_AVX2
10009 (match_operand:VI48_AVX2 1 "register_operand" "0,v")
10010 (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10011 "TARGET_SSE2 && <mask_mode512bit_condition>"
10013 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10014 vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10015 [(set_attr "isa" "noavx,avx")
10016 (set_attr "type" "sseishft")
10017 (set (attr "length_immediate")
10018 (if_then_else (match_operand 2 "const_int_operand")
10020 (const_string "0")))
10021 (set_attr "prefix_data16" "1,*")
10022 (set_attr "prefix" "orig,vex")
10023 (set_attr "mode" "<sseinsnmode>")])
10025 (define_insn "<shift_insn><mode>3<mask_name>"
10026 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10027 (any_lshift:VI48_512
10028 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
10029 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10030 "TARGET_AVX512F && <mask_mode512bit_condition>"
10031 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10032 [(set_attr "isa" "avx512f")
10033 (set_attr "type" "sseishft")
10034 (set (attr "length_immediate")
10035 (if_then_else (match_operand 2 "const_int_operand")
10037 (const_string "0")))
10038 (set_attr "prefix" "evex")
10039 (set_attr "mode" "<sseinsnmode>")])
10042 (define_expand "vec_shl_<mode>"
10043 [(set (match_dup 3)
10045 (match_operand:VI_128 1 "register_operand")
10046 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10047 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10050 operands[1] = gen_lowpart (V1TImode, operands[1]);
10051 operands[3] = gen_reg_rtx (V1TImode);
10052 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10055 (define_insn "<sse2_avx2>_ashl<mode>3"
10056 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10058 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10059 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10062 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10064 switch (which_alternative)
10067 return "pslldq\t{%2, %0|%0, %2}";
10069 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10071 gcc_unreachable ();
10074 [(set_attr "isa" "noavx,avx")
10075 (set_attr "type" "sseishft")
10076 (set_attr "length_immediate" "1")
10077 (set_attr "prefix_data16" "1,*")
10078 (set_attr "prefix" "orig,vex")
10079 (set_attr "mode" "<sseinsnmode>")])
10081 (define_expand "vec_shr_<mode>"
10082 [(set (match_dup 3)
10084 (match_operand:VI_128 1 "register_operand")
10085 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10086 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10089 operands[1] = gen_lowpart (V1TImode, operands[1]);
10090 operands[3] = gen_reg_rtx (V1TImode);
10091 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10094 (define_insn "<sse2_avx2>_lshr<mode>3"
10095 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10096 (lshiftrt:VIMAX_AVX2
10097 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10098 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10101 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10103 switch (which_alternative)
10106 return "psrldq\t{%2, %0|%0, %2}";
10108 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10110 gcc_unreachable ();
10113 [(set_attr "isa" "noavx,avx")
10114 (set_attr "type" "sseishft")
10115 (set_attr "length_immediate" "1")
10116 (set_attr "atom_unit" "sishuf")
10117 (set_attr "prefix_data16" "1,*")
10118 (set_attr "prefix" "orig,vex")
10119 (set_attr "mode" "<sseinsnmode>")])
10121 (define_insn "<avx512>_<rotate>v<mode><mask_name>"
10122 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10123 (any_rotate:VI48_AVX512VL
10124 (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10125 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10127 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10128 [(set_attr "prefix" "evex")
10129 (set_attr "mode" "<sseinsnmode>")])
10131 (define_insn "<avx512>_<rotate><mode><mask_name>"
10132 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10133 (any_rotate:VI48_AVX512VL
10134 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10135 (match_operand:SI 2 "const_0_to_255_operand")))]
10137 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10138 [(set_attr "prefix" "evex")
10139 (set_attr "mode" "<sseinsnmode>")])
10141 (define_expand "<code><mode>3"
10142 [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10143 (maxmin:VI124_256_AVX512F_AVX512BW
10144 (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10145 (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10147 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10149 (define_insn "*avx2_<code><mode>3"
10150 [(set (match_operand:VI124_256 0 "register_operand" "=v")
10152 (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10153 (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10154 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10155 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10156 [(set_attr "type" "sseiadd")
10157 (set_attr "prefix_extra" "1")
10158 (set_attr "prefix" "vex")
10159 (set_attr "mode" "OI")])
10161 (define_expand "<code><mode>3_mask"
10162 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10163 (vec_merge:VI48_AVX512VL
10164 (maxmin:VI48_AVX512VL
10165 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10166 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10167 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10168 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10170 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10172 (define_insn "*avx512bw_<code><mode>3<mask_name>"
10173 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10174 (maxmin:VI48_AVX512VL
10175 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10176 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10177 "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10178 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10179 [(set_attr "type" "sseiadd")
10180 (set_attr "prefix_extra" "1")
10181 (set_attr "prefix" "maybe_evex")
10182 (set_attr "mode" "<sseinsnmode>")])
10184 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10185 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10186 (maxmin:VI12_AVX512VL
10187 (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10188 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10190 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10191 [(set_attr "type" "sseiadd")
10192 (set_attr "prefix" "evex")
10193 (set_attr "mode" "<sseinsnmode>")])
10195 (define_expand "<code><mode>3"
10196 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10197 (maxmin:VI8_AVX2_AVX512BW
10198 (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10199 (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10203 && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10204 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10207 enum rtx_code code;
10212 xops[0] = operands[0];
10214 if (<CODE> == SMAX || <CODE> == UMAX)
10216 xops[1] = operands[1];
10217 xops[2] = operands[2];
10221 xops[1] = operands[2];
10222 xops[2] = operands[1];
10225 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10227 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10228 xops[4] = operands[1];
10229 xops[5] = operands[2];
10231 ok = ix86_expand_int_vcond (xops);
10237 (define_expand "<code><mode>3"
10238 [(set (match_operand:VI124_128 0 "register_operand")
10240 (match_operand:VI124_128 1 "nonimmediate_operand")
10241 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10244 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10245 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10251 xops[0] = operands[0];
10252 operands[1] = force_reg (<MODE>mode, operands[1]);
10253 operands[2] = force_reg (<MODE>mode, operands[2]);
10255 if (<CODE> == SMAX)
10257 xops[1] = operands[1];
10258 xops[2] = operands[2];
10262 xops[1] = operands[2];
10263 xops[2] = operands[1];
10266 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10267 xops[4] = operands[1];
10268 xops[5] = operands[2];
10270 ok = ix86_expand_int_vcond (xops);
10276 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10277 [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10279 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10280 (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10282 && <mask_mode512bit_condition>
10283 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10285 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10286 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10287 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10288 [(set_attr "isa" "noavx,noavx,avx")
10289 (set_attr "type" "sseiadd")
10290 (set_attr "prefix_extra" "1,1,*")
10291 (set_attr "prefix" "orig,orig,vex")
10292 (set_attr "mode" "TI")])
10294 (define_insn "*<code>v8hi3"
10295 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10297 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10298 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10299 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10301 p<maxmin_int>w\t{%2, %0|%0, %2}
10302 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10303 [(set_attr "isa" "noavx,avx")
10304 (set_attr "type" "sseiadd")
10305 (set_attr "prefix_data16" "1,*")
10306 (set_attr "prefix_extra" "*,1")
10307 (set_attr "prefix" "orig,vex")
10308 (set_attr "mode" "TI")])
10310 (define_expand "<code><mode>3"
10311 [(set (match_operand:VI124_128 0 "register_operand")
10313 (match_operand:VI124_128 1 "nonimmediate_operand")
10314 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10317 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10318 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10319 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10321 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10322 operands[1] = force_reg (<MODE>mode, operands[1]);
10323 if (rtx_equal_p (op3, op2))
10324 op3 = gen_reg_rtx (V8HImode);
10325 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10326 emit_insn (gen_addv8hi3 (op0, op3, op2));
10334 operands[1] = force_reg (<MODE>mode, operands[1]);
10335 operands[2] = force_reg (<MODE>mode, operands[2]);
10337 xops[0] = operands[0];
10339 if (<CODE> == UMAX)
10341 xops[1] = operands[1];
10342 xops[2] = operands[2];
10346 xops[1] = operands[2];
10347 xops[2] = operands[1];
10350 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10351 xops[4] = operands[1];
10352 xops[5] = operands[2];
10354 ok = ix86_expand_int_vcond (xops);
10360 (define_insn "*sse4_1_<code><mode>3<mask_name>"
10361 [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10363 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10364 (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10366 && <mask_mode512bit_condition>
10367 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10369 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10370 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10371 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10372 [(set_attr "isa" "noavx,noavx,avx")
10373 (set_attr "type" "sseiadd")
10374 (set_attr "prefix_extra" "1,1,*")
10375 (set_attr "prefix" "orig,orig,vex")
10376 (set_attr "mode" "TI")])
10378 (define_insn "*<code>v16qi3"
10379 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10381 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10382 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10383 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10385 p<maxmin_int>b\t{%2, %0|%0, %2}
10386 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10387 [(set_attr "isa" "noavx,avx")
10388 (set_attr "type" "sseiadd")
10389 (set_attr "prefix_data16" "1,*")
10390 (set_attr "prefix_extra" "*,1")
10391 (set_attr "prefix" "orig,vex")
10392 (set_attr "mode" "TI")])
10394 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10396 ;; Parallel integral comparisons
10398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10400 (define_expand "avx2_eq<mode>3"
10401 [(set (match_operand:VI_256 0 "register_operand")
10403 (match_operand:VI_256 1 "nonimmediate_operand")
10404 (match_operand:VI_256 2 "nonimmediate_operand")))]
10406 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10408 (define_insn "*avx2_eq<mode>3"
10409 [(set (match_operand:VI_256 0 "register_operand" "=x")
10411 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10412 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10413 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10414 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10415 [(set_attr "type" "ssecmp")
10416 (set_attr "prefix_extra" "1")
10417 (set_attr "prefix" "vex")
10418 (set_attr "mode" "OI")])
10420 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10421 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10422 (unspec:<avx512fmaskmode>
10423 [(match_operand:VI12_AVX512VL 1 "register_operand")
10424 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10425 UNSPEC_MASKED_EQ))]
10427 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10429 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10430 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10431 (unspec:<avx512fmaskmode>
10432 [(match_operand:VI48_AVX512VL 1 "register_operand")
10433 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10434 UNSPEC_MASKED_EQ))]
10436 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10438 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10439 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10440 (unspec:<avx512fmaskmode>
10441 [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10442 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10443 UNSPEC_MASKED_EQ))]
10444 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10445 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10446 [(set_attr "type" "ssecmp")
10447 (set_attr "prefix_extra" "1")
10448 (set_attr "prefix" "evex")
10449 (set_attr "mode" "<sseinsnmode>")])
10451 (define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10452 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10453 (unspec:<avx512fmaskmode>
10454 [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10455 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10456 UNSPEC_MASKED_EQ))]
10457 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10458 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10459 [(set_attr "type" "ssecmp")
10460 (set_attr "prefix_extra" "1")
10461 (set_attr "prefix" "evex")
10462 (set_attr "mode" "<sseinsnmode>")])
10464 (define_insn "*sse4_1_eqv2di3"
10465 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10467 (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10468 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10469 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10471 pcmpeqq\t{%2, %0|%0, %2}
10472 pcmpeqq\t{%2, %0|%0, %2}
10473 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10474 [(set_attr "isa" "noavx,noavx,avx")
10475 (set_attr "type" "ssecmp")
10476 (set_attr "prefix_extra" "1")
10477 (set_attr "prefix" "orig,orig,vex")
10478 (set_attr "mode" "TI")])
10480 (define_insn "*sse2_eq<mode>3"
10481 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10483 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10484 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10485 "TARGET_SSE2 && !TARGET_XOP
10486 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10488 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10489 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10490 [(set_attr "isa" "noavx,avx")
10491 (set_attr "type" "ssecmp")
10492 (set_attr "prefix_data16" "1,*")
10493 (set_attr "prefix" "orig,vex")
10494 (set_attr "mode" "TI")])
10496 (define_expand "sse2_eq<mode>3"
10497 [(set (match_operand:VI124_128 0 "register_operand")
10499 (match_operand:VI124_128 1 "nonimmediate_operand")
10500 (match_operand:VI124_128 2 "nonimmediate_operand")))]
10501 "TARGET_SSE2 && !TARGET_XOP "
10502 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10504 (define_expand "sse4_1_eqv2di3"
10505 [(set (match_operand:V2DI 0 "register_operand")
10507 (match_operand:V2DI 1 "nonimmediate_operand")
10508 (match_operand:V2DI 2 "nonimmediate_operand")))]
10510 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10512 (define_insn "sse4_2_gtv2di3"
10513 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10515 (match_operand:V2DI 1 "register_operand" "0,0,x")
10516 (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10519 pcmpgtq\t{%2, %0|%0, %2}
10520 pcmpgtq\t{%2, %0|%0, %2}
10521 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10522 [(set_attr "isa" "noavx,noavx,avx")
10523 (set_attr "type" "ssecmp")
10524 (set_attr "prefix_extra" "1")
10525 (set_attr "prefix" "orig,orig,vex")
10526 (set_attr "mode" "TI")])
10528 (define_insn "avx2_gt<mode>3"
10529 [(set (match_operand:VI_256 0 "register_operand" "=x")
10531 (match_operand:VI_256 1 "register_operand" "x")
10532 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10534 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10535 [(set_attr "type" "ssecmp")
10536 (set_attr "prefix_extra" "1")
10537 (set_attr "prefix" "vex")
10538 (set_attr "mode" "OI")])
10540 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10541 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10542 (unspec:<avx512fmaskmode>
10543 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10544 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10546 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10547 [(set_attr "type" "ssecmp")
10548 (set_attr "prefix_extra" "1")
10549 (set_attr "prefix" "evex")
10550 (set_attr "mode" "<sseinsnmode>")])
10552 (define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10553 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10554 (unspec:<avx512fmaskmode>
10555 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10556 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10558 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10559 [(set_attr "type" "ssecmp")
10560 (set_attr "prefix_extra" "1")
10561 (set_attr "prefix" "evex")
10562 (set_attr "mode" "<sseinsnmode>")])
10564 (define_insn "sse2_gt<mode>3"
10565 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10567 (match_operand:VI124_128 1 "register_operand" "0,x")
10568 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10569 "TARGET_SSE2 && !TARGET_XOP"
10571 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10572 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10573 [(set_attr "isa" "noavx,avx")
10574 (set_attr "type" "ssecmp")
10575 (set_attr "prefix_data16" "1,*")
10576 (set_attr "prefix" "orig,vex")
10577 (set_attr "mode" "TI")])
10579 (define_expand "vcond<V_512:mode><VI_512:mode>"
10580 [(set (match_operand:V_512 0 "register_operand")
10581 (if_then_else:V_512
10582 (match_operator 3 ""
10583 [(match_operand:VI_512 4 "nonimmediate_operand")
10584 (match_operand:VI_512 5 "general_operand")])
10585 (match_operand:V_512 1)
10586 (match_operand:V_512 2)))]
10588 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10589 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10591 bool ok = ix86_expand_int_vcond (operands);
10596 (define_expand "vcond<V_256:mode><VI_256:mode>"
10597 [(set (match_operand:V_256 0 "register_operand")
10598 (if_then_else:V_256
10599 (match_operator 3 ""
10600 [(match_operand:VI_256 4 "nonimmediate_operand")
10601 (match_operand:VI_256 5 "general_operand")])
10602 (match_operand:V_256 1)
10603 (match_operand:V_256 2)))]
10605 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10606 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10608 bool ok = ix86_expand_int_vcond (operands);
10613 (define_expand "vcond<V_128:mode><VI124_128:mode>"
10614 [(set (match_operand:V_128 0 "register_operand")
10615 (if_then_else:V_128
10616 (match_operator 3 ""
10617 [(match_operand:VI124_128 4 "nonimmediate_operand")
10618 (match_operand:VI124_128 5 "general_operand")])
10619 (match_operand:V_128 1)
10620 (match_operand:V_128 2)))]
10622 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10623 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10625 bool ok = ix86_expand_int_vcond (operands);
10630 (define_expand "vcond<VI8F_128:mode>v2di"
10631 [(set (match_operand:VI8F_128 0 "register_operand")
10632 (if_then_else:VI8F_128
10633 (match_operator 3 ""
10634 [(match_operand:V2DI 4 "nonimmediate_operand")
10635 (match_operand:V2DI 5 "general_operand")])
10636 (match_operand:VI8F_128 1)
10637 (match_operand:VI8F_128 2)))]
10640 bool ok = ix86_expand_int_vcond (operands);
10645 (define_expand "vcondu<V_512:mode><VI_512:mode>"
10646 [(set (match_operand:V_512 0 "register_operand")
10647 (if_then_else:V_512
10648 (match_operator 3 ""
10649 [(match_operand:VI_512 4 "nonimmediate_operand")
10650 (match_operand:VI_512 5 "nonimmediate_operand")])
10651 (match_operand:V_512 1 "general_operand")
10652 (match_operand:V_512 2 "general_operand")))]
10654 && (GET_MODE_NUNITS (<V_512:MODE>mode)
10655 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10657 bool ok = ix86_expand_int_vcond (operands);
10662 (define_expand "vcondu<V_256:mode><VI_256:mode>"
10663 [(set (match_operand:V_256 0 "register_operand")
10664 (if_then_else:V_256
10665 (match_operator 3 ""
10666 [(match_operand:VI_256 4 "nonimmediate_operand")
10667 (match_operand:VI_256 5 "nonimmediate_operand")])
10668 (match_operand:V_256 1 "general_operand")
10669 (match_operand:V_256 2 "general_operand")))]
10671 && (GET_MODE_NUNITS (<V_256:MODE>mode)
10672 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10674 bool ok = ix86_expand_int_vcond (operands);
10679 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
10680 [(set (match_operand:V_128 0 "register_operand")
10681 (if_then_else:V_128
10682 (match_operator 3 ""
10683 [(match_operand:VI124_128 4 "nonimmediate_operand")
10684 (match_operand:VI124_128 5 "nonimmediate_operand")])
10685 (match_operand:V_128 1 "general_operand")
10686 (match_operand:V_128 2 "general_operand")))]
10688 && (GET_MODE_NUNITS (<V_128:MODE>mode)
10689 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10691 bool ok = ix86_expand_int_vcond (operands);
10696 (define_expand "vcondu<VI8F_128:mode>v2di"
10697 [(set (match_operand:VI8F_128 0 "register_operand")
10698 (if_then_else:VI8F_128
10699 (match_operator 3 ""
10700 [(match_operand:V2DI 4 "nonimmediate_operand")
10701 (match_operand:V2DI 5 "nonimmediate_operand")])
10702 (match_operand:VI8F_128 1 "general_operand")
10703 (match_operand:VI8F_128 2 "general_operand")))]
10706 bool ok = ix86_expand_int_vcond (operands);
10711 (define_mode_iterator VEC_PERM_AVX2
10712 [V16QI V8HI V4SI V2DI V4SF V2DF
10713 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10714 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10715 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10716 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10717 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10718 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10720 (define_expand "vec_perm<mode>"
10721 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10722 (match_operand:VEC_PERM_AVX2 1 "register_operand")
10723 (match_operand:VEC_PERM_AVX2 2 "register_operand")
10724 (match_operand:<sseintvecmode> 3 "register_operand")]
10725 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10727 ix86_expand_vec_perm (operands);
10731 (define_mode_iterator VEC_PERM_CONST
10732 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10733 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10734 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10735 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10736 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10737 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10738 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10739 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10740 (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10742 (define_expand "vec_perm_const<mode>"
10743 [(match_operand:VEC_PERM_CONST 0 "register_operand")
10744 (match_operand:VEC_PERM_CONST 1 "register_operand")
10745 (match_operand:VEC_PERM_CONST 2 "register_operand")
10746 (match_operand:<sseintvecmode> 3)]
10749 if (ix86_expand_vec_perm_const (operands))
10755 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10757 ;; Parallel bitwise logical operations
10759 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10761 (define_expand "one_cmpl<mode>2"
10762 [(set (match_operand:VI 0 "register_operand")
10763 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
10767 int i, n = GET_MODE_NUNITS (<MODE>mode);
10768 rtvec v = rtvec_alloc (n);
10770 for (i = 0; i < n; ++i)
10771 RTVEC_ELT (v, i) = constm1_rtx;
10773 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10776 (define_expand "<sse2_avx2>_andnot<mode>3"
10777 [(set (match_operand:VI_AVX2 0 "register_operand")
10779 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10780 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10783 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10784 [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10785 (vec_merge:VI48_AVX512VL
10788 (match_operand:VI48_AVX512VL 1 "register_operand"))
10789 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10790 (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10791 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10794 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
10795 [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10796 (vec_merge:VI12_AVX512VL
10799 (match_operand:VI12_AVX512VL 1 "register_operand"))
10800 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10801 (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10802 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10805 (define_insn "*andnot<mode>3"
10806 [(set (match_operand:VI 0 "register_operand" "=x,v")
10808 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10809 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10812 static char buf[64];
10816 switch (get_attr_mode (insn))
10819 gcc_assert (TARGET_AVX512F);
10821 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10823 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10824 switch (<MODE>mode)
10828 if (TARGET_AVX512F)
10830 tmp = "pandn<ssemodesuffix>";
10837 if (TARGET_AVX512VL)
10839 tmp = "pandn<ssemodesuffix>";
10843 tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10848 gcc_assert (TARGET_AVX512F);
10850 gcc_assert (TARGET_AVX);
10852 gcc_assert (TARGET_SSE);
10858 gcc_unreachable ();
10861 switch (which_alternative)
10864 ops = "%s\t{%%2, %%0|%%0, %%2}";
10867 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
10870 gcc_unreachable ();
10873 snprintf (buf, sizeof (buf), ops, tmp);
10876 [(set_attr "isa" "noavx,avx")
10877 (set_attr "type" "sselog")
10878 (set (attr "prefix_data16")
10880 (and (eq_attr "alternative" "0")
10881 (eq_attr "mode" "TI"))
10883 (const_string "*")))
10884 (set_attr "prefix" "orig,vex")
10886 (cond [(and (match_test "<MODE_SIZE> == 16")
10887 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10888 (const_string "<ssePSmode>")
10889 (match_test "TARGET_AVX2")
10890 (const_string "<sseinsnmode>")
10891 (match_test "TARGET_AVX")
10893 (match_test "<MODE_SIZE> > 16")
10894 (const_string "V8SF")
10895 (const_string "<sseinsnmode>"))
10896 (ior (not (match_test "TARGET_SSE2"))
10897 (match_test "optimize_function_for_size_p (cfun)"))
10898 (const_string "V4SF")
10900 (const_string "<sseinsnmode>")))])
10902 (define_insn "*andnot<mode>3_mask"
10903 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10904 (vec_merge:VI48_AVX512VL
10907 (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10908 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10909 (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10910 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10912 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10913 [(set_attr "type" "sselog")
10914 (set_attr "prefix" "evex")
10915 (set_attr "mode" "<sseinsnmode>")])
10917 (define_insn "*andnot<mode>3_mask"
10918 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10919 (vec_merge:VI12_AVX512VL
10922 (match_operand:VI12_AVX512VL 1 "register_operand" "v"))
10923 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
10924 (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
10925 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10927 "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10928 [(set_attr "type" "sselog")
10929 (set_attr "prefix" "evex")
10930 (set_attr "mode" "<sseinsnmode>")])
10932 (define_expand "<code><mode>3"
10933 [(set (match_operand:VI 0 "register_operand")
10935 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10936 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10939 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10943 (define_insn "<mask_codefor><code><mode>3<mask_name>"
10944 [(set (match_operand:VI 0 "register_operand" "=x,v")
10946 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
10947 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10948 "TARGET_SSE && <mask_mode512bit_condition>
10949 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10951 static char buf[64];
10955 switch (get_attr_mode (insn))
10958 gcc_assert (TARGET_AVX512F);
10960 gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10962 gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10963 switch (<MODE>mode)
10967 if (TARGET_AVX512F)
10969 tmp = "p<logic><ssemodesuffix>";
10976 if (TARGET_AVX512VL)
10978 tmp = "p<logic><ssemodesuffix>";
10982 tmp = TARGET_AVX512VL ? "p<logic>q" : "p<logic>";
10987 gcc_assert (TARGET_AVX512F);
10989 gcc_assert (TARGET_AVX);
10991 gcc_assert (TARGET_SSE);
10997 gcc_unreachable ();
11000 switch (which_alternative)
11003 ops = "%s\t{%%2, %%0|%%0, %%2}";
11006 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11009 gcc_unreachable ();
11012 snprintf (buf, sizeof (buf), ops, tmp);
11015 [(set_attr "isa" "noavx,avx")
11016 (set_attr "type" "sselog")
11017 (set (attr "prefix_data16")
11019 (and (eq_attr "alternative" "0")
11020 (eq_attr "mode" "TI"))
11022 (const_string "*")))
11023 (set_attr "prefix" "<mask_prefix3>")
11025 (cond [(and (match_test "<MODE_SIZE> == 16")
11026 (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11027 (const_string "<ssePSmode>")
11028 (match_test "TARGET_AVX2")
11029 (const_string "<sseinsnmode>")
11030 (match_test "TARGET_AVX")
11032 (match_test "<MODE_SIZE> > 16")
11033 (const_string "V8SF")
11034 (const_string "<sseinsnmode>"))
11035 (ior (not (match_test "TARGET_SSE2"))
11036 (match_test "optimize_function_for_size_p (cfun)"))
11037 (const_string "V4SF")
11039 (const_string "<sseinsnmode>")))])
11041 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11042 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11043 (unspec:<avx512fmaskmode>
11044 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11045 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11048 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11049 [(set_attr "prefix" "evex")
11050 (set_attr "mode" "<sseinsnmode>")])
11052 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11053 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11054 (unspec:<avx512fmaskmode>
11055 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11056 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11059 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11060 [(set_attr "prefix" "evex")
11061 (set_attr "mode" "<sseinsnmode>")])
11063 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11064 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11065 (unspec:<avx512fmaskmode>
11066 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11067 (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11070 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11071 [(set_attr "prefix" "evex")
11072 (set_attr "mode" "<sseinsnmode>")])
11074 (define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11075 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11076 (unspec:<avx512fmaskmode>
11077 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11078 (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11081 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11082 [(set_attr "prefix" "evex")
11083 (set_attr "mode" "<sseinsnmode>")])
11085 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11087 ;; Parallel integral element swizzling
11089 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11091 (define_expand "vec_pack_trunc_<mode>"
11092 [(match_operand:<ssepackmode> 0 "register_operand")
11093 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
11094 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
11097 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11098 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11099 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11103 (define_insn "<sse2_avx2>_packsswb<mask_name>"
11104 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11105 (vec_concat:VI1_AVX512
11106 (ss_truncate:<ssehalfvecmode>
11107 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11108 (ss_truncate:<ssehalfvecmode>
11109 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11110 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11112 packsswb\t{%2, %0|%0, %2}
11113 vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11114 [(set_attr "isa" "noavx,avx")
11115 (set_attr "type" "sselog")
11116 (set_attr "prefix_data16" "1,*")
11117 (set_attr "prefix" "orig,maybe_evex")
11118 (set_attr "mode" "<sseinsnmode>")])
11120 (define_insn "<sse2_avx2>_packssdw<mask_name>"
11121 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11122 (vec_concat:VI2_AVX2
11123 (ss_truncate:<ssehalfvecmode>
11124 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11125 (ss_truncate:<ssehalfvecmode>
11126 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11127 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11129 packssdw\t{%2, %0|%0, %2}
11130 vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11131 [(set_attr "isa" "noavx,avx")
11132 (set_attr "type" "sselog")
11133 (set_attr "prefix_data16" "1,*")
11134 (set_attr "prefix" "orig,vex")
11135 (set_attr "mode" "<sseinsnmode>")])
11137 (define_insn "<sse2_avx2>_packuswb<mask_name>"
11138 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11139 (vec_concat:VI1_AVX512
11140 (us_truncate:<ssehalfvecmode>
11141 (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11142 (us_truncate:<ssehalfvecmode>
11143 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11144 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11146 packuswb\t{%2, %0|%0, %2}
11147 vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11148 [(set_attr "isa" "noavx,avx")
11149 (set_attr "type" "sselog")
11150 (set_attr "prefix_data16" "1,*")
11151 (set_attr "prefix" "orig,vex")
11152 (set_attr "mode" "<sseinsnmode>")])
11154 (define_insn "avx512bw_interleave_highv64qi<mask_name>"
11155 [(set (match_operand:V64QI 0 "register_operand" "=v")
11158 (match_operand:V64QI 1 "register_operand" "v")
11159 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11160 (parallel [(const_int 8) (const_int 72)
11161 (const_int 9) (const_int 73)
11162 (const_int 10) (const_int 74)
11163 (const_int 11) (const_int 75)
11164 (const_int 12) (const_int 76)
11165 (const_int 13) (const_int 77)
11166 (const_int 14) (const_int 78)
11167 (const_int 15) (const_int 79)
11168 (const_int 24) (const_int 88)
11169 (const_int 25) (const_int 89)
11170 (const_int 26) (const_int 90)
11171 (const_int 27) (const_int 91)
11172 (const_int 28) (const_int 92)
11173 (const_int 29) (const_int 93)
11174 (const_int 30) (const_int 94)
11175 (const_int 31) (const_int 95)
11176 (const_int 40) (const_int 104)
11177 (const_int 41) (const_int 105)
11178 (const_int 42) (const_int 106)
11179 (const_int 43) (const_int 107)
11180 (const_int 44) (const_int 108)
11181 (const_int 45) (const_int 109)
11182 (const_int 46) (const_int 110)
11183 (const_int 47) (const_int 111)
11184 (const_int 56) (const_int 120)
11185 (const_int 57) (const_int 121)
11186 (const_int 58) (const_int 122)
11187 (const_int 59) (const_int 123)
11188 (const_int 60) (const_int 124)
11189 (const_int 61) (const_int 125)
11190 (const_int 62) (const_int 126)
11191 (const_int 63) (const_int 127)])))]
11193 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11194 [(set_attr "type" "sselog")
11195 (set_attr "prefix" "evex")
11196 (set_attr "mode" "XI")])
11198 (define_insn "avx2_interleave_highv32qi<mask_name>"
11199 [(set (match_operand:V32QI 0 "register_operand" "=v")
11202 (match_operand:V32QI 1 "register_operand" "v")
11203 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11204 (parallel [(const_int 8) (const_int 40)
11205 (const_int 9) (const_int 41)
11206 (const_int 10) (const_int 42)
11207 (const_int 11) (const_int 43)
11208 (const_int 12) (const_int 44)
11209 (const_int 13) (const_int 45)
11210 (const_int 14) (const_int 46)
11211 (const_int 15) (const_int 47)
11212 (const_int 24) (const_int 56)
11213 (const_int 25) (const_int 57)
11214 (const_int 26) (const_int 58)
11215 (const_int 27) (const_int 59)
11216 (const_int 28) (const_int 60)
11217 (const_int 29) (const_int 61)
11218 (const_int 30) (const_int 62)
11219 (const_int 31) (const_int 63)])))]
11220 "TARGET_AVX2 && <mask_avx512vl_condition>"
11221 "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11222 [(set_attr "type" "sselog")
11223 (set_attr "prefix" "<mask_prefix>")
11224 (set_attr "mode" "OI")])
11226 (define_insn "vec_interleave_highv16qi<mask_name>"
11227 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11230 (match_operand:V16QI 1 "register_operand" "0,v")
11231 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11232 (parallel [(const_int 8) (const_int 24)
11233 (const_int 9) (const_int 25)
11234 (const_int 10) (const_int 26)
11235 (const_int 11) (const_int 27)
11236 (const_int 12) (const_int 28)
11237 (const_int 13) (const_int 29)
11238 (const_int 14) (const_int 30)
11239 (const_int 15) (const_int 31)])))]
11240 "TARGET_SSE2 && <mask_avx512vl_condition>"
11242 punpckhbw\t{%2, %0|%0, %2}
11243 vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11244 [(set_attr "isa" "noavx,avx")
11245 (set_attr "type" "sselog")
11246 (set_attr "prefix_data16" "1,*")
11247 (set_attr "prefix" "orig,<mask_prefix>")
11248 (set_attr "mode" "TI")])
11250 (define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11251 [(set (match_operand:V64QI 0 "register_operand" "=v")
11254 (match_operand:V64QI 1 "register_operand" "v")
11255 (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11256 (parallel [(const_int 0) (const_int 64)
11257 (const_int 1) (const_int 65)
11258 (const_int 2) (const_int 66)
11259 (const_int 3) (const_int 67)
11260 (const_int 4) (const_int 68)
11261 (const_int 5) (const_int 69)
11262 (const_int 6) (const_int 70)
11263 (const_int 7) (const_int 71)
11264 (const_int 16) (const_int 80)
11265 (const_int 17) (const_int 81)
11266 (const_int 18) (const_int 82)
11267 (const_int 19) (const_int 83)
11268 (const_int 20) (const_int 84)
11269 (const_int 21) (const_int 85)
11270 (const_int 22) (const_int 86)
11271 (const_int 23) (const_int 87)
11272 (const_int 32) (const_int 96)
11273 (const_int 33) (const_int 97)
11274 (const_int 34) (const_int 98)
11275 (const_int 35) (const_int 99)
11276 (const_int 36) (const_int 100)
11277 (const_int 37) (const_int 101)
11278 (const_int 38) (const_int 102)
11279 (const_int 39) (const_int 103)
11280 (const_int 48) (const_int 112)
11281 (const_int 49) (const_int 113)
11282 (const_int 50) (const_int 114)
11283 (const_int 51) (const_int 115)
11284 (const_int 52) (const_int 116)
11285 (const_int 53) (const_int 117)
11286 (const_int 54) (const_int 118)
11287 (const_int 55) (const_int 119)])))]
11289 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11290 [(set_attr "type" "sselog")
11291 (set_attr "prefix" "evex")
11292 (set_attr "mode" "XI")])
11294 (define_insn "avx2_interleave_lowv32qi<mask_name>"
11295 [(set (match_operand:V32QI 0 "register_operand" "=v")
11298 (match_operand:V32QI 1 "register_operand" "v")
11299 (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11300 (parallel [(const_int 0) (const_int 32)
11301 (const_int 1) (const_int 33)
11302 (const_int 2) (const_int 34)
11303 (const_int 3) (const_int 35)
11304 (const_int 4) (const_int 36)
11305 (const_int 5) (const_int 37)
11306 (const_int 6) (const_int 38)
11307 (const_int 7) (const_int 39)
11308 (const_int 16) (const_int 48)
11309 (const_int 17) (const_int 49)
11310 (const_int 18) (const_int 50)
11311 (const_int 19) (const_int 51)
11312 (const_int 20) (const_int 52)
11313 (const_int 21) (const_int 53)
11314 (const_int 22) (const_int 54)
11315 (const_int 23) (const_int 55)])))]
11316 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11317 "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11318 [(set_attr "type" "sselog")
11319 (set_attr "prefix" "maybe_vex")
11320 (set_attr "mode" "OI")])
11322 (define_insn "vec_interleave_lowv16qi<mask_name>"
11323 [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11326 (match_operand:V16QI 1 "register_operand" "0,v")
11327 (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11328 (parallel [(const_int 0) (const_int 16)
11329 (const_int 1) (const_int 17)
11330 (const_int 2) (const_int 18)
11331 (const_int 3) (const_int 19)
11332 (const_int 4) (const_int 20)
11333 (const_int 5) (const_int 21)
11334 (const_int 6) (const_int 22)
11335 (const_int 7) (const_int 23)])))]
11336 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11338 punpcklbw\t{%2, %0|%0, %2}
11339 vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11340 [(set_attr "isa" "noavx,avx")
11341 (set_attr "type" "sselog")
11342 (set_attr "prefix_data16" "1,*")
11343 (set_attr "prefix" "orig,vex")
11344 (set_attr "mode" "TI")])
11346 (define_insn "avx512bw_interleave_highv32hi<mask_name>"
11347 [(set (match_operand:V32HI 0 "register_operand" "=v")
11350 (match_operand:V32HI 1 "register_operand" "v")
11351 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11352 (parallel [(const_int 4) (const_int 36)
11353 (const_int 5) (const_int 37)
11354 (const_int 6) (const_int 38)
11355 (const_int 7) (const_int 39)
11356 (const_int 12) (const_int 44)
11357 (const_int 13) (const_int 45)
11358 (const_int 14) (const_int 46)
11359 (const_int 15) (const_int 47)
11360 (const_int 20) (const_int 52)
11361 (const_int 21) (const_int 53)
11362 (const_int 22) (const_int 54)
11363 (const_int 23) (const_int 55)
11364 (const_int 28) (const_int 60)
11365 (const_int 29) (const_int 61)
11366 (const_int 30) (const_int 62)
11367 (const_int 31) (const_int 63)])))]
11369 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11370 [(set_attr "type" "sselog")
11371 (set_attr "prefix" "evex")
11372 (set_attr "mode" "XI")])
11374 (define_insn "avx2_interleave_highv16hi<mask_name>"
11375 [(set (match_operand:V16HI 0 "register_operand" "=v")
11378 (match_operand:V16HI 1 "register_operand" "v")
11379 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11380 (parallel [(const_int 4) (const_int 20)
11381 (const_int 5) (const_int 21)
11382 (const_int 6) (const_int 22)
11383 (const_int 7) (const_int 23)
11384 (const_int 12) (const_int 28)
11385 (const_int 13) (const_int 29)
11386 (const_int 14) (const_int 30)
11387 (const_int 15) (const_int 31)])))]
11388 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11389 "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11390 [(set_attr "type" "sselog")
11391 (set_attr "prefix" "maybe_evex")
11392 (set_attr "mode" "OI")])
11394 (define_insn "vec_interleave_highv8hi<mask_name>"
11395 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11398 (match_operand:V8HI 1 "register_operand" "0,v")
11399 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11400 (parallel [(const_int 4) (const_int 12)
11401 (const_int 5) (const_int 13)
11402 (const_int 6) (const_int 14)
11403 (const_int 7) (const_int 15)])))]
11404 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11406 punpckhwd\t{%2, %0|%0, %2}
11407 vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11408 [(set_attr "isa" "noavx,avx")
11409 (set_attr "type" "sselog")
11410 (set_attr "prefix_data16" "1,*")
11411 (set_attr "prefix" "orig,maybe_vex")
11412 (set_attr "mode" "TI")])
11414 (define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11415 [(set (match_operand:V32HI 0 "register_operand" "=v")
11418 (match_operand:V32HI 1 "register_operand" "v")
11419 (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11420 (parallel [(const_int 0) (const_int 32)
11421 (const_int 1) (const_int 33)
11422 (const_int 2) (const_int 34)
11423 (const_int 3) (const_int 35)
11424 (const_int 8) (const_int 40)
11425 (const_int 9) (const_int 41)
11426 (const_int 10) (const_int 42)
11427 (const_int 11) (const_int 43)
11428 (const_int 16) (const_int 48)
11429 (const_int 17) (const_int 49)
11430 (const_int 18) (const_int 50)
11431 (const_int 19) (const_int 51)
11432 (const_int 24) (const_int 56)
11433 (const_int 25) (const_int 57)
11434 (const_int 26) (const_int 58)
11435 (const_int 27) (const_int 59)])))]
11437 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11438 [(set_attr "type" "sselog")
11439 (set_attr "prefix" "evex")
11440 (set_attr "mode" "XI")])
11442 (define_insn "avx2_interleave_lowv16hi<mask_name>"
11443 [(set (match_operand:V16HI 0 "register_operand" "=v")
11446 (match_operand:V16HI 1 "register_operand" "v")
11447 (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11448 (parallel [(const_int 0) (const_int 16)
11449 (const_int 1) (const_int 17)
11450 (const_int 2) (const_int 18)
11451 (const_int 3) (const_int 19)
11452 (const_int 8) (const_int 24)
11453 (const_int 9) (const_int 25)
11454 (const_int 10) (const_int 26)
11455 (const_int 11) (const_int 27)])))]
11456 "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11457 "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11458 [(set_attr "type" "sselog")
11459 (set_attr "prefix" "maybe_evex")
11460 (set_attr "mode" "OI")])
11462 (define_insn "vec_interleave_lowv8hi<mask_name>"
11463 [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11466 (match_operand:V8HI 1 "register_operand" "0,v")
11467 (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11468 (parallel [(const_int 0) (const_int 8)
11469 (const_int 1) (const_int 9)
11470 (const_int 2) (const_int 10)
11471 (const_int 3) (const_int 11)])))]
11472 "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11474 punpcklwd\t{%2, %0|%0, %2}
11475 vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11476 [(set_attr "isa" "noavx,avx")
11477 (set_attr "type" "sselog")
11478 (set_attr "prefix_data16" "1,*")
11479 (set_attr "prefix" "orig,maybe_evex")
11480 (set_attr "mode" "TI")])
11482 (define_insn "avx2_interleave_highv8si<mask_name>"
11483 [(set (match_operand:V8SI 0 "register_operand" "=v")
11486 (match_operand:V8SI 1 "register_operand" "v")
11487 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11488 (parallel [(const_int 2) (const_int 10)
11489 (const_int 3) (const_int 11)
11490 (const_int 6) (const_int 14)
11491 (const_int 7) (const_int 15)])))]
11492 "TARGET_AVX2 && <mask_avx512vl_condition>"
11493 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11494 [(set_attr "type" "sselog")
11495 (set_attr "prefix" "maybe_evex")
11496 (set_attr "mode" "OI")])
11498 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11499 [(set (match_operand:V16SI 0 "register_operand" "=v")
11502 (match_operand:V16SI 1 "register_operand" "v")
11503 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11504 (parallel [(const_int 2) (const_int 18)
11505 (const_int 3) (const_int 19)
11506 (const_int 6) (const_int 22)
11507 (const_int 7) (const_int 23)
11508 (const_int 10) (const_int 26)
11509 (const_int 11) (const_int 27)
11510 (const_int 14) (const_int 30)
11511 (const_int 15) (const_int 31)])))]
11513 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11514 [(set_attr "type" "sselog")
11515 (set_attr "prefix" "evex")
11516 (set_attr "mode" "XI")])
11519 (define_insn "vec_interleave_highv4si<mask_name>"
11520 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11523 (match_operand:V4SI 1 "register_operand" "0,v")
11524 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11525 (parallel [(const_int 2) (const_int 6)
11526 (const_int 3) (const_int 7)])))]
11527 "TARGET_SSE2 && <mask_avx512vl_condition>"
11529 punpckhdq\t{%2, %0|%0, %2}
11530 vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11531 [(set_attr "isa" "noavx,avx")
11532 (set_attr "type" "sselog")
11533 (set_attr "prefix_data16" "1,*")
11534 (set_attr "prefix" "orig,maybe_vex")
11535 (set_attr "mode" "TI")])
11537 (define_insn "avx2_interleave_lowv8si<mask_name>"
11538 [(set (match_operand:V8SI 0 "register_operand" "=v")
11541 (match_operand:V8SI 1 "register_operand" "v")
11542 (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11543 (parallel [(const_int 0) (const_int 8)
11544 (const_int 1) (const_int 9)
11545 (const_int 4) (const_int 12)
11546 (const_int 5) (const_int 13)])))]
11547 "TARGET_AVX2 && <mask_avx512vl_condition>"
11548 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11549 [(set_attr "type" "sselog")
11550 (set_attr "prefix" "maybe_evex")
11551 (set_attr "mode" "OI")])
11553 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11554 [(set (match_operand:V16SI 0 "register_operand" "=v")
11557 (match_operand:V16SI 1 "register_operand" "v")
11558 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11559 (parallel [(const_int 0) (const_int 16)
11560 (const_int 1) (const_int 17)
11561 (const_int 4) (const_int 20)
11562 (const_int 5) (const_int 21)
11563 (const_int 8) (const_int 24)
11564 (const_int 9) (const_int 25)
11565 (const_int 12) (const_int 28)
11566 (const_int 13) (const_int 29)])))]
11568 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11569 [(set_attr "type" "sselog")
11570 (set_attr "prefix" "evex")
11571 (set_attr "mode" "XI")])
11573 (define_insn "vec_interleave_lowv4si<mask_name>"
11574 [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11577 (match_operand:V4SI 1 "register_operand" "0,v")
11578 (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11579 (parallel [(const_int 0) (const_int 4)
11580 (const_int 1) (const_int 5)])))]
11581 "TARGET_SSE2 && <mask_avx512vl_condition>"
11583 punpckldq\t{%2, %0|%0, %2}
11584 vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11585 [(set_attr "isa" "noavx,avx")
11586 (set_attr "type" "sselog")
11587 (set_attr "prefix_data16" "1,*")
11588 (set_attr "prefix" "orig,vex")
11589 (set_attr "mode" "TI")])
11591 (define_expand "vec_interleave_high<mode>"
11592 [(match_operand:VI_256 0 "register_operand" "=x")
11593 (match_operand:VI_256 1 "register_operand" "x")
11594 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11597 rtx t1 = gen_reg_rtx (<MODE>mode);
11598 rtx t2 = gen_reg_rtx (<MODE>mode);
11599 rtx t3 = gen_reg_rtx (V4DImode);
11600 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11601 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11602 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11603 gen_lowpart (V4DImode, t2),
11604 GEN_INT (1 + (3 << 4))));
11605 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11609 (define_expand "vec_interleave_low<mode>"
11610 [(match_operand:VI_256 0 "register_operand" "=x")
11611 (match_operand:VI_256 1 "register_operand" "x")
11612 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11615 rtx t1 = gen_reg_rtx (<MODE>mode);
11616 rtx t2 = gen_reg_rtx (<MODE>mode);
11617 rtx t3 = gen_reg_rtx (V4DImode);
11618 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11619 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11620 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11621 gen_lowpart (V4DImode, t2),
11622 GEN_INT (0 + (2 << 4))));
11623 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11627 ;; Modes handled by pinsr patterns.
11628 (define_mode_iterator PINSR_MODE
11629 [(V16QI "TARGET_SSE4_1") V8HI
11630 (V4SI "TARGET_SSE4_1")
11631 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11633 (define_mode_attr sse2p4_1
11634 [(V16QI "sse4_1") (V8HI "sse2")
11635 (V4SI "sse4_1") (V2DI "sse4_1")])
11637 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11638 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11639 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11640 (vec_merge:PINSR_MODE
11641 (vec_duplicate:PINSR_MODE
11642 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11643 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11644 (match_operand:SI 3 "const_int_operand")))]
11646 && ((unsigned) exact_log2 (INTVAL (operands[3]))
11647 < GET_MODE_NUNITS (<MODE>mode))"
11649 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11651 switch (which_alternative)
11654 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11655 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11658 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11660 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11661 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11664 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11666 gcc_unreachable ();
11669 [(set_attr "isa" "noavx,noavx,avx,avx")
11670 (set_attr "type" "sselog")
11671 (set (attr "prefix_rex")
11673 (and (not (match_test "TARGET_AVX"))
11674 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11676 (const_string "*")))
11677 (set (attr "prefix_data16")
11679 (and (not (match_test "TARGET_AVX"))
11680 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11682 (const_string "*")))
11683 (set (attr "prefix_extra")
11685 (and (not (match_test "TARGET_AVX"))
11686 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11688 (const_string "1")))
11689 (set_attr "length_immediate" "1")
11690 (set_attr "prefix" "orig,orig,vex,vex")
11691 (set_attr "mode" "TI")])
11693 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11694 [(match_operand:AVX512_VEC 0 "register_operand")
11695 (match_operand:AVX512_VEC 1 "register_operand")
11696 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11697 (match_operand:SI 3 "const_0_to_3_operand")
11698 (match_operand:AVX512_VEC 4 "register_operand")
11699 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11703 mask = INTVAL (operands[3]);
11704 selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11705 0xFFFF ^ (0xF000 >> mask * 4)
11706 : 0xFF ^ (0xC0 >> mask * 2);
11707 emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11708 (operands[0], operands[1], operands[2], GEN_INT (selector),
11709 operands[4], operands[5]));
11713 (define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11714 [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11715 (vec_merge:AVX512_VEC
11716 (match_operand:AVX512_VEC 1 "register_operand" "v")
11717 (vec_duplicate:AVX512_VEC
11718 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11719 (match_operand:SI 3 "const_int_operand" "n")))]
11723 int selector = INTVAL (operands[3]);
11725 if (selector == 0xFFF || selector == 0x3F)
11727 else if ( selector == 0xF0FF || selector == 0xCF)
11729 else if ( selector == 0xFF0F || selector == 0xF3)
11731 else if ( selector == 0xFFF0 || selector == 0xFC)
11734 gcc_unreachable ();
11736 operands[3] = GEN_INT (mask);
11738 return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11740 [(set_attr "type" "sselog")
11741 (set_attr "length_immediate" "1")
11742 (set_attr "prefix" "evex")
11743 (set_attr "mode" "<sseinsnmode>")])
11745 (define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11746 [(match_operand:AVX512_VEC_2 0 "register_operand")
11747 (match_operand:AVX512_VEC_2 1 "register_operand")
11748 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11749 (match_operand:SI 3 "const_0_to_1_operand")
11750 (match_operand:AVX512_VEC_2 4 "register_operand")
11751 (match_operand:<avx512fmaskmode> 5 "register_operand")]
11754 int mask = INTVAL (operands[3]);
11756 emit_insn (gen_vec_set_lo_<mode>_mask
11757 (operands[0], operands[1], operands[2],
11758 operands[4], operands[5]));
11760 emit_insn (gen_vec_set_hi_<mode>_mask
11761 (operands[0], operands[1], operands[2],
11762 operands[4], operands[5]));
11766 (define_insn "vec_set_lo_<mode><mask_name>"
11767 [(set (match_operand:V16FI 0 "register_operand" "=v")
11769 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11770 (vec_select:<ssehalfvecmode>
11771 (match_operand:V16FI 1 "register_operand" "v")
11772 (parallel [(const_int 8) (const_int 9)
11773 (const_int 10) (const_int 11)
11774 (const_int 12) (const_int 13)
11775 (const_int 14) (const_int 15)]))))]
11777 "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11778 [(set_attr "type" "sselog")
11779 (set_attr "length_immediate" "1")
11780 (set_attr "prefix" "evex")
11781 (set_attr "mode" "<sseinsnmode>")])
11783 (define_insn "vec_set_hi_<mode><mask_name>"
11784 [(set (match_operand:V16FI 0 "register_operand" "=v")
11786 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11787 (vec_select:<ssehalfvecmode>
11788 (match_operand:V16FI 1 "register_operand" "v")
11789 (parallel [(const_int 0) (const_int 1)
11790 (const_int 2) (const_int 3)
11791 (const_int 4) (const_int 5)
11792 (const_int 6) (const_int 7)]))))]
11794 "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11795 [(set_attr "type" "sselog")
11796 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "evex")
11798 (set_attr "mode" "<sseinsnmode>")])
11800 (define_insn "vec_set_lo_<mode><mask_name>"
11801 [(set (match_operand:V8FI 0 "register_operand" "=v")
11803 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11804 (vec_select:<ssehalfvecmode>
11805 (match_operand:V8FI 1 "register_operand" "v")
11806 (parallel [(const_int 4) (const_int 5)
11807 (const_int 6) (const_int 7)]))))]
11809 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11810 [(set_attr "type" "sselog")
11811 (set_attr "length_immediate" "1")
11812 (set_attr "prefix" "evex")
11813 (set_attr "mode" "XI")])
11815 (define_insn "vec_set_hi_<mode><mask_name>"
11816 [(set (match_operand:V8FI 0 "register_operand" "=v")
11818 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11819 (vec_select:<ssehalfvecmode>
11820 (match_operand:V8FI 1 "register_operand" "v")
11821 (parallel [(const_int 0) (const_int 1)
11822 (const_int 2) (const_int 3)]))))]
11824 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11825 [(set_attr "type" "sselog")
11826 (set_attr "length_immediate" "1")
11827 (set_attr "prefix" "evex")
11828 (set_attr "mode" "XI")])
11830 (define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11831 [(match_operand:VI8F_256 0 "register_operand")
11832 (match_operand:VI8F_256 1 "register_operand")
11833 (match_operand:VI8F_256 2 "nonimmediate_operand")
11834 (match_operand:SI 3 "const_0_to_3_operand")
11835 (match_operand:VI8F_256 4 "register_operand")
11836 (match_operand:QI 5 "register_operand")]
11839 int mask = INTVAL (operands[3]);
11840 emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11841 (operands[0], operands[1], operands[2],
11842 GEN_INT (((mask >> 0) & 1) * 2 + 0),
11843 GEN_INT (((mask >> 0) & 1) * 2 + 1),
11844 GEN_INT (((mask >> 1) & 1) * 2 + 4),
11845 GEN_INT (((mask >> 1) & 1) * 2 + 5),
11846 operands[4], operands[5]));
11850 (define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11851 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11852 (vec_select:VI8F_256
11853 (vec_concat:<ssedoublemode>
11854 (match_operand:VI8F_256 1 "register_operand" "v")
11855 (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11856 (parallel [(match_operand 3 "const_0_to_3_operand")
11857 (match_operand 4 "const_0_to_3_operand")
11858 (match_operand 5 "const_4_to_7_operand")
11859 (match_operand 6 "const_4_to_7_operand")])))]
11861 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11862 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11865 mask = INTVAL (operands[3]) / 2;
11866 mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11867 operands[3] = GEN_INT (mask);
11868 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11870 [(set_attr "type" "sselog")
11871 (set_attr "length_immediate" "1")
11872 (set_attr "prefix" "evex")
11873 (set_attr "mode" "XI")])
11875 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11876 [(match_operand:V8FI 0 "register_operand")
11877 (match_operand:V8FI 1 "register_operand")
11878 (match_operand:V8FI 2 "nonimmediate_operand")
11879 (match_operand:SI 3 "const_0_to_255_operand")
11880 (match_operand:V8FI 4 "register_operand")
11881 (match_operand:QI 5 "register_operand")]
11884 int mask = INTVAL (operands[3]);
11885 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11886 (operands[0], operands[1], operands[2],
11887 GEN_INT (((mask >> 0) & 3) * 2),
11888 GEN_INT (((mask >> 0) & 3) * 2 + 1),
11889 GEN_INT (((mask >> 2) & 3) * 2),
11890 GEN_INT (((mask >> 2) & 3) * 2 + 1),
11891 GEN_INT (((mask >> 4) & 3) * 2 + 8),
11892 GEN_INT (((mask >> 4) & 3) * 2 + 9),
11893 GEN_INT (((mask >> 6) & 3) * 2 + 8),
11894 GEN_INT (((mask >> 6) & 3) * 2 + 9),
11895 operands[4], operands[5]));
11899 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11900 [(set (match_operand:V8FI 0 "register_operand" "=v")
11902 (vec_concat:<ssedoublemode>
11903 (match_operand:V8FI 1 "register_operand" "v")
11904 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11905 (parallel [(match_operand 3 "const_0_to_7_operand")
11906 (match_operand 4 "const_0_to_7_operand")
11907 (match_operand 5 "const_0_to_7_operand")
11908 (match_operand 6 "const_0_to_7_operand")
11909 (match_operand 7 "const_8_to_15_operand")
11910 (match_operand 8 "const_8_to_15_operand")
11911 (match_operand 9 "const_8_to_15_operand")
11912 (match_operand 10 "const_8_to_15_operand")])))]
11914 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11915 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
11916 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11917 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
11920 mask = INTVAL (operands[3]) / 2;
11921 mask |= INTVAL (operands[5]) / 2 << 2;
11922 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
11923 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
11924 operands[3] = GEN_INT (mask);
11926 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11928 [(set_attr "type" "sselog")
11929 (set_attr "length_immediate" "1")
11930 (set_attr "prefix" "evex")
11931 (set_attr "mode" "<sseinsnmode>")])
11933 (define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
11934 [(match_operand:VI4F_256 0 "register_operand")
11935 (match_operand:VI4F_256 1 "register_operand")
11936 (match_operand:VI4F_256 2 "nonimmediate_operand")
11937 (match_operand:SI 3 "const_0_to_3_operand")
11938 (match_operand:VI4F_256 4 "register_operand")
11939 (match_operand:QI 5 "register_operand")]
11942 int mask = INTVAL (operands[3]);
11943 emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
11944 (operands[0], operands[1], operands[2],
11945 GEN_INT (((mask >> 0) & 1) * 4 + 0),
11946 GEN_INT (((mask >> 0) & 1) * 4 + 1),
11947 GEN_INT (((mask >> 0) & 1) * 4 + 2),
11948 GEN_INT (((mask >> 0) & 1) * 4 + 3),
11949 GEN_INT (((mask >> 1) & 1) * 4 + 8),
11950 GEN_INT (((mask >> 1) & 1) * 4 + 9),
11951 GEN_INT (((mask >> 1) & 1) * 4 + 10),
11952 GEN_INT (((mask >> 1) & 1) * 4 + 11),
11953 operands[4], operands[5]));
11957 (define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
11958 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
11959 (vec_select:VI4F_256
11960 (vec_concat:<ssedoublemode>
11961 (match_operand:VI4F_256 1 "register_operand" "v")
11962 (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
11963 (parallel [(match_operand 3 "const_0_to_7_operand")
11964 (match_operand 4 "const_0_to_7_operand")
11965 (match_operand 5 "const_0_to_7_operand")
11966 (match_operand 6 "const_0_to_7_operand")
11967 (match_operand 7 "const_8_to_15_operand")
11968 (match_operand 8 "const_8_to_15_operand")
11969 (match_operand 9 "const_8_to_15_operand")
11970 (match_operand 10 "const_8_to_15_operand")])))]
11972 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11973 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
11974 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
11975 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
11976 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
11977 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
11980 mask = INTVAL (operands[3]) / 4;
11981 mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
11982 operands[3] = GEN_INT (mask);
11984 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
11986 [(set_attr "type" "sselog")
11987 (set_attr "length_immediate" "1")
11988 (set_attr "prefix" "evex")
11989 (set_attr "mode" "<sseinsnmode>")])
11991 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
11992 [(match_operand:V16FI 0 "register_operand")
11993 (match_operand:V16FI 1 "register_operand")
11994 (match_operand:V16FI 2 "nonimmediate_operand")
11995 (match_operand:SI 3 "const_0_to_255_operand")
11996 (match_operand:V16FI 4 "register_operand")
11997 (match_operand:HI 5 "register_operand")]
12000 int mask = INTVAL (operands[3]);
12001 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12002 (operands[0], operands[1], operands[2],
12003 GEN_INT (((mask >> 0) & 3) * 4),
12004 GEN_INT (((mask >> 0) & 3) * 4 + 1),
12005 GEN_INT (((mask >> 0) & 3) * 4 + 2),
12006 GEN_INT (((mask >> 0) & 3) * 4 + 3),
12007 GEN_INT (((mask >> 2) & 3) * 4),
12008 GEN_INT (((mask >> 2) & 3) * 4 + 1),
12009 GEN_INT (((mask >> 2) & 3) * 4 + 2),
12010 GEN_INT (((mask >> 2) & 3) * 4 + 3),
12011 GEN_INT (((mask >> 4) & 3) * 4 + 16),
12012 GEN_INT (((mask >> 4) & 3) * 4 + 17),
12013 GEN_INT (((mask >> 4) & 3) * 4 + 18),
12014 GEN_INT (((mask >> 4) & 3) * 4 + 19),
12015 GEN_INT (((mask >> 6) & 3) * 4 + 16),
12016 GEN_INT (((mask >> 6) & 3) * 4 + 17),
12017 GEN_INT (((mask >> 6) & 3) * 4 + 18),
12018 GEN_INT (((mask >> 6) & 3) * 4 + 19),
12019 operands[4], operands[5]));
12023 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12024 [(set (match_operand:V16FI 0 "register_operand" "=v")
12026 (vec_concat:<ssedoublemode>
12027 (match_operand:V16FI 1 "register_operand" "v")
12028 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12029 (parallel [(match_operand 3 "const_0_to_15_operand")
12030 (match_operand 4 "const_0_to_15_operand")
12031 (match_operand 5 "const_0_to_15_operand")
12032 (match_operand 6 "const_0_to_15_operand")
12033 (match_operand 7 "const_0_to_15_operand")
12034 (match_operand 8 "const_0_to_15_operand")
12035 (match_operand 9 "const_0_to_15_operand")
12036 (match_operand 10 "const_0_to_15_operand")
12037 (match_operand 11 "const_16_to_31_operand")
12038 (match_operand 12 "const_16_to_31_operand")
12039 (match_operand 13 "const_16_to_31_operand")
12040 (match_operand 14 "const_16_to_31_operand")
12041 (match_operand 15 "const_16_to_31_operand")
12042 (match_operand 16 "const_16_to_31_operand")
12043 (match_operand 17 "const_16_to_31_operand")
12044 (match_operand 18 "const_16_to_31_operand")])))]
12046 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12047 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12048 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12049 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12050 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12051 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12052 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12053 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12054 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12055 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12056 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12057 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12060 mask = INTVAL (operands[3]) / 4;
12061 mask |= INTVAL (operands[7]) / 4 << 2;
12062 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12063 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12064 operands[3] = GEN_INT (mask);
12066 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12068 [(set_attr "type" "sselog")
12069 (set_attr "length_immediate" "1")
12070 (set_attr "prefix" "evex")
12071 (set_attr "mode" "<sseinsnmode>")])
12073 (define_expand "avx512f_pshufdv3_mask"
12074 [(match_operand:V16SI 0 "register_operand")
12075 (match_operand:V16SI 1 "nonimmediate_operand")
12076 (match_operand:SI 2 "const_0_to_255_operand")
12077 (match_operand:V16SI 3 "register_operand")
12078 (match_operand:HI 4 "register_operand")]
12081 int mask = INTVAL (operands[2]);
12082 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12083 GEN_INT ((mask >> 0) & 3),
12084 GEN_INT ((mask >> 2) & 3),
12085 GEN_INT ((mask >> 4) & 3),
12086 GEN_INT ((mask >> 6) & 3),
12087 GEN_INT (((mask >> 0) & 3) + 4),
12088 GEN_INT (((mask >> 2) & 3) + 4),
12089 GEN_INT (((mask >> 4) & 3) + 4),
12090 GEN_INT (((mask >> 6) & 3) + 4),
12091 GEN_INT (((mask >> 0) & 3) + 8),
12092 GEN_INT (((mask >> 2) & 3) + 8),
12093 GEN_INT (((mask >> 4) & 3) + 8),
12094 GEN_INT (((mask >> 6) & 3) + 8),
12095 GEN_INT (((mask >> 0) & 3) + 12),
12096 GEN_INT (((mask >> 2) & 3) + 12),
12097 GEN_INT (((mask >> 4) & 3) + 12),
12098 GEN_INT (((mask >> 6) & 3) + 12),
12099 operands[3], operands[4]));
12103 (define_insn "avx512f_pshufd_1<mask_name>"
12104 [(set (match_operand:V16SI 0 "register_operand" "=v")
12106 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12107 (parallel [(match_operand 2 "const_0_to_3_operand")
12108 (match_operand 3 "const_0_to_3_operand")
12109 (match_operand 4 "const_0_to_3_operand")
12110 (match_operand 5 "const_0_to_3_operand")
12111 (match_operand 6 "const_4_to_7_operand")
12112 (match_operand 7 "const_4_to_7_operand")
12113 (match_operand 8 "const_4_to_7_operand")
12114 (match_operand 9 "const_4_to_7_operand")
12115 (match_operand 10 "const_8_to_11_operand")
12116 (match_operand 11 "const_8_to_11_operand")
12117 (match_operand 12 "const_8_to_11_operand")
12118 (match_operand 13 "const_8_to_11_operand")
12119 (match_operand 14 "const_12_to_15_operand")
12120 (match_operand 15 "const_12_to_15_operand")
12121 (match_operand 16 "const_12_to_15_operand")
12122 (match_operand 17 "const_12_to_15_operand")])))]
12124 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12125 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12126 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12127 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12128 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12129 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12130 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12131 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12132 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12133 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12134 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12135 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12138 mask |= INTVAL (operands[2]) << 0;
12139 mask |= INTVAL (operands[3]) << 2;
12140 mask |= INTVAL (operands[4]) << 4;
12141 mask |= INTVAL (operands[5]) << 6;
12142 operands[2] = GEN_INT (mask);
12144 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12146 [(set_attr "type" "sselog1")
12147 (set_attr "prefix" "evex")
12148 (set_attr "length_immediate" "1")
12149 (set_attr "mode" "XI")])
12151 (define_expand "avx512vl_pshufdv3_mask"
12152 [(match_operand:V8SI 0 "register_operand")
12153 (match_operand:V8SI 1 "nonimmediate_operand")
12154 (match_operand:SI 2 "const_0_to_255_operand")
12155 (match_operand:V8SI 3 "register_operand")
12156 (match_operand:QI 4 "register_operand")]
12159 int mask = INTVAL (operands[2]);
12160 emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12161 GEN_INT ((mask >> 0) & 3),
12162 GEN_INT ((mask >> 2) & 3),
12163 GEN_INT ((mask >> 4) & 3),
12164 GEN_INT ((mask >> 6) & 3),
12165 GEN_INT (((mask >> 0) & 3) + 4),
12166 GEN_INT (((mask >> 2) & 3) + 4),
12167 GEN_INT (((mask >> 4) & 3) + 4),
12168 GEN_INT (((mask >> 6) & 3) + 4),
12169 operands[3], operands[4]));
12173 (define_expand "avx2_pshufdv3"
12174 [(match_operand:V8SI 0 "register_operand")
12175 (match_operand:V8SI 1 "nonimmediate_operand")
12176 (match_operand:SI 2 "const_0_to_255_operand")]
12179 int mask = INTVAL (operands[2]);
12180 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12181 GEN_INT ((mask >> 0) & 3),
12182 GEN_INT ((mask >> 2) & 3),
12183 GEN_INT ((mask >> 4) & 3),
12184 GEN_INT ((mask >> 6) & 3),
12185 GEN_INT (((mask >> 0) & 3) + 4),
12186 GEN_INT (((mask >> 2) & 3) + 4),
12187 GEN_INT (((mask >> 4) & 3) + 4),
12188 GEN_INT (((mask >> 6) & 3) + 4)));
12192 (define_insn "avx2_pshufd_1<mask_name>"
12193 [(set (match_operand:V8SI 0 "register_operand" "=v")
12195 (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12196 (parallel [(match_operand 2 "const_0_to_3_operand")
12197 (match_operand 3 "const_0_to_3_operand")
12198 (match_operand 4 "const_0_to_3_operand")
12199 (match_operand 5 "const_0_to_3_operand")
12200 (match_operand 6 "const_4_to_7_operand")
12201 (match_operand 7 "const_4_to_7_operand")
12202 (match_operand 8 "const_4_to_7_operand")
12203 (match_operand 9 "const_4_to_7_operand")])))]
12205 && <mask_avx512vl_condition>
12206 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12207 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12208 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12209 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12212 mask |= INTVAL (operands[2]) << 0;
12213 mask |= INTVAL (operands[3]) << 2;
12214 mask |= INTVAL (operands[4]) << 4;
12215 mask |= INTVAL (operands[5]) << 6;
12216 operands[2] = GEN_INT (mask);
12218 return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12220 [(set_attr "type" "sselog1")
12221 (set_attr "prefix" "maybe_evex")
12222 (set_attr "length_immediate" "1")
12223 (set_attr "mode" "OI")])
12225 (define_expand "avx512vl_pshufd_mask"
12226 [(match_operand:V4SI 0 "register_operand")
12227 (match_operand:V4SI 1 "nonimmediate_operand")
12228 (match_operand:SI 2 "const_0_to_255_operand")
12229 (match_operand:V4SI 3 "register_operand")
12230 (match_operand:QI 4 "register_operand")]
12233 int mask = INTVAL (operands[2]);
12234 emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12235 GEN_INT ((mask >> 0) & 3),
12236 GEN_INT ((mask >> 2) & 3),
12237 GEN_INT ((mask >> 4) & 3),
12238 GEN_INT ((mask >> 6) & 3),
12239 operands[3], operands[4]));
12243 (define_expand "sse2_pshufd"
12244 [(match_operand:V4SI 0 "register_operand")
12245 (match_operand:V4SI 1 "nonimmediate_operand")
12246 (match_operand:SI 2 "const_int_operand")]
12249 int mask = INTVAL (operands[2]);
12250 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12251 GEN_INT ((mask >> 0) & 3),
12252 GEN_INT ((mask >> 2) & 3),
12253 GEN_INT ((mask >> 4) & 3),
12254 GEN_INT ((mask >> 6) & 3)));
12258 (define_insn "sse2_pshufd_1<mask_name>"
12259 [(set (match_operand:V4SI 0 "register_operand" "=v")
12261 (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12262 (parallel [(match_operand 2 "const_0_to_3_operand")
12263 (match_operand 3 "const_0_to_3_operand")
12264 (match_operand 4 "const_0_to_3_operand")
12265 (match_operand 5 "const_0_to_3_operand")])))]
12266 "TARGET_SSE2 && <mask_avx512vl_condition>"
12269 mask |= INTVAL (operands[2]) << 0;
12270 mask |= INTVAL (operands[3]) << 2;
12271 mask |= INTVAL (operands[4]) << 4;
12272 mask |= INTVAL (operands[5]) << 6;
12273 operands[2] = GEN_INT (mask);
12275 return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12277 [(set_attr "type" "sselog1")
12278 (set_attr "prefix_data16" "1")
12279 (set_attr "prefix" "<mask_prefix2>")
12280 (set_attr "length_immediate" "1")
12281 (set_attr "mode" "TI")])
12283 (define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12284 [(set (match_operand:V32HI 0 "register_operand" "=v")
12286 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12287 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12290 "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12291 [(set_attr "type" "sselog")
12292 (set_attr "prefix" "evex")
12293 (set_attr "mode" "XI")])
12295 (define_expand "avx512vl_pshuflwv3_mask"
12296 [(match_operand:V16HI 0 "register_operand")
12297 (match_operand:V16HI 1 "nonimmediate_operand")
12298 (match_operand:SI 2 "const_0_to_255_operand")
12299 (match_operand:V16HI 3 "register_operand")
12300 (match_operand:HI 4 "register_operand")]
12301 "TARGET_AVX512VL && TARGET_AVX512BW"
12303 int mask = INTVAL (operands[2]);
12304 emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12305 GEN_INT ((mask >> 0) & 3),
12306 GEN_INT ((mask >> 2) & 3),
12307 GEN_INT ((mask >> 4) & 3),
12308 GEN_INT ((mask >> 6) & 3),
12309 GEN_INT (((mask >> 0) & 3) + 8),
12310 GEN_INT (((mask >> 2) & 3) + 8),
12311 GEN_INT (((mask >> 4) & 3) + 8),
12312 GEN_INT (((mask >> 6) & 3) + 8),
12313 operands[3], operands[4]));
12317 (define_expand "avx2_pshuflwv3"
12318 [(match_operand:V16HI 0 "register_operand")
12319 (match_operand:V16HI 1 "nonimmediate_operand")
12320 (match_operand:SI 2 "const_0_to_255_operand")]
12323 int mask = INTVAL (operands[2]);
12324 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12325 GEN_INT ((mask >> 0) & 3),
12326 GEN_INT ((mask >> 2) & 3),
12327 GEN_INT ((mask >> 4) & 3),
12328 GEN_INT ((mask >> 6) & 3),
12329 GEN_INT (((mask >> 0) & 3) + 8),
12330 GEN_INT (((mask >> 2) & 3) + 8),
12331 GEN_INT (((mask >> 4) & 3) + 8),
12332 GEN_INT (((mask >> 6) & 3) + 8)));
12336 (define_insn "avx2_pshuflw_1<mask_name>"
12337 [(set (match_operand:V16HI 0 "register_operand" "=v")
12339 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12340 (parallel [(match_operand 2 "const_0_to_3_operand")
12341 (match_operand 3 "const_0_to_3_operand")
12342 (match_operand 4 "const_0_to_3_operand")
12343 (match_operand 5 "const_0_to_3_operand")
12348 (match_operand 6 "const_8_to_11_operand")
12349 (match_operand 7 "const_8_to_11_operand")
12350 (match_operand 8 "const_8_to_11_operand")
12351 (match_operand 9 "const_8_to_11_operand")
12355 (const_int 15)])))]
12357 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12358 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12359 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12360 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12361 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12364 mask |= INTVAL (operands[2]) << 0;
12365 mask |= INTVAL (operands[3]) << 2;
12366 mask |= INTVAL (operands[4]) << 4;
12367 mask |= INTVAL (operands[5]) << 6;
12368 operands[2] = GEN_INT (mask);
12370 return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12372 [(set_attr "type" "sselog")
12373 (set_attr "prefix" "maybe_evex")
12374 (set_attr "length_immediate" "1")
12375 (set_attr "mode" "OI")])
12377 (define_expand "avx512vl_pshuflw_mask"
12378 [(match_operand:V8HI 0 "register_operand")
12379 (match_operand:V8HI 1 "nonimmediate_operand")
12380 (match_operand:SI 2 "const_0_to_255_operand")
12381 (match_operand:V8HI 3 "register_operand")
12382 (match_operand:QI 4 "register_operand")]
12383 "TARGET_AVX512VL && TARGET_AVX512BW"
12385 int mask = INTVAL (operands[2]);
12386 emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12387 GEN_INT ((mask >> 0) & 3),
12388 GEN_INT ((mask >> 2) & 3),
12389 GEN_INT ((mask >> 4) & 3),
12390 GEN_INT ((mask >> 6) & 3),
12391 operands[3], operands[4]));
12395 (define_expand "sse2_pshuflw"
12396 [(match_operand:V8HI 0 "register_operand")
12397 (match_operand:V8HI 1 "nonimmediate_operand")
12398 (match_operand:SI 2 "const_int_operand")]
12401 int mask = INTVAL (operands[2]);
12402 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12403 GEN_INT ((mask >> 0) & 3),
12404 GEN_INT ((mask >> 2) & 3),
12405 GEN_INT ((mask >> 4) & 3),
12406 GEN_INT ((mask >> 6) & 3)));
12410 (define_insn "sse2_pshuflw_1<mask_name>"
12411 [(set (match_operand:V8HI 0 "register_operand" "=v")
12413 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12414 (parallel [(match_operand 2 "const_0_to_3_operand")
12415 (match_operand 3 "const_0_to_3_operand")
12416 (match_operand 4 "const_0_to_3_operand")
12417 (match_operand 5 "const_0_to_3_operand")
12422 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12425 mask |= INTVAL (operands[2]) << 0;
12426 mask |= INTVAL (operands[3]) << 2;
12427 mask |= INTVAL (operands[4]) << 4;
12428 mask |= INTVAL (operands[5]) << 6;
12429 operands[2] = GEN_INT (mask);
12431 return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12433 [(set_attr "type" "sselog")
12434 (set_attr "prefix_data16" "0")
12435 (set_attr "prefix_rep" "1")
12436 (set_attr "prefix" "maybe_vex")
12437 (set_attr "length_immediate" "1")
12438 (set_attr "mode" "TI")])
12440 (define_expand "avx2_pshufhwv3"
12441 [(match_operand:V16HI 0 "register_operand")
12442 (match_operand:V16HI 1 "nonimmediate_operand")
12443 (match_operand:SI 2 "const_0_to_255_operand")]
12446 int mask = INTVAL (operands[2]);
12447 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12448 GEN_INT (((mask >> 0) & 3) + 4),
12449 GEN_INT (((mask >> 2) & 3) + 4),
12450 GEN_INT (((mask >> 4) & 3) + 4),
12451 GEN_INT (((mask >> 6) & 3) + 4),
12452 GEN_INT (((mask >> 0) & 3) + 12),
12453 GEN_INT (((mask >> 2) & 3) + 12),
12454 GEN_INT (((mask >> 4) & 3) + 12),
12455 GEN_INT (((mask >> 6) & 3) + 12)));
12459 (define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12460 [(set (match_operand:V32HI 0 "register_operand" "=v")
12462 [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12463 (match_operand:SI 2 "const_0_to_255_operand" "n")]
12466 "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12467 [(set_attr "type" "sselog")
12468 (set_attr "prefix" "evex")
12469 (set_attr "mode" "XI")])
12471 (define_expand "avx512vl_pshufhwv3_mask"
12472 [(match_operand:V16HI 0 "register_operand")
12473 (match_operand:V16HI 1 "nonimmediate_operand")
12474 (match_operand:SI 2 "const_0_to_255_operand")
12475 (match_operand:V16HI 3 "register_operand")
12476 (match_operand:HI 4 "register_operand")]
12477 "TARGET_AVX512VL && TARGET_AVX512BW"
12479 int mask = INTVAL (operands[2]);
12480 emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12481 GEN_INT (((mask >> 0) & 3) + 4),
12482 GEN_INT (((mask >> 2) & 3) + 4),
12483 GEN_INT (((mask >> 4) & 3) + 4),
12484 GEN_INT (((mask >> 6) & 3) + 4),
12485 GEN_INT (((mask >> 0) & 3) + 12),
12486 GEN_INT (((mask >> 2) & 3) + 12),
12487 GEN_INT (((mask >> 4) & 3) + 12),
12488 GEN_INT (((mask >> 6) & 3) + 12),
12489 operands[3], operands[4]));
12493 (define_insn "avx2_pshufhw_1<mask_name>"
12494 [(set (match_operand:V16HI 0 "register_operand" "=v")
12496 (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12497 (parallel [(const_int 0)
12501 (match_operand 2 "const_4_to_7_operand")
12502 (match_operand 3 "const_4_to_7_operand")
12503 (match_operand 4 "const_4_to_7_operand")
12504 (match_operand 5 "const_4_to_7_operand")
12509 (match_operand 6 "const_12_to_15_operand")
12510 (match_operand 7 "const_12_to_15_operand")
12511 (match_operand 8 "const_12_to_15_operand")
12512 (match_operand 9 "const_12_to_15_operand")])))]
12514 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12515 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12516 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12517 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12518 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12521 mask |= (INTVAL (operands[2]) - 4) << 0;
12522 mask |= (INTVAL (operands[3]) - 4) << 2;
12523 mask |= (INTVAL (operands[4]) - 4) << 4;
12524 mask |= (INTVAL (operands[5]) - 4) << 6;
12525 operands[2] = GEN_INT (mask);
12527 return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12529 [(set_attr "type" "sselog")
12530 (set_attr "prefix" "maybe_evex")
12531 (set_attr "length_immediate" "1")
12532 (set_attr "mode" "OI")])
12534 (define_expand "avx512vl_pshufhw_mask"
12535 [(match_operand:V8HI 0 "register_operand")
12536 (match_operand:V8HI 1 "nonimmediate_operand")
12537 (match_operand:SI 2 "const_0_to_255_operand")
12538 (match_operand:V8HI 3 "register_operand")
12539 (match_operand:QI 4 "register_operand")]
12540 "TARGET_AVX512VL && TARGET_AVX512BW"
12542 int mask = INTVAL (operands[2]);
12543 emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12544 GEN_INT (((mask >> 0) & 3) + 4),
12545 GEN_INT (((mask >> 2) & 3) + 4),
12546 GEN_INT (((mask >> 4) & 3) + 4),
12547 GEN_INT (((mask >> 6) & 3) + 4),
12548 operands[3], operands[4]));
12552 (define_expand "sse2_pshufhw"
12553 [(match_operand:V8HI 0 "register_operand")
12554 (match_operand:V8HI 1 "nonimmediate_operand")
12555 (match_operand:SI 2 "const_int_operand")]
12558 int mask = INTVAL (operands[2]);
12559 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12560 GEN_INT (((mask >> 0) & 3) + 4),
12561 GEN_INT (((mask >> 2) & 3) + 4),
12562 GEN_INT (((mask >> 4) & 3) + 4),
12563 GEN_INT (((mask >> 6) & 3) + 4)));
12567 (define_insn "sse2_pshufhw_1<mask_name>"
12568 [(set (match_operand:V8HI 0 "register_operand" "=v")
12570 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12571 (parallel [(const_int 0)
12575 (match_operand 2 "const_4_to_7_operand")
12576 (match_operand 3 "const_4_to_7_operand")
12577 (match_operand 4 "const_4_to_7_operand")
12578 (match_operand 5 "const_4_to_7_operand")])))]
12579 "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12582 mask |= (INTVAL (operands[2]) - 4) << 0;
12583 mask |= (INTVAL (operands[3]) - 4) << 2;
12584 mask |= (INTVAL (operands[4]) - 4) << 4;
12585 mask |= (INTVAL (operands[5]) - 4) << 6;
12586 operands[2] = GEN_INT (mask);
12588 return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12590 [(set_attr "type" "sselog")
12591 (set_attr "prefix_rep" "1")
12592 (set_attr "prefix_data16" "0")
12593 (set_attr "prefix" "maybe_vex")
12594 (set_attr "length_immediate" "1")
12595 (set_attr "mode" "TI")])
12597 (define_expand "sse2_loadd"
12598 [(set (match_operand:V4SI 0 "register_operand")
12600 (vec_duplicate:V4SI
12601 (match_operand:SI 1 "nonimmediate_operand"))
12605 "operands[2] = CONST0_RTX (V4SImode);")
12607 (define_insn "sse2_loadld"
12608 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
12610 (vec_duplicate:V4SI
12611 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12612 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
12616 %vmovd\t{%2, %0|%0, %2}
12617 %vmovd\t{%2, %0|%0, %2}
12618 movss\t{%2, %0|%0, %2}
12619 movss\t{%2, %0|%0, %2}
12620 vmovss\t{%2, %1, %0|%0, %1, %2}"
12621 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12622 (set_attr "type" "ssemov")
12623 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12624 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12626 (define_insn "*vec_extract<mode>"
12627 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12628 (vec_select:<ssescalarmode>
12629 (match_operand:VI12_128 1 "register_operand" "x,x")
12631 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12634 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12635 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12636 [(set_attr "type" "sselog1")
12637 (set (attr "prefix_data16")
12639 (and (eq_attr "alternative" "0")
12640 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12642 (const_string "*")))
12643 (set (attr "prefix_extra")
12645 (and (eq_attr "alternative" "0")
12646 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12648 (const_string "1")))
12649 (set_attr "length_immediate" "1")
12650 (set_attr "prefix" "maybe_vex")
12651 (set_attr "mode" "TI")])
12653 (define_insn "*vec_extractv8hi_sse2"
12654 [(set (match_operand:HI 0 "register_operand" "=r")
12656 (match_operand:V8HI 1 "register_operand" "x")
12658 [(match_operand:SI 2 "const_0_to_7_operand")])))]
12659 "TARGET_SSE2 && !TARGET_SSE4_1"
12660 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12661 [(set_attr "type" "sselog1")
12662 (set_attr "prefix_data16" "1")
12663 (set_attr "length_immediate" "1")
12664 (set_attr "mode" "TI")])
12666 (define_insn "*vec_extractv16qi_zext"
12667 [(set (match_operand:SWI48 0 "register_operand" "=r")
12670 (match_operand:V16QI 1 "register_operand" "x")
12672 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12674 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12675 [(set_attr "type" "sselog1")
12676 (set_attr "prefix_extra" "1")
12677 (set_attr "length_immediate" "1")
12678 (set_attr "prefix" "maybe_vex")
12679 (set_attr "mode" "TI")])
12681 (define_insn "*vec_extractv8hi_zext"
12682 [(set (match_operand:SWI48 0 "register_operand" "=r")
12685 (match_operand:V8HI 1 "register_operand" "x")
12687 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12689 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12690 [(set_attr "type" "sselog1")
12691 (set_attr "prefix_data16" "1")
12692 (set_attr "length_immediate" "1")
12693 (set_attr "prefix" "maybe_vex")
12694 (set_attr "mode" "TI")])
12696 (define_insn "*vec_extract<mode>_mem"
12697 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12698 (vec_select:<ssescalarmode>
12699 (match_operand:VI12_128 1 "memory_operand" "o")
12701 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12705 (define_insn "*vec_extract<ssevecmodelower>_0"
12706 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
12708 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12709 (parallel [(const_int 0)])))]
12710 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12712 [(set_attr "isa" "*,sse4,*,*")])
12714 (define_insn_and_split "*vec_extractv4si_0_zext"
12715 [(set (match_operand:DI 0 "register_operand" "=r")
12718 (match_operand:V4SI 1 "register_operand" "x")
12719 (parallel [(const_int 0)]))))]
12720 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12722 "&& reload_completed"
12723 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12724 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12726 (define_insn "*vec_extractv2di_0_sse"
12727 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
12729 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12730 (parallel [(const_int 0)])))]
12731 "TARGET_SSE && !TARGET_64BIT
12732 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12736 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12738 (match_operand:<ssevecmode> 1 "register_operand")
12739 (parallel [(const_int 0)])))]
12740 "TARGET_SSE && reload_completed"
12741 [(set (match_dup 0) (match_dup 1))]
12742 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12744 (define_insn "*vec_extractv4si"
12745 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12747 (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12748 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12751 switch (which_alternative)
12754 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12758 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12759 return "psrldq\t{%2, %0|%0, %2}";
12762 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12763 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12766 gcc_unreachable ();
12769 [(set_attr "isa" "*,noavx,noavx,avx")
12770 (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12771 (set_attr "prefix_extra" "1,*,*,*")
12772 (set_attr "length_immediate" "1")
12773 (set_attr "prefix" "maybe_vex,orig,orig,vex")
12774 (set_attr "mode" "TI")])
12776 (define_insn "*vec_extractv4si_zext"
12777 [(set (match_operand:DI 0 "register_operand" "=r")
12780 (match_operand:V4SI 1 "register_operand" "x")
12781 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12782 "TARGET_64BIT && TARGET_SSE4_1"
12783 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12784 [(set_attr "type" "sselog1")
12785 (set_attr "prefix_extra" "1")
12786 (set_attr "length_immediate" "1")
12787 (set_attr "prefix" "maybe_vex")
12788 (set_attr "mode" "TI")])
12790 (define_insn "*vec_extractv4si_mem"
12791 [(set (match_operand:SI 0 "register_operand" "=x,r")
12793 (match_operand:V4SI 1 "memory_operand" "o,o")
12794 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12798 (define_insn_and_split "*vec_extractv4si_zext_mem"
12799 [(set (match_operand:DI 0 "register_operand" "=x,r")
12802 (match_operand:V4SI 1 "memory_operand" "o,o")
12803 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12804 "TARGET_64BIT && TARGET_SSE"
12806 "&& reload_completed"
12807 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12809 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12812 (define_insn "*vec_extractv2di_1"
12813 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
12815 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
12816 (parallel [(const_int 1)])))]
12817 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12819 %vpextrq\t{$1, %1, %0|%0, %1, 1}
12820 %vmovhps\t{%1, %0|%0, %1}
12821 psrldq\t{$8, %0|%0, 8}
12822 vpsrldq\t{$8, %1, %0|%0, %1, 8}
12823 movhlps\t{%1, %0|%0, %1}
12826 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12827 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12828 (set_attr "length_immediate" "1,*,1,1,*,*,*")
12829 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12830 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12831 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12832 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12835 [(set (match_operand:<ssescalarmode> 0 "register_operand")
12836 (vec_select:<ssescalarmode>
12837 (match_operand:VI_128 1 "memory_operand")
12839 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12840 "TARGET_SSE && reload_completed"
12841 [(set (match_dup 0) (match_dup 1))]
12843 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12845 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12848 ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
12849 ;; vector modes into vec_extract*.
12851 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12852 (match_operand:SWI48x 1 "register_operand"))]
12853 "can_create_pseudo_p ()
12854 && GET_CODE (operands[1]) == SUBREG
12855 && REG_P (SUBREG_REG (operands[1]))
12856 && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
12857 || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
12858 == MODE_VECTOR_FLOAT))
12859 && SUBREG_BYTE (operands[1]) == 0
12861 && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
12862 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
12864 || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
12865 && TARGET_AVX512F))
12866 && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
12867 [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
12868 (parallel [(const_int 0)])))]
12871 operands[1] = SUBREG_REG (operands[1]);
12872 switch (GET_MODE_SIZE (GET_MODE (operands[1])))
12875 if (<MODE>mode == SImode)
12877 tmp = gen_reg_rtx (V8SImode);
12878 emit_insn (gen_vec_extract_lo_v16si (tmp,
12879 gen_lowpart (V16SImode,
12884 tmp = gen_reg_rtx (V4DImode);
12885 emit_insn (gen_vec_extract_lo_v8di (tmp,
12886 gen_lowpart (V8DImode,
12892 tmp = gen_reg_rtx (<ssevecmode>mode);
12893 if (<MODE>mode == SImode)
12894 emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
12897 emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
12902 operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
12907 (define_insn "*vec_concatv2si_sse4_1"
12908 [(set (match_operand:V2SI 0 "register_operand"
12909 "=Yr,*x,x, Yr,*x,x, x, *y,*y")
12911 (match_operand:SI 1 "nonimmediate_operand"
12912 " 0, 0,x, 0,0, x,rm, 0,rm")
12913 (match_operand:SI 2 "vector_move_operand"
12914 " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
12915 "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12917 pinsrd\t{$1, %2, %0|%0, %2, 1}
12918 pinsrd\t{$1, %2, %0|%0, %2, 1}
12919 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
12920 punpckldq\t{%2, %0|%0, %2}
12921 punpckldq\t{%2, %0|%0, %2}
12922 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
12923 %vmovd\t{%1, %0|%0, %1}
12924 punpckldq\t{%2, %0|%0, %2}
12925 movd\t{%1, %0|%0, %1}"
12926 [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
12927 (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
12928 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
12929 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
12930 (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
12931 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
12933 ;; ??? In theory we can match memory for the MMX alternative, but allowing
12934 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
12935 ;; alternatives pretty much forces the MMX alternative to be chosen.
12936 (define_insn "*vec_concatv2si"
12937 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
12939 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
12940 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
12941 "TARGET_SSE && !TARGET_SSE4_1"
12943 punpckldq\t{%2, %0|%0, %2}
12944 movd\t{%1, %0|%0, %1}
12945 movd\t{%1, %0|%0, %1}
12946 unpcklps\t{%2, %0|%0, %2}
12947 movss\t{%1, %0|%0, %1}
12948 punpckldq\t{%2, %0|%0, %2}
12949 movd\t{%1, %0|%0, %1}"
12950 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
12951 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
12952 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
12954 (define_insn "*vec_concatv4si"
12955 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
12957 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
12958 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
12961 punpcklqdq\t{%2, %0|%0, %2}
12962 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12963 movlhps\t{%2, %0|%0, %2}
12964 movhps\t{%2, %0|%0, %q2}
12965 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
12966 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
12967 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
12968 (set_attr "prefix" "orig,vex,orig,orig,vex")
12969 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
12971 ;; movd instead of movq is required to handle broken assemblers.
12972 (define_insn "vec_concatv2di"
12973 [(set (match_operand:V2DI 0 "register_operand"
12974 "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
12976 (match_operand:DI 1 "nonimmediate_operand"
12977 " 0, 0,x ,r ,xm,*y,0,x,0,0,x")
12978 (match_operand:DI 2 "vector_move_operand"
12979 "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
12982 pinsrq\t{$1, %2, %0|%0, %2, 1}
12983 pinsrq\t{$1, %2, %0|%0, %2, 1}
12984 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
12985 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
12986 %vmovq\t{%1, %0|%0, %1}
12987 movq2dq\t{%1, %0|%0, %1}
12988 punpcklqdq\t{%2, %0|%0, %2}
12989 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
12990 movlhps\t{%2, %0|%0, %2}
12991 movhps\t{%2, %0|%0, %2}
12992 vmovhps\t{%2, %1, %0|%0, %1, %2}"
12993 [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
12996 (eq_attr "alternative" "0,1,2,6,7")
12997 (const_string "sselog")
12998 (const_string "ssemov")))
12999 (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13000 (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13001 (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13002 (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13003 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13005 (define_expand "vec_unpacks_lo_<mode>"
13006 [(match_operand:<sseunpackmode> 0 "register_operand")
13007 (match_operand:VI124_AVX512F 1 "register_operand")]
13009 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13011 (define_expand "vec_unpacks_hi_<mode>"
13012 [(match_operand:<sseunpackmode> 0 "register_operand")
13013 (match_operand:VI124_AVX512F 1 "register_operand")]
13015 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13017 (define_expand "vec_unpacku_lo_<mode>"
13018 [(match_operand:<sseunpackmode> 0 "register_operand")
13019 (match_operand:VI124_AVX512F 1 "register_operand")]
13021 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
13023 (define_expand "vec_unpacku_hi_<mode>"
13024 [(match_operand:<sseunpackmode> 0 "register_operand")
13025 (match_operand:VI124_AVX512F 1 "register_operand")]
13027 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
13029 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13033 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13035 (define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13036 [(set (match_operand:VI12_AVX2 0 "register_operand")
13037 (truncate:VI12_AVX2
13038 (lshiftrt:<ssedoublemode>
13039 (plus:<ssedoublemode>
13040 (plus:<ssedoublemode>
13041 (zero_extend:<ssedoublemode>
13042 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13043 (zero_extend:<ssedoublemode>
13044 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
13045 (match_dup <mask_expand_op3>))
13047 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13050 if (<mask_applied>)
13052 operands[3] = CONST1_RTX(<MODE>mode);
13053 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13055 if (<mask_applied>)
13057 operands[5] = operands[3];
13062 (define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13063 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13064 (truncate:VI12_AVX2
13065 (lshiftrt:<ssedoublemode>
13066 (plus:<ssedoublemode>
13067 (plus:<ssedoublemode>
13068 (zero_extend:<ssedoublemode>
13069 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
13070 (zero_extend:<ssedoublemode>
13071 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13072 (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13074 "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13075 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13077 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13078 vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13079 [(set_attr "isa" "noavx,avx")
13080 (set_attr "type" "sseiadd")
13081 (set_attr "prefix_data16" "1,*")
13082 (set_attr "prefix" "orig,<mask_prefix>")
13083 (set_attr "mode" "<sseinsnmode>")])
13085 ;; The correct representation for this is absolutely enormous, and
13086 ;; surely not generally useful.
13087 (define_insn "<sse2_avx2>_psadbw"
13088 [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13089 (unspec:VI8_AVX2_AVX512BW
13090 [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13091 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
13095 psadbw\t{%2, %0|%0, %2}
13096 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13097 [(set_attr "isa" "noavx,avx")
13098 (set_attr "type" "sseiadd")
13099 (set_attr "atom_unit" "simul")
13100 (set_attr "prefix_data16" "1,*")
13101 (set_attr "prefix" "orig,maybe_evex")
13102 (set_attr "mode" "<sseinsnmode>")])
13104 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13105 [(set (match_operand:SI 0 "register_operand" "=r")
13107 [(match_operand:VF_128_256 1 "register_operand" "x")]
13110 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13111 [(set_attr "type" "ssemov")
13112 (set_attr "prefix" "maybe_vex")
13113 (set_attr "mode" "<MODE>")])
13115 (define_insn "avx2_pmovmskb"
13116 [(set (match_operand:SI 0 "register_operand" "=r")
13117 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13120 "vpmovmskb\t{%1, %0|%0, %1}"
13121 [(set_attr "type" "ssemov")
13122 (set_attr "prefix" "vex")
13123 (set_attr "mode" "DI")])
13125 (define_insn "sse2_pmovmskb"
13126 [(set (match_operand:SI 0 "register_operand" "=r")
13127 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13130 "%vpmovmskb\t{%1, %0|%0, %1}"
13131 [(set_attr "type" "ssemov")
13132 (set_attr "prefix_data16" "1")
13133 (set_attr "prefix" "maybe_vex")
13134 (set_attr "mode" "SI")])
13136 (define_expand "sse2_maskmovdqu"
13137 [(set (match_operand:V16QI 0 "memory_operand")
13138 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13139 (match_operand:V16QI 2 "register_operand")
13144 (define_insn "*sse2_maskmovdqu"
13145 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13146 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13147 (match_operand:V16QI 2 "register_operand" "x")
13148 (mem:V16QI (match_dup 0))]
13152 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13153 that requires %v to be at the beginning of the opcode name. */
13154 if (Pmode != word_mode)
13155 fputs ("\taddr32", asm_out_file);
13156 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13158 [(set_attr "type" "ssemov")
13159 (set_attr "prefix_data16" "1")
13160 (set (attr "length_address")
13161 (symbol_ref ("Pmode != word_mode")))
13162 ;; The implicit %rdi operand confuses default length_vex computation.
13163 (set (attr "length_vex")
13164 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13165 (set_attr "prefix" "maybe_vex")
13166 (set_attr "mode" "TI")])
13168 (define_insn "sse_ldmxcsr"
13169 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13173 [(set_attr "type" "sse")
13174 (set_attr "atom_sse_attr" "mxcsr")
13175 (set_attr "prefix" "maybe_vex")
13176 (set_attr "memory" "load")])
13178 (define_insn "sse_stmxcsr"
13179 [(set (match_operand:SI 0 "memory_operand" "=m")
13180 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13183 [(set_attr "type" "sse")
13184 (set_attr "atom_sse_attr" "mxcsr")
13185 (set_attr "prefix" "maybe_vex")
13186 (set_attr "memory" "store")])
13188 (define_insn "sse2_clflush"
13189 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13193 [(set_attr "type" "sse")
13194 (set_attr "atom_sse_attr" "fence")
13195 (set_attr "memory" "unknown")])
13198 (define_insn "sse3_mwait"
13199 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
13200 (match_operand:SI 1 "register_operand" "c")]
13203 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13204 ;; Since 32bit register operands are implicitly zero extended to 64bit,
13205 ;; we only need to set up 32bit registers.
13207 [(set_attr "length" "3")])
13209 (define_insn "sse3_monitor_<mode>"
13210 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13211 (match_operand:SI 1 "register_operand" "c")
13212 (match_operand:SI 2 "register_operand" "d")]
13215 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13216 ;; RCX and RDX are used. Since 32bit register operands are implicitly
13217 ;; zero extended to 64bit, we only need to set up 32bit registers.
13219 [(set (attr "length")
13220 (symbol_ref ("(Pmode != word_mode) + 3")))])
13222 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13224 ;; SSSE3 instructions
13226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13228 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13230 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13231 [(set (match_operand:V16HI 0 "register_operand" "=x")
13236 (ssse3_plusminus:HI
13238 (match_operand:V16HI 1 "register_operand" "x")
13239 (parallel [(const_int 0)]))
13240 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13241 (ssse3_plusminus:HI
13242 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13243 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13245 (ssse3_plusminus:HI
13246 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13247 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13248 (ssse3_plusminus:HI
13249 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13250 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13253 (ssse3_plusminus:HI
13254 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13255 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13256 (ssse3_plusminus:HI
13257 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13258 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13260 (ssse3_plusminus:HI
13261 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13262 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13263 (ssse3_plusminus:HI
13264 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13265 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13269 (ssse3_plusminus:HI
13271 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13272 (parallel [(const_int 0)]))
13273 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13274 (ssse3_plusminus:HI
13275 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13276 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13278 (ssse3_plusminus:HI
13279 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13280 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13281 (ssse3_plusminus:HI
13282 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13283 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13286 (ssse3_plusminus:HI
13287 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13288 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13289 (ssse3_plusminus:HI
13290 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13291 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13293 (ssse3_plusminus:HI
13294 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13295 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13296 (ssse3_plusminus:HI
13297 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13298 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13300 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13301 [(set_attr "type" "sseiadd")
13302 (set_attr "prefix_extra" "1")
13303 (set_attr "prefix" "vex")
13304 (set_attr "mode" "OI")])
13306 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13307 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13311 (ssse3_plusminus:HI
13313 (match_operand:V8HI 1 "register_operand" "0,x")
13314 (parallel [(const_int 0)]))
13315 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13316 (ssse3_plusminus:HI
13317 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13318 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13320 (ssse3_plusminus:HI
13321 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13322 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13323 (ssse3_plusminus:HI
13324 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13325 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13328 (ssse3_plusminus:HI
13330 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13331 (parallel [(const_int 0)]))
13332 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13333 (ssse3_plusminus:HI
13334 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13335 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13337 (ssse3_plusminus:HI
13338 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13339 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13340 (ssse3_plusminus:HI
13341 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13342 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13345 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13346 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13347 [(set_attr "isa" "noavx,avx")
13348 (set_attr "type" "sseiadd")
13349 (set_attr "atom_unit" "complex")
13350 (set_attr "prefix_data16" "1,*")
13351 (set_attr "prefix_extra" "1")
13352 (set_attr "prefix" "orig,vex")
13353 (set_attr "mode" "TI")])
13355 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13356 [(set (match_operand:V4HI 0 "register_operand" "=y")
13359 (ssse3_plusminus:HI
13361 (match_operand:V4HI 1 "register_operand" "0")
13362 (parallel [(const_int 0)]))
13363 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13364 (ssse3_plusminus:HI
13365 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13366 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13368 (ssse3_plusminus:HI
13370 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
13371 (parallel [(const_int 0)]))
13372 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13373 (ssse3_plusminus:HI
13374 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13375 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13377 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13378 [(set_attr "type" "sseiadd")
13379 (set_attr "atom_unit" "complex")
13380 (set_attr "prefix_extra" "1")
13381 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13382 (set_attr "mode" "DI")])
13384 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13385 [(set (match_operand:V8SI 0 "register_operand" "=x")
13391 (match_operand:V8SI 1 "register_operand" "x")
13392 (parallel [(const_int 0)]))
13393 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13395 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13396 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13399 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13400 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13402 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13403 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13408 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13409 (parallel [(const_int 0)]))
13410 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13412 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13413 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13416 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13417 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13419 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13420 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13422 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13423 [(set_attr "type" "sseiadd")
13424 (set_attr "prefix_extra" "1")
13425 (set_attr "prefix" "vex")
13426 (set_attr "mode" "OI")])
13428 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13429 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13434 (match_operand:V4SI 1 "register_operand" "0,x")
13435 (parallel [(const_int 0)]))
13436 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13438 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13439 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13443 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
13444 (parallel [(const_int 0)]))
13445 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13447 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13448 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13451 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13452 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13453 [(set_attr "isa" "noavx,avx")
13454 (set_attr "type" "sseiadd")
13455 (set_attr "atom_unit" "complex")
13456 (set_attr "prefix_data16" "1,*")
13457 (set_attr "prefix_extra" "1")
13458 (set_attr "prefix" "orig,vex")
13459 (set_attr "mode" "TI")])
13461 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13462 [(set (match_operand:V2SI 0 "register_operand" "=y")
13466 (match_operand:V2SI 1 "register_operand" "0")
13467 (parallel [(const_int 0)]))
13468 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13471 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13472 (parallel [(const_int 0)]))
13473 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13475 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13476 [(set_attr "type" "sseiadd")
13477 (set_attr "atom_unit" "complex")
13478 (set_attr "prefix_extra" "1")
13479 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13480 (set_attr "mode" "DI")])
13482 (define_insn "avx2_pmaddubsw256"
13483 [(set (match_operand:V16HI 0 "register_operand" "=x")
13488 (match_operand:V32QI 1 "register_operand" "x")
13489 (parallel [(const_int 0) (const_int 2)
13490 (const_int 4) (const_int 6)
13491 (const_int 8) (const_int 10)
13492 (const_int 12) (const_int 14)
13493 (const_int 16) (const_int 18)
13494 (const_int 20) (const_int 22)
13495 (const_int 24) (const_int 26)
13496 (const_int 28) (const_int 30)])))
13499 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
13500 (parallel [(const_int 0) (const_int 2)
13501 (const_int 4) (const_int 6)
13502 (const_int 8) (const_int 10)
13503 (const_int 12) (const_int 14)
13504 (const_int 16) (const_int 18)
13505 (const_int 20) (const_int 22)
13506 (const_int 24) (const_int 26)
13507 (const_int 28) (const_int 30)]))))
13510 (vec_select:V16QI (match_dup 1)
13511 (parallel [(const_int 1) (const_int 3)
13512 (const_int 5) (const_int 7)
13513 (const_int 9) (const_int 11)
13514 (const_int 13) (const_int 15)
13515 (const_int 17) (const_int 19)
13516 (const_int 21) (const_int 23)
13517 (const_int 25) (const_int 27)
13518 (const_int 29) (const_int 31)])))
13520 (vec_select:V16QI (match_dup 2)
13521 (parallel [(const_int 1) (const_int 3)
13522 (const_int 5) (const_int 7)
13523 (const_int 9) (const_int 11)
13524 (const_int 13) (const_int 15)
13525 (const_int 17) (const_int 19)
13526 (const_int 21) (const_int 23)
13527 (const_int 25) (const_int 27)
13528 (const_int 29) (const_int 31)]))))))]
13530 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13531 [(set_attr "type" "sseiadd")
13532 (set_attr "prefix_extra" "1")
13533 (set_attr "prefix" "vex")
13534 (set_attr "mode" "OI")])
13536 ;; The correct representation for this is absolutely enormous, and
13537 ;; surely not generally useful.
13538 (define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13539 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13540 (unspec:VI2_AVX512VL
13541 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13542 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13543 UNSPEC_PMADDUBSW512))]
13545 "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13546 [(set_attr "type" "sseiadd")
13547 (set_attr "prefix" "evex")
13548 (set_attr "mode" "XI")])
13550 (define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13551 [(set (match_operand:V32HI 0 "register_operand" "=v")
13558 (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13560 (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13562 (const_vector:V32HI [(const_int 1) (const_int 1)
13563 (const_int 1) (const_int 1)
13564 (const_int 1) (const_int 1)
13565 (const_int 1) (const_int 1)
13566 (const_int 1) (const_int 1)
13567 (const_int 1) (const_int 1)
13568 (const_int 1) (const_int 1)
13569 (const_int 1) (const_int 1)
13570 (const_int 1) (const_int 1)
13571 (const_int 1) (const_int 1)
13572 (const_int 1) (const_int 1)
13573 (const_int 1) (const_int 1)
13574 (const_int 1) (const_int 1)
13575 (const_int 1) (const_int 1)
13576 (const_int 1) (const_int 1)
13577 (const_int 1) (const_int 1)]))
13580 "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13581 [(set_attr "type" "sseimul")
13582 (set_attr "prefix" "evex")
13583 (set_attr "mode" "XI")])
13585 (define_insn "ssse3_pmaddubsw128"
13586 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13591 (match_operand:V16QI 1 "register_operand" "0,x")
13592 (parallel [(const_int 0) (const_int 2)
13593 (const_int 4) (const_int 6)
13594 (const_int 8) (const_int 10)
13595 (const_int 12) (const_int 14)])))
13598 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13599 (parallel [(const_int 0) (const_int 2)
13600 (const_int 4) (const_int 6)
13601 (const_int 8) (const_int 10)
13602 (const_int 12) (const_int 14)]))))
13605 (vec_select:V8QI (match_dup 1)
13606 (parallel [(const_int 1) (const_int 3)
13607 (const_int 5) (const_int 7)
13608 (const_int 9) (const_int 11)
13609 (const_int 13) (const_int 15)])))
13611 (vec_select:V8QI (match_dup 2)
13612 (parallel [(const_int 1) (const_int 3)
13613 (const_int 5) (const_int 7)
13614 (const_int 9) (const_int 11)
13615 (const_int 13) (const_int 15)]))))))]
13618 pmaddubsw\t{%2, %0|%0, %2}
13619 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13620 [(set_attr "isa" "noavx,avx")
13621 (set_attr "type" "sseiadd")
13622 (set_attr "atom_unit" "simul")
13623 (set_attr "prefix_data16" "1,*")
13624 (set_attr "prefix_extra" "1")
13625 (set_attr "prefix" "orig,vex")
13626 (set_attr "mode" "TI")])
13628 (define_insn "ssse3_pmaddubsw"
13629 [(set (match_operand:V4HI 0 "register_operand" "=y")
13634 (match_operand:V8QI 1 "register_operand" "0")
13635 (parallel [(const_int 0) (const_int 2)
13636 (const_int 4) (const_int 6)])))
13639 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
13640 (parallel [(const_int 0) (const_int 2)
13641 (const_int 4) (const_int 6)]))))
13644 (vec_select:V4QI (match_dup 1)
13645 (parallel [(const_int 1) (const_int 3)
13646 (const_int 5) (const_int 7)])))
13648 (vec_select:V4QI (match_dup 2)
13649 (parallel [(const_int 1) (const_int 3)
13650 (const_int 5) (const_int 7)]))))))]
13652 "pmaddubsw\t{%2, %0|%0, %2}"
13653 [(set_attr "type" "sseiadd")
13654 (set_attr "atom_unit" "simul")
13655 (set_attr "prefix_extra" "1")
13656 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13657 (set_attr "mode" "DI")])
13659 (define_mode_iterator PMULHRSW
13660 [V4HI V8HI (V16HI "TARGET_AVX2")])
13662 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13663 [(set (match_operand:PMULHRSW 0 "register_operand")
13664 (vec_merge:PMULHRSW
13666 (lshiftrt:<ssedoublemode>
13667 (plus:<ssedoublemode>
13668 (lshiftrt:<ssedoublemode>
13669 (mult:<ssedoublemode>
13670 (sign_extend:<ssedoublemode>
13671 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13672 (sign_extend:<ssedoublemode>
13673 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13677 (match_operand:PMULHRSW 3 "register_operand")
13678 (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13679 "TARGET_AVX512BW && TARGET_AVX512VL"
13681 operands[5] = CONST1_RTX(<MODE>mode);
13682 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13685 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13686 [(set (match_operand:PMULHRSW 0 "register_operand")
13688 (lshiftrt:<ssedoublemode>
13689 (plus:<ssedoublemode>
13690 (lshiftrt:<ssedoublemode>
13691 (mult:<ssedoublemode>
13692 (sign_extend:<ssedoublemode>
13693 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13694 (sign_extend:<ssedoublemode>
13695 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13701 operands[3] = CONST1_RTX(<MODE>mode);
13702 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13705 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13706 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13708 (lshiftrt:<ssedoublemode>
13709 (plus:<ssedoublemode>
13710 (lshiftrt:<ssedoublemode>
13711 (mult:<ssedoublemode>
13712 (sign_extend:<ssedoublemode>
13713 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13714 (sign_extend:<ssedoublemode>
13715 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13717 (match_operand:VI2_AVX2 3 "const1_operand"))
13719 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13720 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13722 pmulhrsw\t{%2, %0|%0, %2}
13723 vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13724 [(set_attr "isa" "noavx,avx")
13725 (set_attr "type" "sseimul")
13726 (set_attr "prefix_data16" "1,*")
13727 (set_attr "prefix_extra" "1")
13728 (set_attr "prefix" "orig,maybe_evex")
13729 (set_attr "mode" "<sseinsnmode>")])
13731 (define_insn "*ssse3_pmulhrswv4hi3"
13732 [(set (match_operand:V4HI 0 "register_operand" "=y")
13739 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13741 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13743 (match_operand:V4HI 3 "const1_operand"))
13745 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13746 "pmulhrsw\t{%2, %0|%0, %2}"
13747 [(set_attr "type" "sseimul")
13748 (set_attr "prefix_extra" "1")
13749 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13750 (set_attr "mode" "DI")])
13752 (define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13753 [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13755 [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13756 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
13758 "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13760 pshufb\t{%2, %0|%0, %2}
13761 vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13762 [(set_attr "isa" "noavx,avx")
13763 (set_attr "type" "sselog1")
13764 (set_attr "prefix_data16" "1,*")
13765 (set_attr "prefix_extra" "1")
13766 (set_attr "prefix" "orig,maybe_evex")
13767 (set_attr "btver2_decode" "vector,vector")
13768 (set_attr "mode" "<sseinsnmode>")])
13770 (define_insn "ssse3_pshufbv8qi3"
13771 [(set (match_operand:V8QI 0 "register_operand" "=y")
13772 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13773 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13776 "pshufb\t{%2, %0|%0, %2}";
13777 [(set_attr "type" "sselog1")
13778 (set_attr "prefix_extra" "1")
13779 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13780 (set_attr "mode" "DI")])
13782 (define_insn "<ssse3_avx2>_psign<mode>3"
13783 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13785 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13786 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13790 psign<ssemodesuffix>\t{%2, %0|%0, %2}
13791 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13792 [(set_attr "isa" "noavx,avx")
13793 (set_attr "type" "sselog1")
13794 (set_attr "prefix_data16" "1,*")
13795 (set_attr "prefix_extra" "1")
13796 (set_attr "prefix" "orig,vex")
13797 (set_attr "mode" "<sseinsnmode>")])
13799 (define_insn "ssse3_psign<mode>3"
13800 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13802 [(match_operand:MMXMODEI 1 "register_operand" "0")
13803 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13806 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13807 [(set_attr "type" "sselog1")
13808 (set_attr "prefix_extra" "1")
13809 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13810 (set_attr "mode" "DI")])
13812 (define_insn "<ssse3_avx2>_palignr<mode>_mask"
13813 [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13814 (vec_merge:VI1_AVX512
13816 [(match_operand:VI1_AVX512 1 "register_operand" "v")
13817 (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
13818 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13820 (match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
13821 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13822 "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13824 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13825 return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13827 [(set_attr "type" "sseishft")
13828 (set_attr "atom_unit" "sishuf")
13829 (set_attr "prefix_extra" "1")
13830 (set_attr "length_immediate" "1")
13831 (set_attr "prefix" "evex")
13832 (set_attr "mode" "<sseinsnmode>")])
13834 (define_insn "<ssse3_avx2>_palignr<mode>"
13835 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13836 (unspec:SSESCALARMODE
13837 [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13838 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13839 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13843 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13845 switch (which_alternative)
13848 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13850 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13852 gcc_unreachable ();
13855 [(set_attr "isa" "noavx,avx")
13856 (set_attr "type" "sseishft")
13857 (set_attr "atom_unit" "sishuf")
13858 (set_attr "prefix_data16" "1,*")
13859 (set_attr "prefix_extra" "1")
13860 (set_attr "length_immediate" "1")
13861 (set_attr "prefix" "orig,vex")
13862 (set_attr "mode" "<sseinsnmode>")])
13864 (define_insn "ssse3_palignrdi"
13865 [(set (match_operand:DI 0 "register_operand" "=y")
13866 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
13867 (match_operand:DI 2 "nonimmediate_operand" "ym")
13868 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13872 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13873 return "palignr\t{%3, %2, %0|%0, %2, %3}";
13875 [(set_attr "type" "sseishft")
13876 (set_attr "atom_unit" "sishuf")
13877 (set_attr "prefix_extra" "1")
13878 (set_attr "length_immediate" "1")
13879 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13880 (set_attr "mode" "DI")])
13882 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13883 ;; modes for abs instruction on pre AVX-512 targets.
13884 (define_mode_iterator VI1248_AVX512VL_AVX512BW
13885 [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13886 (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13887 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13888 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13890 (define_insn "*abs<mode>2"
13891 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13892 (abs:VI1248_AVX512VL_AVX512BW
13893 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
13895 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
13896 [(set_attr "type" "sselog1")
13897 (set_attr "prefix_data16" "1")
13898 (set_attr "prefix_extra" "1")
13899 (set_attr "prefix" "maybe_vex")
13900 (set_attr "mode" "<sseinsnmode>")])
13902 (define_insn "abs<mode>2_mask"
13903 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13904 (vec_merge:VI48_AVX512VL
13906 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13907 (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13908 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13910 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13911 [(set_attr "type" "sselog1")
13912 (set_attr "prefix" "evex")
13913 (set_attr "mode" "<sseinsnmode>")])
13915 (define_insn "abs<mode>2_mask"
13916 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
13917 (vec_merge:VI12_AVX512VL
13919 (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
13920 (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
13921 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
13923 "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
13924 [(set_attr "type" "sselog1")
13925 (set_attr "prefix" "evex")
13926 (set_attr "mode" "<sseinsnmode>")])
13928 (define_expand "abs<mode>2"
13929 [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
13930 (abs:VI1248_AVX512VL_AVX512BW
13931 (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
13936 ix86_expand_sse2_abs (operands[0], operands[1]);
13941 (define_insn "abs<mode>2"
13942 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13944 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
13946 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
13947 [(set_attr "type" "sselog1")
13948 (set_attr "prefix_rep" "0")
13949 (set_attr "prefix_extra" "1")
13950 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13951 (set_attr "mode" "DI")])
13953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13955 ;; AMD SSE4A instructions
13957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13959 (define_insn "sse4a_movnt<mode>"
13960 [(set (match_operand:MODEF 0 "memory_operand" "=m")
13962 [(match_operand:MODEF 1 "register_operand" "x")]
13965 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
13966 [(set_attr "type" "ssemov")
13967 (set_attr "mode" "<MODE>")])
13969 (define_insn "sse4a_vmmovnt<mode>"
13970 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
13971 (unspec:<ssescalarmode>
13972 [(vec_select:<ssescalarmode>
13973 (match_operand:VF_128 1 "register_operand" "x")
13974 (parallel [(const_int 0)]))]
13977 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
13978 [(set_attr "type" "ssemov")
13979 (set_attr "mode" "<ssescalarmode>")])
13981 (define_insn "sse4a_extrqi"
13982 [(set (match_operand:V2DI 0 "register_operand" "=x")
13983 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13984 (match_operand 2 "const_0_to_255_operand")
13985 (match_operand 3 "const_0_to_255_operand")]
13988 "extrq\t{%3, %2, %0|%0, %2, %3}"
13989 [(set_attr "type" "sse")
13990 (set_attr "prefix_data16" "1")
13991 (set_attr "length_immediate" "2")
13992 (set_attr "mode" "TI")])
13994 (define_insn "sse4a_extrq"
13995 [(set (match_operand:V2DI 0 "register_operand" "=x")
13996 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
13997 (match_operand:V16QI 2 "register_operand" "x")]
14000 "extrq\t{%2, %0|%0, %2}"
14001 [(set_attr "type" "sse")
14002 (set_attr "prefix_data16" "1")
14003 (set_attr "mode" "TI")])
14005 (define_insn "sse4a_insertqi"
14006 [(set (match_operand:V2DI 0 "register_operand" "=x")
14007 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14008 (match_operand:V2DI 2 "register_operand" "x")
14009 (match_operand 3 "const_0_to_255_operand")
14010 (match_operand 4 "const_0_to_255_operand")]
14013 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14014 [(set_attr "type" "sseins")
14015 (set_attr "prefix_data16" "0")
14016 (set_attr "prefix_rep" "1")
14017 (set_attr "length_immediate" "2")
14018 (set_attr "mode" "TI")])
14020 (define_insn "sse4a_insertq"
14021 [(set (match_operand:V2DI 0 "register_operand" "=x")
14022 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14023 (match_operand:V2DI 2 "register_operand" "x")]
14026 "insertq\t{%2, %0|%0, %2}"
14027 [(set_attr "type" "sseins")
14028 (set_attr "prefix_data16" "0")
14029 (set_attr "prefix_rep" "1")
14030 (set_attr "mode" "TI")])
14032 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14034 ;; Intel SSE4.1 instructions
14036 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14038 ;; Mapping of immediate bits for blend instructions
14039 (define_mode_attr blendbits
14040 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14042 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14043 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14044 (vec_merge:VF_128_256
14045 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14046 (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14047 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14050 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14051 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14052 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14053 [(set_attr "isa" "noavx,noavx,avx")
14054 (set_attr "type" "ssemov")
14055 (set_attr "length_immediate" "1")
14056 (set_attr "prefix_data16" "1,1,*")
14057 (set_attr "prefix_extra" "1")
14058 (set_attr "prefix" "orig,orig,vex")
14059 (set_attr "mode" "<MODE>")])
14061 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14062 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14064 [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14065 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14066 (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14070 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14071 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14072 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14073 [(set_attr "isa" "noavx,noavx,avx")
14074 (set_attr "type" "ssemov")
14075 (set_attr "length_immediate" "1")
14076 (set_attr "prefix_data16" "1,1,*")
14077 (set_attr "prefix_extra" "1")
14078 (set_attr "prefix" "orig,orig,vex")
14079 (set_attr "btver2_decode" "vector,vector,vector")
14080 (set_attr "mode" "<MODE>")])
14082 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14083 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14085 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14086 (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14087 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14091 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14092 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14093 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14094 [(set_attr "isa" "noavx,noavx,avx")
14095 (set_attr "type" "ssemul")
14096 (set_attr "length_immediate" "1")
14097 (set_attr "prefix_data16" "1,1,*")
14098 (set_attr "prefix_extra" "1")
14099 (set_attr "prefix" "orig,orig,vex")
14100 (set_attr "btver2_decode" "vector,vector,vector")
14101 (set_attr "mode" "<MODE>")])
14103 ;; Mode attribute used by `vmovntdqa' pattern
14104 (define_mode_attr vi8_sse4_1_avx2_avx512
14105 [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14107 (define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14108 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14109 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14112 "%vmovntdqa\t{%1, %0|%0, %1}"
14113 [(set_attr "type" "ssemov")
14114 (set_attr "prefix_extra" "1,1,*")
14115 (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14116 (set_attr "mode" "<sseinsnmode>")])
14118 (define_insn "<sse4_1_avx2>_mpsadbw"
14119 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14121 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14122 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14123 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14127 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14128 mpsadbw\t{%3, %2, %0|%0, %2, %3}
14129 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14130 [(set_attr "isa" "noavx,noavx,avx")
14131 (set_attr "type" "sselog1")
14132 (set_attr "length_immediate" "1")
14133 (set_attr "prefix_extra" "1")
14134 (set_attr "prefix" "orig,orig,vex")
14135 (set_attr "btver2_decode" "vector,vector,vector")
14136 (set_attr "mode" "<sseinsnmode>")])
14138 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14139 [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14140 (vec_concat:VI2_AVX2
14141 (us_truncate:<ssehalfvecmode>
14142 (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14143 (us_truncate:<ssehalfvecmode>
14144 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
14145 "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14147 packusdw\t{%2, %0|%0, %2}
14148 packusdw\t{%2, %0|%0, %2}
14149 vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14150 [(set_attr "isa" "noavx,noavx,avx")
14151 (set_attr "type" "sselog")
14152 (set_attr "prefix_extra" "1")
14153 (set_attr "prefix" "orig,orig,maybe_evex")
14154 (set_attr "mode" "<sseinsnmode>")])
14156 (define_insn "<sse4_1_avx2>_pblendvb"
14157 [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14159 [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14160 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14161 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14165 pblendvb\t{%3, %2, %0|%0, %2, %3}
14166 pblendvb\t{%3, %2, %0|%0, %2, %3}
14167 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14168 [(set_attr "isa" "noavx,noavx,avx")
14169 (set_attr "type" "ssemov")
14170 (set_attr "prefix_extra" "1")
14171 (set_attr "length_immediate" "*,*,1")
14172 (set_attr "prefix" "orig,orig,vex")
14173 (set_attr "btver2_decode" "vector,vector,vector")
14174 (set_attr "mode" "<sseinsnmode>")])
14176 (define_insn "sse4_1_pblendw"
14177 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14179 (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14180 (match_operand:V8HI 1 "register_operand" "0,0,x")
14181 (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14184 pblendw\t{%3, %2, %0|%0, %2, %3}
14185 pblendw\t{%3, %2, %0|%0, %2, %3}
14186 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14187 [(set_attr "isa" "noavx,noavx,avx")
14188 (set_attr "type" "ssemov")
14189 (set_attr "prefix_extra" "1")
14190 (set_attr "length_immediate" "1")
14191 (set_attr "prefix" "orig,orig,vex")
14192 (set_attr "mode" "TI")])
14194 ;; The builtin uses an 8-bit immediate. Expand that.
14195 (define_expand "avx2_pblendw"
14196 [(set (match_operand:V16HI 0 "register_operand")
14198 (match_operand:V16HI 2 "nonimmediate_operand")
14199 (match_operand:V16HI 1 "register_operand")
14200 (match_operand:SI 3 "const_0_to_255_operand")))]
14203 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14204 operands[3] = GEN_INT (val << 8 | val);
14207 (define_insn "*avx2_pblendw"
14208 [(set (match_operand:V16HI 0 "register_operand" "=x")
14210 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14211 (match_operand:V16HI 1 "register_operand" "x")
14212 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14215 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14216 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14218 [(set_attr "type" "ssemov")
14219 (set_attr "prefix_extra" "1")
14220 (set_attr "length_immediate" "1")
14221 (set_attr "prefix" "vex")
14222 (set_attr "mode" "OI")])
14224 (define_insn "avx2_pblendd<mode>"
14225 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14226 (vec_merge:VI4_AVX2
14227 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14228 (match_operand:VI4_AVX2 1 "register_operand" "x")
14229 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14231 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14232 [(set_attr "type" "ssemov")
14233 (set_attr "prefix_extra" "1")
14234 (set_attr "length_immediate" "1")
14235 (set_attr "prefix" "vex")
14236 (set_attr "mode" "<sseinsnmode>")])
14238 (define_insn "sse4_1_phminposuw"
14239 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14240 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
14241 UNSPEC_PHMINPOSUW))]
14243 "%vphminposuw\t{%1, %0|%0, %1}"
14244 [(set_attr "type" "sselog1")
14245 (set_attr "prefix_extra" "1")
14246 (set_attr "prefix" "maybe_vex")
14247 (set_attr "mode" "TI")])
14249 (define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14250 [(set (match_operand:V16HI 0 "register_operand" "=v")
14252 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14253 "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14254 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14255 [(set_attr "type" "ssemov")
14256 (set_attr "prefix_extra" "1")
14257 (set_attr "prefix" "maybe_evex")
14258 (set_attr "mode" "OI")])
14260 (define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14261 [(set (match_operand:V32HI 0 "register_operand" "=v")
14263 (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14265 "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14266 [(set_attr "type" "ssemov")
14267 (set_attr "prefix_extra" "1")
14268 (set_attr "prefix" "evex")
14269 (set_attr "mode" "XI")])
14271 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14272 [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14275 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14276 (parallel [(const_int 0) (const_int 1)
14277 (const_int 2) (const_int 3)
14278 (const_int 4) (const_int 5)
14279 (const_int 6) (const_int 7)]))))]
14280 "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14281 "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14282 [(set_attr "type" "ssemov")
14283 (set_attr "ssememalign" "64")
14284 (set_attr "prefix_extra" "1")
14285 (set_attr "prefix" "maybe_vex")
14286 (set_attr "mode" "TI")])
14288 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14289 [(set (match_operand:V16SI 0 "register_operand" "=v")
14291 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14293 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14294 [(set_attr "type" "ssemov")
14295 (set_attr "prefix" "evex")
14296 (set_attr "mode" "XI")])
14298 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
14299 [(set (match_operand:V8SI 0 "register_operand" "=v")
14302 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14303 (parallel [(const_int 0) (const_int 1)
14304 (const_int 2) (const_int 3)
14305 (const_int 4) (const_int 5)
14306 (const_int 6) (const_int 7)]))))]
14307 "TARGET_AVX2 && <mask_avx512vl_condition>"
14308 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14309 [(set_attr "type" "ssemov")
14310 (set_attr "prefix_extra" "1")
14311 (set_attr "prefix" "maybe_evex")
14312 (set_attr "mode" "OI")])
14314 (define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14315 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14318 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14319 (parallel [(const_int 0) (const_int 1)
14320 (const_int 2) (const_int 3)]))))]
14321 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14322 "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14323 [(set_attr "type" "ssemov")
14324 (set_attr "ssememalign" "32")
14325 (set_attr "prefix_extra" "1")
14326 (set_attr "prefix" "maybe_vex")
14327 (set_attr "mode" "TI")])
14329 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14330 [(set (match_operand:V16SI 0 "register_operand" "=v")
14332 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14334 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14335 [(set_attr "type" "ssemov")
14336 (set_attr "prefix" "evex")
14337 (set_attr "mode" "XI")])
14339 (define_insn "avx2_<code>v8hiv8si2<mask_name>"
14340 [(set (match_operand:V8SI 0 "register_operand" "=v")
14342 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14343 "TARGET_AVX2 && <mask_avx512vl_condition>"
14344 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14345 [(set_attr "type" "ssemov")
14346 (set_attr "prefix_extra" "1")
14347 (set_attr "prefix" "maybe_evex")
14348 (set_attr "mode" "OI")])
14350 (define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14351 [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14354 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14355 (parallel [(const_int 0) (const_int 1)
14356 (const_int 2) (const_int 3)]))))]
14357 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14358 "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14359 [(set_attr "type" "ssemov")
14360 (set_attr "ssememalign" "64")
14361 (set_attr "prefix_extra" "1")
14362 (set_attr "prefix" "maybe_vex")
14363 (set_attr "mode" "TI")])
14365 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14366 [(set (match_operand:V8DI 0 "register_operand" "=v")
14369 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14370 (parallel [(const_int 0) (const_int 1)
14371 (const_int 2) (const_int 3)
14372 (const_int 4) (const_int 5)
14373 (const_int 6) (const_int 7)]))))]
14375 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14376 [(set_attr "type" "ssemov")
14377 (set_attr "prefix" "evex")
14378 (set_attr "mode" "XI")])
14380 (define_insn "avx2_<code>v4qiv4di2<mask_name>"
14381 [(set (match_operand:V4DI 0 "register_operand" "=v")
14384 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14385 (parallel [(const_int 0) (const_int 1)
14386 (const_int 2) (const_int 3)]))))]
14387 "TARGET_AVX2 && <mask_avx512vl_condition>"
14388 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14389 [(set_attr "type" "ssemov")
14390 (set_attr "prefix_extra" "1")
14391 (set_attr "prefix" "maybe_evex")
14392 (set_attr "mode" "OI")])
14394 (define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14395 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14398 (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14399 (parallel [(const_int 0) (const_int 1)]))))]
14400 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14401 "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14402 [(set_attr "type" "ssemov")
14403 (set_attr "ssememalign" "16")
14404 (set_attr "prefix_extra" "1")
14405 (set_attr "prefix" "maybe_vex")
14406 (set_attr "mode" "TI")])
14408 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14409 [(set (match_operand:V8DI 0 "register_operand" "=v")
14411 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14413 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14414 [(set_attr "type" "ssemov")
14415 (set_attr "prefix" "evex")
14416 (set_attr "mode" "XI")])
14418 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
14419 [(set (match_operand:V4DI 0 "register_operand" "=v")
14422 (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14423 (parallel [(const_int 0) (const_int 1)
14424 (const_int 2) (const_int 3)]))))]
14425 "TARGET_AVX2 && <mask_avx512vl_condition>"
14426 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14427 [(set_attr "type" "ssemov")
14428 (set_attr "prefix_extra" "1")
14429 (set_attr "prefix" "maybe_evex")
14430 (set_attr "mode" "OI")])
14432 (define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14433 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14436 (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14437 (parallel [(const_int 0) (const_int 1)]))))]
14438 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14439 "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14440 [(set_attr "type" "ssemov")
14441 (set_attr "ssememalign" "32")
14442 (set_attr "prefix_extra" "1")
14443 (set_attr "prefix" "maybe_vex")
14444 (set_attr "mode" "TI")])
14446 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
14447 [(set (match_operand:V8DI 0 "register_operand" "=v")
14449 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14451 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14452 [(set_attr "type" "ssemov")
14453 (set_attr "prefix" "evex")
14454 (set_attr "mode" "XI")])
14456 (define_insn "avx2_<code>v4siv4di2<mask_name>"
14457 [(set (match_operand:V4DI 0 "register_operand" "=v")
14459 (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14460 "TARGET_AVX2 && <mask_avx512vl_condition>"
14461 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14462 [(set_attr "type" "ssemov")
14463 (set_attr "prefix" "maybe_evex")
14464 (set_attr "prefix_extra" "1")
14465 (set_attr "mode" "OI")])
14467 (define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14468 [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14471 (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14472 (parallel [(const_int 0) (const_int 1)]))))]
14473 "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14474 "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14475 [(set_attr "type" "ssemov")
14476 (set_attr "ssememalign" "64")
14477 (set_attr "prefix_extra" "1")
14478 (set_attr "prefix" "maybe_vex")
14479 (set_attr "mode" "TI")])
14481 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
14482 ;; setting FLAGS_REG. But it is not a really compare instruction.
14483 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14484 [(set (reg:CC FLAGS_REG)
14485 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14486 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14489 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14490 [(set_attr "type" "ssecomi")
14491 (set_attr "prefix_extra" "1")
14492 (set_attr "prefix" "vex")
14493 (set_attr "mode" "<MODE>")])
14495 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14496 ;; But it is not a really compare instruction.
14497 (define_insn "avx_ptest256"
14498 [(set (reg:CC FLAGS_REG)
14499 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14500 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14503 "vptest\t{%1, %0|%0, %1}"
14504 [(set_attr "type" "ssecomi")
14505 (set_attr "prefix_extra" "1")
14506 (set_attr "prefix" "vex")
14507 (set_attr "btver2_decode" "vector")
14508 (set_attr "mode" "OI")])
14510 (define_insn "sse4_1_ptest"
14511 [(set (reg:CC FLAGS_REG)
14512 (unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
14513 (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
14516 "%vptest\t{%1, %0|%0, %1}"
14517 [(set_attr "type" "ssecomi")
14518 (set_attr "prefix_extra" "1")
14519 (set_attr "prefix" "maybe_vex")
14520 (set_attr "mode" "TI")])
14522 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14523 [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14525 [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
14526 (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14529 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14530 [(set_attr "type" "ssecvt")
14531 (set (attr "prefix_data16")
14533 (match_test "TARGET_AVX")
14535 (const_string "1")))
14536 (set_attr "prefix_extra" "1")
14537 (set_attr "length_immediate" "1")
14538 (set_attr "prefix" "maybe_vex")
14539 (set_attr "mode" "<MODE>")])
14541 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14542 [(match_operand:<sseintvecmode> 0 "register_operand")
14543 (match_operand:VF1_128_256 1 "nonimmediate_operand")
14544 (match_operand:SI 2 "const_0_to_15_operand")]
14547 rtx tmp = gen_reg_rtx (<MODE>mode);
14550 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14553 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14557 (define_expand "avx512f_roundpd512"
14558 [(match_operand:V8DF 0 "register_operand")
14559 (match_operand:V8DF 1 "nonimmediate_operand")
14560 (match_operand:SI 2 "const_0_to_15_operand")]
14563 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14567 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14568 [(match_operand:<ssepackfltmode> 0 "register_operand")
14569 (match_operand:VF2 1 "nonimmediate_operand")
14570 (match_operand:VF2 2 "nonimmediate_operand")
14571 (match_operand:SI 3 "const_0_to_15_operand")]
14576 if (<MODE>mode == V2DFmode
14577 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14579 rtx tmp2 = gen_reg_rtx (V4DFmode);
14581 tmp0 = gen_reg_rtx (V4DFmode);
14582 tmp1 = force_reg (V2DFmode, operands[1]);
14584 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14585 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14586 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14590 tmp0 = gen_reg_rtx (<MODE>mode);
14591 tmp1 = gen_reg_rtx (<MODE>mode);
14594 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14597 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14600 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14605 (define_insn "sse4_1_round<ssescalarmodesuffix>"
14606 [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
14609 [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
14610 (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
14612 (match_operand:VF_128 1 "register_operand" "0,0,x")
14616 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14617 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14618 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14619 [(set_attr "isa" "noavx,noavx,avx")
14620 (set_attr "type" "ssecvt")
14621 (set_attr "length_immediate" "1")
14622 (set_attr "prefix_data16" "1,1,*")
14623 (set_attr "prefix_extra" "1")
14624 (set_attr "prefix" "orig,orig,vex")
14625 (set_attr "mode" "<MODE>")])
14627 (define_expand "round<mode>2"
14628 [(set (match_dup 4)
14630 (match_operand:VF 1 "register_operand")
14632 (set (match_operand:VF 0 "register_operand")
14634 [(match_dup 4) (match_dup 5)]
14636 "TARGET_ROUND && !flag_trapping_math"
14638 machine_mode scalar_mode;
14639 const struct real_format *fmt;
14640 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14641 rtx half, vec_half;
14643 scalar_mode = GET_MODE_INNER (<MODE>mode);
14645 /* load nextafter (0.5, 0.0) */
14646 fmt = REAL_MODE_FORMAT (scalar_mode);
14647 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14648 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14649 half = const_double_from_real_value (pred_half, scalar_mode);
14651 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14652 vec_half = force_reg (<MODE>mode, vec_half);
14654 operands[3] = gen_reg_rtx (<MODE>mode);
14655 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14657 operands[4] = gen_reg_rtx (<MODE>mode);
14658 operands[5] = GEN_INT (ROUND_TRUNC);
14661 (define_expand "round<mode>2_sfix"
14662 [(match_operand:<sseintvecmode> 0 "register_operand")
14663 (match_operand:VF1_128_256 1 "register_operand")]
14664 "TARGET_ROUND && !flag_trapping_math"
14666 rtx tmp = gen_reg_rtx (<MODE>mode);
14668 emit_insn (gen_round<mode>2 (tmp, operands[1]));
14671 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14675 (define_expand "round<mode>2_vec_pack_sfix"
14676 [(match_operand:<ssepackfltmode> 0 "register_operand")
14677 (match_operand:VF2 1 "register_operand")
14678 (match_operand:VF2 2 "register_operand")]
14679 "TARGET_ROUND && !flag_trapping_math"
14683 if (<MODE>mode == V2DFmode
14684 && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14686 rtx tmp2 = gen_reg_rtx (V4DFmode);
14688 tmp0 = gen_reg_rtx (V4DFmode);
14689 tmp1 = force_reg (V2DFmode, operands[1]);
14691 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14692 emit_insn (gen_roundv4df2 (tmp2, tmp0));
14693 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14697 tmp0 = gen_reg_rtx (<MODE>mode);
14698 tmp1 = gen_reg_rtx (<MODE>mode);
14700 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14701 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14704 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14709 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14711 ;; Intel SSE4.2 string/text processing instructions
14713 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14715 (define_insn_and_split "sse4_2_pcmpestr"
14716 [(set (match_operand:SI 0 "register_operand" "=c,c")
14718 [(match_operand:V16QI 2 "register_operand" "x,x")
14719 (match_operand:SI 3 "register_operand" "a,a")
14720 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14721 (match_operand:SI 5 "register_operand" "d,d")
14722 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14724 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14732 (set (reg:CC FLAGS_REG)
14741 && can_create_pseudo_p ()"
14746 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14747 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14748 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14751 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14752 operands[3], operands[4],
14753 operands[5], operands[6]));
14755 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14756 operands[3], operands[4],
14757 operands[5], operands[6]));
14758 if (flags && !(ecx || xmm0))
14759 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14760 operands[2], operands[3],
14761 operands[4], operands[5],
14763 if (!(flags || ecx || xmm0))
14764 emit_note (NOTE_INSN_DELETED);
14768 [(set_attr "type" "sselog")
14769 (set_attr "prefix_data16" "1")
14770 (set_attr "prefix_extra" "1")
14771 (set_attr "ssememalign" "8")
14772 (set_attr "length_immediate" "1")
14773 (set_attr "memory" "none,load")
14774 (set_attr "mode" "TI")])
14776 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14777 [(set (match_operand:SI 0 "register_operand" "=c")
14779 [(match_operand:V16QI 2 "register_operand" "x")
14780 (match_operand:SI 3 "register_operand" "a")
14782 [(match_operand:V16QI 4 "memory_operand" "m")]
14784 (match_operand:SI 5 "register_operand" "d")
14785 (match_operand:SI 6 "const_0_to_255_operand" "n")]
14787 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14791 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14795 (set (reg:CC FLAGS_REG)
14799 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14804 && can_create_pseudo_p ()"
14809 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14810 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14811 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14814 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14815 operands[3], operands[4],
14816 operands[5], operands[6]));
14818 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14819 operands[3], operands[4],
14820 operands[5], operands[6]));
14821 if (flags && !(ecx || xmm0))
14822 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14823 operands[2], operands[3],
14824 operands[4], operands[5],
14826 if (!(flags || ecx || xmm0))
14827 emit_note (NOTE_INSN_DELETED);
14831 [(set_attr "type" "sselog")
14832 (set_attr "prefix_data16" "1")
14833 (set_attr "prefix_extra" "1")
14834 (set_attr "ssememalign" "8")
14835 (set_attr "length_immediate" "1")
14836 (set_attr "memory" "load")
14837 (set_attr "mode" "TI")])
14839 (define_insn "sse4_2_pcmpestri"
14840 [(set (match_operand:SI 0 "register_operand" "=c,c")
14842 [(match_operand:V16QI 1 "register_operand" "x,x")
14843 (match_operand:SI 2 "register_operand" "a,a")
14844 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14845 (match_operand:SI 4 "register_operand" "d,d")
14846 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14848 (set (reg:CC FLAGS_REG)
14857 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14858 [(set_attr "type" "sselog")
14859 (set_attr "prefix_data16" "1")
14860 (set_attr "prefix_extra" "1")
14861 (set_attr "prefix" "maybe_vex")
14862 (set_attr "ssememalign" "8")
14863 (set_attr "length_immediate" "1")
14864 (set_attr "btver2_decode" "vector")
14865 (set_attr "memory" "none,load")
14866 (set_attr "mode" "TI")])
14868 (define_insn "sse4_2_pcmpestrm"
14869 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14871 [(match_operand:V16QI 1 "register_operand" "x,x")
14872 (match_operand:SI 2 "register_operand" "a,a")
14873 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14874 (match_operand:SI 4 "register_operand" "d,d")
14875 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14877 (set (reg:CC FLAGS_REG)
14886 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14887 [(set_attr "type" "sselog")
14888 (set_attr "prefix_data16" "1")
14889 (set_attr "prefix_extra" "1")
14890 (set_attr "ssememalign" "8")
14891 (set_attr "length_immediate" "1")
14892 (set_attr "prefix" "maybe_vex")
14893 (set_attr "btver2_decode" "vector")
14894 (set_attr "memory" "none,load")
14895 (set_attr "mode" "TI")])
14897 (define_insn "sse4_2_pcmpestr_cconly"
14898 [(set (reg:CC FLAGS_REG)
14900 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14901 (match_operand:SI 3 "register_operand" "a,a,a,a")
14902 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14903 (match_operand:SI 5 "register_operand" "d,d,d,d")
14904 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14906 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14907 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
14910 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14911 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
14912 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
14913 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
14914 [(set_attr "type" "sselog")
14915 (set_attr "prefix_data16" "1")
14916 (set_attr "prefix_extra" "1")
14917 (set_attr "ssememalign" "8")
14918 (set_attr "length_immediate" "1")
14919 (set_attr "memory" "none,load,none,load")
14920 (set_attr "btver2_decode" "vector,vector,vector,vector")
14921 (set_attr "prefix" "maybe_vex")
14922 (set_attr "mode" "TI")])
14924 (define_insn_and_split "sse4_2_pcmpistr"
14925 [(set (match_operand:SI 0 "register_operand" "=c,c")
14927 [(match_operand:V16QI 2 "register_operand" "x,x")
14928 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14929 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
14931 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14937 (set (reg:CC FLAGS_REG)
14944 && can_create_pseudo_p ()"
14949 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14950 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14951 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14954 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
14955 operands[3], operands[4]));
14957 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
14958 operands[3], operands[4]));
14959 if (flags && !(ecx || xmm0))
14960 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
14961 operands[2], operands[3],
14963 if (!(flags || ecx || xmm0))
14964 emit_note (NOTE_INSN_DELETED);
14968 [(set_attr "type" "sselog")
14969 (set_attr "prefix_data16" "1")
14970 (set_attr "prefix_extra" "1")
14971 (set_attr "ssememalign" "8")
14972 (set_attr "length_immediate" "1")
14973 (set_attr "memory" "none,load")
14974 (set_attr "mode" "TI")])
14976 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
14977 [(set (match_operand:SI 0 "register_operand" "=c")
14979 [(match_operand:V16QI 2 "register_operand" "x")
14981 [(match_operand:V16QI 3 "memory_operand" "m")]
14983 (match_operand:SI 4 "const_0_to_255_operand" "n")]
14985 (set (match_operand:V16QI 1 "register_operand" "=Yz")
14988 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14991 (set (reg:CC FLAGS_REG)
14994 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
14998 && can_create_pseudo_p ()"
15003 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15004 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15005 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15008 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15009 operands[3], operands[4]));
15011 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15012 operands[3], operands[4]));
15013 if (flags && !(ecx || xmm0))
15014 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15015 operands[2], operands[3],
15017 if (!(flags || ecx || xmm0))
15018 emit_note (NOTE_INSN_DELETED);
15022 [(set_attr "type" "sselog")
15023 (set_attr "prefix_data16" "1")
15024 (set_attr "prefix_extra" "1")
15025 (set_attr "ssememalign" "8")
15026 (set_attr "length_immediate" "1")
15027 (set_attr "memory" "load")
15028 (set_attr "mode" "TI")])
15030 (define_insn "sse4_2_pcmpistri"
15031 [(set (match_operand:SI 0 "register_operand" "=c,c")
15033 [(match_operand:V16QI 1 "register_operand" "x,x")
15034 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15035 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15037 (set (reg:CC FLAGS_REG)
15044 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15045 [(set_attr "type" "sselog")
15046 (set_attr "prefix_data16" "1")
15047 (set_attr "prefix_extra" "1")
15048 (set_attr "ssememalign" "8")
15049 (set_attr "length_immediate" "1")
15050 (set_attr "prefix" "maybe_vex")
15051 (set_attr "memory" "none,load")
15052 (set_attr "btver2_decode" "vector")
15053 (set_attr "mode" "TI")])
15055 (define_insn "sse4_2_pcmpistrm"
15056 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15058 [(match_operand:V16QI 1 "register_operand" "x,x")
15059 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15060 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15062 (set (reg:CC FLAGS_REG)
15069 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15070 [(set_attr "type" "sselog")
15071 (set_attr "prefix_data16" "1")
15072 (set_attr "prefix_extra" "1")
15073 (set_attr "ssememalign" "8")
15074 (set_attr "length_immediate" "1")
15075 (set_attr "prefix" "maybe_vex")
15076 (set_attr "memory" "none,load")
15077 (set_attr "btver2_decode" "vector")
15078 (set_attr "mode" "TI")])
15080 (define_insn "sse4_2_pcmpistr_cconly"
15081 [(set (reg:CC FLAGS_REG)
15083 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15084 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15085 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15087 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15088 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
15091 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15092 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15093 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15094 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15095 [(set_attr "type" "sselog")
15096 (set_attr "prefix_data16" "1")
15097 (set_attr "prefix_extra" "1")
15098 (set_attr "ssememalign" "8")
15099 (set_attr "length_immediate" "1")
15100 (set_attr "memory" "none,load,none,load")
15101 (set_attr "prefix" "maybe_vex")
15102 (set_attr "btver2_decode" "vector,vector,vector,vector")
15103 (set_attr "mode" "TI")])
15105 ;; Packed float variants
15106 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15107 [(V8DI "V8SF") (V16SI "V16SF")])
15109 (define_expand "avx512pf_gatherpf<mode>sf"
15111 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15112 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15114 [(match_operand 2 "vsib_address_operand")
15115 (match_operand:VI48_512 1 "register_operand")
15116 (match_operand:SI 3 "const1248_operand")]))
15117 (match_operand:SI 4 "const_2_to_3_operand")]
15118 UNSPEC_GATHER_PREFETCH)]
15122 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15123 operands[3]), UNSPEC_VSIBADDR);
15126 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
15128 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15129 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15131 [(match_operand:P 2 "vsib_address_operand" "Tv")
15132 (match_operand:VI48_512 1 "register_operand" "v")
15133 (match_operand:SI 3 "const1248_operand" "n")]
15135 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15136 UNSPEC_GATHER_PREFETCH)]
15139 switch (INTVAL (operands[4]))
15142 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15144 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15146 gcc_unreachable ();
15149 [(set_attr "type" "sse")
15150 (set_attr "prefix" "evex")
15151 (set_attr "mode" "XI")])
15153 (define_insn "*avx512pf_gatherpf<mode>sf"
15156 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15158 [(match_operand:P 1 "vsib_address_operand" "Tv")
15159 (match_operand:VI48_512 0 "register_operand" "v")
15160 (match_operand:SI 2 "const1248_operand" "n")]
15162 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15163 UNSPEC_GATHER_PREFETCH)]
15166 switch (INTVAL (operands[3]))
15169 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
15171 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
15173 gcc_unreachable ();
15176 [(set_attr "type" "sse")
15177 (set_attr "prefix" "evex")
15178 (set_attr "mode" "XI")])
15180 ;; Packed double variants
15181 (define_expand "avx512pf_gatherpf<mode>df"
15183 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15186 [(match_operand 2 "vsib_address_operand")
15187 (match_operand:VI4_256_8_512 1 "register_operand")
15188 (match_operand:SI 3 "const1248_operand")]))
15189 (match_operand:SI 4 "const_2_to_3_operand")]
15190 UNSPEC_GATHER_PREFETCH)]
15194 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15195 operands[3]), UNSPEC_VSIBADDR);
15198 (define_insn "*avx512pf_gatherpf<mode>df_mask"
15200 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15201 (match_operator:V8DF 5 "vsib_mem_operator"
15203 [(match_operand:P 2 "vsib_address_operand" "Tv")
15204 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15205 (match_operand:SI 3 "const1248_operand" "n")]
15207 (match_operand:SI 4 "const_2_to_3_operand" "n")]
15208 UNSPEC_GATHER_PREFETCH)]
15211 switch (INTVAL (operands[4]))
15214 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15216 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15218 gcc_unreachable ();
15221 [(set_attr "type" "sse")
15222 (set_attr "prefix" "evex")
15223 (set_attr "mode" "XI")])
15225 (define_insn "*avx512pf_gatherpf<mode>df"
15228 (match_operator:V8DF 4 "vsib_mem_operator"
15230 [(match_operand:P 1 "vsib_address_operand" "Tv")
15231 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15232 (match_operand:SI 2 "const1248_operand" "n")]
15234 (match_operand:SI 3 "const_2_to_3_operand" "n")]
15235 UNSPEC_GATHER_PREFETCH)]
15238 switch (INTVAL (operands[3]))
15241 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
15243 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
15245 gcc_unreachable ();
15248 [(set_attr "type" "sse")
15249 (set_attr "prefix" "evex")
15250 (set_attr "mode" "XI")])
15252 ;; Packed float variants
15253 (define_expand "avx512pf_scatterpf<mode>sf"
15255 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15256 (mem:<GATHER_SCATTER_SF_MEM_MODE>
15258 [(match_operand 2 "vsib_address_operand")
15259 (match_operand:VI48_512 1 "register_operand")
15260 (match_operand:SI 3 "const1248_operand")]))
15261 (match_operand:SI 4 "const2367_operand")]
15262 UNSPEC_SCATTER_PREFETCH)]
15266 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15267 operands[3]), UNSPEC_VSIBADDR);
15270 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
15272 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15273 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15275 [(match_operand:P 2 "vsib_address_operand" "Tv")
15276 (match_operand:VI48_512 1 "register_operand" "v")
15277 (match_operand:SI 3 "const1248_operand" "n")]
15279 (match_operand:SI 4 "const2367_operand" "n")]
15280 UNSPEC_SCATTER_PREFETCH)]
15283 switch (INTVAL (operands[4]))
15287 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15290 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15292 gcc_unreachable ();
15295 [(set_attr "type" "sse")
15296 (set_attr "prefix" "evex")
15297 (set_attr "mode" "XI")])
15299 (define_insn "*avx512pf_scatterpf<mode>sf"
15302 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
15304 [(match_operand:P 1 "vsib_address_operand" "Tv")
15305 (match_operand:VI48_512 0 "register_operand" "v")
15306 (match_operand:SI 2 "const1248_operand" "n")]
15308 (match_operand:SI 3 "const2367_operand" "n")]
15309 UNSPEC_SCATTER_PREFETCH)]
15312 switch (INTVAL (operands[3]))
15316 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
15319 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
15321 gcc_unreachable ();
15324 [(set_attr "type" "sse")
15325 (set_attr "prefix" "evex")
15326 (set_attr "mode" "XI")])
15328 ;; Packed double variants
15329 (define_expand "avx512pf_scatterpf<mode>df"
15331 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
15334 [(match_operand 2 "vsib_address_operand")
15335 (match_operand:VI4_256_8_512 1 "register_operand")
15336 (match_operand:SI 3 "const1248_operand")]))
15337 (match_operand:SI 4 "const2367_operand")]
15338 UNSPEC_SCATTER_PREFETCH)]
15342 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15343 operands[3]), UNSPEC_VSIBADDR);
15346 (define_insn "*avx512pf_scatterpf<mode>df_mask"
15348 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15349 (match_operator:V8DF 5 "vsib_mem_operator"
15351 [(match_operand:P 2 "vsib_address_operand" "Tv")
15352 (match_operand:VI4_256_8_512 1 "register_operand" "v")
15353 (match_operand:SI 3 "const1248_operand" "n")]
15355 (match_operand:SI 4 "const2367_operand" "n")]
15356 UNSPEC_SCATTER_PREFETCH)]
15359 switch (INTVAL (operands[4]))
15363 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15366 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15368 gcc_unreachable ();
15371 [(set_attr "type" "sse")
15372 (set_attr "prefix" "evex")
15373 (set_attr "mode" "XI")])
15375 (define_insn "*avx512pf_scatterpf<mode>df"
15378 (match_operator:V8DF 4 "vsib_mem_operator"
15380 [(match_operand:P 1 "vsib_address_operand" "Tv")
15381 (match_operand:VI4_256_8_512 0 "register_operand" "v")
15382 (match_operand:SI 2 "const1248_operand" "n")]
15384 (match_operand:SI 3 "const2367_operand" "n")]
15385 UNSPEC_SCATTER_PREFETCH)]
15388 switch (INTVAL (operands[3]))
15392 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
15395 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
15397 gcc_unreachable ();
15400 [(set_attr "type" "sse")
15401 (set_attr "prefix" "evex")
15402 (set_attr "mode" "XI")])
15404 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15405 [(set (match_operand:VF_512 0 "register_operand" "=v")
15407 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15410 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15411 [(set_attr "prefix" "evex")
15412 (set_attr "type" "sse")
15413 (set_attr "mode" "<MODE>")])
15415 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15416 [(set (match_operand:VF_512 0 "register_operand" "=v")
15418 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15421 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15422 [(set_attr "prefix" "evex")
15423 (set_attr "type" "sse")
15424 (set_attr "mode" "<MODE>")])
15426 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15427 [(set (match_operand:VF_128 0 "register_operand" "=v")
15430 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15432 (match_operand:VF_128 2 "register_operand" "v")
15435 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15436 [(set_attr "length_immediate" "1")
15437 (set_attr "prefix" "evex")
15438 (set_attr "type" "sse")
15439 (set_attr "mode" "<MODE>")])
15441 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15442 [(set (match_operand:VF_512 0 "register_operand" "=v")
15444 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15447 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15448 [(set_attr "prefix" "evex")
15449 (set_attr "type" "sse")
15450 (set_attr "mode" "<MODE>")])
15452 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15453 [(set (match_operand:VF_128 0 "register_operand" "=v")
15456 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15458 (match_operand:VF_128 2 "register_operand" "v")
15461 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15462 [(set_attr "length_immediate" "1")
15463 (set_attr "type" "sse")
15464 (set_attr "prefix" "evex")
15465 (set_attr "mode" "<MODE>")])
15467 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15469 ;; XOP instructions
15471 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15473 (define_code_iterator xop_plus [plus ss_plus])
15475 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
15476 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15478 ;; XOP parallel integer multiply/add instructions.
15480 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15481 [(set (match_operand:VI24_128 0 "register_operand" "=x")
15484 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15485 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15486 (match_operand:VI24_128 3 "register_operand" "x")))]
15488 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15489 [(set_attr "type" "ssemuladd")
15490 (set_attr "mode" "TI")])
15492 (define_insn "xop_p<macs>dql"
15493 [(set (match_operand:V2DI 0 "register_operand" "=x")
15498 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15499 (parallel [(const_int 0) (const_int 2)])))
15502 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15503 (parallel [(const_int 0) (const_int 2)]))))
15504 (match_operand:V2DI 3 "register_operand" "x")))]
15506 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15507 [(set_attr "type" "ssemuladd")
15508 (set_attr "mode" "TI")])
15510 (define_insn "xop_p<macs>dqh"
15511 [(set (match_operand:V2DI 0 "register_operand" "=x")
15516 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15517 (parallel [(const_int 1) (const_int 3)])))
15520 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15521 (parallel [(const_int 1) (const_int 3)]))))
15522 (match_operand:V2DI 3 "register_operand" "x")))]
15524 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15525 [(set_attr "type" "ssemuladd")
15526 (set_attr "mode" "TI")])
15528 ;; XOP parallel integer multiply/add instructions for the intrinisics
15529 (define_insn "xop_p<macs>wd"
15530 [(set (match_operand:V4SI 0 "register_operand" "=x")
15535 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15536 (parallel [(const_int 1) (const_int 3)
15537 (const_int 5) (const_int 7)])))
15540 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15541 (parallel [(const_int 1) (const_int 3)
15542 (const_int 5) (const_int 7)]))))
15543 (match_operand:V4SI 3 "register_operand" "x")))]
15545 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15546 [(set_attr "type" "ssemuladd")
15547 (set_attr "mode" "TI")])
15549 (define_insn "xop_p<madcs>wd"
15550 [(set (match_operand:V4SI 0 "register_operand" "=x")
15556 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15557 (parallel [(const_int 0) (const_int 2)
15558 (const_int 4) (const_int 6)])))
15561 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15562 (parallel [(const_int 0) (const_int 2)
15563 (const_int 4) (const_int 6)]))))
15568 (parallel [(const_int 1) (const_int 3)
15569 (const_int 5) (const_int 7)])))
15573 (parallel [(const_int 1) (const_int 3)
15574 (const_int 5) (const_int 7)])))))
15575 (match_operand:V4SI 3 "register_operand" "x")))]
15577 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15578 [(set_attr "type" "ssemuladd")
15579 (set_attr "mode" "TI")])
15581 ;; XOP parallel XMM conditional moves
15582 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
15583 [(set (match_operand:V 0 "register_operand" "=x,x")
15585 (match_operand:V 3 "nonimmediate_operand" "x,m")
15586 (match_operand:V 1 "register_operand" "x,x")
15587 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15589 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15590 [(set_attr "type" "sse4arg")])
15592 ;; XOP horizontal add/subtract instructions
15593 (define_insn "xop_phadd<u>bw"
15594 [(set (match_operand:V8HI 0 "register_operand" "=x")
15598 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15599 (parallel [(const_int 0) (const_int 2)
15600 (const_int 4) (const_int 6)
15601 (const_int 8) (const_int 10)
15602 (const_int 12) (const_int 14)])))
15606 (parallel [(const_int 1) (const_int 3)
15607 (const_int 5) (const_int 7)
15608 (const_int 9) (const_int 11)
15609 (const_int 13) (const_int 15)])))))]
15611 "vphadd<u>bw\t{%1, %0|%0, %1}"
15612 [(set_attr "type" "sseiadd1")])
15614 (define_insn "xop_phadd<u>bd"
15615 [(set (match_operand:V4SI 0 "register_operand" "=x")
15620 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15621 (parallel [(const_int 0) (const_int 4)
15622 (const_int 8) (const_int 12)])))
15626 (parallel [(const_int 1) (const_int 5)
15627 (const_int 9) (const_int 13)]))))
15632 (parallel [(const_int 2) (const_int 6)
15633 (const_int 10) (const_int 14)])))
15637 (parallel [(const_int 3) (const_int 7)
15638 (const_int 11) (const_int 15)]))))))]
15640 "vphadd<u>bd\t{%1, %0|%0, %1}"
15641 [(set_attr "type" "sseiadd1")])
15643 (define_insn "xop_phadd<u>bq"
15644 [(set (match_operand:V2DI 0 "register_operand" "=x")
15650 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15651 (parallel [(const_int 0) (const_int 8)])))
15655 (parallel [(const_int 1) (const_int 9)]))))
15660 (parallel [(const_int 2) (const_int 10)])))
15664 (parallel [(const_int 3) (const_int 11)])))))
15670 (parallel [(const_int 4) (const_int 12)])))
15674 (parallel [(const_int 5) (const_int 13)]))))
15679 (parallel [(const_int 6) (const_int 14)])))
15683 (parallel [(const_int 7) (const_int 15)])))))))]
15685 "vphadd<u>bq\t{%1, %0|%0, %1}"
15686 [(set_attr "type" "sseiadd1")])
15688 (define_insn "xop_phadd<u>wd"
15689 [(set (match_operand:V4SI 0 "register_operand" "=x")
15693 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15694 (parallel [(const_int 0) (const_int 2)
15695 (const_int 4) (const_int 6)])))
15699 (parallel [(const_int 1) (const_int 3)
15700 (const_int 5) (const_int 7)])))))]
15702 "vphadd<u>wd\t{%1, %0|%0, %1}"
15703 [(set_attr "type" "sseiadd1")])
15705 (define_insn "xop_phadd<u>wq"
15706 [(set (match_operand:V2DI 0 "register_operand" "=x")
15711 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15712 (parallel [(const_int 0) (const_int 4)])))
15716 (parallel [(const_int 1) (const_int 5)]))))
15721 (parallel [(const_int 2) (const_int 6)])))
15725 (parallel [(const_int 3) (const_int 7)]))))))]
15727 "vphadd<u>wq\t{%1, %0|%0, %1}"
15728 [(set_attr "type" "sseiadd1")])
15730 (define_insn "xop_phadd<u>dq"
15731 [(set (match_operand:V2DI 0 "register_operand" "=x")
15735 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15736 (parallel [(const_int 0) (const_int 2)])))
15740 (parallel [(const_int 1) (const_int 3)])))))]
15742 "vphadd<u>dq\t{%1, %0|%0, %1}"
15743 [(set_attr "type" "sseiadd1")])
15745 (define_insn "xop_phsubbw"
15746 [(set (match_operand:V8HI 0 "register_operand" "=x")
15750 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15751 (parallel [(const_int 0) (const_int 2)
15752 (const_int 4) (const_int 6)
15753 (const_int 8) (const_int 10)
15754 (const_int 12) (const_int 14)])))
15758 (parallel [(const_int 1) (const_int 3)
15759 (const_int 5) (const_int 7)
15760 (const_int 9) (const_int 11)
15761 (const_int 13) (const_int 15)])))))]
15763 "vphsubbw\t{%1, %0|%0, %1}"
15764 [(set_attr "type" "sseiadd1")])
15766 (define_insn "xop_phsubwd"
15767 [(set (match_operand:V4SI 0 "register_operand" "=x")
15771 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15772 (parallel [(const_int 0) (const_int 2)
15773 (const_int 4) (const_int 6)])))
15777 (parallel [(const_int 1) (const_int 3)
15778 (const_int 5) (const_int 7)])))))]
15780 "vphsubwd\t{%1, %0|%0, %1}"
15781 [(set_attr "type" "sseiadd1")])
15783 (define_insn "xop_phsubdq"
15784 [(set (match_operand:V2DI 0 "register_operand" "=x")
15788 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15789 (parallel [(const_int 0) (const_int 2)])))
15793 (parallel [(const_int 1) (const_int 3)])))))]
15795 "vphsubdq\t{%1, %0|%0, %1}"
15796 [(set_attr "type" "sseiadd1")])
15798 ;; XOP permute instructions
15799 (define_insn "xop_pperm"
15800 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15802 [(match_operand:V16QI 1 "register_operand" "x,x")
15803 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15804 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15805 UNSPEC_XOP_PERMUTE))]
15806 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15807 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15808 [(set_attr "type" "sse4arg")
15809 (set_attr "mode" "TI")])
15811 ;; XOP pack instructions that combine two vectors into a smaller vector
15812 (define_insn "xop_pperm_pack_v2di_v4si"
15813 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15816 (match_operand:V2DI 1 "register_operand" "x,x"))
15818 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15819 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15820 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15821 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15822 [(set_attr "type" "sse4arg")
15823 (set_attr "mode" "TI")])
15825 (define_insn "xop_pperm_pack_v4si_v8hi"
15826 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15829 (match_operand:V4SI 1 "register_operand" "x,x"))
15831 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15832 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15833 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15834 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15835 [(set_attr "type" "sse4arg")
15836 (set_attr "mode" "TI")])
15838 (define_insn "xop_pperm_pack_v8hi_v16qi"
15839 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15842 (match_operand:V8HI 1 "register_operand" "x,x"))
15844 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15845 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15846 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15847 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15848 [(set_attr "type" "sse4arg")
15849 (set_attr "mode" "TI")])
15851 ;; XOP packed rotate instructions
15852 (define_expand "rotl<mode>3"
15853 [(set (match_operand:VI_128 0 "register_operand")
15855 (match_operand:VI_128 1 "nonimmediate_operand")
15856 (match_operand:SI 2 "general_operand")))]
15859 /* If we were given a scalar, convert it to parallel */
15860 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15862 rtvec vs = rtvec_alloc (<ssescalarnum>);
15863 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15864 rtx reg = gen_reg_rtx (<MODE>mode);
15865 rtx op2 = operands[2];
15868 if (GET_MODE (op2) != <ssescalarmode>mode)
15870 op2 = gen_reg_rtx (<ssescalarmode>mode);
15871 convert_move (op2, operands[2], false);
15874 for (i = 0; i < <ssescalarnum>; i++)
15875 RTVEC_ELT (vs, i) = op2;
15877 emit_insn (gen_vec_init<mode> (reg, par));
15878 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15883 (define_expand "rotr<mode>3"
15884 [(set (match_operand:VI_128 0 "register_operand")
15886 (match_operand:VI_128 1 "nonimmediate_operand")
15887 (match_operand:SI 2 "general_operand")))]
15890 /* If we were given a scalar, convert it to parallel */
15891 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15893 rtvec vs = rtvec_alloc (<ssescalarnum>);
15894 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15895 rtx neg = gen_reg_rtx (<MODE>mode);
15896 rtx reg = gen_reg_rtx (<MODE>mode);
15897 rtx op2 = operands[2];
15900 if (GET_MODE (op2) != <ssescalarmode>mode)
15902 op2 = gen_reg_rtx (<ssescalarmode>mode);
15903 convert_move (op2, operands[2], false);
15906 for (i = 0; i < <ssescalarnum>; i++)
15907 RTVEC_ELT (vs, i) = op2;
15909 emit_insn (gen_vec_init<mode> (reg, par));
15910 emit_insn (gen_neg<mode>2 (neg, reg));
15911 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15916 (define_insn "xop_rotl<mode>3"
15917 [(set (match_operand:VI_128 0 "register_operand" "=x")
15919 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15920 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15922 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15923 [(set_attr "type" "sseishft")
15924 (set_attr "length_immediate" "1")
15925 (set_attr "mode" "TI")])
15927 (define_insn "xop_rotr<mode>3"
15928 [(set (match_operand:VI_128 0 "register_operand" "=x")
15930 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15931 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15935 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15936 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15938 [(set_attr "type" "sseishft")
15939 (set_attr "length_immediate" "1")
15940 (set_attr "mode" "TI")])
15942 (define_expand "vrotr<mode>3"
15943 [(match_operand:VI_128 0 "register_operand")
15944 (match_operand:VI_128 1 "register_operand")
15945 (match_operand:VI_128 2 "register_operand")]
15948 rtx reg = gen_reg_rtx (<MODE>mode);
15949 emit_insn (gen_neg<mode>2 (reg, operands[2]));
15950 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15954 (define_expand "vrotl<mode>3"
15955 [(match_operand:VI_128 0 "register_operand")
15956 (match_operand:VI_128 1 "register_operand")
15957 (match_operand:VI_128 2 "register_operand")]
15960 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15964 (define_insn "xop_vrotl<mode>3"
15965 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15966 (if_then_else:VI_128
15968 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15971 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15975 (neg:VI_128 (match_dup 2)))))]
15976 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15977 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15978 [(set_attr "type" "sseishft")
15979 (set_attr "prefix_data16" "0")
15980 (set_attr "prefix_extra" "2")
15981 (set_attr "mode" "TI")])
15983 ;; XOP packed shift instructions.
15984 (define_expand "vlshr<mode>3"
15985 [(set (match_operand:VI12_128 0 "register_operand")
15987 (match_operand:VI12_128 1 "register_operand")
15988 (match_operand:VI12_128 2 "nonimmediate_operand")))]
15991 rtx neg = gen_reg_rtx (<MODE>mode);
15992 emit_insn (gen_neg<mode>2 (neg, operands[2]));
15993 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15997 (define_expand "vlshr<mode>3"
15998 [(set (match_operand:VI48_128 0 "register_operand")
16000 (match_operand:VI48_128 1 "register_operand")
16001 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16002 "TARGET_AVX2 || TARGET_XOP"
16006 rtx neg = gen_reg_rtx (<MODE>mode);
16007 emit_insn (gen_neg<mode>2 (neg, operands[2]));
16008 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
16013 (define_expand "vlshr<mode>3"
16014 [(set (match_operand:VI48_512 0 "register_operand")
16016 (match_operand:VI48_512 1 "register_operand")
16017 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16020 (define_expand "vlshr<mode>3"
16021 [(set (match_operand:VI48_256 0 "register_operand")
16023 (match_operand:VI48_256 1 "register_operand")
16024 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16027 (define_expand "vashrv8hi3<mask_name>"
16028 [(set (match_operand:V8HI 0 "register_operand")
16030 (match_operand:V8HI 1 "register_operand")
16031 (match_operand:V8HI 2 "nonimmediate_operand")))]
16032 "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16036 rtx neg = gen_reg_rtx (V8HImode);
16037 emit_insn (gen_negv8hi2 (neg, operands[2]));
16038 emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16043 (define_expand "vashrv16qi3"
16044 [(set (match_operand:V16QI 0 "register_operand")
16046 (match_operand:V16QI 1 "register_operand")
16047 (match_operand:V16QI 2 "nonimmediate_operand")))]
16050 rtx neg = gen_reg_rtx (V16QImode);
16051 emit_insn (gen_negv16qi2 (neg, operands[2]));
16052 emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16056 (define_expand "vashrv2di3<mask_name>"
16057 [(set (match_operand:V2DI 0 "register_operand")
16059 (match_operand:V2DI 1 "register_operand")
16060 (match_operand:V2DI 2 "nonimmediate_operand")))]
16061 "TARGET_XOP || TARGET_AVX512VL"
16065 rtx neg = gen_reg_rtx (V2DImode);
16066 emit_insn (gen_negv2di2 (neg, operands[2]));
16067 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16072 (define_expand "vashrv4si3"
16073 [(set (match_operand:V4SI 0 "register_operand")
16074 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16075 (match_operand:V4SI 2 "nonimmediate_operand")))]
16076 "TARGET_AVX2 || TARGET_XOP"
16080 rtx neg = gen_reg_rtx (V4SImode);
16081 emit_insn (gen_negv4si2 (neg, operands[2]));
16082 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16087 (define_expand "vashrv16si3"
16088 [(set (match_operand:V16SI 0 "register_operand")
16089 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16090 (match_operand:V16SI 2 "nonimmediate_operand")))]
16093 (define_expand "vashrv8si3"
16094 [(set (match_operand:V8SI 0 "register_operand")
16095 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16096 (match_operand:V8SI 2 "nonimmediate_operand")))]
16099 (define_expand "vashl<mode>3"
16100 [(set (match_operand:VI12_128 0 "register_operand")
16102 (match_operand:VI12_128 1 "register_operand")
16103 (match_operand:VI12_128 2 "nonimmediate_operand")))]
16106 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16110 (define_expand "vashl<mode>3"
16111 [(set (match_operand:VI48_128 0 "register_operand")
16113 (match_operand:VI48_128 1 "register_operand")
16114 (match_operand:VI48_128 2 "nonimmediate_operand")))]
16115 "TARGET_AVX2 || TARGET_XOP"
16119 operands[2] = force_reg (<MODE>mode, operands[2]);
16120 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16125 (define_expand "vashl<mode>3"
16126 [(set (match_operand:VI48_512 0 "register_operand")
16128 (match_operand:VI48_512 1 "register_operand")
16129 (match_operand:VI48_512 2 "nonimmediate_operand")))]
16132 (define_expand "vashl<mode>3"
16133 [(set (match_operand:VI48_256 0 "register_operand")
16135 (match_operand:VI48_256 1 "register_operand")
16136 (match_operand:VI48_256 2 "nonimmediate_operand")))]
16139 (define_insn "xop_sha<mode>3"
16140 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16141 (if_then_else:VI_128
16143 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16146 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16150 (neg:VI_128 (match_dup 2)))))]
16151 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16152 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16153 [(set_attr "type" "sseishft")
16154 (set_attr "prefix_data16" "0")
16155 (set_attr "prefix_extra" "2")
16156 (set_attr "mode" "TI")])
16158 (define_insn "xop_shl<mode>3"
16159 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16160 (if_then_else:VI_128
16162 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16165 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16169 (neg:VI_128 (match_dup 2)))))]
16170 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16171 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16172 [(set_attr "type" "sseishft")
16173 (set_attr "prefix_data16" "0")
16174 (set_attr "prefix_extra" "2")
16175 (set_attr "mode" "TI")])
16177 (define_expand "<shift_insn><mode>3"
16178 [(set (match_operand:VI1_AVX512 0 "register_operand")
16179 (any_shift:VI1_AVX512
16180 (match_operand:VI1_AVX512 1 "register_operand")
16181 (match_operand:SI 2 "nonmemory_operand")))]
16184 if (TARGET_XOP && <MODE>mode == V16QImode)
16186 bool negate = false;
16187 rtx (*gen) (rtx, rtx, rtx);
16191 if (<CODE> != ASHIFT)
16193 if (CONST_INT_P (operands[2]))
16194 operands[2] = GEN_INT (-INTVAL (operands[2]));
16198 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16199 for (i = 0; i < 16; i++)
16200 XVECEXP (par, 0, i) = operands[2];
16202 tmp = gen_reg_rtx (V16QImode);
16203 emit_insn (gen_vec_initv16qi (tmp, par));
16206 emit_insn (gen_negv16qi2 (tmp, tmp));
16208 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16209 emit_insn (gen (operands[0], operands[1], tmp));
16212 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16216 (define_expand "ashrv2di3"
16217 [(set (match_operand:V2DI 0 "register_operand")
16219 (match_operand:V2DI 1 "register_operand")
16220 (match_operand:DI 2 "nonmemory_operand")))]
16221 "TARGET_XOP || TARGET_AVX512VL"
16223 if (!TARGET_AVX512VL)
16225 rtx reg = gen_reg_rtx (V2DImode);
16227 bool negate = false;
16230 if (CONST_INT_P (operands[2]))
16231 operands[2] = GEN_INT (-INTVAL (operands[2]));
16235 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16236 for (i = 0; i < 2; i++)
16237 XVECEXP (par, 0, i) = operands[2];
16239 emit_insn (gen_vec_initv2di (reg, par));
16242 emit_insn (gen_negv2di2 (reg, reg));
16244 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16249 ;; XOP FRCZ support
16250 (define_insn "xop_frcz<mode>2"
16251 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16253 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16256 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16257 [(set_attr "type" "ssecvt1")
16258 (set_attr "mode" "<MODE>")])
16260 (define_expand "xop_vmfrcz<mode>2"
16261 [(set (match_operand:VF_128 0 "register_operand")
16264 [(match_operand:VF_128 1 "nonimmediate_operand")]
16269 "operands[2] = CONST0_RTX (<MODE>mode);")
16271 (define_insn "*xop_vmfrcz<mode>2"
16272 [(set (match_operand:VF_128 0 "register_operand" "=x")
16275 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16277 (match_operand:VF_128 2 "const0_operand")
16280 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16281 [(set_attr "type" "ssecvt1")
16282 (set_attr "mode" "<MODE>")])
16284 (define_insn "xop_maskcmp<mode>3"
16285 [(set (match_operand:VI_128 0 "register_operand" "=x")
16286 (match_operator:VI_128 1 "ix86_comparison_int_operator"
16287 [(match_operand:VI_128 2 "register_operand" "x")
16288 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16290 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16291 [(set_attr "type" "sse4arg")
16292 (set_attr "prefix_data16" "0")
16293 (set_attr "prefix_rep" "0")
16294 (set_attr "prefix_extra" "2")
16295 (set_attr "length_immediate" "1")
16296 (set_attr "mode" "TI")])
16298 (define_insn "xop_maskcmp_uns<mode>3"
16299 [(set (match_operand:VI_128 0 "register_operand" "=x")
16300 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
16301 [(match_operand:VI_128 2 "register_operand" "x")
16302 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16304 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16305 [(set_attr "type" "ssecmp")
16306 (set_attr "prefix_data16" "0")
16307 (set_attr "prefix_rep" "0")
16308 (set_attr "prefix_extra" "2")
16309 (set_attr "length_immediate" "1")
16310 (set_attr "mode" "TI")])
16312 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16313 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
16314 ;; the exact instruction generated for the intrinsic.
16315 (define_insn "xop_maskcmp_uns2<mode>3"
16316 [(set (match_operand:VI_128 0 "register_operand" "=x")
16318 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16319 [(match_operand:VI_128 2 "register_operand" "x")
16320 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16321 UNSPEC_XOP_UNSIGNED_CMP))]
16323 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16324 [(set_attr "type" "ssecmp")
16325 (set_attr "prefix_data16" "0")
16326 (set_attr "prefix_extra" "2")
16327 (set_attr "length_immediate" "1")
16328 (set_attr "mode" "TI")])
16330 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
16331 ;; being added here to be complete.
16332 (define_insn "xop_pcom_tf<mode>3"
16333 [(set (match_operand:VI_128 0 "register_operand" "=x")
16335 [(match_operand:VI_128 1 "register_operand" "x")
16336 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16337 (match_operand:SI 3 "const_int_operand" "n")]
16338 UNSPEC_XOP_TRUEFALSE))]
16341 return ((INTVAL (operands[3]) != 0)
16342 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16343 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16345 [(set_attr "type" "ssecmp")
16346 (set_attr "prefix_data16" "0")
16347 (set_attr "prefix_extra" "2")
16348 (set_attr "length_immediate" "1")
16349 (set_attr "mode" "TI")])
16351 (define_insn "xop_vpermil2<mode>3"
16352 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16354 [(match_operand:VF_128_256 1 "register_operand" "x")
16355 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16356 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16357 (match_operand:SI 4 "const_0_to_3_operand" "n")]
16360 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16361 [(set_attr "type" "sse4arg")
16362 (set_attr "length_immediate" "1")
16363 (set_attr "mode" "<MODE>")])
16365 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16367 (define_insn "aesenc"
16368 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16369 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16370 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16374 aesenc\t{%2, %0|%0, %2}
16375 vaesenc\t{%2, %1, %0|%0, %1, %2}"
16376 [(set_attr "isa" "noavx,avx")
16377 (set_attr "type" "sselog1")
16378 (set_attr "prefix_extra" "1")
16379 (set_attr "prefix" "orig,vex")
16380 (set_attr "btver2_decode" "double,double")
16381 (set_attr "mode" "TI")])
16383 (define_insn "aesenclast"
16384 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16385 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16386 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16387 UNSPEC_AESENCLAST))]
16390 aesenclast\t{%2, %0|%0, %2}
16391 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16392 [(set_attr "isa" "noavx,avx")
16393 (set_attr "type" "sselog1")
16394 (set_attr "prefix_extra" "1")
16395 (set_attr "prefix" "orig,vex")
16396 (set_attr "btver2_decode" "double,double")
16397 (set_attr "mode" "TI")])
16399 (define_insn "aesdec"
16400 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16401 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16402 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16406 aesdec\t{%2, %0|%0, %2}
16407 vaesdec\t{%2, %1, %0|%0, %1, %2}"
16408 [(set_attr "isa" "noavx,avx")
16409 (set_attr "type" "sselog1")
16410 (set_attr "prefix_extra" "1")
16411 (set_attr "prefix" "orig,vex")
16412 (set_attr "btver2_decode" "double,double")
16413 (set_attr "mode" "TI")])
16415 (define_insn "aesdeclast"
16416 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16417 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16418 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16419 UNSPEC_AESDECLAST))]
16422 aesdeclast\t{%2, %0|%0, %2}
16423 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16424 [(set_attr "isa" "noavx,avx")
16425 (set_attr "type" "sselog1")
16426 (set_attr "prefix_extra" "1")
16427 (set_attr "prefix" "orig,vex")
16428 (set_attr "btver2_decode" "double,double")
16429 (set_attr "mode" "TI")])
16431 (define_insn "aesimc"
16432 [(set (match_operand:V2DI 0 "register_operand" "=x")
16433 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16436 "%vaesimc\t{%1, %0|%0, %1}"
16437 [(set_attr "type" "sselog1")
16438 (set_attr "prefix_extra" "1")
16439 (set_attr "prefix" "maybe_vex")
16440 (set_attr "mode" "TI")])
16442 (define_insn "aeskeygenassist"
16443 [(set (match_operand:V2DI 0 "register_operand" "=x")
16444 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16445 (match_operand:SI 2 "const_0_to_255_operand" "n")]
16446 UNSPEC_AESKEYGENASSIST))]
16448 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16449 [(set_attr "type" "sselog1")
16450 (set_attr "prefix_extra" "1")
16451 (set_attr "length_immediate" "1")
16452 (set_attr "prefix" "maybe_vex")
16453 (set_attr "mode" "TI")])
16455 (define_insn "pclmulqdq"
16456 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16457 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16458 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16459 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16463 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16464 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16465 [(set_attr "isa" "noavx,avx")
16466 (set_attr "type" "sselog1")
16467 (set_attr "prefix_extra" "1")
16468 (set_attr "length_immediate" "1")
16469 (set_attr "prefix" "orig,vex")
16470 (set_attr "mode" "TI")])
16472 (define_expand "avx_vzeroall"
16473 [(match_par_dup 0 [(const_int 0)])]
16476 int nregs = TARGET_64BIT ? 16 : 8;
16479 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16481 XVECEXP (operands[0], 0, 0)
16482 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16485 for (regno = 0; regno < nregs; regno++)
16486 XVECEXP (operands[0], 0, regno + 1)
16487 = gen_rtx_SET (gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16488 CONST0_RTX (V8SImode));
16491 (define_insn "*avx_vzeroall"
16492 [(match_parallel 0 "vzeroall_operation"
16493 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16496 [(set_attr "type" "sse")
16497 (set_attr "modrm" "0")
16498 (set_attr "memory" "none")
16499 (set_attr "prefix" "vex")
16500 (set_attr "btver2_decode" "vector")
16501 (set_attr "mode" "OI")])
16503 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16504 ;; if the upper 128bits are unused.
16505 (define_insn "avx_vzeroupper"
16506 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16509 [(set_attr "type" "sse")
16510 (set_attr "modrm" "0")
16511 (set_attr "memory" "none")
16512 (set_attr "prefix" "vex")
16513 (set_attr "btver2_decode" "vector")
16514 (set_attr "mode" "OI")])
16516 (define_insn "avx2_pbroadcast<mode>"
16517 [(set (match_operand:VI 0 "register_operand" "=x")
16519 (vec_select:<ssescalarmode>
16520 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16521 (parallel [(const_int 0)]))))]
16523 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16524 [(set_attr "type" "ssemov")
16525 (set_attr "prefix_extra" "1")
16526 (set_attr "prefix" "vex")
16527 (set_attr "mode" "<sseinsnmode>")])
16529 (define_insn "avx2_pbroadcast<mode>_1"
16530 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16531 (vec_duplicate:VI_256
16532 (vec_select:<ssescalarmode>
16533 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16534 (parallel [(const_int 0)]))))]
16537 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16538 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16539 [(set_attr "type" "ssemov")
16540 (set_attr "prefix_extra" "1")
16541 (set_attr "prefix" "vex")
16542 (set_attr "mode" "<sseinsnmode>")])
16544 (define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16545 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16546 (unspec:VI48F_256_512
16547 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16548 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16550 "TARGET_AVX2 && <mask_mode512bit_condition>"
16551 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16552 [(set_attr "type" "sselog")
16553 (set_attr "prefix" "<mask_prefix2>")
16554 (set_attr "mode" "<sseinsnmode>")])
16556 (define_insn "<avx512>_permvar<mode><mask_name>"
16557 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16558 (unspec:VI1_AVX512VL
16559 [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16560 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16562 "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16563 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16564 [(set_attr "type" "sselog")
16565 (set_attr "prefix" "<mask_prefix2>")
16566 (set_attr "mode" "<sseinsnmode>")])
16568 (define_insn "<avx512>_permvar<mode><mask_name>"
16569 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16570 (unspec:VI2_AVX512VL
16571 [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16572 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16574 "TARGET_AVX512BW && <mask_mode512bit_condition>"
16575 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16576 [(set_attr "type" "sselog")
16577 (set_attr "prefix" "<mask_prefix2>")
16578 (set_attr "mode" "<sseinsnmode>")])
16580 (define_expand "<avx2_avx512>_perm<mode>"
16581 [(match_operand:VI8F_256_512 0 "register_operand")
16582 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16583 (match_operand:SI 2 "const_0_to_255_operand")]
16586 int mask = INTVAL (operands[2]);
16587 emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16588 GEN_INT ((mask >> 0) & 3),
16589 GEN_INT ((mask >> 2) & 3),
16590 GEN_INT ((mask >> 4) & 3),
16591 GEN_INT ((mask >> 6) & 3)));
16595 (define_expand "<avx512>_perm<mode>_mask"
16596 [(match_operand:VI8F_256_512 0 "register_operand")
16597 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16598 (match_operand:SI 2 "const_0_to_255_operand")
16599 (match_operand:VI8F_256_512 3 "vector_move_operand")
16600 (match_operand:<avx512fmaskmode> 4 "register_operand")]
16603 int mask = INTVAL (operands[2]);
16604 emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16605 GEN_INT ((mask >> 0) & 3),
16606 GEN_INT ((mask >> 2) & 3),
16607 GEN_INT ((mask >> 4) & 3),
16608 GEN_INT ((mask >> 6) & 3),
16609 operands[3], operands[4]));
16613 (define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16614 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16615 (vec_select:VI8F_256_512
16616 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16617 (parallel [(match_operand 2 "const_0_to_3_operand")
16618 (match_operand 3 "const_0_to_3_operand")
16619 (match_operand 4 "const_0_to_3_operand")
16620 (match_operand 5 "const_0_to_3_operand")])))]
16621 "TARGET_AVX2 && <mask_mode512bit_condition>"
16624 mask |= INTVAL (operands[2]) << 0;
16625 mask |= INTVAL (operands[3]) << 2;
16626 mask |= INTVAL (operands[4]) << 4;
16627 mask |= INTVAL (operands[5]) << 6;
16628 operands[2] = GEN_INT (mask);
16629 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16631 [(set_attr "type" "sselog")
16632 (set_attr "prefix" "<mask_prefix2>")
16633 (set_attr "mode" "<sseinsnmode>")])
16635 (define_insn "avx2_permv2ti"
16636 [(set (match_operand:V4DI 0 "register_operand" "=x")
16638 [(match_operand:V4DI 1 "register_operand" "x")
16639 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16640 (match_operand:SI 3 "const_0_to_255_operand" "n")]
16643 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16644 [(set_attr "type" "sselog")
16645 (set_attr "prefix" "vex")
16646 (set_attr "mode" "OI")])
16648 (define_insn "avx2_vec_dupv4df"
16649 [(set (match_operand:V4DF 0 "register_operand" "=x")
16650 (vec_duplicate:V4DF
16652 (match_operand:V2DF 1 "register_operand" "x")
16653 (parallel [(const_int 0)]))))]
16655 "vbroadcastsd\t{%1, %0|%0, %1}"
16656 [(set_attr "type" "sselog1")
16657 (set_attr "prefix" "vex")
16658 (set_attr "mode" "V4DF")])
16660 (define_insn "<avx512>_vec_dup<mode>_1"
16661 [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16662 (vec_duplicate:VI_AVX512BW
16663 (vec_select:VI_AVX512BW
16664 (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16665 (parallel [(const_int 0)]))))]
16667 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16668 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16669 [(set_attr "type" "ssemov")
16670 (set_attr "prefix" "evex")
16671 (set_attr "mode" "<sseinsnmode>")])
16673 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16674 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16675 (vec_duplicate:V48_AVX512VL
16676 (vec_select:<ssescalarmode>
16677 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16678 (parallel [(const_int 0)]))))]
16680 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16681 [(set_attr "type" "ssemov")
16682 (set_attr "prefix" "evex")
16683 (set_attr "mode" "<sseinsnmode>")])
16685 (define_insn "<avx512>_vec_dup<mode><mask_name>"
16686 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16687 (vec_duplicate:VI12_AVX512VL
16688 (vec_select:<ssescalarmode>
16689 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16690 (parallel [(const_int 0)]))))]
16692 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16693 [(set_attr "type" "ssemov")
16694 (set_attr "prefix" "evex")
16695 (set_attr "mode" "<sseinsnmode>")])
16697 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16698 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16699 (vec_duplicate:V16FI
16700 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16703 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16704 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16705 [(set_attr "type" "ssemov")
16706 (set_attr "prefix" "evex")
16707 (set_attr "mode" "<sseinsnmode>")])
16709 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16710 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16711 (vec_duplicate:V8FI
16712 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16715 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16716 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16717 [(set_attr "type" "ssemov")
16718 (set_attr "prefix" "evex")
16719 (set_attr "mode" "<sseinsnmode>")])
16721 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16722 [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16723 (vec_duplicate:VI12_AVX512VL
16724 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16727 vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16728 vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16729 [(set_attr "type" "ssemov")
16730 (set_attr "prefix" "evex")
16731 (set_attr "mode" "<sseinsnmode>")])
16733 (define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16734 [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16735 (vec_duplicate:V48_AVX512VL
16736 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16738 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16739 [(set_attr "type" "ssemov")
16740 (set_attr "prefix" "evex")
16741 (set_attr "mode" "<sseinsnmode>")
16742 (set (attr "enabled")
16743 (if_then_else (eq_attr "alternative" "1")
16744 (symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16745 && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16748 (define_insn "vec_dupv4sf"
16749 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16750 (vec_duplicate:V4SF
16751 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16754 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16755 vbroadcastss\t{%1, %0|%0, %1}
16756 shufps\t{$0, %0, %0|%0, %0, 0}"
16757 [(set_attr "isa" "avx,avx,noavx")
16758 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16759 (set_attr "length_immediate" "1,0,1")
16760 (set_attr "prefix_extra" "0,1,*")
16761 (set_attr "prefix" "vex,vex,orig")
16762 (set_attr "mode" "V4SF")])
16764 (define_insn "*vec_dupv4si"
16765 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
16766 (vec_duplicate:V4SI
16767 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16770 %vpshufd\t{$0, %1, %0|%0, %1, 0}
16771 vbroadcastss\t{%1, %0|%0, %1}
16772 shufps\t{$0, %0, %0|%0, %0, 0}"
16773 [(set_attr "isa" "sse2,avx,noavx")
16774 (set_attr "type" "sselog1,ssemov,sselog1")
16775 (set_attr "length_immediate" "1,0,1")
16776 (set_attr "prefix_extra" "0,1,*")
16777 (set_attr "prefix" "maybe_vex,vex,orig")
16778 (set_attr "mode" "TI,V4SF,V4SF")])
16780 (define_insn "*vec_dupv2di"
16781 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
16782 (vec_duplicate:V2DI
16783 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16787 vpunpcklqdq\t{%d1, %0|%0, %d1}
16788 %vmovddup\t{%1, %0|%0, %1}
16790 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16791 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16792 (set_attr "prefix" "orig,vex,maybe_vex,orig")
16793 (set_attr "mode" "TI,TI,DF,V4SF")])
16795 (define_insn "avx2_vbroadcasti128_<mode>"
16796 [(set (match_operand:VI_256 0 "register_operand" "=x")
16798 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16801 "vbroadcasti128\t{%1, %0|%0, %1}"
16802 [(set_attr "type" "ssemov")
16803 (set_attr "prefix_extra" "1")
16804 (set_attr "prefix" "vex")
16805 (set_attr "mode" "OI")])
16807 ;; Modes handled by AVX vec_dup patterns.
16808 (define_mode_iterator AVX_VEC_DUP_MODE
16809 [V8SI V8SF V4DI V4DF])
16810 ;; Modes handled by AVX2 vec_dup patterns.
16811 (define_mode_iterator AVX2_VEC_DUP_MODE
16812 [V32QI V16QI V16HI V8HI V8SI V4SI])
16814 (define_insn "*vec_dup<mode>"
16815 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16816 (vec_duplicate:AVX2_VEC_DUP_MODE
16817 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16820 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16821 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16823 [(set_attr "type" "ssemov")
16824 (set_attr "prefix_extra" "1")
16825 (set_attr "prefix" "maybe_evex")
16826 (set_attr "mode" "<sseinsnmode>")])
16828 (define_insn "vec_dup<mode>"
16829 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,v,x")
16830 (vec_duplicate:AVX_VEC_DUP_MODE
16831 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,v,?x")))]
16834 v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16835 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16836 v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16838 [(set_attr "type" "ssemov")
16839 (set_attr "prefix_extra" "1")
16840 (set_attr "prefix" "maybe_evex")
16841 (set_attr "isa" "avx2,noavx2,avx2,noavx2")
16842 (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,V8SF")])
16845 [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16846 (vec_duplicate:AVX2_VEC_DUP_MODE
16847 (match_operand:<ssescalarmode> 1 "register_operand")))]
16849 /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16850 available, because then we can broadcast from GPRs directly.
16851 For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16852 for V*SI mode it requires just -mavx512vl. */
16853 && !(TARGET_AVX512VL
16854 && (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16855 && reload_completed && GENERAL_REG_P (operands[1])"
16858 emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16859 CONST0_RTX (V4SImode),
16860 gen_lowpart (SImode, operands[1])));
16861 emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16862 gen_lowpart (<ssexmmmode>mode,
16868 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16869 (vec_duplicate:AVX_VEC_DUP_MODE
16870 (match_operand:<ssescalarmode> 1 "register_operand")))]
16871 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16872 [(set (match_dup 2)
16873 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16875 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16876 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16878 (define_insn "avx_vbroadcastf128_<mode>"
16879 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16881 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16885 vbroadcast<i128>\t{%1, %0|%0, %1}
16886 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16887 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16888 [(set_attr "type" "ssemov,sselog1,sselog1")
16889 (set_attr "prefix_extra" "1")
16890 (set_attr "length_immediate" "0,1,1")
16891 (set_attr "prefix" "vex")
16892 (set_attr "mode" "<sseinsnmode>")])
16894 ;; For broadcast[i|f]32x2. Yes there is no v4sf version, only v4si.
16895 (define_mode_iterator VI4F_BRCST32x2
16896 [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16897 V16SF (V8SF "TARGET_AVX512VL")])
16899 (define_mode_attr 64x2mode
16900 [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16902 (define_mode_attr 32x2mode
16903 [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16904 (V8SF "V2SF") (V4SI "V2SI")])
16906 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16907 [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16908 (vec_duplicate:VI4F_BRCST32x2
16909 (vec_select:<32x2mode>
16910 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16911 (parallel [(const_int 0) (const_int 1)]))))]
16913 "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16914 [(set_attr "type" "ssemov")
16915 (set_attr "prefix_extra" "1")
16916 (set_attr "prefix" "evex")
16917 (set_attr "mode" "<sseinsnmode>")])
16919 (define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16920 [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16921 (vec_duplicate:VI4F_256
16922 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16925 vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16926 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16927 [(set_attr "type" "ssemov")
16928 (set_attr "prefix_extra" "1")
16929 (set_attr "prefix" "evex")
16930 (set_attr "mode" "<sseinsnmode>")])
16932 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16933 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16934 (vec_duplicate:V16FI
16935 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16938 vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16939 vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16940 [(set_attr "type" "ssemov")
16941 (set_attr "prefix_extra" "1")
16942 (set_attr "prefix" "evex")
16943 (set_attr "mode" "<sseinsnmode>")])
16945 ;; For broadcast[i|f]64x2
16946 (define_mode_iterator VI8F_BRCST64x2
16947 [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16949 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16950 [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16951 (vec_duplicate:VI8F_BRCST64x2
16952 (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16955 vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16956 vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16957 [(set_attr "type" "ssemov")
16958 (set_attr "prefix_extra" "1")
16959 (set_attr "prefix" "evex")
16960 (set_attr "mode" "<sseinsnmode>")])
16962 (define_insn "avx512cd_maskb_vec_dup<mode>"
16963 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16964 (vec_duplicate:VI8_AVX512VL
16966 (match_operand:QI 1 "register_operand" "Yk"))))]
16968 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16969 [(set_attr "type" "mskmov")
16970 (set_attr "prefix" "evex")
16971 (set_attr "mode" "XI")])
16973 (define_insn "avx512cd_maskw_vec_dup<mode>"
16974 [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16975 (vec_duplicate:VI4_AVX512VL
16977 (match_operand:HI 1 "register_operand" "Yk"))))]
16979 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16980 [(set_attr "type" "mskmov")
16981 (set_attr "prefix" "evex")
16982 (set_attr "mode" "XI")])
16984 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16985 ;; If it so happens that the input is in memory, use vbroadcast.
16986 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16987 (define_insn "*avx_vperm_broadcast_v4sf"
16988 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16990 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16991 (match_parallel 2 "avx_vbroadcast_operand"
16992 [(match_operand 3 "const_int_operand" "C,n,n")])))]
16995 int elt = INTVAL (operands[3]);
16996 switch (which_alternative)
17000 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
17001 return "vbroadcastss\t{%1, %0|%0, %k1}";
17003 operands[2] = GEN_INT (elt * 0x55);
17004 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
17006 gcc_unreachable ();
17009 [(set_attr "type" "ssemov,ssemov,sselog1")
17010 (set_attr "prefix_extra" "1")
17011 (set_attr "length_immediate" "0,0,1")
17012 (set_attr "prefix" "vex")
17013 (set_attr "mode" "SF,SF,V4SF")])
17015 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
17016 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17018 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
17019 (match_parallel 2 "avx_vbroadcast_operand"
17020 [(match_operand 3 "const_int_operand" "C,n,n")])))]
17023 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17024 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17026 rtx op0 = operands[0], op1 = operands[1];
17027 int elt = INTVAL (operands[3]);
17033 if (TARGET_AVX2 && elt == 0)
17035 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17040 /* Shuffle element we care about into all elements of the 128-bit lane.
17041 The other lane gets shuffled too, but we don't care. */
17042 if (<MODE>mode == V4DFmode)
17043 mask = (elt & 1 ? 15 : 0);
17045 mask = (elt & 3) * 0x55;
17046 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17048 /* Shuffle the lane we care about into both lanes of the dest. */
17049 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17050 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17054 operands[1] = adjust_address (op1, <ssescalarmode>mode,
17055 elt * GET_MODE_SIZE (<ssescalarmode>mode));
17058 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17059 [(set (match_operand:VF2 0 "register_operand")
17061 (match_operand:VF2 1 "nonimmediate_operand")
17062 (match_operand:SI 2 "const_0_to_255_operand")))]
17063 "TARGET_AVX && <mask_mode512bit_condition>"
17065 int mask = INTVAL (operands[2]);
17066 rtx perm[<ssescalarnum>];
17069 for (i = 0; i < <ssescalarnum>; i = i + 2)
17071 perm[i] = GEN_INT (((mask >> i) & 1) + i);
17072 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17076 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17079 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17080 [(set (match_operand:VF1 0 "register_operand")
17082 (match_operand:VF1 1 "nonimmediate_operand")
17083 (match_operand:SI 2 "const_0_to_255_operand")))]
17084 "TARGET_AVX && <mask_mode512bit_condition>"
17086 int mask = INTVAL (operands[2]);
17087 rtx perm[<ssescalarnum>];
17090 for (i = 0; i < <ssescalarnum>; i = i + 4)
17092 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
17093 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17094 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17095 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17099 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17102 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17103 [(set (match_operand:VF 0 "register_operand" "=v")
17105 (match_operand:VF 1 "nonimmediate_operand" "vm")
17106 (match_parallel 2 ""
17107 [(match_operand 3 "const_int_operand")])))]
17108 "TARGET_AVX && <mask_mode512bit_condition>
17109 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17111 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17112 operands[2] = GEN_INT (mask);
17113 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17115 [(set_attr "type" "sselog")
17116 (set_attr "prefix_extra" "1")
17117 (set_attr "length_immediate" "1")
17118 (set_attr "prefix" "<mask_prefix>")
17119 (set_attr "mode" "<sseinsnmode>")])
17121 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17122 [(set (match_operand:VF 0 "register_operand" "=v")
17124 [(match_operand:VF 1 "register_operand" "v")
17125 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17127 "TARGET_AVX && <mask_mode512bit_condition>"
17128 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17129 [(set_attr "type" "sselog")
17130 (set_attr "prefix_extra" "1")
17131 (set_attr "btver2_decode" "vector")
17132 (set_attr "prefix" "<mask_prefix>")
17133 (set_attr "mode" "<sseinsnmode>")])
17135 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17136 [(match_operand:VI48F 0 "register_operand" "=v")
17137 (match_operand:VI48F 1 "register_operand" "v")
17138 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17139 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17140 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17143 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17144 operands[0], operands[1], operands[2], operands[3],
17145 CONST0_RTX (<MODE>mode), operands[4]));
17149 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17150 [(match_operand:VI1_AVX512VL 0 "register_operand")
17151 (match_operand:VI1_AVX512VL 1 "register_operand")
17152 (match_operand:<sseintvecmode> 2 "register_operand")
17153 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17154 (match_operand:<avx512fmaskmode> 4 "register_operand")]
17155 "TARGET_AVX512VBMI"
17157 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17158 operands[0], operands[1], operands[2], operands[3],
17159 CONST0_RTX (<MODE>mode), operands[4]));
17163 (define_expand "<avx512>_vpermi2var<mode>3_maskz"
17164 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17165 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17166 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17167 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17168 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17171 emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17172 operands[0], operands[1], operands[2], operands[3],
17173 CONST0_RTX (<MODE>mode), operands[4]));
17177 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17178 [(set (match_operand:VI48F 0 "register_operand" "=v")
17180 [(match_operand:VI48F 1 "register_operand" "v")
17181 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17182 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17185 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17186 [(set_attr "type" "sselog")
17187 (set_attr "prefix" "evex")
17188 (set_attr "mode" "<sseinsnmode>")])
17190 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17191 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17192 (unspec:VI1_AVX512VL
17193 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17194 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17195 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17197 "TARGET_AVX512VBMI"
17198 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17199 [(set_attr "type" "sselog")
17200 (set_attr "prefix" "evex")
17201 (set_attr "mode" "<sseinsnmode>")])
17203 (define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17204 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17205 (unspec:VI2_AVX512VL
17206 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17207 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17208 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17211 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17212 [(set_attr "type" "sselog")
17213 (set_attr "prefix" "evex")
17214 (set_attr "mode" "<sseinsnmode>")])
17216 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17217 [(set (match_operand:VI48F 0 "register_operand" "=v")
17220 [(match_operand:VI48F 1 "register_operand" "v")
17221 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17222 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17223 UNSPEC_VPERMI2_MASK)
17225 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17227 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17228 [(set_attr "type" "sselog")
17229 (set_attr "prefix" "evex")
17230 (set_attr "mode" "<sseinsnmode>")])
17232 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17233 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17234 (vec_merge:VI1_AVX512VL
17235 (unspec:VI1_AVX512VL
17236 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17237 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17238 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17239 UNSPEC_VPERMI2_MASK)
17241 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17242 "TARGET_AVX512VBMI"
17243 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17244 [(set_attr "type" "sselog")
17245 (set_attr "prefix" "evex")
17246 (set_attr "mode" "<sseinsnmode>")])
17248 (define_insn "<avx512>_vpermi2var<mode>3_mask"
17249 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17250 (vec_merge:VI2_AVX512VL
17251 (unspec:VI2_AVX512VL
17252 [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17253 (match_operand:<sseintvecmode> 2 "register_operand" "0")
17254 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17255 UNSPEC_VPERMI2_MASK)
17257 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17259 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17260 [(set_attr "type" "sselog")
17261 (set_attr "prefix" "evex")
17262 (set_attr "mode" "<sseinsnmode>")])
17264 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17265 [(match_operand:VI48F 0 "register_operand" "=v")
17266 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17267 (match_operand:VI48F 2 "register_operand" "0")
17268 (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17269 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17272 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17273 operands[0], operands[1], operands[2], operands[3],
17274 CONST0_RTX (<MODE>mode), operands[4]));
17278 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17279 [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17280 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17281 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17282 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17283 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17284 "TARGET_AVX512VBMI"
17286 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17287 operands[0], operands[1], operands[2], operands[3],
17288 CONST0_RTX (<MODE>mode), operands[4]));
17292 (define_expand "<avx512>_vpermt2var<mode>3_maskz"
17293 [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17294 (match_operand:<sseintvecmode> 1 "register_operand" "v")
17295 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17296 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17297 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17300 emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17301 operands[0], operands[1], operands[2], operands[3],
17302 CONST0_RTX (<MODE>mode), operands[4]));
17306 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17307 [(set (match_operand:VI48F 0 "register_operand" "=v")
17309 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17310 (match_operand:VI48F 2 "register_operand" "0")
17311 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17314 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17315 [(set_attr "type" "sselog")
17316 (set_attr "prefix" "evex")
17317 (set_attr "mode" "<sseinsnmode>")])
17319 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17320 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17321 (unspec:VI1_AVX512VL
17322 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17323 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17324 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17326 "TARGET_AVX512VBMI"
17327 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17328 [(set_attr "type" "sselog")
17329 (set_attr "prefix" "evex")
17330 (set_attr "mode" "<sseinsnmode>")])
17332 (define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17333 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17334 (unspec:VI2_AVX512VL
17335 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17336 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17337 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17340 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17341 [(set_attr "type" "sselog")
17342 (set_attr "prefix" "evex")
17343 (set_attr "mode" "<sseinsnmode>")])
17345 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17346 [(set (match_operand:VI48F 0 "register_operand" "=v")
17349 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17350 (match_operand:VI48F 2 "register_operand" "0")
17351 (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17354 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17356 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17357 [(set_attr "type" "sselog")
17358 (set_attr "prefix" "evex")
17359 (set_attr "mode" "<sseinsnmode>")])
17361 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17362 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17363 (vec_merge:VI1_AVX512VL
17364 (unspec:VI1_AVX512VL
17365 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17366 (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17367 (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17370 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17371 "TARGET_AVX512VBMI"
17372 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17373 [(set_attr "type" "sselog")
17374 (set_attr "prefix" "evex")
17375 (set_attr "mode" "<sseinsnmode>")])
17377 (define_insn "<avx512>_vpermt2var<mode>3_mask"
17378 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17379 (vec_merge:VI2_AVX512VL
17380 (unspec:VI2_AVX512VL
17381 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17382 (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17383 (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17386 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17388 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17389 [(set_attr "type" "sselog")
17390 (set_attr "prefix" "evex")
17391 (set_attr "mode" "<sseinsnmode>")])
17393 (define_expand "avx_vperm2f128<mode>3"
17394 [(set (match_operand:AVX256MODE2P 0 "register_operand")
17395 (unspec:AVX256MODE2P
17396 [(match_operand:AVX256MODE2P 1 "register_operand")
17397 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17398 (match_operand:SI 3 "const_0_to_255_operand")]
17399 UNSPEC_VPERMIL2F128))]
17402 int mask = INTVAL (operands[3]);
17403 if ((mask & 0x88) == 0)
17405 rtx perm[<ssescalarnum>], t1, t2;
17406 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17408 base = (mask & 3) * nelt2;
17409 for (i = 0; i < nelt2; ++i)
17410 perm[i] = GEN_INT (base + i);
17412 base = ((mask >> 4) & 3) * nelt2;
17413 for (i = 0; i < nelt2; ++i)
17414 perm[i + nelt2] = GEN_INT (base + i);
17416 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17417 operands[1], operands[2]);
17418 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17419 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17420 t2 = gen_rtx_SET (operands[0], t2);
17426 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17427 ;; means that in order to represent this properly in rtl we'd have to
17428 ;; nest *another* vec_concat with a zero operand and do the select from
17429 ;; a 4x wide vector. That doesn't seem very nice.
17430 (define_insn "*avx_vperm2f128<mode>_full"
17431 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17432 (unspec:AVX256MODE2P
17433 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17434 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17435 (match_operand:SI 3 "const_0_to_255_operand" "n")]
17436 UNSPEC_VPERMIL2F128))]
17438 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17439 [(set_attr "type" "sselog")
17440 (set_attr "prefix_extra" "1")
17441 (set_attr "length_immediate" "1")
17442 (set_attr "prefix" "vex")
17443 (set_attr "mode" "<sseinsnmode>")])
17445 (define_insn "*avx_vperm2f128<mode>_nozero"
17446 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17447 (vec_select:AVX256MODE2P
17448 (vec_concat:<ssedoublevecmode>
17449 (match_operand:AVX256MODE2P 1 "register_operand" "x")
17450 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17451 (match_parallel 3 ""
17452 [(match_operand 4 "const_int_operand")])))]
17454 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17456 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17458 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17460 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17461 operands[3] = GEN_INT (mask);
17462 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17464 [(set_attr "type" "sselog")
17465 (set_attr "prefix_extra" "1")
17466 (set_attr "length_immediate" "1")
17467 (set_attr "prefix" "vex")
17468 (set_attr "mode" "<sseinsnmode>")])
17470 (define_insn "*ssse3_palignr<mode>_perm"
17471 [(set (match_operand:V_128 0 "register_operand" "=x,x")
17473 (match_operand:V_128 1 "register_operand" "0,x")
17474 (match_parallel 2 "palignr_operand"
17475 [(match_operand 3 "const_int_operand" "n, n")])))]
17478 machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
17479 operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17481 switch (which_alternative)
17484 return "palignr\t{%2, %1, %0|%0, %1, %2}";
17486 return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17488 gcc_unreachable ();
17491 [(set_attr "isa" "noavx,avx")
17492 (set_attr "type" "sseishft")
17493 (set_attr "atom_unit" "sishuf")
17494 (set_attr "prefix_data16" "1,*")
17495 (set_attr "prefix_extra" "1")
17496 (set_attr "length_immediate" "1")
17497 (set_attr "prefix" "orig,vex")])
17499 (define_expand "avx512vl_vinsert<mode>"
17500 [(match_operand:VI48F_256 0 "register_operand")
17501 (match_operand:VI48F_256 1 "register_operand")
17502 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17503 (match_operand:SI 3 "const_0_to_1_operand")
17504 (match_operand:VI48F_256 4 "register_operand")
17505 (match_operand:<avx512fmaskmode> 5 "register_operand")]
17508 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17510 switch (INTVAL (operands[3]))
17513 insn = gen_vec_set_lo_<mode>_mask;
17516 insn = gen_vec_set_hi_<mode>_mask;
17519 gcc_unreachable ();
17522 emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17527 (define_expand "avx_vinsertf128<mode>"
17528 [(match_operand:V_256 0 "register_operand")
17529 (match_operand:V_256 1 "register_operand")
17530 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17531 (match_operand:SI 3 "const_0_to_1_operand")]
17534 rtx (*insn)(rtx, rtx, rtx);
17536 switch (INTVAL (operands[3]))
17539 insn = gen_vec_set_lo_<mode>;
17542 insn = gen_vec_set_hi_<mode>;
17545 gcc_unreachable ();
17548 emit_insn (insn (operands[0], operands[1], operands[2]));
17552 (define_insn "vec_set_lo_<mode><mask_name>"
17553 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17554 (vec_concat:VI8F_256
17555 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17556 (vec_select:<ssehalfvecmode>
17557 (match_operand:VI8F_256 1 "register_operand" "v")
17558 (parallel [(const_int 2) (const_int 3)]))))]
17561 if (TARGET_AVX512VL)
17562 return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17564 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17566 [(set_attr "type" "sselog")
17567 (set_attr "prefix_extra" "1")
17568 (set_attr "length_immediate" "1")
17569 (set_attr "prefix" "vex")
17570 (set_attr "mode" "<sseinsnmode>")])
17572 (define_insn "vec_set_hi_<mode><mask_name>"
17573 [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17574 (vec_concat:VI8F_256
17575 (vec_select:<ssehalfvecmode>
17576 (match_operand:VI8F_256 1 "register_operand" "v")
17577 (parallel [(const_int 0) (const_int 1)]))
17578 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17581 if (TARGET_AVX512VL)
17582 return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17584 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17586 [(set_attr "type" "sselog")
17587 (set_attr "prefix_extra" "1")
17588 (set_attr "length_immediate" "1")
17589 (set_attr "prefix" "vex")
17590 (set_attr "mode" "<sseinsnmode>")])
17592 (define_insn "vec_set_lo_<mode><mask_name>"
17593 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17594 (vec_concat:VI4F_256
17595 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17596 (vec_select:<ssehalfvecmode>
17597 (match_operand:VI4F_256 1 "register_operand" "v")
17598 (parallel [(const_int 4) (const_int 5)
17599 (const_int 6) (const_int 7)]))))]
17602 if (TARGET_AVX512VL)
17603 return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17605 return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17607 [(set_attr "type" "sselog")
17608 (set_attr "prefix_extra" "1")
17609 (set_attr "length_immediate" "1")
17610 (set_attr "prefix" "vex")
17611 (set_attr "mode" "<sseinsnmode>")])
17613 (define_insn "vec_set_hi_<mode><mask_name>"
17614 [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17615 (vec_concat:VI4F_256
17616 (vec_select:<ssehalfvecmode>
17617 (match_operand:VI4F_256 1 "register_operand" "v")
17618 (parallel [(const_int 0) (const_int 1)
17619 (const_int 2) (const_int 3)]))
17620 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17623 if (TARGET_AVX512VL)
17624 return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17626 return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17628 [(set_attr "type" "sselog")
17629 (set_attr "prefix_extra" "1")
17630 (set_attr "length_immediate" "1")
17631 (set_attr "prefix" "vex")
17632 (set_attr "mode" "<sseinsnmode>")])
17634 (define_insn "vec_set_lo_v16hi"
17635 [(set (match_operand:V16HI 0 "register_operand" "=x")
17637 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17639 (match_operand:V16HI 1 "register_operand" "x")
17640 (parallel [(const_int 8) (const_int 9)
17641 (const_int 10) (const_int 11)
17642 (const_int 12) (const_int 13)
17643 (const_int 14) (const_int 15)]))))]
17645 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17646 [(set_attr "type" "sselog")
17647 (set_attr "prefix_extra" "1")
17648 (set_attr "length_immediate" "1")
17649 (set_attr "prefix" "vex")
17650 (set_attr "mode" "OI")])
17652 (define_insn "vec_set_hi_v16hi"
17653 [(set (match_operand:V16HI 0 "register_operand" "=x")
17656 (match_operand:V16HI 1 "register_operand" "x")
17657 (parallel [(const_int 0) (const_int 1)
17658 (const_int 2) (const_int 3)
17659 (const_int 4) (const_int 5)
17660 (const_int 6) (const_int 7)]))
17661 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17663 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17664 [(set_attr "type" "sselog")
17665 (set_attr "prefix_extra" "1")
17666 (set_attr "length_immediate" "1")
17667 (set_attr "prefix" "vex")
17668 (set_attr "mode" "OI")])
17670 (define_insn "vec_set_lo_v32qi"
17671 [(set (match_operand:V32QI 0 "register_operand" "=x")
17673 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17675 (match_operand:V32QI 1 "register_operand" "x")
17676 (parallel [(const_int 16) (const_int 17)
17677 (const_int 18) (const_int 19)
17678 (const_int 20) (const_int 21)
17679 (const_int 22) (const_int 23)
17680 (const_int 24) (const_int 25)
17681 (const_int 26) (const_int 27)
17682 (const_int 28) (const_int 29)
17683 (const_int 30) (const_int 31)]))))]
17685 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17686 [(set_attr "type" "sselog")
17687 (set_attr "prefix_extra" "1")
17688 (set_attr "length_immediate" "1")
17689 (set_attr "prefix" "vex")
17690 (set_attr "mode" "OI")])
17692 (define_insn "vec_set_hi_v32qi"
17693 [(set (match_operand:V32QI 0 "register_operand" "=x")
17696 (match_operand:V32QI 1 "register_operand" "x")
17697 (parallel [(const_int 0) (const_int 1)
17698 (const_int 2) (const_int 3)
17699 (const_int 4) (const_int 5)
17700 (const_int 6) (const_int 7)
17701 (const_int 8) (const_int 9)
17702 (const_int 10) (const_int 11)
17703 (const_int 12) (const_int 13)
17704 (const_int 14) (const_int 15)]))
17705 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17707 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17708 [(set_attr "type" "sselog")
17709 (set_attr "prefix_extra" "1")
17710 (set_attr "length_immediate" "1")
17711 (set_attr "prefix" "vex")
17712 (set_attr "mode" "OI")])
17714 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17715 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17717 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17718 (match_operand:V48_AVX2 1 "memory_operand" "m")]
17721 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17722 [(set_attr "type" "sselog1")
17723 (set_attr "prefix_extra" "1")
17724 (set_attr "prefix" "vex")
17725 (set_attr "btver2_decode" "vector")
17726 (set_attr "mode" "<sseinsnmode>")])
17728 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17729 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17731 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17732 (match_operand:V48_AVX2 2 "register_operand" "x")
17736 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17737 [(set_attr "type" "sselog1")
17738 (set_attr "prefix_extra" "1")
17739 (set_attr "prefix" "vex")
17740 (set_attr "btver2_decode" "vector")
17741 (set_attr "mode" "<sseinsnmode>")])
17743 (define_expand "maskload<mode>"
17744 [(set (match_operand:V48_AVX2 0 "register_operand")
17746 [(match_operand:<sseintvecmode> 2 "register_operand")
17747 (match_operand:V48_AVX2 1 "memory_operand")]
17751 (define_expand "maskstore<mode>"
17752 [(set (match_operand:V48_AVX2 0 "memory_operand")
17754 [(match_operand:<sseintvecmode> 2 "register_operand")
17755 (match_operand:V48_AVX2 1 "register_operand")
17760 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17761 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17762 (unspec:AVX256MODE2P
17763 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17767 "&& reload_completed"
17770 rtx op0 = operands[0];
17771 rtx op1 = operands[1];
17773 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17775 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17776 emit_move_insn (op0, op1);
17780 (define_expand "vec_init<mode>"
17781 [(match_operand:V_256 0 "register_operand")
17785 ix86_expand_vector_init (false, operands[0], operands[1]);
17789 (define_expand "vec_init<mode>"
17790 [(match_operand:VF48_I1248 0 "register_operand")
17794 ix86_expand_vector_init (false, operands[0], operands[1]);
17798 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17799 [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17800 (ashiftrt:VI48_AVX512F_AVX512VL
17801 (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17802 (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17803 "TARGET_AVX2 && <mask_mode512bit_condition>"
17804 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17805 [(set_attr "type" "sseishft")
17806 (set_attr "prefix" "maybe_evex")
17807 (set_attr "mode" "<sseinsnmode>")])
17809 (define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17810 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17811 (ashiftrt:VI2_AVX512VL
17812 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17813 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17815 "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17816 [(set_attr "type" "sseishft")
17817 (set_attr "prefix" "maybe_evex")
17818 (set_attr "mode" "<sseinsnmode>")])
17820 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17821 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17822 (any_lshift:VI48_AVX512F
17823 (match_operand:VI48_AVX512F 1 "register_operand" "v")
17824 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17825 "TARGET_AVX2 && <mask_mode512bit_condition>"
17826 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17827 [(set_attr "type" "sseishft")
17828 (set_attr "prefix" "maybe_evex")
17829 (set_attr "mode" "<sseinsnmode>")])
17831 (define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17832 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17833 (any_lshift:VI2_AVX512VL
17834 (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17835 (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17837 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17838 [(set_attr "type" "sseishft")
17839 (set_attr "prefix" "maybe_evex")
17840 (set_attr "mode" "<sseinsnmode>")])
17842 (define_insn "avx_vec_concat<mode>"
17843 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17844 (vec_concat:V_256_512
17845 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17846 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17849 switch (which_alternative)
17852 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17854 switch (get_attr_mode (insn))
17857 return "vmovaps\t{%1, %t0|%t0, %1}";
17859 return "vmovapd\t{%1, %t0|%t0, %1}";
17861 return "vmovaps\t{%1, %x0|%x0, %1}";
17863 return "vmovapd\t{%1, %x0|%x0, %1}";
17865 return "vmovdqa\t{%1, %t0|%t0, %1}";
17867 return "vmovdqa\t{%1, %x0|%x0, %1}";
17869 gcc_unreachable ();
17872 gcc_unreachable ();
17875 [(set_attr "type" "sselog,ssemov")
17876 (set_attr "prefix_extra" "1,*")
17877 (set_attr "length_immediate" "1,*")
17878 (set_attr "prefix" "maybe_evex")
17879 (set_attr "mode" "<sseinsnmode>")])
17881 (define_insn "vcvtph2ps<mask_name>"
17882 [(set (match_operand:V4SF 0 "register_operand" "=v")
17884 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17886 (parallel [(const_int 0) (const_int 1)
17887 (const_int 2) (const_int 3)])))]
17888 "TARGET_F16C || TARGET_AVX512VL"
17889 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17890 [(set_attr "type" "ssecvt")
17891 (set_attr "prefix" "maybe_evex")
17892 (set_attr "mode" "V4SF")])
17894 (define_insn "*vcvtph2ps_load<mask_name>"
17895 [(set (match_operand:V4SF 0 "register_operand" "=v")
17896 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17897 UNSPEC_VCVTPH2PS))]
17898 "TARGET_F16C || TARGET_AVX512VL"
17899 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17900 [(set_attr "type" "ssecvt")
17901 (set_attr "prefix" "vex")
17902 (set_attr "mode" "V8SF")])
17904 (define_insn "vcvtph2ps256<mask_name>"
17905 [(set (match_operand:V8SF 0 "register_operand" "=v")
17906 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17907 UNSPEC_VCVTPH2PS))]
17908 "TARGET_F16C || TARGET_AVX512VL"
17909 "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17910 [(set_attr "type" "ssecvt")
17911 (set_attr "prefix" "vex")
17912 (set_attr "btver2_decode" "double")
17913 (set_attr "mode" "V8SF")])
17915 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17916 [(set (match_operand:V16SF 0 "register_operand" "=v")
17918 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17919 UNSPEC_VCVTPH2PS))]
17921 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17922 [(set_attr "type" "ssecvt")
17923 (set_attr "prefix" "evex")
17924 (set_attr "mode" "V16SF")])
17926 (define_expand "vcvtps2ph_mask"
17927 [(set (match_operand:V8HI 0 "register_operand")
17930 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17931 (match_operand:SI 2 "const_0_to_255_operand")]
17934 (match_operand:V8HI 3 "vector_move_operand")
17935 (match_operand:QI 4 "register_operand")))]
17937 "operands[5] = CONST0_RTX (V4HImode);")
17939 (define_expand "vcvtps2ph"
17940 [(set (match_operand:V8HI 0 "register_operand")
17942 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17943 (match_operand:SI 2 "const_0_to_255_operand")]
17947 "operands[3] = CONST0_RTX (V4HImode);")
17949 (define_insn "*vcvtps2ph<mask_name>"
17950 [(set (match_operand:V8HI 0 "register_operand" "=v")
17952 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17953 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17955 (match_operand:V4HI 3 "const0_operand")))]
17956 "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17957 "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17958 [(set_attr "type" "ssecvt")
17959 (set_attr "prefix" "maybe_evex")
17960 (set_attr "mode" "V4SF")])
17962 (define_insn "*vcvtps2ph_store<mask_name>"
17963 [(set (match_operand:V4HI 0 "memory_operand" "=m")
17964 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17965 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17966 UNSPEC_VCVTPS2PH))]
17967 "TARGET_F16C || TARGET_AVX512VL"
17968 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17969 [(set_attr "type" "ssecvt")
17970 (set_attr "prefix" "maybe_evex")
17971 (set_attr "mode" "V4SF")])
17973 (define_insn "vcvtps2ph256<mask_name>"
17974 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17975 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17976 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17977 UNSPEC_VCVTPS2PH))]
17978 "TARGET_F16C || TARGET_AVX512VL"
17979 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17980 [(set_attr "type" "ssecvt")
17981 (set_attr "prefix" "maybe_evex")
17982 (set_attr "btver2_decode" "vector")
17983 (set_attr "mode" "V8SF")])
17985 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17986 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17988 [(match_operand:V16SF 1 "register_operand" "v")
17989 (match_operand:SI 2 "const_0_to_255_operand" "N")]
17990 UNSPEC_VCVTPS2PH))]
17992 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17993 [(set_attr "type" "ssecvt")
17994 (set_attr "prefix" "evex")
17995 (set_attr "mode" "V16SF")])
17997 ;; For gather* insn patterns
17998 (define_mode_iterator VEC_GATHER_MODE
17999 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
18000 (define_mode_attr VEC_GATHER_IDXSI
18001 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
18002 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
18003 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
18004 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
18006 (define_mode_attr VEC_GATHER_IDXDI
18007 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18008 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18009 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18010 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18012 (define_mode_attr VEC_GATHER_SRCDI
18013 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18014 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18015 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18016 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18018 (define_expand "avx2_gathersi<mode>"
18019 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18020 (unspec:VEC_GATHER_MODE
18021 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18022 (mem:<ssescalarmode>
18024 [(match_operand 2 "vsib_address_operand")
18025 (match_operand:<VEC_GATHER_IDXSI>
18026 3 "register_operand")
18027 (match_operand:SI 5 "const1248_operand ")]))
18028 (mem:BLK (scratch))
18029 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18031 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18035 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18036 operands[5]), UNSPEC_VSIBADDR);
18039 (define_insn "*avx2_gathersi<mode>"
18040 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18041 (unspec:VEC_GATHER_MODE
18042 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18043 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18045 [(match_operand:P 3 "vsib_address_operand" "Tv")
18046 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18047 (match_operand:SI 6 "const1248_operand" "n")]
18049 (mem:BLK (scratch))
18050 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18052 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18054 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18055 [(set_attr "type" "ssemov")
18056 (set_attr "prefix" "vex")
18057 (set_attr "mode" "<sseinsnmode>")])
18059 (define_insn "*avx2_gathersi<mode>_2"
18060 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18061 (unspec:VEC_GATHER_MODE
18063 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18065 [(match_operand:P 2 "vsib_address_operand" "Tv")
18066 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18067 (match_operand:SI 5 "const1248_operand" "n")]
18069 (mem:BLK (scratch))
18070 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18072 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18074 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18075 [(set_attr "type" "ssemov")
18076 (set_attr "prefix" "vex")
18077 (set_attr "mode" "<sseinsnmode>")])
18079 (define_expand "avx2_gatherdi<mode>"
18080 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18081 (unspec:VEC_GATHER_MODE
18082 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18083 (mem:<ssescalarmode>
18085 [(match_operand 2 "vsib_address_operand")
18086 (match_operand:<VEC_GATHER_IDXDI>
18087 3 "register_operand")
18088 (match_operand:SI 5 "const1248_operand ")]))
18089 (mem:BLK (scratch))
18090 (match_operand:<VEC_GATHER_SRCDI>
18091 4 "register_operand")]
18093 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18097 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18098 operands[5]), UNSPEC_VSIBADDR);
18101 (define_insn "*avx2_gatherdi<mode>"
18102 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18103 (unspec:VEC_GATHER_MODE
18104 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18105 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18107 [(match_operand:P 3 "vsib_address_operand" "Tv")
18108 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18109 (match_operand:SI 6 "const1248_operand" "n")]
18111 (mem:BLK (scratch))
18112 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18114 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18116 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18117 [(set_attr "type" "ssemov")
18118 (set_attr "prefix" "vex")
18119 (set_attr "mode" "<sseinsnmode>")])
18121 (define_insn "*avx2_gatherdi<mode>_2"
18122 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18123 (unspec:VEC_GATHER_MODE
18125 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18127 [(match_operand:P 2 "vsib_address_operand" "Tv")
18128 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18129 (match_operand:SI 5 "const1248_operand" "n")]
18131 (mem:BLK (scratch))
18132 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18134 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18137 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18138 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18139 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18141 [(set_attr "type" "ssemov")
18142 (set_attr "prefix" "vex")
18143 (set_attr "mode" "<sseinsnmode>")])
18145 (define_insn "*avx2_gatherdi<mode>_3"
18146 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18147 (vec_select:<VEC_GATHER_SRCDI>
18149 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18150 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18152 [(match_operand:P 3 "vsib_address_operand" "Tv")
18153 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18154 (match_operand:SI 6 "const1248_operand" "n")]
18156 (mem:BLK (scratch))
18157 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18159 (parallel [(const_int 0) (const_int 1)
18160 (const_int 2) (const_int 3)])))
18161 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18163 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18164 [(set_attr "type" "ssemov")
18165 (set_attr "prefix" "vex")
18166 (set_attr "mode" "<sseinsnmode>")])
18168 (define_insn "*avx2_gatherdi<mode>_4"
18169 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18170 (vec_select:<VEC_GATHER_SRCDI>
18173 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18175 [(match_operand:P 2 "vsib_address_operand" "Tv")
18176 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18177 (match_operand:SI 5 "const1248_operand" "n")]
18179 (mem:BLK (scratch))
18180 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18182 (parallel [(const_int 0) (const_int 1)
18183 (const_int 2) (const_int 3)])))
18184 (clobber (match_scratch:VI4F_256 1 "=&x"))]
18186 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18187 [(set_attr "type" "ssemov")
18188 (set_attr "prefix" "vex")
18189 (set_attr "mode" "<sseinsnmode>")])
18191 (define_expand "<avx512>_gathersi<mode>"
18192 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18194 [(match_operand:VI48F 1 "register_operand")
18195 (match_operand:<avx512fmaskmode> 4 "register_operand")
18196 (mem:<ssescalarmode>
18198 [(match_operand 2 "vsib_address_operand")
18199 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18200 (match_operand:SI 5 "const1248_operand")]))]
18202 (clobber (match_scratch:<avx512fmaskmode> 7))])]
18206 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18207 operands[5]), UNSPEC_VSIBADDR);
18210 (define_insn "*avx512f_gathersi<mode>"
18211 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18213 [(match_operand:VI48F 1 "register_operand" "0")
18214 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18215 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18217 [(match_operand:P 4 "vsib_address_operand" "Tv")
18218 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18219 (match_operand:SI 5 "const1248_operand" "n")]
18220 UNSPEC_VSIBADDR)])]
18222 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18224 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18225 [(set_attr "type" "ssemov")
18226 (set_attr "prefix" "evex")
18227 (set_attr "mode" "<sseinsnmode>")])
18229 (define_insn "*avx512f_gathersi<mode>_2"
18230 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18233 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18234 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18236 [(match_operand:P 3 "vsib_address_operand" "Tv")
18237 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18238 (match_operand:SI 4 "const1248_operand" "n")]
18239 UNSPEC_VSIBADDR)])]
18241 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18243 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18244 [(set_attr "type" "ssemov")
18245 (set_attr "prefix" "evex")
18246 (set_attr "mode" "<sseinsnmode>")])
18249 (define_expand "<avx512>_gatherdi<mode>"
18250 [(parallel [(set (match_operand:VI48F 0 "register_operand")
18252 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18253 (match_operand:QI 4 "register_operand")
18254 (mem:<ssescalarmode>
18256 [(match_operand 2 "vsib_address_operand")
18257 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18258 (match_operand:SI 5 "const1248_operand")]))]
18260 (clobber (match_scratch:QI 7))])]
18264 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18265 operands[5]), UNSPEC_VSIBADDR);
18268 (define_insn "*avx512f_gatherdi<mode>"
18269 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18271 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18272 (match_operand:QI 7 "register_operand" "2")
18273 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18275 [(match_operand:P 4 "vsib_address_operand" "Tv")
18276 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18277 (match_operand:SI 5 "const1248_operand" "n")]
18278 UNSPEC_VSIBADDR)])]
18280 (clobber (match_scratch:QI 2 "=&Yk"))]
18282 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18283 [(set_attr "type" "ssemov")
18284 (set_attr "prefix" "evex")
18285 (set_attr "mode" "<sseinsnmode>")])
18287 (define_insn "*avx512f_gatherdi<mode>_2"
18288 [(set (match_operand:VI48F 0 "register_operand" "=&v")
18291 (match_operand:QI 6 "register_operand" "1")
18292 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18294 [(match_operand:P 3 "vsib_address_operand" "Tv")
18295 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18296 (match_operand:SI 4 "const1248_operand" "n")]
18297 UNSPEC_VSIBADDR)])]
18299 (clobber (match_scratch:QI 1 "=&Yk"))]
18302 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18304 if (<MODE_SIZE> != 64)
18305 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18307 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18309 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18311 [(set_attr "type" "ssemov")
18312 (set_attr "prefix" "evex")
18313 (set_attr "mode" "<sseinsnmode>")])
18315 (define_expand "<avx512>_scattersi<mode>"
18316 [(parallel [(set (mem:VI48F
18318 [(match_operand 0 "vsib_address_operand")
18319 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18320 (match_operand:SI 4 "const1248_operand")]))
18322 [(match_operand:<avx512fmaskmode> 1 "register_operand")
18323 (match_operand:VI48F 3 "register_operand")]
18325 (clobber (match_scratch:<avx512fmaskmode> 6))])]
18329 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18330 operands[4]), UNSPEC_VSIBADDR);
18333 (define_insn "*avx512f_scattersi<mode>"
18334 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18336 [(match_operand:P 0 "vsib_address_operand" "Tv")
18337 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18338 (match_operand:SI 4 "const1248_operand" "n")]
18341 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18342 (match_operand:VI48F 3 "register_operand" "v")]
18344 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18346 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18347 [(set_attr "type" "ssemov")
18348 (set_attr "prefix" "evex")
18349 (set_attr "mode" "<sseinsnmode>")])
18351 (define_expand "<avx512>_scatterdi<mode>"
18352 [(parallel [(set (mem:VI48F
18354 [(match_operand 0 "vsib_address_operand")
18355 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18356 (match_operand:SI 4 "const1248_operand")]))
18358 [(match_operand:QI 1 "register_operand")
18359 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18361 (clobber (match_scratch:QI 6))])]
18365 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18366 operands[4]), UNSPEC_VSIBADDR);
18369 (define_insn "*avx512f_scatterdi<mode>"
18370 [(set (match_operator:VI48F 5 "vsib_mem_operator"
18372 [(match_operand:P 0 "vsib_address_operand" "Tv")
18373 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18374 (match_operand:SI 4 "const1248_operand" "n")]
18377 [(match_operand:QI 6 "register_operand" "1")
18378 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18380 (clobber (match_scratch:QI 1 "=&Yk"))]
18382 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18383 [(set_attr "type" "ssemov")
18384 (set_attr "prefix" "evex")
18385 (set_attr "mode" "<sseinsnmode>")])
18387 (define_insn "<avx512>_compress<mode>_mask"
18388 [(set (match_operand:VI48F 0 "register_operand" "=v")
18390 [(match_operand:VI48F 1 "register_operand" "v")
18391 (match_operand:VI48F 2 "vector_move_operand" "0C")
18392 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18395 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18396 [(set_attr "type" "ssemov")
18397 (set_attr "prefix" "evex")
18398 (set_attr "mode" "<sseinsnmode>")])
18400 (define_insn "<avx512>_compressstore<mode>_mask"
18401 [(set (match_operand:VI48F 0 "memory_operand" "=m")
18403 [(match_operand:VI48F 1 "register_operand" "x")
18405 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18406 UNSPEC_COMPRESS_STORE))]
18408 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18409 [(set_attr "type" "ssemov")
18410 (set_attr "prefix" "evex")
18411 (set_attr "memory" "store")
18412 (set_attr "mode" "<sseinsnmode>")])
18414 (define_expand "<avx512>_expand<mode>_maskz"
18415 [(set (match_operand:VI48F 0 "register_operand")
18417 [(match_operand:VI48F 1 "nonimmediate_operand")
18418 (match_operand:VI48F 2 "vector_move_operand")
18419 (match_operand:<avx512fmaskmode> 3 "register_operand")]
18422 "operands[2] = CONST0_RTX (<MODE>mode);")
18424 (define_insn "<avx512>_expand<mode>_mask"
18425 [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18427 [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18428 (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18429 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18432 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18433 [(set_attr "type" "ssemov")
18434 (set_attr "prefix" "evex")
18435 (set_attr "memory" "none,load")
18436 (set_attr "mode" "<sseinsnmode>")])
18438 (define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18439 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18440 (unspec:VF_AVX512VL
18441 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18442 (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18443 (match_operand:SI 3 "const_0_to_15_operand")]
18445 "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18446 "vrange<ssemodesuffix>\t{<round_saeonly_mask_op4>%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3<round_saeonly_mask_op4>}"
18447 [(set_attr "type" "sse")
18448 (set_attr "prefix" "evex")
18449 (set_attr "mode" "<MODE>")])
18451 (define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18452 [(set (match_operand:VF_128 0 "register_operand" "=v")
18455 [(match_operand:VF_128 1 "register_operand" "v")
18456 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18457 (match_operand:SI 3 "const_0_to_15_operand")]
18462 "vrange<ssescalarmodesuffix>\t{<round_saeonly_op4>%3, %2, %1, %0|%0, %1, %2, %3<round_saeonly_op4>}"
18463 [(set_attr "type" "sse")
18464 (set_attr "prefix" "evex")
18465 (set_attr "mode" "<MODE>")])
18467 (define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18468 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18469 (unspec:<avx512fmaskmode>
18470 [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18471 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18474 "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18475 [(set_attr "type" "sse")
18476 (set_attr "length_immediate" "1")
18477 (set_attr "prefix" "evex")
18478 (set_attr "mode" "<MODE>")])
18480 (define_insn "avx512dq_vmfpclass<mode>"
18481 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18482 (and:<avx512fmaskmode>
18483 (unspec:<avx512fmaskmode>
18484 [(match_operand:VF_128 1 "register_operand" "v")
18485 (match_operand:QI 2 "const_0_to_255_operand" "n")]
18489 "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18490 [(set_attr "type" "sse")
18491 (set_attr "length_immediate" "1")
18492 (set_attr "prefix" "evex")
18493 (set_attr "mode" "<MODE>")])
18495 (define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18496 [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18497 (unspec:VF_AVX512VL
18498 [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18499 (match_operand:SI 2 "const_0_to_15_operand")]
18502 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18503 [(set_attr "prefix" "evex")
18504 (set_attr "mode" "<MODE>")])
18506 (define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18507 [(set (match_operand:VF_128 0 "register_operand" "=v")
18510 [(match_operand:VF_128 1 "register_operand" "v")
18511 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18512 (match_operand:SI 3 "const_0_to_15_operand")]
18517 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18518 [(set_attr "prefix" "evex")
18519 (set_attr "mode" "<ssescalarmode>")])
18521 ;; The correct representation for this is absolutely enormous, and
18522 ;; surely not generally useful.
18523 (define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18524 [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18525 (unspec:VI2_AVX512VL
18526 [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18527 (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18528 (match_operand:SI 3 "const_0_to_255_operand")]
18531 "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18532 [(set_attr "isa" "avx")
18533 (set_attr "type" "sselog1")
18534 (set_attr "length_immediate" "1")
18535 (set_attr "prefix" "evex")
18536 (set_attr "mode" "<sseinsnmode>")])
18538 (define_insn "clz<mode>2<mask_name>"
18539 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18541 (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18543 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18544 [(set_attr "type" "sse")
18545 (set_attr "prefix" "evex")
18546 (set_attr "mode" "<sseinsnmode>")])
18548 (define_insn "<mask_codefor>conflict<mode><mask_name>"
18549 [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18550 (unspec:VI48_AVX512VL
18551 [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18554 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18555 [(set_attr "type" "sse")
18556 (set_attr "prefix" "evex")
18557 (set_attr "mode" "<sseinsnmode>")])
18559 (define_insn "sha1msg1"
18560 [(set (match_operand:V4SI 0 "register_operand" "=x")
18562 [(match_operand:V4SI 1 "register_operand" "0")
18563 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18566 "sha1msg1\t{%2, %0|%0, %2}"
18567 [(set_attr "type" "sselog1")
18568 (set_attr "mode" "TI")])
18570 (define_insn "sha1msg2"
18571 [(set (match_operand:V4SI 0 "register_operand" "=x")
18573 [(match_operand:V4SI 1 "register_operand" "0")
18574 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18577 "sha1msg2\t{%2, %0|%0, %2}"
18578 [(set_attr "type" "sselog1")
18579 (set_attr "mode" "TI")])
18581 (define_insn "sha1nexte"
18582 [(set (match_operand:V4SI 0 "register_operand" "=x")
18584 [(match_operand:V4SI 1 "register_operand" "0")
18585 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18586 UNSPEC_SHA1NEXTE))]
18588 "sha1nexte\t{%2, %0|%0, %2}"
18589 [(set_attr "type" "sselog1")
18590 (set_attr "mode" "TI")])
18592 (define_insn "sha1rnds4"
18593 [(set (match_operand:V4SI 0 "register_operand" "=x")
18595 [(match_operand:V4SI 1 "register_operand" "0")
18596 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18597 (match_operand:SI 3 "const_0_to_3_operand" "n")]
18598 UNSPEC_SHA1RNDS4))]
18600 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18601 [(set_attr "type" "sselog1")
18602 (set_attr "length_immediate" "1")
18603 (set_attr "mode" "TI")])
18605 (define_insn "sha256msg1"
18606 [(set (match_operand:V4SI 0 "register_operand" "=x")
18608 [(match_operand:V4SI 1 "register_operand" "0")
18609 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18610 UNSPEC_SHA256MSG1))]
18612 "sha256msg1\t{%2, %0|%0, %2}"
18613 [(set_attr "type" "sselog1")
18614 (set_attr "mode" "TI")])
18616 (define_insn "sha256msg2"
18617 [(set (match_operand:V4SI 0 "register_operand" "=x")
18619 [(match_operand:V4SI 1 "register_operand" "0")
18620 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18621 UNSPEC_SHA256MSG2))]
18623 "sha256msg2\t{%2, %0|%0, %2}"
18624 [(set_attr "type" "sselog1")
18625 (set_attr "mode" "TI")])
18627 (define_insn "sha256rnds2"
18628 [(set (match_operand:V4SI 0 "register_operand" "=x")
18630 [(match_operand:V4SI 1 "register_operand" "0")
18631 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18632 (match_operand:V4SI 3 "register_operand" "Yz")]
18633 UNSPEC_SHA256RNDS2))]
18635 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18636 [(set_attr "type" "sselog1")
18637 (set_attr "length_immediate" "1")
18638 (set_attr "mode" "TI")])
18640 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18641 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18642 (unspec:AVX512MODE2P
18643 [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18647 "&& reload_completed"
18650 rtx op0 = operands[0];
18651 rtx op1 = operands[1];
18653 op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18655 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18656 emit_move_insn (op0, op1);
18660 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18661 [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18662 (unspec:AVX512MODE2P
18663 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18667 "&& reload_completed"
18670 rtx op0 = operands[0];
18671 rtx op1 = operands[1];
18673 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18675 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18676 emit_move_insn (op0, op1);
18680 (define_int_iterator VPMADD52
18681 [UNSPEC_VPMADD52LUQ
18682 UNSPEC_VPMADD52HUQ])
18684 (define_int_attr vpmadd52type
18685 [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18687 (define_expand "vpamdd52huq<mode>_maskz"
18688 [(match_operand:VI8_AVX512VL 0 "register_operand")
18689 (match_operand:VI8_AVX512VL 1 "register_operand")
18690 (match_operand:VI8_AVX512VL 2 "register_operand")
18691 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18692 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18693 "TARGET_AVX512IFMA"
18695 emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18696 operands[0], operands[1], operands[2], operands[3],
18697 CONST0_RTX (<MODE>mode), operands[4]));
18701 (define_expand "vpamdd52luq<mode>_maskz"
18702 [(match_operand:VI8_AVX512VL 0 "register_operand")
18703 (match_operand:VI8_AVX512VL 1 "register_operand")
18704 (match_operand:VI8_AVX512VL 2 "register_operand")
18705 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18706 (match_operand:<avx512fmaskmode> 4 "register_operand")]
18707 "TARGET_AVX512IFMA"
18709 emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18710 operands[0], operands[1], operands[2], operands[3],
18711 CONST0_RTX (<MODE>mode), operands[4]));
18715 (define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18716 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18717 (unspec:VI8_AVX512VL
18718 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18719 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18720 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18722 "TARGET_AVX512IFMA"
18723 "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18724 [(set_attr "type" "ssemuladd")
18725 (set_attr "prefix" "evex")
18726 (set_attr "mode" "<sseinsnmode>")])
18728 (define_insn "vpamdd52<vpmadd52type><mode>_mask"
18729 [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18730 (vec_merge:VI8_AVX512VL
18731 (unspec:VI8_AVX512VL
18732 [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18733 (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18734 (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18737 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18738 "TARGET_AVX512IFMA"
18739 "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18740 [(set_attr "type" "ssemuladd")
18741 (set_attr "prefix" "evex")
18742 (set_attr "mode" "<sseinsnmode>")])
18744 (define_insn "vpmultishiftqb<mode><mask_name>"
18745 [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18746 (unspec:VI1_AVX512VL
18747 [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18748 (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18749 UNSPEC_VPMULTISHIFT))]
18750 "TARGET_AVX512VBMI"
18751 "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18752 [(set_attr "type" "sselog")
18753 (set_attr "prefix" "evex")
18754 (set_attr "mode" "<sseinsnmode>")])