1 ;; GCC machine description for SSE instructions
2 ;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify
7 ;; it under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful,
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;; GNU General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_c_enum "unspec" [
55 UNSPEC_XOP_UNSIGNED_CMP
66 UNSPEC_AESKEYGENASSIST
87 ;; For AVX512F support
91 UNSPEC_UNSIGNED_FIX_NOTRUNC
106 UNSPEC_COMPRESS_STORE
111 ;; For embed. rounding feature
112 UNSPEC_EMBEDDED_ROUNDING
114 ;; For AVX512PF support
115 UNSPEC_GATHER_PREFETCH
116 UNSPEC_SCATTER_PREFETCH
118 ;; For AVX512ER support
133 (define_c_enum "unspecv" [
143 ;; All vector modes including V?TImode, used in move patterns.
144 (define_mode_iterator VMOVE
145 [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
146 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
147 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
148 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
149 (V2TI "TARGET_AVX") V1TI
150 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
151 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
154 (define_mode_iterator V
155 [(V32QI "TARGET_AVX") V16QI
156 (V16HI "TARGET_AVX") V8HI
157 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
158 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
159 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
160 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
162 ;; All 128bit vector modes
163 (define_mode_iterator V_128
164 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
166 ;; All 256bit vector modes
167 (define_mode_iterator V_256
168 [V32QI V16HI V8SI V4DI V8SF V4DF])
170 ;; All 512bit vector modes
171 (define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
173 ;; All 256bit and 512bit vector modes
174 (define_mode_iterator V_256_512
175 [V32QI V16HI V8SI V4DI V8SF V4DF
176 (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
177 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
179 ;; All vector float modes
180 (define_mode_iterator VF
181 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
182 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
184 ;; 128- and 256-bit float vector modes
185 (define_mode_iterator VF_128_256
186 [(V8SF "TARGET_AVX") V4SF
187 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
189 ;; All SFmode vector float modes
190 (define_mode_iterator VF1
191 [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
193 ;; 128- and 256-bit SF vector modes
194 (define_mode_iterator VF1_128_256
195 [(V8SF "TARGET_AVX") V4SF])
197 ;; All DFmode vector float modes
198 (define_mode_iterator VF2
199 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
201 ;; 128- and 256-bit DF vector modes
202 (define_mode_iterator VF2_128_256
203 [(V4DF "TARGET_AVX") V2DF])
205 (define_mode_iterator VF2_512_256
206 [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
208 ;; All 128bit vector float modes
209 (define_mode_iterator VF_128
210 [V4SF (V2DF "TARGET_SSE2")])
212 ;; All 256bit vector float modes
213 (define_mode_iterator VF_256
216 ;; All 512bit vector float modes
217 (define_mode_iterator VF_512
220 ;; All vector integer modes
221 (define_mode_iterator VI
222 [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
223 (V32QI "TARGET_AVX") V16QI
224 (V16HI "TARGET_AVX") V8HI
225 (V8SI "TARGET_AVX") V4SI
226 (V4DI "TARGET_AVX") V2DI])
228 (define_mode_iterator VI_AVX2
229 [(V32QI "TARGET_AVX2") V16QI
230 (V16HI "TARGET_AVX2") V8HI
231 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
232 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
234 ;; All QImode vector integer modes
235 (define_mode_iterator VI1
236 [(V32QI "TARGET_AVX") V16QI])
238 (define_mode_iterator VI_UNALIGNED_LOADSTORE
239 [(V32QI "TARGET_AVX") V16QI
240 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
242 ;; All DImode vector integer modes
243 (define_mode_iterator VI8
244 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
246 (define_mode_iterator VI1_AVX2
247 [(V32QI "TARGET_AVX2") V16QI])
249 (define_mode_iterator VI2_AVX2
250 [(V16HI "TARGET_AVX2") V8HI])
252 (define_mode_iterator VI2_AVX512F
253 [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
255 (define_mode_iterator VI4_AVX
256 [(V8SI "TARGET_AVX") V4SI])
258 (define_mode_iterator VI4_AVX2
259 [(V8SI "TARGET_AVX2") V4SI])
261 (define_mode_iterator VI4_AVX512F
262 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
264 (define_mode_iterator VI48_AVX512F
265 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
266 (V8DI "TARGET_AVX512F")])
268 (define_mode_iterator VI8_AVX2
269 [(V4DI "TARGET_AVX2") V2DI])
271 (define_mode_iterator VI8_AVX2_AVX512F
272 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
275 (define_mode_iterator V8FI
279 (define_mode_iterator V16FI
282 ;; ??? We should probably use TImode instead.
283 (define_mode_iterator VIMAX_AVX2
284 [(V2TI "TARGET_AVX2") V1TI])
286 ;; ??? This should probably be dropped in favor of VIMAX_AVX2.
287 (define_mode_iterator SSESCALARMODE
288 [(V2TI "TARGET_AVX2") TI])
290 (define_mode_iterator VI12_AVX2
291 [(V32QI "TARGET_AVX2") V16QI
292 (V16HI "TARGET_AVX2") V8HI])
294 (define_mode_iterator VI24_AVX2
295 [(V16HI "TARGET_AVX2") V8HI
296 (V8SI "TARGET_AVX2") V4SI])
298 (define_mode_iterator VI124_AVX2_48_AVX512F
299 [(V32QI "TARGET_AVX2") V16QI
300 (V16HI "TARGET_AVX2") V8HI
301 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
302 (V8DI "TARGET_AVX512F")])
304 (define_mode_iterator VI124_AVX512F
305 [(V32QI "TARGET_AVX2") V16QI
306 (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
307 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
309 (define_mode_iterator VI124_AVX2
310 [(V32QI "TARGET_AVX2") V16QI
311 (V16HI "TARGET_AVX2") V8HI
312 (V8SI "TARGET_AVX2") V4SI])
314 (define_mode_iterator VI248_AVX2
315 [(V16HI "TARGET_AVX2") V8HI
316 (V8SI "TARGET_AVX2") V4SI
317 (V4DI "TARGET_AVX2") V2DI])
319 (define_mode_iterator VI248_AVX2_8_AVX512F
320 [(V16HI "TARGET_AVX2") V8HI
321 (V8SI "TARGET_AVX2") V4SI
322 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
324 (define_mode_iterator VI48_AVX2_48_AVX512F
325 [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
326 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
328 (define_mode_iterator V48_AVX2
331 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
332 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
334 (define_mode_attr sse2_avx_avx512f
335 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
336 (V4SI "sse2") (V8SI "avx") (V16SI "avx512f")
338 (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
339 (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
341 (define_mode_attr sse2_avx2
342 [(V16QI "sse2") (V32QI "avx2")
343 (V8HI "sse2") (V16HI "avx2")
344 (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
345 (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
346 (V1TI "sse2") (V2TI "avx2")])
348 (define_mode_attr ssse3_avx2
349 [(V16QI "ssse3") (V32QI "avx2")
350 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2")
351 (V4SI "ssse3") (V8SI "avx2")
352 (V2DI "ssse3") (V4DI "avx2")
353 (TI "ssse3") (V2TI "avx2")])
355 (define_mode_attr sse4_1_avx2
356 [(V16QI "sse4_1") (V32QI "avx2")
357 (V8HI "sse4_1") (V16HI "avx2")
358 (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
359 (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
361 (define_mode_attr avx_avx2
362 [(V4SF "avx") (V2DF "avx")
363 (V8SF "avx") (V4DF "avx")
364 (V4SI "avx2") (V2DI "avx2")
365 (V8SI "avx2") (V4DI "avx2")])
367 (define_mode_attr vec_avx2
368 [(V16QI "vec") (V32QI "avx2")
369 (V8HI "vec") (V16HI "avx2")
370 (V4SI "vec") (V8SI "avx2")
371 (V2DI "vec") (V4DI "avx2")])
373 (define_mode_attr avx2_avx512f
374 [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
375 (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
376 (V8SF "avx2") (V16SF "avx512f")
377 (V4DF "avx2") (V8DF "avx512f")])
379 (define_mode_attr shuffletype
380 [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
381 (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
382 (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
383 (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
384 (V64QI "i") (V1TI "i") (V2TI "i")])
386 (define_mode_attr ssequartermode
387 [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
389 (define_mode_attr ssedoublemode
390 [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
391 (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
392 (V32QI "V32HI") (V16QI "V16HI")])
394 (define_mode_attr ssebytemode
395 [(V4DI "V32QI") (V2DI "V16QI")])
397 ;; All 128bit vector integer modes
398 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
400 ;; All 256bit vector integer modes
401 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
403 ;; All 512bit vector integer modes
404 (define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
406 ;; Various 128bit vector integer mode combinations
407 (define_mode_iterator VI12_128 [V16QI V8HI])
408 (define_mode_iterator VI14_128 [V16QI V4SI])
409 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
410 (define_mode_iterator VI128_128 [V16QI V8HI V2DI])
411 (define_mode_iterator VI24_128 [V8HI V4SI])
412 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
413 (define_mode_iterator VI48_128 [V4SI V2DI])
415 ;; Various 256bit and 512 vector integer mode combinations
416 (define_mode_iterator VI124_256_48_512
417 [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
418 (define_mode_iterator VI48_256 [V8SI V4DI])
419 (define_mode_iterator VI48_512 [V16SI V8DI])
420 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
422 ;; Int-float size matches
423 (define_mode_iterator VI4F_128 [V4SI V4SF])
424 (define_mode_iterator VI8F_128 [V2DI V2DF])
425 (define_mode_iterator VI4F_256 [V8SI V8SF])
426 (define_mode_iterator VI8F_256 [V4DI V4DF])
427 (define_mode_iterator VI8F_256_512
428 [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
429 (define_mode_iterator VI48F_256_512
431 (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
432 (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
433 (define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
435 ;; Mapping from float mode to required SSE level
436 (define_mode_attr sse
437 [(SF "sse") (DF "sse2")
438 (V4SF "sse") (V2DF "sse2")
439 (V16SF "avx512f") (V8SF "avx")
440 (V8DF "avx512f") (V4DF "avx")])
442 (define_mode_attr sse2
443 [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
444 (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
446 (define_mode_attr sse3
447 [(V16QI "sse3") (V32QI "avx")])
449 (define_mode_attr sse4_1
450 [(V4SF "sse4_1") (V2DF "sse4_1")
451 (V8SF "avx") (V4DF "avx")
454 (define_mode_attr avxsizesuffix
455 [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
456 (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
457 (V16QI "") (V8HI "") (V4SI "") (V2DI "")
458 (V16SF "512") (V8DF "512")
459 (V8SF "256") (V4DF "256")
460 (V4SF "") (V2DF "")])
462 ;; SSE instruction mode
463 (define_mode_attr sseinsnmode
464 [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI")
465 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
466 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
467 (V16SF "V16SF") (V8DF "V8DF")
468 (V8SF "V8SF") (V4DF "V4DF")
469 (V4SF "V4SF") (V2DF "V2DF")
472 ;; Mapping of vector modes to corresponding mask size
473 (define_mode_attr avx512fmaskmode
475 (V16HI "HI") (V8HI "QI")
476 (V16SI "HI") (V8SI "QI") (V4SI "QI")
477 (V8DI "QI") (V4DI "QI") (V2DI "QI")
478 (V16SF "HI") (V8SF "QI") (V4SF "QI")
479 (V8DF "QI") (V4DF "QI") (V2DF "QI")])
481 ;; Mapping of vector float modes to an integer mode of the same size
482 (define_mode_attr sseintvecmode
483 [(V16SF "V16SI") (V8DF "V8DI")
484 (V8SF "V8SI") (V4DF "V4DI")
485 (V4SF "V4SI") (V2DF "V2DI")
486 (V16SI "V16SI") (V8DI "V8DI")
487 (V8SI "V8SI") (V4DI "V4DI")
488 (V4SI "V4SI") (V2DI "V2DI")
489 (V16HI "V16HI") (V8HI "V8HI")
490 (V32QI "V32QI") (V16QI "V16QI")])
492 (define_mode_attr sseintvecmodelower
494 (V8SF "v8si") (V4DF "v4di")
495 (V4SF "v4si") (V2DF "v2di")
496 (V8SI "v8si") (V4DI "v4di")
497 (V4SI "v4si") (V2DI "v2di")
498 (V16HI "v16hi") (V8HI "v8hi")
499 (V32QI "v32qi") (V16QI "v16qi")])
501 ;; Mapping of vector modes to a vector mode of double size
502 (define_mode_attr ssedoublevecmode
503 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
504 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
505 (V8SF "V16SF") (V4DF "V8DF")
506 (V4SF "V8SF") (V2DF "V4DF")])
508 ;; Mapping of vector modes to a vector mode of half size
509 (define_mode_attr ssehalfvecmode
510 [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
511 (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
512 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI")
513 (V16SF "V8SF") (V8DF "V4DF")
514 (V8SF "V4SF") (V4DF "V2DF")
517 ;; Mapping of vector modes ti packed single mode of the same size
518 (define_mode_attr ssePSmode
519 [(V16SI "V16SF") (V8DF "V16SF")
520 (V16SF "V16SF") (V8DI "V16SF")
521 (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
522 (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
523 (V8SI "V8SF") (V4SI "V4SF")
524 (V4DI "V8SF") (V2DI "V4SF")
525 (V2TI "V8SF") (V1TI "V4SF")
526 (V8SF "V8SF") (V4SF "V4SF")
527 (V4DF "V8SF") (V2DF "V4SF")])
529 ;; Mapping of vector modes back to the scalar modes
530 (define_mode_attr ssescalarmode
531 [(V64QI "QI") (V32QI "QI") (V16QI "QI")
532 (V32HI "HI") (V16HI "HI") (V8HI "HI")
533 (V16SI "SI") (V8SI "SI") (V4SI "SI")
534 (V8DI "DI") (V4DI "DI") (V2DI "DI")
535 (V16SF "SF") (V8SF "SF") (V4SF "SF")
536 (V8DF "DF") (V4DF "DF") (V2DF "DF")])
538 ;; Mapping of vector modes to the 128bit modes
539 (define_mode_attr ssexmmmode
540 [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
541 (V32HI "V8HI") (V16HI "V8HI") (V8HI "V8HI")
542 (V16SI "V4SI") (V8SI "V4SI") (V4SI "V4SI")
543 (V8DI "V2DI") (V4DI "V2DI") (V2DI "V2DI")
544 (V16SF "V4SF") (V8SF "V4SF") (V4SF "V4SF")
545 (V8DF "V2DF") (V4DF "V2DF") (V2DF "V2DF")])
547 ;; Pointer size override for scalar modes (Intel asm dialect)
548 (define_mode_attr iptr
549 [(V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
550 (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
551 (V8SF "k") (V4DF "q")
552 (V4SF "k") (V2DF "q")
555 ;; Number of scalar elements in each vector type
556 (define_mode_attr ssescalarnum
557 [(V64QI "64") (V16SI "16") (V8DI "8")
558 (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
559 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
560 (V16SF "16") (V8DF "8")
561 (V8SF "8") (V4DF "4")
562 (V4SF "4") (V2DF "2")])
564 ;; Mask of scalar elements in each vector type
565 (define_mode_attr ssescalarnummask
566 [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
567 (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
568 (V8SF "7") (V4DF "3")
569 (V4SF "3") (V2DF "1")])
571 (define_mode_attr ssescalarsize
572 [(V8DI "64") (V4DI "64") (V2DI "64")
573 (V32HI "16") (V16HI "16") (V8HI "16")
574 (V16SI "32") (V8SI "32") (V4SI "32")
575 (V16SF "32") (V8DF "64")])
577 ;; SSE prefix for integer vector modes
578 (define_mode_attr sseintprefix
579 [(V2DI "p") (V2DF "")
584 (V16SI "p") (V16SF "")])
586 ;; SSE scalar suffix for vector modes
587 (define_mode_attr ssescalarmodesuffix
589 (V8SF "ss") (V4DF "sd")
590 (V4SF "ss") (V2DF "sd")
591 (V8SI "ss") (V4DI "sd")
594 ;; Pack/unpack vector modes
595 (define_mode_attr sseunpackmode
596 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
597 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
598 (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
600 (define_mode_attr ssepackmode
601 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
602 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
603 (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
605 ;; Mapping of the max integer size for xop rotate immediate constraint
606 (define_mode_attr sserotatemax
607 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
609 ;; Mapping of mode to cast intrinsic name
610 (define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")])
612 ;; Instruction suffix for sign and zero extensions.
613 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
615 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
616 ;; i64x4 or f64x4 for 512bit modes.
617 (define_mode_attr i128
618 [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
619 (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
620 (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
623 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
625 ;; Mapping of immediate bits for blend instructions
626 (define_mode_attr blendbits
627 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
629 ;; Mapping suffixes for broadcast
630 (define_mode_attr bcstscalarsuff
631 [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
633 ;; Include define_subst patterns for instructions with mask
636 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
638 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
642 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
644 ;; All of these patterns are enabled for SSE1 as well as SSE2.
645 ;; This is essential for maintaining stable calling conventions.
647 (define_expand "mov<mode>"
648 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
649 (match_operand:VMOVE 1 "nonimmediate_operand"))]
652 ix86_expand_vector_move (<MODE>mode, operands);
656 (define_insn "*mov<mode>_internal"
657 [(set (match_operand:VMOVE 0 "nonimmediate_operand" "=v,v ,m")
658 (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" "C ,vm,v"))]
660 && (register_operand (operands[0], <MODE>mode)
661 || register_operand (operands[1], <MODE>mode))"
663 int mode = get_attr_mode (insn);
664 switch (which_alternative)
667 return standard_sse_constant_opcode (insn, operands[1]);
670 /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
671 in avx512f, so we need to use workarounds, to access sse registers
672 16-31, which are evex-only. */
673 if (TARGET_AVX512F && <MODE_SIZE> < 64
674 && ((REG_P (operands[0])
675 && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
676 || (REG_P (operands[1])
677 && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
679 if (memory_operand (operands[0], <MODE>mode))
681 if (<MODE_SIZE> == 32)
682 return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
683 else if (<MODE_SIZE> == 16)
684 return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
688 else if (memory_operand (operands[1], <MODE>mode))
690 if (<MODE_SIZE> == 32)
691 return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
692 else if (<MODE_SIZE> == 16)
693 return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
698 /* Reg -> reg move is always aligned. Just use wider move. */
703 return "vmovaps\t{%g1, %g0|%g0, %g1}";
706 return "vmovapd\t{%g1, %g0|%g0, %g1}";
709 return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
720 && (misaligned_operand (operands[0], <MODE>mode)
721 || misaligned_operand (operands[1], <MODE>mode)))
722 return "vmovups\t{%1, %0|%0, %1}";
724 return "%vmovaps\t{%1, %0|%0, %1}";
730 && (misaligned_operand (operands[0], <MODE>mode)
731 || misaligned_operand (operands[1], <MODE>mode)))
732 return "vmovupd\t{%1, %0|%0, %1}";
734 return "%vmovapd\t{%1, %0|%0, %1}";
739 && (misaligned_operand (operands[0], <MODE>mode)
740 || misaligned_operand (operands[1], <MODE>mode)))
741 return "vmovdqu\t{%1, %0|%0, %1}";
743 return "%vmovdqa\t{%1, %0|%0, %1}";
745 if (misaligned_operand (operands[0], <MODE>mode)
746 || misaligned_operand (operands[1], <MODE>mode))
747 return "vmovdqu64\t{%1, %0|%0, %1}";
749 return "vmovdqa64\t{%1, %0|%0, %1}";
758 [(set_attr "type" "sselog1,ssemov,ssemov")
759 (set_attr "prefix" "maybe_vex")
761 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
762 (const_string "<ssePSmode>")
763 (and (match_test "<MODE_SIZE> == 16")
764 (and (eq_attr "alternative" "2")
765 (match_test "TARGET_SSE_TYPELESS_STORES")))
766 (const_string "<ssePSmode>")
767 (match_test "TARGET_AVX")
768 (const_string "<sseinsnmode>")
769 (ior (not (match_test "TARGET_SSE2"))
770 (match_test "optimize_function_for_size_p (cfun)"))
771 (const_string "V4SF")
772 (and (eq_attr "alternative" "0")
773 (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
776 (const_string "<sseinsnmode>")))])
778 (define_insn "avx512f_load<mode>_mask"
779 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
781 (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
782 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
783 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
786 switch (MODE_<sseinsnmode>)
790 if (misaligned_operand (operands[1], <MODE>mode))
791 return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
792 return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
794 if (misaligned_operand (operands[1], <MODE>mode))
795 return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
796 return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
799 [(set_attr "type" "ssemov")
800 (set_attr "prefix" "evex")
801 (set_attr "memory" "none,load")
802 (set_attr "mode" "<sseinsnmode>")])
804 (define_insn "avx512f_blendm<mode>"
805 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
807 (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
808 (match_operand:VI48F_512 1 "register_operand" "v")
809 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
811 "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
812 [(set_attr "type" "ssemov")
813 (set_attr "prefix" "evex")
814 (set_attr "mode" "<sseinsnmode>")])
816 (define_insn "avx512f_store<mode>_mask"
817 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
819 (match_operand:VI48F_512 1 "register_operand" "v")
821 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
824 switch (MODE_<sseinsnmode>)
828 return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
830 return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
833 [(set_attr "type" "ssemov")
834 (set_attr "prefix" "evex")
835 (set_attr "memory" "store")
836 (set_attr "mode" "<sseinsnmode>")])
838 (define_insn "sse2_movq128"
839 [(set (match_operand:V2DI 0 "register_operand" "=x")
842 (match_operand:V2DI 1 "nonimmediate_operand" "xm")
843 (parallel [(const_int 0)]))
846 "%vmovq\t{%1, %0|%0, %q1}"
847 [(set_attr "type" "ssemov")
848 (set_attr "prefix" "maybe_vex")
849 (set_attr "mode" "TI")])
851 ;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
852 ;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
853 ;; from memory, we'd prefer to load the memory directly into the %xmm
854 ;; register. To facilitate this happy circumstance, this pattern won't
855 ;; split until after register allocation. If the 64-bit value didn't
856 ;; come from memory, this is the best we can do. This is much better
857 ;; than storing %edx:%eax into a stack temporary and loading an %xmm
860 (define_insn_and_split "movdi_to_sse"
862 [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
863 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
864 (clobber (match_scratch:V4SI 2 "=&x,X"))])]
865 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
867 "&& reload_completed"
870 if (register_operand (operands[1], DImode))
872 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
873 Assemble the 64-bit DImode value in an xmm register. */
874 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
875 gen_rtx_SUBREG (SImode, operands[1], 0)));
876 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
877 gen_rtx_SUBREG (SImode, operands[1], 4)));
878 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
881 else if (memory_operand (operands[1], DImode))
883 rtx tmp = gen_reg_rtx (V2DImode);
884 emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
885 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
892 [(set (match_operand:V4SF 0 "register_operand")
893 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
894 "TARGET_SSE && reload_completed"
897 (vec_duplicate:V4SF (match_dup 1))
901 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
902 operands[2] = CONST0_RTX (V4SFmode);
906 [(set (match_operand:V2DF 0 "register_operand")
907 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
908 "TARGET_SSE2 && reload_completed"
909 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
911 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
912 operands[2] = CONST0_RTX (DFmode);
915 (define_expand "movmisalign<mode>"
916 [(set (match_operand:VMOVE 0 "nonimmediate_operand")
917 (match_operand:VMOVE 1 "nonimmediate_operand"))]
920 ix86_expand_vector_move_misalign (<MODE>mode, operands);
924 (define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
925 [(set (match_operand:VF 0 "register_operand")
926 (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
928 "TARGET_SSE && <mask_mode512bit_condition>"
930 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
931 just fine if misaligned_operand is true, and without the UNSPEC it can
932 be combined with arithmetic instructions. If misaligned_operand is
933 false, still emit UNSPEC_LOADU insn to honor user's request for
936 && misaligned_operand (operands[1], <MODE>mode))
938 rtx src = operands[1];
940 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
941 operands[2 * <mask_applied>],
942 operands[3 * <mask_applied>]);
943 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
948 (define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
949 [(set (match_operand:VF 0 "register_operand" "=v")
951 [(match_operand:VF 1 "nonimmediate_operand" "vm")]
953 "TARGET_SSE && <mask_mode512bit_condition>"
955 switch (get_attr_mode (insn))
960 return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
962 return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
965 [(set_attr "type" "ssemov")
966 (set_attr "movu" "1")
967 (set_attr "ssememalign" "8")
968 (set_attr "prefix" "maybe_vex")
970 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
971 (const_string "<ssePSmode>")
972 (match_test "TARGET_AVX")
973 (const_string "<MODE>")
974 (match_test "optimize_function_for_size_p (cfun)")
975 (const_string "V4SF")
977 (const_string "<MODE>")))])
979 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
980 [(set (match_operand:VF 0 "memory_operand" "=m")
982 [(match_operand:VF 1 "register_operand" "v")]
986 switch (get_attr_mode (insn))
991 return "%vmovups\t{%1, %0|%0, %1}";
993 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
996 [(set_attr "type" "ssemov")
997 (set_attr "movu" "1")
998 (set_attr "ssememalign" "8")
999 (set_attr "prefix" "maybe_vex")
1001 (cond [(and (match_test "<MODE_SIZE> == 16")
1002 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1003 (match_test "TARGET_SSE_TYPELESS_STORES")))
1004 (const_string "<ssePSmode>")
1005 (match_test "TARGET_AVX")
1006 (const_string "<MODE>")
1007 (match_test "optimize_function_for_size_p (cfun)")
1008 (const_string "V4SF")
1010 (const_string "<MODE>")))])
1012 (define_insn "avx512f_storeu<ssemodesuffix>512_mask"
1013 [(set (match_operand:VF_512 0 "memory_operand" "=m")
1016 [(match_operand:VF_512 1 "register_operand" "v")]
1019 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1022 switch (get_attr_mode (insn))
1025 return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1027 return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1030 [(set_attr "type" "ssemov")
1031 (set_attr "movu" "1")
1032 (set_attr "memory" "store")
1033 (set_attr "prefix" "evex")
1034 (set_attr "mode" "<sseinsnmode>")])
1036 (define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1037 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand")
1038 (unspec:VI_UNALIGNED_LOADSTORE
1039 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")]
1041 "TARGET_SSE2 && <mask_mode512bit_condition>"
1043 /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1044 just fine if misaligned_operand is true, and without the UNSPEC it can
1045 be combined with arithmetic instructions. If misaligned_operand is
1046 false, still emit UNSPEC_LOADU insn to honor user's request for
1049 && misaligned_operand (operands[1], <MODE>mode))
1051 rtx src = operands[1];
1053 src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1054 operands[2 * <mask_applied>],
1055 operands[3 * <mask_applied>]);
1056 emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1061 (define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1062 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
1063 (unspec:VI_UNALIGNED_LOADSTORE
1064 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
1066 "TARGET_SSE2 && <mask_mode512bit_condition>"
1068 switch (get_attr_mode (insn))
1072 return "%vmovups\t{%1, %0|%0, %1}";
1074 if (<MODE>mode == V8DImode)
1075 return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1077 return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1079 return "%vmovdqu\t{%1, %0|%0, %1}";
1082 [(set_attr "type" "ssemov")
1083 (set_attr "movu" "1")
1084 (set_attr "ssememalign" "8")
1085 (set (attr "prefix_data16")
1087 (match_test "TARGET_AVX")
1089 (const_string "1")))
1090 (set_attr "prefix" "maybe_vex")
1092 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1093 (const_string "<ssePSmode>")
1094 (match_test "TARGET_AVX")
1095 (const_string "<sseinsnmode>")
1096 (match_test "optimize_function_for_size_p (cfun)")
1097 (const_string "V4SF")
1099 (const_string "<sseinsnmode>")))])
1101 (define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1102 [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
1103 (unspec:VI_UNALIGNED_LOADSTORE
1104 [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
1108 switch (get_attr_mode (insn))
1113 return "%vmovups\t{%1, %0|%0, %1}";
1115 if (<MODE>mode == V8DImode)
1116 return "vmovdqu64\t{%1, %0|%0, %1}";
1118 return "vmovdqu32\t{%1, %0|%0, %1}";
1120 return "%vmovdqu\t{%1, %0|%0, %1}";
1123 [(set_attr "type" "ssemov")
1124 (set_attr "movu" "1")
1125 (set_attr "ssememalign" "8")
1126 (set (attr "prefix_data16")
1128 (match_test "TARGET_AVX")
1130 (const_string "1")))
1131 (set_attr "prefix" "maybe_vex")
1133 (cond [(and (match_test "<MODE_SIZE> == 16")
1134 (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1135 (match_test "TARGET_SSE_TYPELESS_STORES")))
1136 (const_string "<ssePSmode>")
1137 (match_test "TARGET_AVX")
1138 (const_string "<sseinsnmode>")
1139 (match_test "optimize_function_for_size_p (cfun)")
1140 (const_string "V4SF")
1142 (const_string "<sseinsnmode>")))])
1144 (define_insn "avx512f_storedqu<mode>_mask"
1145 [(set (match_operand:VI48_512 0 "memory_operand" "=m")
1148 [(match_operand:VI48_512 1 "register_operand" "v")]
1151 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1154 if (<MODE>mode == V8DImode)
1155 return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1157 return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1159 [(set_attr "type" "ssemov")
1160 (set_attr "movu" "1")
1161 (set_attr "memory" "store")
1162 (set_attr "prefix" "evex")
1163 (set_attr "mode" "<sseinsnmode>")])
1165 (define_insn "<sse3>_lddqu<avxsizesuffix>"
1166 [(set (match_operand:VI1 0 "register_operand" "=x")
1167 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1170 "%vlddqu\t{%1, %0|%0, %1}"
1171 [(set_attr "type" "ssemov")
1172 (set_attr "movu" "1")
1173 (set_attr "ssememalign" "8")
1174 (set (attr "prefix_data16")
1176 (match_test "TARGET_AVX")
1178 (const_string "0")))
1179 (set (attr "prefix_rep")
1181 (match_test "TARGET_AVX")
1183 (const_string "1")))
1184 (set_attr "prefix" "maybe_vex")
1185 (set_attr "mode" "<sseinsnmode>")])
1187 (define_insn "sse2_movnti<mode>"
1188 [(set (match_operand:SWI48 0 "memory_operand" "=m")
1189 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1192 "movnti\t{%1, %0|%0, %1}"
1193 [(set_attr "type" "ssemov")
1194 (set_attr "prefix_data16" "0")
1195 (set_attr "mode" "<MODE>")])
1197 (define_insn "<sse>_movnt<mode>"
1198 [(set (match_operand:VF 0 "memory_operand" "=m")
1200 [(match_operand:VF 1 "register_operand" "v")]
1203 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1204 [(set_attr "type" "ssemov")
1205 (set_attr "prefix" "maybe_vex")
1206 (set_attr "mode" "<MODE>")])
1208 (define_insn "<sse2>_movnt<mode>"
1209 [(set (match_operand:VI8 0 "memory_operand" "=m")
1210 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1213 "%vmovntdq\t{%1, %0|%0, %1}"
1214 [(set_attr "type" "ssecvt")
1215 (set (attr "prefix_data16")
1217 (match_test "TARGET_AVX")
1219 (const_string "1")))
1220 (set_attr "prefix" "maybe_vex")
1221 (set_attr "mode" "<sseinsnmode>")])
1223 ; Expand patterns for non-temporal stores. At the moment, only those
1224 ; that directly map to insns are defined; it would be possible to
1225 ; define patterns for other modes that would expand to several insns.
1227 ;; Modes handled by storent patterns.
1228 (define_mode_iterator STORENT_MODE
1229 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1230 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1231 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1232 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1233 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1235 (define_expand "storent<mode>"
1236 [(set (match_operand:STORENT_MODE 0 "memory_operand")
1237 (unspec:STORENT_MODE
1238 [(match_operand:STORENT_MODE 1 "register_operand")]
1242 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1244 ;; Parallel floating point arithmetic
1246 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1248 (define_expand "<code><mode>2"
1249 [(set (match_operand:VF 0 "register_operand")
1251 (match_operand:VF 1 "register_operand")))]
1253 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1255 (define_insn_and_split "*absneg<mode>2"
1256 [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1257 (match_operator:VF 3 "absneg_operator"
1258 [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1259 (use (match_operand:VF 2 "nonimmediate_operand" "xm, 0, vm,v"))]
1262 "&& reload_completed"
1265 enum rtx_code absneg_op;
1271 if (MEM_P (operands[1]))
1272 op1 = operands[2], op2 = operands[1];
1274 op1 = operands[1], op2 = operands[2];
1279 if (rtx_equal_p (operands[0], operands[1]))
1285 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1286 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1287 t = gen_rtx_SET (VOIDmode, operands[0], t);
1291 [(set_attr "isa" "noavx,noavx,avx,avx")])
1293 (define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1294 [(set (match_operand:VF 0 "register_operand")
1296 (match_operand:VF 1 "<round_nimm_predicate>")
1297 (match_operand:VF 2 "<round_nimm_predicate>")))]
1298 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1299 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1301 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1302 [(set (match_operand:VF 0 "register_operand" "=x,v")
1304 (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1305 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1306 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1308 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1309 v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1310 [(set_attr "isa" "noavx,avx")
1311 (set_attr "type" "sseadd")
1312 (set_attr "prefix" "<mask_prefix3>")
1313 (set_attr "mode" "<MODE>")])
1315 (define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1316 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1319 (match_operand:VF_128 1 "register_operand" "0,v")
1320 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1325 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1326 v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1327 [(set_attr "isa" "noavx,avx")
1328 (set_attr "type" "sseadd")
1329 (set_attr "prefix" "<round_prefix>")
1330 (set_attr "mode" "<ssescalarmode>")])
1332 (define_expand "mul<mode>3<mask_name><round_name>"
1333 [(set (match_operand:VF 0 "register_operand")
1335 (match_operand:VF 1 "<round_nimm_predicate>")
1336 (match_operand:VF 2 "<round_nimm_predicate>")))]
1337 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1338 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1340 (define_insn "*mul<mode>3<mask_name><round_name>"
1341 [(set (match_operand:VF 0 "register_operand" "=x,v")
1343 (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1344 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1345 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1347 mul<ssemodesuffix>\t{%2, %0|%0, %2}
1348 vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1349 [(set_attr "isa" "noavx,avx")
1350 (set_attr "type" "ssemul")
1351 (set_attr "prefix" "<mask_prefix3>")
1352 (set_attr "btver2_decode" "direct,double")
1353 (set_attr "mode" "<MODE>")])
1355 (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1356 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1359 (match_operand:VF_128 1 "register_operand" "0,v")
1360 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1365 <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1366 v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1367 [(set_attr "isa" "noavx,avx")
1368 (set_attr "type" "sse<multdiv_mnemonic>")
1369 (set_attr "prefix" "<round_prefix>")
1370 (set_attr "btver2_decode" "direct,double")
1371 (set_attr "mode" "<ssescalarmode>")])
1373 (define_expand "div<mode>3"
1374 [(set (match_operand:VF2 0 "register_operand")
1375 (div:VF2 (match_operand:VF2 1 "register_operand")
1376 (match_operand:VF2 2 "nonimmediate_operand")))]
1378 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1380 (define_expand "div<mode>3"
1381 [(set (match_operand:VF1 0 "register_operand")
1382 (div:VF1 (match_operand:VF1 1 "register_operand")
1383 (match_operand:VF1 2 "nonimmediate_operand")))]
1386 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1389 && TARGET_RECIP_VEC_DIV
1390 && !optimize_insn_for_size_p ()
1391 && flag_finite_math_only && !flag_trapping_math
1392 && flag_unsafe_math_optimizations)
1394 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1399 (define_insn "<sse>_div<mode>3<mask_name><round_name>"
1400 [(set (match_operand:VF 0 "register_operand" "=x,v")
1402 (match_operand:VF 1 "register_operand" "0,v")
1403 (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1404 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1406 div<ssemodesuffix>\t{%2, %0|%0, %2}
1407 vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1408 [(set_attr "isa" "noavx,avx")
1409 (set_attr "type" "ssediv")
1410 (set_attr "prefix" "<mask_prefix3>")
1411 (set_attr "mode" "<MODE>")])
1413 (define_insn "<sse>_rcp<mode>2"
1414 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1416 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1418 "%vrcpps\t{%1, %0|%0, %1}"
1419 [(set_attr "type" "sse")
1420 (set_attr "atom_sse_attr" "rcp")
1421 (set_attr "btver2_sse_attr" "rcp")
1422 (set_attr "prefix" "maybe_vex")
1423 (set_attr "mode" "<MODE>")])
1425 (define_insn "sse_vmrcpv4sf2"
1426 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1428 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1430 (match_operand:V4SF 2 "register_operand" "0,x")
1434 rcpss\t{%1, %0|%0, %k1}
1435 vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1436 [(set_attr "isa" "noavx,avx")
1437 (set_attr "type" "sse")
1438 (set_attr "ssememalign" "32")
1439 (set_attr "atom_sse_attr" "rcp")
1440 (set_attr "btver2_sse_attr" "rcp")
1441 (set_attr "prefix" "orig,vex")
1442 (set_attr "mode" "SF")])
1444 (define_insn "<mask_codefor>rcp14<mode><mask_name>"
1445 [(set (match_operand:VF_512 0 "register_operand" "=v")
1447 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1450 "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1451 [(set_attr "type" "sse")
1452 (set_attr "prefix" "evex")
1453 (set_attr "mode" "<MODE>")])
1455 (define_insn "srcp14<mode>"
1456 [(set (match_operand:VF_128 0 "register_operand" "=v")
1459 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1461 (match_operand:VF_128 2 "register_operand" "v")
1464 "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1465 [(set_attr "type" "sse")
1466 (set_attr "prefix" "evex")
1467 (set_attr "mode" "<MODE>")])
1469 (define_expand "sqrt<mode>2"
1470 [(set (match_operand:VF2 0 "register_operand")
1471 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1474 (define_expand "sqrt<mode>2"
1475 [(set (match_operand:VF1 0 "register_operand")
1476 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1480 && TARGET_RECIP_VEC_SQRT
1481 && !optimize_insn_for_size_p ()
1482 && flag_finite_math_only && !flag_trapping_math
1483 && flag_unsafe_math_optimizations)
1485 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1490 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1491 [(set (match_operand:VF 0 "register_operand" "=v")
1492 (sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1493 "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1494 "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1495 [(set_attr "type" "sse")
1496 (set_attr "atom_sse_attr" "sqrt")
1497 (set_attr "btver2_sse_attr" "sqrt")
1498 (set_attr "prefix" "maybe_vex")
1499 (set_attr "mode" "<MODE>")])
1501 (define_insn "<sse>_vmsqrt<mode>2<round_name>"
1502 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1505 (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1506 (match_operand:VF_128 2 "register_operand" "0,v")
1510 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1511 vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1512 [(set_attr "isa" "noavx,avx")
1513 (set_attr "type" "sse")
1514 (set_attr "atom_sse_attr" "sqrt")
1515 (set_attr "prefix" "<round_prefix>")
1516 (set_attr "btver2_sse_attr" "sqrt")
1517 (set_attr "mode" "<ssescalarmode>")])
1519 (define_expand "rsqrt<mode>2"
1520 [(set (match_operand:VF1_128_256 0 "register_operand")
1522 [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1525 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1529 (define_insn "<sse>_rsqrt<mode>2"
1530 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1532 [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1534 "%vrsqrtps\t{%1, %0|%0, %1}"
1535 [(set_attr "type" "sse")
1536 (set_attr "prefix" "maybe_vex")
1537 (set_attr "mode" "<MODE>")])
1539 (define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1540 [(set (match_operand:VF_512 0 "register_operand" "=v")
1542 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
1545 "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1546 [(set_attr "type" "sse")
1547 (set_attr "prefix" "evex")
1548 (set_attr "mode" "<MODE>")])
1550 (define_insn "rsqrt14<mode>"
1551 [(set (match_operand:VF_128 0 "register_operand" "=v")
1554 [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1556 (match_operand:VF_128 2 "register_operand" "v")
1559 "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1560 [(set_attr "type" "sse")
1561 (set_attr "prefix" "evex")
1562 (set_attr "mode" "<MODE>")])
1564 (define_insn "sse_vmrsqrtv4sf2"
1565 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1567 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1569 (match_operand:V4SF 2 "register_operand" "0,x")
1573 rsqrtss\t{%1, %0|%0, %k1}
1574 vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1575 [(set_attr "isa" "noavx,avx")
1576 (set_attr "type" "sse")
1577 (set_attr "ssememalign" "32")
1578 (set_attr "prefix" "orig,vex")
1579 (set_attr "mode" "SF")])
1581 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1582 ;; isn't really correct, as those rtl operators aren't defined when
1583 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1585 (define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1586 [(set (match_operand:VF 0 "register_operand")
1588 (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1589 (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1590 "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1592 if (!flag_finite_math_only)
1593 operands[1] = force_reg (<MODE>mode, operands[1]);
1594 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1597 (define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1598 [(set (match_operand:VF 0 "register_operand" "=x,v")
1600 (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1601 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1602 "TARGET_SSE && flag_finite_math_only
1603 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1604 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1606 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1607 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1608 [(set_attr "isa" "noavx,avx")
1609 (set_attr "type" "sseadd")
1610 (set_attr "btver2_sse_attr" "maxmin")
1611 (set_attr "prefix" "<mask_prefix3>")
1612 (set_attr "mode" "<MODE>")])
1614 (define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1615 [(set (match_operand:VF 0 "register_operand" "=x,v")
1617 (match_operand:VF 1 "register_operand" "0,v")
1618 (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1619 "TARGET_SSE && !flag_finite_math_only
1620 && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1622 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1623 v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1624 [(set_attr "isa" "noavx,avx")
1625 (set_attr "type" "sseadd")
1626 (set_attr "btver2_sse_attr" "maxmin")
1627 (set_attr "prefix" "<mask_prefix3>")
1628 (set_attr "mode" "<MODE>")])
1630 (define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1631 [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1634 (match_operand:VF_128 1 "register_operand" "0,v")
1635 (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1640 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1641 v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1642 [(set_attr "isa" "noavx,avx")
1643 (set_attr "type" "sse")
1644 (set_attr "btver2_sse_attr" "maxmin")
1645 (set_attr "prefix" "<round_saeonly_prefix>")
1646 (set_attr "mode" "<ssescalarmode>")])
1648 ;; These versions of the min/max patterns implement exactly the operations
1649 ;; min = (op1 < op2 ? op1 : op2)
1650 ;; max = (!(op1 < op2) ? op1 : op2)
1651 ;; Their operands are not commutative, and thus they may be used in the
1652 ;; presence of -0.0 and NaN.
1654 (define_insn "*ieee_smin<mode>3"
1655 [(set (match_operand:VF 0 "register_operand" "=v,v")
1657 [(match_operand:VF 1 "register_operand" "0,v")
1658 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1662 min<ssemodesuffix>\t{%2, %0|%0, %2}
1663 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1664 [(set_attr "isa" "noavx,avx")
1665 (set_attr "type" "sseadd")
1666 (set_attr "prefix" "orig,vex")
1667 (set_attr "mode" "<MODE>")])
1669 (define_insn "*ieee_smax<mode>3"
1670 [(set (match_operand:VF 0 "register_operand" "=v,v")
1672 [(match_operand:VF 1 "register_operand" "0,v")
1673 (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1677 max<ssemodesuffix>\t{%2, %0|%0, %2}
1678 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1679 [(set_attr "isa" "noavx,avx")
1680 (set_attr "type" "sseadd")
1681 (set_attr "prefix" "orig,vex")
1682 (set_attr "mode" "<MODE>")])
1684 (define_insn "avx_addsubv4df3"
1685 [(set (match_operand:V4DF 0 "register_operand" "=x")
1688 (match_operand:V4DF 1 "register_operand" "x")
1689 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
1690 (minus:V4DF (match_dup 1) (match_dup 2))
1693 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1694 [(set_attr "type" "sseadd")
1695 (set_attr "prefix" "vex")
1696 (set_attr "mode" "V4DF")])
1698 (define_insn "sse3_addsubv2df3"
1699 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1702 (match_operand:V2DF 1 "register_operand" "0,x")
1703 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
1704 (minus:V2DF (match_dup 1) (match_dup 2))
1708 addsubpd\t{%2, %0|%0, %2}
1709 vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
1710 [(set_attr "isa" "noavx,avx")
1711 (set_attr "type" "sseadd")
1712 (set_attr "atom_unit" "complex")
1713 (set_attr "prefix" "orig,vex")
1714 (set_attr "mode" "V2DF")])
1716 (define_insn "avx_addsubv8sf3"
1717 [(set (match_operand:V8SF 0 "register_operand" "=x")
1720 (match_operand:V8SF 1 "register_operand" "x")
1721 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
1722 (minus:V8SF (match_dup 1) (match_dup 2))
1725 "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1726 [(set_attr "type" "sseadd")
1727 (set_attr "prefix" "vex")
1728 (set_attr "mode" "V8SF")])
1730 (define_insn "sse3_addsubv4sf3"
1731 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1734 (match_operand:V4SF 1 "register_operand" "0,x")
1735 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
1736 (minus:V4SF (match_dup 1) (match_dup 2))
1740 addsubps\t{%2, %0|%0, %2}
1741 vaddsubps\t{%2, %1, %0|%0, %1, %2}"
1742 [(set_attr "isa" "noavx,avx")
1743 (set_attr "type" "sseadd")
1744 (set_attr "prefix" "orig,vex")
1745 (set_attr "prefix_rep" "1,*")
1746 (set_attr "mode" "V4SF")])
1748 (define_insn "avx_h<plusminus_insn>v4df3"
1749 [(set (match_operand:V4DF 0 "register_operand" "=x")
1754 (match_operand:V4DF 1 "register_operand" "x")
1755 (parallel [(const_int 0)]))
1756 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1759 (match_operand:V4DF 2 "nonimmediate_operand" "xm")
1760 (parallel [(const_int 0)]))
1761 (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
1764 (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
1765 (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
1767 (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
1768 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
1770 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
1771 [(set_attr "type" "sseadd")
1772 (set_attr "prefix" "vex")
1773 (set_attr "mode" "V4DF")])
1775 (define_expand "sse3_haddv2df3"
1776 [(set (match_operand:V2DF 0 "register_operand")
1780 (match_operand:V2DF 1 "register_operand")
1781 (parallel [(const_int 0)]))
1782 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1785 (match_operand:V2DF 2 "nonimmediate_operand")
1786 (parallel [(const_int 0)]))
1787 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1790 (define_insn "*sse3_haddv2df3"
1791 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1795 (match_operand:V2DF 1 "register_operand" "0,x")
1796 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
1799 (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
1802 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1803 (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
1806 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
1808 && INTVAL (operands[3]) != INTVAL (operands[4])
1809 && INTVAL (operands[5]) != INTVAL (operands[6])"
1811 haddpd\t{%2, %0|%0, %2}
1812 vhaddpd\t{%2, %1, %0|%0, %1, %2}"
1813 [(set_attr "isa" "noavx,avx")
1814 (set_attr "type" "sseadd")
1815 (set_attr "prefix" "orig,vex")
1816 (set_attr "mode" "V2DF")])
1818 (define_insn "sse3_hsubv2df3"
1819 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1823 (match_operand:V2DF 1 "register_operand" "0,x")
1824 (parallel [(const_int 0)]))
1825 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1828 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
1829 (parallel [(const_int 0)]))
1830 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1833 hsubpd\t{%2, %0|%0, %2}
1834 vhsubpd\t{%2, %1, %0|%0, %1, %2}"
1835 [(set_attr "isa" "noavx,avx")
1836 (set_attr "type" "sseadd")
1837 (set_attr "prefix" "orig,vex")
1838 (set_attr "mode" "V2DF")])
1840 (define_insn "*sse3_haddv2df3_low"
1841 [(set (match_operand:DF 0 "register_operand" "=x,x")
1844 (match_operand:V2DF 1 "register_operand" "0,x")
1845 (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
1848 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
1850 && INTVAL (operands[2]) != INTVAL (operands[3])"
1852 haddpd\t{%0, %0|%0, %0}
1853 vhaddpd\t{%1, %1, %0|%0, %1, %1}"
1854 [(set_attr "isa" "noavx,avx")
1855 (set_attr "type" "sseadd1")
1856 (set_attr "prefix" "orig,vex")
1857 (set_attr "mode" "V2DF")])
1859 (define_insn "*sse3_hsubv2df3_low"
1860 [(set (match_operand:DF 0 "register_operand" "=x,x")
1863 (match_operand:V2DF 1 "register_operand" "0,x")
1864 (parallel [(const_int 0)]))
1867 (parallel [(const_int 1)]))))]
1870 hsubpd\t{%0, %0|%0, %0}
1871 vhsubpd\t{%1, %1, %0|%0, %1, %1}"
1872 [(set_attr "isa" "noavx,avx")
1873 (set_attr "type" "sseadd1")
1874 (set_attr "prefix" "orig,vex")
1875 (set_attr "mode" "V2DF")])
1877 (define_insn "avx_h<plusminus_insn>v8sf3"
1878 [(set (match_operand:V8SF 0 "register_operand" "=x")
1884 (match_operand:V8SF 1 "register_operand" "x")
1885 (parallel [(const_int 0)]))
1886 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1888 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1889 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1893 (match_operand:V8SF 2 "nonimmediate_operand" "xm")
1894 (parallel [(const_int 0)]))
1895 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1897 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1898 (vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
1902 (vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
1903 (vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
1905 (vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
1906 (vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
1909 (vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
1910 (vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
1912 (vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
1913 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
1915 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1916 [(set_attr "type" "sseadd")
1917 (set_attr "prefix" "vex")
1918 (set_attr "mode" "V8SF")])
1920 (define_insn "sse3_h<plusminus_insn>v4sf3"
1921 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1926 (match_operand:V4SF 1 "register_operand" "0,x")
1927 (parallel [(const_int 0)]))
1928 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
1930 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
1931 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
1935 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
1936 (parallel [(const_int 0)]))
1937 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
1939 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1940 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1943 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
1944 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
1945 [(set_attr "isa" "noavx,avx")
1946 (set_attr "type" "sseadd")
1947 (set_attr "atom_unit" "complex")
1948 (set_attr "prefix" "orig,vex")
1949 (set_attr "prefix_rep" "1,*")
1950 (set_attr "mode" "V4SF")])
1952 (define_expand "reduc_splus_v8df"
1953 [(match_operand:V8DF 0 "register_operand")
1954 (match_operand:V8DF 1 "register_operand")]
1957 ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
1961 (define_expand "reduc_splus_v4df"
1962 [(match_operand:V4DF 0 "register_operand")
1963 (match_operand:V4DF 1 "register_operand")]
1966 rtx tmp = gen_reg_rtx (V4DFmode);
1967 rtx tmp2 = gen_reg_rtx (V4DFmode);
1968 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
1969 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
1970 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
1974 (define_expand "reduc_splus_v2df"
1975 [(match_operand:V2DF 0 "register_operand")
1976 (match_operand:V2DF 1 "register_operand")]
1979 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1983 (define_expand "reduc_splus_v16sf"
1984 [(match_operand:V16SF 0 "register_operand")
1985 (match_operand:V16SF 1 "register_operand")]
1988 ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
1992 (define_expand "reduc_splus_v8sf"
1993 [(match_operand:V8SF 0 "register_operand")
1994 (match_operand:V8SF 1 "register_operand")]
1997 rtx tmp = gen_reg_rtx (V8SFmode);
1998 rtx tmp2 = gen_reg_rtx (V8SFmode);
1999 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2000 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2001 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2002 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2006 (define_expand "reduc_splus_v4sf"
2007 [(match_operand:V4SF 0 "register_operand")
2008 (match_operand:V4SF 1 "register_operand")]
2013 rtx tmp = gen_reg_rtx (V4SFmode);
2014 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2015 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2018 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2022 ;; Modes handled by reduc_sm{in,ax}* patterns.
2023 (define_mode_iterator REDUC_SMINMAX_MODE
2024 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2025 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2026 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2027 (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
2028 (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2029 (V8DF "TARGET_AVX512F")])
2031 (define_expand "reduc_<code>_<mode>"
2032 [(smaxmin:REDUC_SMINMAX_MODE
2033 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2034 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2037 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2041 (define_expand "reduc_<code>_<mode>"
2043 (match_operand:VI48_512 0 "register_operand")
2044 (match_operand:VI48_512 1 "register_operand"))]
2047 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2051 (define_expand "reduc_<code>_<mode>"
2053 (match_operand:VI_256 0 "register_operand")
2054 (match_operand:VI_256 1 "register_operand"))]
2057 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2061 (define_expand "reduc_umin_v8hi"
2063 (match_operand:V8HI 0 "register_operand")
2064 (match_operand:V8HI 1 "register_operand"))]
2067 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2071 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2073 ;; Parallel floating point comparisons
2075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2077 (define_insn "avx_cmp<mode>3"
2078 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2080 [(match_operand:VF_128_256 1 "register_operand" "x")
2081 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2082 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2085 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2086 [(set_attr "type" "ssecmp")
2087 (set_attr "length_immediate" "1")
2088 (set_attr "prefix" "vex")
2089 (set_attr "mode" "<MODE>")])
2091 (define_insn "avx_vmcmp<mode>3"
2092 [(set (match_operand:VF_128 0 "register_operand" "=x")
2095 [(match_operand:VF_128 1 "register_operand" "x")
2096 (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2097 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2102 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2103 [(set_attr "type" "ssecmp")
2104 (set_attr "length_immediate" "1")
2105 (set_attr "prefix" "vex")
2106 (set_attr "mode" "<ssescalarmode>")])
2108 (define_insn "*<sse>_maskcmp<mode>3_comm"
2109 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2110 (match_operator:VF_128_256 3 "sse_comparison_operator"
2111 [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2112 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2114 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2116 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2117 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2118 [(set_attr "isa" "noavx,avx")
2119 (set_attr "type" "ssecmp")
2120 (set_attr "length_immediate" "1")
2121 (set_attr "prefix" "orig,vex")
2122 (set_attr "mode" "<MODE>")])
2124 (define_insn "<sse>_maskcmp<mode>3"
2125 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2126 (match_operator:VF_128_256 3 "sse_comparison_operator"
2127 [(match_operand:VF_128_256 1 "register_operand" "0,x")
2128 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2131 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2132 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2133 [(set_attr "isa" "noavx,avx")
2134 (set_attr "type" "ssecmp")
2135 (set_attr "length_immediate" "1")
2136 (set_attr "prefix" "orig,vex")
2137 (set_attr "mode" "<MODE>")])
2139 (define_insn "<sse>_vmmaskcmp<mode>3"
2140 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2142 (match_operator:VF_128 3 "sse_comparison_operator"
2143 [(match_operand:VF_128 1 "register_operand" "0,x")
2144 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2149 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2150 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2151 [(set_attr "isa" "noavx,avx")
2152 (set_attr "type" "ssecmp")
2153 (set_attr "length_immediate" "1,*")
2154 (set_attr "prefix" "orig,vex")
2155 (set_attr "mode" "<ssescalarmode>")])
2157 (define_mode_attr cmp_imm_predicate
2158 [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
2159 (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
2161 (define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2162 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2163 (unspec:<avx512fmaskmode>
2164 [(match_operand:VI48F_512 1 "register_operand" "v")
2165 (match_operand:VI48F_512 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2166 (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2168 "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2169 "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2170 [(set_attr "type" "ssecmp")
2171 (set_attr "length_immediate" "1")
2172 (set_attr "prefix" "evex")
2173 (set_attr "mode" "<sseinsnmode>")])
2175 (define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
2176 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2177 (unspec:<avx512fmaskmode>
2178 [(match_operand:VI48_512 1 "register_operand" "v")
2179 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
2180 (match_operand:SI 3 "const_0_to_7_operand" "n")]
2181 UNSPEC_UNSIGNED_PCMP))]
2183 "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2184 [(set_attr "type" "ssecmp")
2185 (set_attr "length_immediate" "1")
2186 (set_attr "prefix" "evex")
2187 (set_attr "mode" "<sseinsnmode>")])
2189 (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2190 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2191 (and:<avx512fmaskmode>
2192 (unspec:<avx512fmaskmode>
2193 [(match_operand:VF_128 1 "register_operand" "v")
2194 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2195 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2199 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2200 [(set_attr "type" "ssecmp")
2201 (set_attr "length_immediate" "1")
2202 (set_attr "prefix" "evex")
2203 (set_attr "mode" "<ssescalarmode>")])
2205 (define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2206 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2207 (and:<avx512fmaskmode>
2208 (unspec:<avx512fmaskmode>
2209 [(match_operand:VF_128 1 "register_operand" "v")
2210 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2211 (match_operand:SI 3 "const_0_to_31_operand" "n")]
2213 (and:<avx512fmaskmode>
2214 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2217 "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2218 [(set_attr "type" "ssecmp")
2219 (set_attr "length_immediate" "1")
2220 (set_attr "prefix" "evex")
2221 (set_attr "mode" "<ssescalarmode>")])
2223 (define_insn "avx512f_maskcmp<mode>3"
2224 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2225 (match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2226 [(match_operand:VF 1 "register_operand" "v")
2227 (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2229 "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2230 [(set_attr "type" "ssecmp")
2231 (set_attr "length_immediate" "1")
2232 (set_attr "prefix" "evex")
2233 (set_attr "mode" "<sseinsnmode>")])
2235 (define_insn "<sse>_comi<round_saeonly_name>"
2236 [(set (reg:CCFP FLAGS_REG)
2239 (match_operand:<ssevecmode> 0 "register_operand" "v")
2240 (parallel [(const_int 0)]))
2242 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2243 (parallel [(const_int 0)]))))]
2244 "SSE_FLOAT_MODE_P (<MODE>mode)"
2245 "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2246 [(set_attr "type" "ssecomi")
2247 (set_attr "prefix" "maybe_vex")
2248 (set_attr "prefix_rep" "0")
2249 (set (attr "prefix_data16")
2250 (if_then_else (eq_attr "mode" "DF")
2252 (const_string "0")))
2253 (set_attr "mode" "<MODE>")])
2255 (define_insn "<sse>_ucomi<round_saeonly_name>"
2256 [(set (reg:CCFPU FLAGS_REG)
2259 (match_operand:<ssevecmode> 0 "register_operand" "v")
2260 (parallel [(const_int 0)]))
2262 (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2263 (parallel [(const_int 0)]))))]
2264 "SSE_FLOAT_MODE_P (<MODE>mode)"
2265 "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2266 [(set_attr "type" "ssecomi")
2267 (set_attr "prefix" "maybe_vex")
2268 (set_attr "prefix_rep" "0")
2269 (set (attr "prefix_data16")
2270 (if_then_else (eq_attr "mode" "DF")
2272 (const_string "0")))
2273 (set_attr "mode" "<MODE>")])
2275 (define_expand "vcond<V_512:mode><VF_512:mode>"
2276 [(set (match_operand:V_512 0 "register_operand")
2278 (match_operator 3 ""
2279 [(match_operand:VF_512 4 "nonimmediate_operand")
2280 (match_operand:VF_512 5 "nonimmediate_operand")])
2281 (match_operand:V_512 1 "general_operand")
2282 (match_operand:V_512 2 "general_operand")))]
2284 && (GET_MODE_NUNITS (<V_512:MODE>mode)
2285 == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2287 bool ok = ix86_expand_fp_vcond (operands);
2292 (define_expand "vcond<V_256:mode><VF_256:mode>"
2293 [(set (match_operand:V_256 0 "register_operand")
2295 (match_operator 3 ""
2296 [(match_operand:VF_256 4 "nonimmediate_operand")
2297 (match_operand:VF_256 5 "nonimmediate_operand")])
2298 (match_operand:V_256 1 "general_operand")
2299 (match_operand:V_256 2 "general_operand")))]
2301 && (GET_MODE_NUNITS (<V_256:MODE>mode)
2302 == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2304 bool ok = ix86_expand_fp_vcond (operands);
2309 (define_expand "vcond<V_128:mode><VF_128:mode>"
2310 [(set (match_operand:V_128 0 "register_operand")
2312 (match_operator 3 ""
2313 [(match_operand:VF_128 4 "nonimmediate_operand")
2314 (match_operand:VF_128 5 "nonimmediate_operand")])
2315 (match_operand:V_128 1 "general_operand")
2316 (match_operand:V_128 2 "general_operand")))]
2318 && (GET_MODE_NUNITS (<V_128:MODE>mode)
2319 == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2321 bool ok = ix86_expand_fp_vcond (operands);
2326 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2328 ;; Parallel floating point logical operations
2330 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2332 (define_insn "<sse>_andnot<mode>3"
2333 [(set (match_operand:VF 0 "register_operand" "=x,v")
2336 (match_operand:VF 1 "register_operand" "0,v"))
2337 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2340 static char buf[32];
2344 switch (get_attr_mode (insn))
2351 suffix = "<ssemodesuffix>";
2354 switch (which_alternative)
2357 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2360 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2366 /* There is no vandnp[sd]. Use vpandnq. */
2367 if (<MODE_SIZE> == 64)
2370 ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2373 snprintf (buf, sizeof (buf), ops, suffix);
2376 [(set_attr "isa" "noavx,avx")
2377 (set_attr "type" "sselog")
2378 (set_attr "prefix" "orig,maybe_evex")
2380 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2381 (const_string "<ssePSmode>")
2382 (match_test "TARGET_AVX")
2383 (const_string "<MODE>")
2384 (match_test "optimize_function_for_size_p (cfun)")
2385 (const_string "V4SF")
2387 (const_string "<MODE>")))])
2389 (define_expand "<code><mode>3"
2390 [(set (match_operand:VF_128_256 0 "register_operand")
2391 (any_logic:VF_128_256
2392 (match_operand:VF_128_256 1 "nonimmediate_operand")
2393 (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2395 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2397 (define_expand "<code><mode>3"
2398 [(set (match_operand:VF_512 0 "register_operand")
2400 (match_operand:VF_512 1 "nonimmediate_operand")
2401 (match_operand:VF_512 2 "nonimmediate_operand")))]
2403 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2405 (define_insn "*<code><mode>3"
2406 [(set (match_operand:VF 0 "register_operand" "=x,v")
2408 (match_operand:VF 1 "nonimmediate_operand" "%0,v")
2409 (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
2410 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2412 static char buf[32];
2416 switch (get_attr_mode (insn))
2423 suffix = "<ssemodesuffix>";
2426 switch (which_alternative)
2429 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2432 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2438 /* There is no v<logic>p[sd]. Use vp<logic>q. */
2439 if (<MODE_SIZE> == 64)
2442 ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2445 snprintf (buf, sizeof (buf), ops, suffix);
2448 [(set_attr "isa" "noavx,avx")
2449 (set_attr "type" "sselog")
2450 (set_attr "prefix" "orig,maybe_evex")
2452 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2453 (const_string "<ssePSmode>")
2454 (match_test "TARGET_AVX")
2455 (const_string "<MODE>")
2456 (match_test "optimize_function_for_size_p (cfun)")
2457 (const_string "V4SF")
2459 (const_string "<MODE>")))])
2461 (define_expand "copysign<mode>3"
2464 (not:VF (match_dup 3))
2465 (match_operand:VF 1 "nonimmediate_operand")))
2467 (and:VF (match_dup 3)
2468 (match_operand:VF 2 "nonimmediate_operand")))
2469 (set (match_operand:VF 0 "register_operand")
2470 (ior:VF (match_dup 4) (match_dup 5)))]
2473 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2475 operands[4] = gen_reg_rtx (<MODE>mode);
2476 operands[5] = gen_reg_rtx (<MODE>mode);
2479 ;; Also define scalar versions. These are used for abs, neg, and
2480 ;; conditional move. Using subregs into vector modes causes register
2481 ;; allocation lossage. These patterns do not allow memory operands
2482 ;; because the native instructions read the full 128-bits.
2484 (define_insn "*andnot<mode>3"
2485 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2488 (match_operand:MODEF 1 "register_operand" "0,x"))
2489 (match_operand:MODEF 2 "register_operand" "x,x")))]
2490 "SSE_FLOAT_MODE_P (<MODE>mode)"
2492 static char buf[32];
2495 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2497 switch (which_alternative)
2500 ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2503 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2509 snprintf (buf, sizeof (buf), ops, suffix);
2512 [(set_attr "isa" "noavx,avx")
2513 (set_attr "type" "sselog")
2514 (set_attr "prefix" "orig,vex")
2516 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2517 (const_string "V4SF")
2518 (match_test "TARGET_AVX")
2519 (const_string "<ssevecmode>")
2520 (match_test "optimize_function_for_size_p (cfun)")
2521 (const_string "V4SF")
2523 (const_string "<ssevecmode>")))])
2525 (define_insn "*andnottf3"
2526 [(set (match_operand:TF 0 "register_operand" "=x,x")
2528 (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2529 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2532 static char buf[32];
2535 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2537 switch (which_alternative)
2540 ops = "%s\t{%%2, %%0|%%0, %%2}";
2543 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2549 snprintf (buf, sizeof (buf), ops, tmp);
2552 [(set_attr "isa" "noavx,avx")
2553 (set_attr "type" "sselog")
2554 (set (attr "prefix_data16")
2556 (and (eq_attr "alternative" "0")
2557 (eq_attr "mode" "TI"))
2559 (const_string "*")))
2560 (set_attr "prefix" "orig,vex")
2562 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2563 (const_string "V4SF")
2564 (match_test "TARGET_AVX")
2566 (ior (not (match_test "TARGET_SSE2"))
2567 (match_test "optimize_function_for_size_p (cfun)"))
2568 (const_string "V4SF")
2570 (const_string "TI")))])
2572 (define_insn "*<code><mode>3"
2573 [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2575 (match_operand:MODEF 1 "register_operand" "%0,x")
2576 (match_operand:MODEF 2 "register_operand" "x,x")))]
2577 "SSE_FLOAT_MODE_P (<MODE>mode)"
2579 static char buf[32];
2582 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2584 switch (which_alternative)
2587 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2590 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2596 snprintf (buf, sizeof (buf), ops, suffix);
2599 [(set_attr "isa" "noavx,avx")
2600 (set_attr "type" "sselog")
2601 (set_attr "prefix" "orig,vex")
2603 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2604 (const_string "V4SF")
2605 (match_test "TARGET_AVX")
2606 (const_string "<ssevecmode>")
2607 (match_test "optimize_function_for_size_p (cfun)")
2608 (const_string "V4SF")
2610 (const_string "<ssevecmode>")))])
2612 (define_expand "<code>tf3"
2613 [(set (match_operand:TF 0 "register_operand")
2615 (match_operand:TF 1 "nonimmediate_operand")
2616 (match_operand:TF 2 "nonimmediate_operand")))]
2618 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
2620 (define_insn "*<code>tf3"
2621 [(set (match_operand:TF 0 "register_operand" "=x,x")
2623 (match_operand:TF 1 "nonimmediate_operand" "%0,x")
2624 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2626 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
2628 static char buf[32];
2631 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
2633 switch (which_alternative)
2636 ops = "%s\t{%%2, %%0|%%0, %%2}";
2639 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2645 snprintf (buf, sizeof (buf), ops, tmp);
2648 [(set_attr "isa" "noavx,avx")
2649 (set_attr "type" "sselog")
2650 (set (attr "prefix_data16")
2652 (and (eq_attr "alternative" "0")
2653 (eq_attr "mode" "TI"))
2655 (const_string "*")))
2656 (set_attr "prefix" "orig,vex")
2658 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
2659 (const_string "V4SF")
2660 (match_test "TARGET_AVX")
2662 (ior (not (match_test "TARGET_SSE2"))
2663 (match_test "optimize_function_for_size_p (cfun)"))
2664 (const_string "V4SF")
2666 (const_string "TI")))])
2668 ;; There are no floating point xor for V16SF and V8DF in avx512f
2669 ;; but we need them for negation. Instead we use int versions of
2670 ;; xor. Maybe there could be a better way to do that.
2672 (define_mode_attr avx512flogicsuff
2673 [(V16SF "d") (V8DF "q")])
2675 (define_insn "avx512f_<logic><mode>"
2676 [(set (match_operand:VF_512 0 "register_operand" "=v")
2678 (match_operand:VF_512 1 "register_operand" "v")
2679 (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2681 "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
2682 [(set_attr "type" "sselog")
2683 (set_attr "prefix" "evex")])
2685 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2687 ;; FMA floating point multiply/accumulate instructions. These include
2688 ;; scalar versions of the instructions as well as vector versions.
2690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2692 ;; The standard names for scalar FMA are only available with SSE math enabled.
2693 ;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma. It doesn't
2694 ;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
2695 ;; and TARGET_FMA4 are both false.
2696 ;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
2697 ;; one must force the EVEX encoding of the fma insns. Ideally we'd improve
2698 ;; GAS to allow proper prefix selection. However, for the moment all hardware
2699 ;; that supports AVX512F also supports FMA so we can ignore this for now.
2700 (define_mode_iterator FMAMODEM
2701 [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2702 (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
2703 (V4SF "TARGET_FMA || TARGET_FMA4")
2704 (V2DF "TARGET_FMA || TARGET_FMA4")
2705 (V8SF "TARGET_FMA || TARGET_FMA4")
2706 (V4DF "TARGET_FMA || TARGET_FMA4")
2707 (V16SF "TARGET_AVX512F")
2708 (V8DF "TARGET_AVX512F")])
2710 (define_expand "fma<mode>4"
2711 [(set (match_operand:FMAMODEM 0 "register_operand")
2713 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2714 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2715 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2717 (define_expand "fms<mode>4"
2718 [(set (match_operand:FMAMODEM 0 "register_operand")
2720 (match_operand:FMAMODEM 1 "nonimmediate_operand")
2721 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2722 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2724 (define_expand "fnma<mode>4"
2725 [(set (match_operand:FMAMODEM 0 "register_operand")
2727 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2728 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2729 (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
2731 (define_expand "fnms<mode>4"
2732 [(set (match_operand:FMAMODEM 0 "register_operand")
2734 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
2735 (match_operand:FMAMODEM 2 "nonimmediate_operand")
2736 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
2738 ;; The builtins for intrinsics are not constrained by SSE math enabled.
2739 (define_mode_iterator FMAMODE
2740 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2741 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2742 (V4SF "TARGET_FMA || TARGET_FMA4")
2743 (V2DF "TARGET_FMA || TARGET_FMA4")
2744 (V8SF "TARGET_FMA || TARGET_FMA4")
2745 (V4DF "TARGET_FMA || TARGET_FMA4")
2746 (V16SF "TARGET_AVX512F")
2747 (V8DF "TARGET_AVX512F")])
2749 (define_expand "fma4i_fmadd_<mode>"
2750 [(set (match_operand:FMAMODE 0 "register_operand")
2752 (match_operand:FMAMODE 1 "nonimmediate_operand")
2753 (match_operand:FMAMODE 2 "nonimmediate_operand")
2754 (match_operand:FMAMODE 3 "nonimmediate_operand")))])
2756 (define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
2757 [(match_operand:VF_512 0 "register_operand")
2758 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
2759 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
2760 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
2761 (match_operand:<avx512fmaskmode> 4 "register_operand")]
2764 emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
2765 operands[0], operands[1], operands[2], operands[3],
2766 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
2770 (define_mode_iterator FMAMODE_NOVF512
2771 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2772 (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
2773 (V4SF "TARGET_FMA || TARGET_FMA4")
2774 (V2DF "TARGET_FMA || TARGET_FMA4")
2775 (V8SF "TARGET_FMA || TARGET_FMA4")
2776 (V4DF "TARGET_FMA || TARGET_FMA4")])
2778 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2779 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2780 (fma:FMAMODE_NOVF512
2781 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2782 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2783 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2784 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2786 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2787 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2788 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2789 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2790 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2791 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2792 (set_attr "type" "ssemuladd")
2793 (set_attr "mode" "<MODE>")])
2795 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
2796 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2798 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
2799 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2800 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
2801 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2803 vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2804 vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2805 vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2806 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2807 (set_attr "type" "ssemuladd")
2808 (set_attr "mode" "<MODE>")])
2810 (define_insn "avx512f_fmadd_<mode>_mask<round_name>"
2811 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2814 (match_operand:VF_512 1 "register_operand" "0,0")
2815 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2816 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2818 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2821 vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2822 vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2823 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2824 (set_attr "type" "ssemuladd")
2825 (set_attr "mode" "<MODE>")])
2827 (define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
2828 [(set (match_operand:VF_512 0 "register_operand" "=x")
2831 (match_operand:VF_512 1 "register_operand" "x")
2832 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2833 (match_operand:VF_512 3 "register_operand" "0"))
2835 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2837 "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2838 [(set_attr "isa" "fma_avx512f")
2839 (set_attr "type" "ssemuladd")
2840 (set_attr "mode" "<MODE>")])
2842 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2843 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2844 (fma:FMAMODE_NOVF512
2845 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x")
2846 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2847 (neg:FMAMODE_NOVF512
2848 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2849 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2851 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2852 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2853 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2854 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2855 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2856 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2857 (set_attr "type" "ssemuladd")
2858 (set_attr "mode" "<MODE>")])
2860 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
2861 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2863 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
2864 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2866 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
2867 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2869 vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2870 vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2871 vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2872 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2873 (set_attr "type" "ssemuladd")
2874 (set_attr "mode" "<MODE>")])
2876 (define_insn "avx512f_fmsub_<mode>_mask<round_name>"
2877 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2880 (match_operand:VF_512 1 "register_operand" "0,0")
2881 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2883 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
2885 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2888 vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2889 vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2890 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2891 (set_attr "type" "ssemuladd")
2892 (set_attr "mode" "<MODE>")])
2894 (define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
2895 [(set (match_operand:VF_512 0 "register_operand" "=v")
2898 (match_operand:VF_512 1 "register_operand" "v")
2899 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2901 (match_operand:VF_512 3 "register_operand" "0")))
2903 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2905 "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2906 [(set_attr "isa" "fma_avx512f")
2907 (set_attr "type" "ssemuladd")
2908 (set_attr "mode" "<MODE>")])
2910 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
2911 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2912 (fma:FMAMODE_NOVF512
2913 (neg:FMAMODE_NOVF512
2914 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2915 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2916 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")))]
2917 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2919 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2920 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2921 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2922 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2923 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2924 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2925 (set_attr "type" "ssemuladd")
2926 (set_attr "mode" "<MODE>")])
2928 (define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
2929 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
2932 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
2933 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
2934 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
2935 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2937 vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2938 vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2939 vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
2940 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
2941 (set_attr "type" "ssemuladd")
2942 (set_attr "mode" "<MODE>")])
2944 (define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
2945 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
2949 (match_operand:VF_512 1 "register_operand" "0,0"))
2950 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
2951 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))
2953 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
2956 vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
2957 vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
2958 [(set_attr "isa" "fma_avx512f,fma_avx512f")
2959 (set_attr "type" "ssemuladd")
2960 (set_attr "mode" "<MODE>")])
2962 (define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
2963 [(set (match_operand:VF_512 0 "register_operand" "=v")
2967 (match_operand:VF_512 1 "register_operand" "v"))
2968 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
2969 (match_operand:VF_512 3 "register_operand" "0"))
2971 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
2973 "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
2974 [(set_attr "isa" "fma_avx512f")
2975 (set_attr "type" "ssemuladd")
2976 (set_attr "mode" "<MODE>")])
2978 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
2979 [(set (match_operand:FMAMODE_NOVF512 0 "register_operand" "=v,v,v,x,x")
2980 (fma:FMAMODE_NOVF512
2981 (neg:FMAMODE_NOVF512
2982 (match_operand:FMAMODE_NOVF512 1 "<round_nimm_predicate>" "%0,0,v,x,x"))
2983 (match_operand:FMAMODE_NOVF512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
2984 (neg:FMAMODE_NOVF512
2985 (match_operand:FMAMODE_NOVF512 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))))]
2986 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
2988 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
2989 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
2990 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
2991 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
2992 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2993 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
2994 (set_attr "type" "ssemuladd")
2995 (set_attr "mode" "<MODE>")])
2997 (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
2998 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3001 (match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v"))
3002 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3004 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3005 "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3007 vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3008 vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3009 vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3010 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3011 (set_attr "type" "ssemuladd")
3012 (set_attr "mode" "<MODE>")])
3014 (define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
3015 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3019 (match_operand:VF_512 1 "register_operand" "0,0"))
3020 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3022 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")))
3024 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3027 vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3028 vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3029 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3030 (set_attr "type" "ssemuladd")
3031 (set_attr "mode" "<MODE>")])
3033 (define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
3034 [(set (match_operand:VF_512 0 "register_operand" "=v")
3038 (match_operand:VF_512 1 "register_operand" "v"))
3039 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3041 (match_operand:VF_512 3 "register_operand" "0")))
3043 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3045 "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3046 [(set_attr "isa" "fma_avx512f")
3047 (set_attr "type" "ssemuladd")
3048 (set_attr "mode" "<MODE>")])
3050 ;; FMA parallel floating point multiply addsub and subadd operations.
3052 ;; It would be possible to represent these without the UNSPEC as
3055 ;; (fma op1 op2 op3)
3056 ;; (fma op1 op2 (neg op3))
3059 ;; But this doesn't seem useful in practice.
3061 (define_expand "fmaddsub_<mode>"
3062 [(set (match_operand:VF 0 "register_operand")
3064 [(match_operand:VF 1 "nonimmediate_operand")
3065 (match_operand:VF 2 "nonimmediate_operand")
3066 (match_operand:VF 3 "nonimmediate_operand")]
3068 "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3070 (define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
3071 [(match_operand:VF_512 0 "register_operand")
3072 (match_operand:VF_512 1 "<round_expand_nimm_predicate>")
3073 (match_operand:VF_512 2 "<round_expand_nimm_predicate>")
3074 (match_operand:VF_512 3 "<round_expand_nimm_predicate>")
3075 (match_operand:<avx512fmaskmode> 4 "register_operand")]
3078 emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3079 operands[0], operands[1], operands[2], operands[3],
3080 CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3084 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3085 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3087 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3088 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3089 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x")]
3091 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3093 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3094 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3095 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3096 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3097 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3098 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3099 (set_attr "type" "ssemuladd")
3100 (set_attr "mode" "<MODE>")])
3102 (define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3103 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3105 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3106 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3107 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3109 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3111 vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3112 vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3113 vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3114 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3115 (set_attr "type" "ssemuladd")
3116 (set_attr "mode" "<MODE>")])
3118 (define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
3119 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3122 [(match_operand:VF_512 1 "register_operand" "0,0")
3123 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3124 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>")]
3127 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3130 vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3131 vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3132 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3133 (set_attr "type" "ssemuladd")
3134 (set_attr "mode" "<MODE>")])
3136 (define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
3137 [(set (match_operand:VF_512 0 "register_operand" "=v")
3140 [(match_operand:VF_512 1 "register_operand" "v")
3141 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3142 (match_operand:VF_512 3 "register_operand" "0")]
3145 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3147 "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3148 [(set_attr "isa" "fma_avx512f")
3149 (set_attr "type" "ssemuladd")
3150 (set_attr "mode" "<MODE>")])
3152 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3153 [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3155 [(match_operand:VF_128_256 1 "<round_nimm_predicate>" "%0,0,v,x,x")
3156 (match_operand:VF_128_256 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>,x,m")
3158 (match_operand:VF_128_256 3 "<round_nimm_predicate>" "v,<round_constraint>,0,xm,x"))]
3160 "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3162 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3163 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3164 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3165 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3166 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3167 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
3168 (set_attr "type" "ssemuladd")
3169 (set_attr "mode" "<MODE>")])
3171 (define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3172 [(set (match_operand:VF_512 0 "register_operand" "=v,v,v")
3174 [(match_operand:VF_512 1 "<round_nimm_predicate>" "%0,0,v")
3175 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3177 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3179 "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3181 vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3182 vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3183 vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3184 [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f")
3185 (set_attr "type" "ssemuladd")
3186 (set_attr "mode" "<MODE>")])
3188 (define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
3189 [(set (match_operand:VF_512 0 "register_operand" "=v,v")
3192 [(match_operand:VF_512 1 "register_operand" "0,0")
3193 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>,v")
3195 (match_operand:VF_512 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
3198 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3201 vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3202 vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3203 [(set_attr "isa" "fma_avx512f,fma_avx512f")
3204 (set_attr "type" "ssemuladd")
3205 (set_attr "mode" "<MODE>")])
3207 (define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
3208 [(set (match_operand:VF_512 0 "register_operand" "=v")
3211 [(match_operand:VF_512 1 "register_operand" "v")
3212 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")
3214 (match_operand:VF_512 3 "register_operand" "0"))]
3217 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3219 "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3220 [(set_attr "isa" "fma_avx512f")
3221 (set_attr "type" "ssemuladd")
3222 (set_attr "mode" "<MODE>")])
3224 ;; FMA3 floating point scalar intrinsics. These merge result with
3225 ;; high-order elements from the destination register.
3227 (define_expand "fmai_vmfmadd_<mode><round_name>"
3228 [(set (match_operand:VF_128 0 "register_operand")
3231 (match_operand:VF_128 1 "<round_nimm_predicate>")
3232 (match_operand:VF_128 2 "<round_nimm_predicate>")
3233 (match_operand:VF_128 3 "<round_nimm_predicate>"))
3238 (define_insn "*fmai_fmadd_<mode>"
3239 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3242 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3243 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3244 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3247 "TARGET_FMA || TARGET_AVX512F"
3249 vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3250 vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3251 [(set_attr "type" "ssemuladd")
3252 (set_attr "mode" "<MODE>")])
3254 (define_insn "*fmai_fmsub_<mode>"
3255 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3258 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3259 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v")
3261 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3264 "TARGET_FMA || TARGET_AVX512F"
3266 vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3267 vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3268 [(set_attr "type" "ssemuladd")
3269 (set_attr "mode" "<MODE>")])
3271 (define_insn "*fmai_fnmadd_<mode><round_name>"
3272 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3276 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3277 (match_operand:VF_128 1 "<round_nimm_predicate>" "0,0")
3278 (match_operand:VF_128 3 "<round_nimm_predicate>" "v,<round_constraint>"))
3281 "TARGET_FMA || TARGET_AVX512F"
3283 vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3284 vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3285 [(set_attr "type" "ssemuladd")
3286 (set_attr "mode" "<MODE>")])
3288 (define_insn "*fmai_fnmsub_<mode><round_name>"
3289 [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3293 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3294 (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3296 (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3299 "TARGET_FMA || TARGET_AVX512F"
3301 vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3302 vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3303 [(set_attr "type" "ssemuladd")
3304 (set_attr "mode" "<MODE>")])
3306 ;; FMA4 floating point scalar intrinsics. These write the
3307 ;; entire destination register, with the high-order elements zeroed.
3309 (define_expand "fma4i_vmfmadd_<mode>"
3310 [(set (match_operand:VF_128 0 "register_operand")
3313 (match_operand:VF_128 1 "nonimmediate_operand")
3314 (match_operand:VF_128 2 "nonimmediate_operand")
3315 (match_operand:VF_128 3 "nonimmediate_operand"))
3319 "operands[4] = CONST0_RTX (<MODE>mode);")
3321 (define_insn "*fma4i_vmfmadd_<mode>"
3322 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3325 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3326 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3327 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3328 (match_operand:VF_128 4 "const0_operand")
3331 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3332 [(set_attr "type" "ssemuladd")
3333 (set_attr "mode" "<MODE>")])
3335 (define_insn "*fma4i_vmfmsub_<mode>"
3336 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3339 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3340 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3342 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3343 (match_operand:VF_128 4 "const0_operand")
3346 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3347 [(set_attr "type" "ssemuladd")
3348 (set_attr "mode" "<MODE>")])
3350 (define_insn "*fma4i_vmfnmadd_<mode>"
3351 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3355 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3356 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3357 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3358 (match_operand:VF_128 4 "const0_operand")
3361 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3362 [(set_attr "type" "ssemuladd")
3363 (set_attr "mode" "<MODE>")])
3365 (define_insn "*fma4i_vmfnmsub_<mode>"
3366 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3370 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3371 (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3373 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3374 (match_operand:VF_128 4 "const0_operand")
3377 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3378 [(set_attr "type" "ssemuladd")
3379 (set_attr "mode" "<MODE>")])
3381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3383 ;; Parallel single-precision floating point conversion operations
3385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3387 (define_insn "sse_cvtpi2ps"
3388 [(set (match_operand:V4SF 0 "register_operand" "=x")
3391 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3392 (match_operand:V4SF 1 "register_operand" "0")
3395 "cvtpi2ps\t{%2, %0|%0, %2}"
3396 [(set_attr "type" "ssecvt")
3397 (set_attr "mode" "V4SF")])
3399 (define_insn "sse_cvtps2pi"
3400 [(set (match_operand:V2SI 0 "register_operand" "=y")
3402 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3404 (parallel [(const_int 0) (const_int 1)])))]
3406 "cvtps2pi\t{%1, %0|%0, %q1}"
3407 [(set_attr "type" "ssecvt")
3408 (set_attr "unit" "mmx")
3409 (set_attr "mode" "DI")])
3411 (define_insn "sse_cvttps2pi"
3412 [(set (match_operand:V2SI 0 "register_operand" "=y")
3414 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3415 (parallel [(const_int 0) (const_int 1)])))]
3417 "cvttps2pi\t{%1, %0|%0, %q1}"
3418 [(set_attr "type" "ssecvt")
3419 (set_attr "unit" "mmx")
3420 (set_attr "prefix_rep" "0")
3421 (set_attr "mode" "SF")])
3423 (define_insn "sse_cvtsi2ss<round_name>"
3424 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3427 (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3428 (match_operand:V4SF 1 "register_operand" "0,0,v")
3432 cvtsi2ss\t{%2, %0|%0, %2}
3433 cvtsi2ss\t{%2, %0|%0, %2}
3434 vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3435 [(set_attr "isa" "noavx,noavx,avx")
3436 (set_attr "type" "sseicvt")
3437 (set_attr "athlon_decode" "vector,double,*")
3438 (set_attr "amdfam10_decode" "vector,double,*")
3439 (set_attr "bdver1_decode" "double,direct,*")
3440 (set_attr "btver2_decode" "double,double,double")
3441 (set_attr "prefix" "orig,orig,maybe_evex")
3442 (set_attr "mode" "SF")])
3444 (define_insn "sse_cvtsi2ssq<round_name>"
3445 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3448 (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3449 (match_operand:V4SF 1 "register_operand" "0,0,v")
3451 "TARGET_SSE && TARGET_64BIT"
3453 cvtsi2ssq\t{%2, %0|%0, %2}
3454 cvtsi2ssq\t{%2, %0|%0, %2}
3455 vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3456 [(set_attr "isa" "noavx,noavx,avx")
3457 (set_attr "type" "sseicvt")
3458 (set_attr "athlon_decode" "vector,double,*")
3459 (set_attr "amdfam10_decode" "vector,double,*")
3460 (set_attr "bdver1_decode" "double,direct,*")
3461 (set_attr "btver2_decode" "double,double,double")
3462 (set_attr "length_vex" "*,*,4")
3463 (set_attr "prefix_rex" "1,1,*")
3464 (set_attr "prefix" "orig,orig,maybe_evex")
3465 (set_attr "mode" "SF")])
3467 (define_insn "sse_cvtss2si<round_name>"
3468 [(set (match_operand:SI 0 "register_operand" "=r,r")
3471 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3472 (parallel [(const_int 0)]))]
3473 UNSPEC_FIX_NOTRUNC))]
3475 "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3476 [(set_attr "type" "sseicvt")
3477 (set_attr "athlon_decode" "double,vector")
3478 (set_attr "bdver1_decode" "double,double")
3479 (set_attr "prefix_rep" "1")
3480 (set_attr "prefix" "maybe_vex")
3481 (set_attr "mode" "SI")])
3483 (define_insn "sse_cvtss2si_2"
3484 [(set (match_operand:SI 0 "register_operand" "=r,r")
3485 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3486 UNSPEC_FIX_NOTRUNC))]
3488 "%vcvtss2si\t{%1, %0|%0, %k1}"
3489 [(set_attr "type" "sseicvt")
3490 (set_attr "athlon_decode" "double,vector")
3491 (set_attr "amdfam10_decode" "double,double")
3492 (set_attr "bdver1_decode" "double,double")
3493 (set_attr "prefix_rep" "1")
3494 (set_attr "prefix" "maybe_vex")
3495 (set_attr "mode" "SI")])
3497 (define_insn "sse_cvtss2siq<round_name>"
3498 [(set (match_operand:DI 0 "register_operand" "=r,r")
3501 (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3502 (parallel [(const_int 0)]))]
3503 UNSPEC_FIX_NOTRUNC))]
3504 "TARGET_SSE && TARGET_64BIT"
3505 "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3506 [(set_attr "type" "sseicvt")
3507 (set_attr "athlon_decode" "double,vector")
3508 (set_attr "bdver1_decode" "double,double")
3509 (set_attr "prefix_rep" "1")
3510 (set_attr "prefix" "maybe_vex")
3511 (set_attr "mode" "DI")])
3513 (define_insn "sse_cvtss2siq_2"
3514 [(set (match_operand:DI 0 "register_operand" "=r,r")
3515 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3516 UNSPEC_FIX_NOTRUNC))]
3517 "TARGET_SSE && TARGET_64BIT"
3518 "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3519 [(set_attr "type" "sseicvt")
3520 (set_attr "athlon_decode" "double,vector")
3521 (set_attr "amdfam10_decode" "double,double")
3522 (set_attr "bdver1_decode" "double,double")
3523 (set_attr "prefix_rep" "1")
3524 (set_attr "prefix" "maybe_vex")
3525 (set_attr "mode" "DI")])
3527 (define_insn "sse_cvttss2si<round_saeonly_name>"
3528 [(set (match_operand:SI 0 "register_operand" "=r,r")
3531 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3532 (parallel [(const_int 0)]))))]
3534 "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3535 [(set_attr "type" "sseicvt")
3536 (set_attr "athlon_decode" "double,vector")
3537 (set_attr "amdfam10_decode" "double,double")
3538 (set_attr "bdver1_decode" "double,double")
3539 (set_attr "prefix_rep" "1")
3540 (set_attr "prefix" "maybe_vex")
3541 (set_attr "mode" "SI")])
3543 (define_insn "sse_cvttss2siq<round_saeonly_name>"
3544 [(set (match_operand:DI 0 "register_operand" "=r,r")
3547 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3548 (parallel [(const_int 0)]))))]
3549 "TARGET_SSE && TARGET_64BIT"
3550 "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3551 [(set_attr "type" "sseicvt")
3552 (set_attr "athlon_decode" "double,vector")
3553 (set_attr "amdfam10_decode" "double,double")
3554 (set_attr "bdver1_decode" "double,double")
3555 (set_attr "prefix_rep" "1")
3556 (set_attr "prefix" "maybe_vex")
3557 (set_attr "mode" "DI")])
3559 (define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3560 [(set (match_operand:VF_128 0 "register_operand" "=v")
3562 (vec_duplicate:VF_128
3563 (unsigned_float:<ssescalarmode>
3564 (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3565 (match_operand:VF_128 1 "register_operand" "v")
3567 "TARGET_AVX512F && <round_modev4sf_condition>"
3568 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3569 [(set_attr "type" "sseicvt")
3570 (set_attr "prefix" "evex")
3571 (set_attr "mode" "<ssescalarmode>")])
3573 (define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
3574 [(set (match_operand:VF_128 0 "register_operand" "=v")
3576 (vec_duplicate:VF_128
3577 (unsigned_float:<ssescalarmode>
3578 (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3579 (match_operand:VF_128 1 "register_operand" "v")
3581 "TARGET_AVX512F && TARGET_64BIT"
3582 "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3583 [(set_attr "type" "sseicvt")
3584 (set_attr "prefix" "evex")
3585 (set_attr "mode" "<ssescalarmode>")])
3587 (define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
3588 [(set (match_operand:VF1 0 "register_operand" "=v")
3590 (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
3591 "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
3592 "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3593 [(set_attr "type" "ssecvt")
3594 (set_attr "prefix" "maybe_vex")
3595 (set_attr "mode" "<sseinsnmode>")])
3597 (define_insn "ufloatv16siv16sf2<mask_name><round_name>"
3598 [(set (match_operand:V16SF 0 "register_operand" "=v")
3599 (unsigned_float:V16SF
3600 (match_operand:V16SI 1 "<round_nimm_predicate>" "<round_constraint>")))]
3602 "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3603 [(set_attr "type" "ssecvt")
3604 (set_attr "prefix" "evex")
3605 (set_attr "mode" "V16SF")])
3607 (define_expand "floatuns<sseintvecmodelower><mode>2"
3608 [(match_operand:VF1 0 "register_operand")
3609 (match_operand:<sseintvecmode> 1 "register_operand")]
3610 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
3612 if (<MODE>mode == V16SFmode)
3613 emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
3615 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
3621 ;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
3622 (define_mode_attr sf2simodelower
3623 [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
3625 (define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
3626 [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
3628 [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
3629 UNSPEC_FIX_NOTRUNC))]
3631 "%vcvtps2dq\t{%1, %0|%0, %1}"
3632 [(set_attr "type" "ssecvt")
3633 (set (attr "prefix_data16")
3635 (match_test "TARGET_AVX")
3637 (const_string "1")))
3638 (set_attr "prefix" "maybe_vex")
3639 (set_attr "mode" "<sseinsnmode>")])
3641 (define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
3642 [(set (match_operand:V16SI 0 "register_operand" "=v")
3644 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3645 UNSPEC_FIX_NOTRUNC))]
3647 "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3648 [(set_attr "type" "ssecvt")
3649 (set_attr "prefix" "evex")
3650 (set_attr "mode" "XI")])
3652 (define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
3653 [(set (match_operand:V16SI 0 "register_operand" "=v")
3655 [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
3656 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3658 "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
3659 [(set_attr "type" "ssecvt")
3660 (set_attr "prefix" "evex")
3661 (set_attr "mode" "XI")])
3663 (define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
3664 [(set (match_operand:V16SI 0 "register_operand" "=v")
3666 (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
3668 "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
3669 [(set_attr "type" "ssecvt")
3670 (set_attr "prefix" "evex")
3671 (set_attr "mode" "XI")])
3673 (define_insn "fix_truncv8sfv8si2"
3674 [(set (match_operand:V8SI 0 "register_operand" "=x")
3675 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
3677 "vcvttps2dq\t{%1, %0|%0, %1}"
3678 [(set_attr "type" "ssecvt")
3679 (set_attr "prefix" "vex")
3680 (set_attr "mode" "OI")])
3682 (define_insn "fix_truncv4sfv4si2"
3683 [(set (match_operand:V4SI 0 "register_operand" "=x")
3684 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
3686 "%vcvttps2dq\t{%1, %0|%0, %1}"
3687 [(set_attr "type" "ssecvt")
3688 (set (attr "prefix_rep")
3690 (match_test "TARGET_AVX")
3692 (const_string "1")))
3693 (set (attr "prefix_data16")
3695 (match_test "TARGET_AVX")
3697 (const_string "0")))
3698 (set_attr "prefix_data16" "0")
3699 (set_attr "prefix" "maybe_vex")
3700 (set_attr "mode" "TI")])
3702 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
3703 [(match_operand:<sseintvecmode> 0 "register_operand")
3704 (match_operand:VF1 1 "register_operand")]
3707 if (<MODE>mode == V16SFmode)
3708 emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
3713 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
3714 tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
3715 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
3716 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
3721 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3723 ;; Parallel double-precision floating point conversion operations
3725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3727 (define_insn "sse2_cvtpi2pd"
3728 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
3729 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
3731 "cvtpi2pd\t{%1, %0|%0, %1}"
3732 [(set_attr "type" "ssecvt")
3733 (set_attr "unit" "mmx,*")
3734 (set_attr "prefix_data16" "1,*")
3735 (set_attr "mode" "V2DF")])
3737 (define_insn "sse2_cvtpd2pi"
3738 [(set (match_operand:V2SI 0 "register_operand" "=y")
3739 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
3740 UNSPEC_FIX_NOTRUNC))]
3742 "cvtpd2pi\t{%1, %0|%0, %1}"
3743 [(set_attr "type" "ssecvt")
3744 (set_attr "unit" "mmx")
3745 (set_attr "bdver1_decode" "double")
3746 (set_attr "btver2_decode" "direct")
3747 (set_attr "prefix_data16" "1")
3748 (set_attr "mode" "DI")])
3750 (define_insn "sse2_cvttpd2pi"
3751 [(set (match_operand:V2SI 0 "register_operand" "=y")
3752 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
3754 "cvttpd2pi\t{%1, %0|%0, %1}"
3755 [(set_attr "type" "ssecvt")
3756 (set_attr "unit" "mmx")
3757 (set_attr "bdver1_decode" "double")
3758 (set_attr "prefix_data16" "1")
3759 (set_attr "mode" "TI")])
3761 (define_insn "sse2_cvtsi2sd"
3762 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
3765 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
3766 (match_operand:V2DF 1 "register_operand" "0,0,x")
3770 cvtsi2sd\t{%2, %0|%0, %2}
3771 cvtsi2sd\t{%2, %0|%0, %2}
3772 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
3773 [(set_attr "isa" "noavx,noavx,avx")
3774 (set_attr "type" "sseicvt")
3775 (set_attr "athlon_decode" "double,direct,*")
3776 (set_attr "amdfam10_decode" "vector,double,*")
3777 (set_attr "bdver1_decode" "double,direct,*")
3778 (set_attr "btver2_decode" "double,double,double")
3779 (set_attr "prefix" "orig,orig,vex")
3780 (set_attr "mode" "DF")])
3782 (define_insn "sse2_cvtsi2sdq<round_name>"
3783 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
3786 (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3787 (match_operand:V2DF 1 "register_operand" "0,0,v")
3789 "TARGET_SSE2 && TARGET_64BIT"
3791 cvtsi2sdq\t{%2, %0|%0, %2}
3792 cvtsi2sdq\t{%2, %0|%0, %2}
3793 vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
3794 [(set_attr "isa" "noavx,noavx,avx")
3795 (set_attr "type" "sseicvt")
3796 (set_attr "athlon_decode" "double,direct,*")
3797 (set_attr "amdfam10_decode" "vector,double,*")
3798 (set_attr "bdver1_decode" "double,direct,*")
3799 (set_attr "length_vex" "*,*,4")
3800 (set_attr "prefix_rex" "1,1,*")
3801 (set_attr "prefix" "orig,orig,maybe_evex")
3802 (set_attr "mode" "DF")])
3804 (define_insn "avx512f_vcvtss2usi<round_name>"
3805 [(set (match_operand:SI 0 "register_operand" "=r")
3808 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3809 (parallel [(const_int 0)]))]
3810 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3812 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3813 [(set_attr "type" "sseicvt")
3814 (set_attr "prefix" "evex")
3815 (set_attr "mode" "SI")])
3817 (define_insn "avx512f_vcvtss2usiq<round_name>"
3818 [(set (match_operand:DI 0 "register_operand" "=r")
3821 (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
3822 (parallel [(const_int 0)]))]
3823 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3824 "TARGET_AVX512F && TARGET_64BIT"
3825 "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3826 [(set_attr "type" "sseicvt")
3827 (set_attr "prefix" "evex")
3828 (set_attr "mode" "DI")])
3830 (define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
3831 [(set (match_operand:SI 0 "register_operand" "=r")
3834 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3835 (parallel [(const_int 0)]))))]
3837 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3838 [(set_attr "type" "sseicvt")
3839 (set_attr "prefix" "evex")
3840 (set_attr "mode" "SI")])
3842 (define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
3843 [(set (match_operand:DI 0 "register_operand" "=r")
3846 (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3847 (parallel [(const_int 0)]))))]
3848 "TARGET_AVX512F && TARGET_64BIT"
3849 "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3850 [(set_attr "type" "sseicvt")
3851 (set_attr "prefix" "evex")
3852 (set_attr "mode" "DI")])
3854 (define_insn "avx512f_vcvtsd2usi<round_name>"
3855 [(set (match_operand:SI 0 "register_operand" "=r")
3858 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3859 (parallel [(const_int 0)]))]
3860 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3862 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3863 [(set_attr "type" "sseicvt")
3864 (set_attr "prefix" "evex")
3865 (set_attr "mode" "SI")])
3867 (define_insn "avx512f_vcvtsd2usiq<round_name>"
3868 [(set (match_operand:DI 0 "register_operand" "=r")
3871 (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
3872 (parallel [(const_int 0)]))]
3873 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
3874 "TARGET_AVX512F && TARGET_64BIT"
3875 "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
3876 [(set_attr "type" "sseicvt")
3877 (set_attr "prefix" "evex")
3878 (set_attr "mode" "DI")])
3880 (define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
3881 [(set (match_operand:SI 0 "register_operand" "=r")
3884 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3885 (parallel [(const_int 0)]))))]
3887 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3888 [(set_attr "type" "sseicvt")
3889 (set_attr "prefix" "evex")
3890 (set_attr "mode" "SI")])
3892 (define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
3893 [(set (match_operand:DI 0 "register_operand" "=r")
3896 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
3897 (parallel [(const_int 0)]))))]
3898 "TARGET_AVX512F && TARGET_64BIT"
3899 "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
3900 [(set_attr "type" "sseicvt")
3901 (set_attr "prefix" "evex")
3902 (set_attr "mode" "DI")])
3904 (define_insn "sse2_cvtsd2si<round_name>"
3905 [(set (match_operand:SI 0 "register_operand" "=r,r")
3908 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3909 (parallel [(const_int 0)]))]
3910 UNSPEC_FIX_NOTRUNC))]
3912 "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3913 [(set_attr "type" "sseicvt")
3914 (set_attr "athlon_decode" "double,vector")
3915 (set_attr "bdver1_decode" "double,double")
3916 (set_attr "btver2_decode" "double,double")
3917 (set_attr "prefix_rep" "1")
3918 (set_attr "prefix" "maybe_vex")
3919 (set_attr "mode" "SI")])
3921 (define_insn "sse2_cvtsd2si_2"
3922 [(set (match_operand:SI 0 "register_operand" "=r,r")
3923 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3924 UNSPEC_FIX_NOTRUNC))]
3926 "%vcvtsd2si\t{%1, %0|%0, %q1}"
3927 [(set_attr "type" "sseicvt")
3928 (set_attr "athlon_decode" "double,vector")
3929 (set_attr "amdfam10_decode" "double,double")
3930 (set_attr "bdver1_decode" "double,double")
3931 (set_attr "prefix_rep" "1")
3932 (set_attr "prefix" "maybe_vex")
3933 (set_attr "mode" "SI")])
3935 (define_insn "sse2_cvtsd2siq<round_name>"
3936 [(set (match_operand:DI 0 "register_operand" "=r,r")
3939 (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3940 (parallel [(const_int 0)]))]
3941 UNSPEC_FIX_NOTRUNC))]
3942 "TARGET_SSE2 && TARGET_64BIT"
3943 "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
3944 [(set_attr "type" "sseicvt")
3945 (set_attr "athlon_decode" "double,vector")
3946 (set_attr "bdver1_decode" "double,double")
3947 (set_attr "prefix_rep" "1")
3948 (set_attr "prefix" "maybe_vex")
3949 (set_attr "mode" "DI")])
3951 (define_insn "sse2_cvtsd2siq_2"
3952 [(set (match_operand:DI 0 "register_operand" "=r,r")
3953 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
3954 UNSPEC_FIX_NOTRUNC))]
3955 "TARGET_SSE2 && TARGET_64BIT"
3956 "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
3957 [(set_attr "type" "sseicvt")
3958 (set_attr "athlon_decode" "double,vector")
3959 (set_attr "amdfam10_decode" "double,double")
3960 (set_attr "bdver1_decode" "double,double")
3961 (set_attr "prefix_rep" "1")
3962 (set_attr "prefix" "maybe_vex")
3963 (set_attr "mode" "DI")])
3965 (define_insn "sse2_cvttsd2si<round_saeonly_name>"
3966 [(set (match_operand:SI 0 "register_operand" "=r,r")
3969 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3970 (parallel [(const_int 0)]))))]
3972 "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
3973 [(set_attr "type" "sseicvt")
3974 (set_attr "athlon_decode" "double,vector")
3975 (set_attr "amdfam10_decode" "double,double")
3976 (set_attr "bdver1_decode" "double,double")
3977 (set_attr "btver2_decode" "double,double")
3978 (set_attr "prefix_rep" "1")
3979 (set_attr "prefix" "maybe_vex")
3980 (set_attr "mode" "SI")])
3982 (define_insn "sse2_cvttsd2siq<round_saeonly_name>"
3983 [(set (match_operand:DI 0 "register_operand" "=r,r")
3986 (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3987 (parallel [(const_int 0)]))))]
3988 "TARGET_SSE2 && TARGET_64BIT"
3989 "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
3990 [(set_attr "type" "sseicvt")
3991 (set_attr "athlon_decode" "double,vector")
3992 (set_attr "amdfam10_decode" "double,double")
3993 (set_attr "bdver1_decode" "double,double")
3994 (set_attr "prefix_rep" "1")
3995 (set_attr "prefix" "maybe_vex")
3996 (set_attr "mode" "DI")])
3998 ;; For float<si2dfmode><mode>2 insn pattern
3999 (define_mode_attr si2dfmode
4000 [(V8DF "V8SI") (V4DF "V4SI")])
4001 (define_mode_attr si2dfmodelower
4002 [(V8DF "v8si") (V4DF "v4si")])
4004 (define_insn "float<si2dfmodelower><mode>2<mask_name>"
4005 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4006 (float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4007 "TARGET_AVX && <mask_mode512bit_condition>"
4008 "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4009 [(set_attr "type" "ssecvt")
4010 (set_attr "prefix" "maybe_vex")
4011 (set_attr "mode" "<MODE>")])
4013 (define_insn "ufloatv8siv8df<mask_name>"
4014 [(set (match_operand:V8DF 0 "register_operand" "=v")
4015 (unsigned_float:V8DF
4016 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
4018 "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4019 [(set_attr "type" "ssecvt")
4020 (set_attr "prefix" "evex")
4021 (set_attr "mode" "V8DF")])
4023 (define_insn "avx512f_cvtdq2pd512_2"
4024 [(set (match_operand:V8DF 0 "register_operand" "=v")
4027 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4028 (parallel [(const_int 0) (const_int 1)
4029 (const_int 2) (const_int 3)
4030 (const_int 4) (const_int 5)
4031 (const_int 6) (const_int 7)]))))]
4033 "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4034 [(set_attr "type" "ssecvt")
4035 (set_attr "prefix" "evex")
4036 (set_attr "mode" "V8DF")])
4038 (define_insn "avx_cvtdq2pd256_2"
4039 [(set (match_operand:V4DF 0 "register_operand" "=x")
4042 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
4043 (parallel [(const_int 0) (const_int 1)
4044 (const_int 2) (const_int 3)]))))]
4046 "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4047 [(set_attr "type" "ssecvt")
4048 (set_attr "prefix" "vex")
4049 (set_attr "mode" "V4DF")])
4051 (define_insn "sse2_cvtdq2pd"
4052 [(set (match_operand:V2DF 0 "register_operand" "=x")
4055 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
4056 (parallel [(const_int 0) (const_int 1)]))))]
4058 "%vcvtdq2pd\t{%1, %0|%0, %q1}"
4059 [(set_attr "type" "ssecvt")
4060 (set_attr "prefix" "maybe_vex")
4061 (set_attr "ssememalign" "64")
4062 (set_attr "mode" "V2DF")])
4064 (define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4065 [(set (match_operand:V8SI 0 "register_operand" "=v")
4067 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4068 UNSPEC_FIX_NOTRUNC))]
4070 "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4071 [(set_attr "type" "ssecvt")
4072 (set_attr "prefix" "evex")
4073 (set_attr "mode" "OI")])
4075 (define_insn "avx_cvtpd2dq256"
4076 [(set (match_operand:V4SI 0 "register_operand" "=x")
4077 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4078 UNSPEC_FIX_NOTRUNC))]
4080 "vcvtpd2dq{y}\t{%1, %0|%0, %1}"
4081 [(set_attr "type" "ssecvt")
4082 (set_attr "prefix" "vex")
4083 (set_attr "mode" "OI")])
4085 (define_expand "avx_cvtpd2dq256_2"
4086 [(set (match_operand:V8SI 0 "register_operand")
4088 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4092 "operands[2] = CONST0_RTX (V4SImode);")
4094 (define_insn "*avx_cvtpd2dq256_2"
4095 [(set (match_operand:V8SI 0 "register_operand" "=x")
4097 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4099 (match_operand:V4SI 2 "const0_operand")))]
4101 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4102 [(set_attr "type" "ssecvt")
4103 (set_attr "prefix" "vex")
4104 (set_attr "btver2_decode" "vector")
4105 (set_attr "mode" "OI")])
4107 (define_expand "sse2_cvtpd2dq"
4108 [(set (match_operand:V4SI 0 "register_operand")
4110 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")]
4114 "operands[2] = CONST0_RTX (V2SImode);")
4116 (define_insn "*sse2_cvtpd2dq"
4117 [(set (match_operand:V4SI 0 "register_operand" "=x")
4119 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4121 (match_operand:V2SI 2 "const0_operand")))]
4125 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
4127 return "cvtpd2dq\t{%1, %0|%0, %1}";
4129 [(set_attr "type" "ssecvt")
4130 (set_attr "prefix_rep" "1")
4131 (set_attr "prefix_data16" "0")
4132 (set_attr "prefix" "maybe_vex")
4133 (set_attr "mode" "TI")
4134 (set_attr "amdfam10_decode" "double")
4135 (set_attr "athlon_decode" "vector")
4136 (set_attr "bdver1_decode" "double")])
4138 (define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
4139 [(set (match_operand:V8SI 0 "register_operand" "=v")
4141 [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4142 UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4144 "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4145 [(set_attr "type" "ssecvt")
4146 (set_attr "prefix" "evex")
4147 (set_attr "mode" "OI")])
4149 (define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4150 [(set (match_operand:V8SI 0 "register_operand" "=v")
4152 (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4154 "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4155 [(set_attr "type" "ssecvt")
4156 (set_attr "prefix" "evex")
4157 (set_attr "mode" "OI")])
4159 (define_insn "fix_truncv4dfv4si2"
4160 [(set (match_operand:V4SI 0 "register_operand" "=x")
4161 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4163 "vcvttpd2dq{y}\t{%1, %0|%0, %1}"
4164 [(set_attr "type" "ssecvt")
4165 (set_attr "prefix" "vex")
4166 (set_attr "mode" "OI")])
4168 (define_expand "avx_cvttpd2dq256_2"
4169 [(set (match_operand:V8SI 0 "register_operand")
4171 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4174 "operands[2] = CONST0_RTX (V4SImode);")
4176 (define_insn "*avx_cvttpd2dq256_2"
4177 [(set (match_operand:V8SI 0 "register_operand" "=x")
4179 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm"))
4180 (match_operand:V4SI 2 "const0_operand")))]
4182 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}"
4183 [(set_attr "type" "ssecvt")
4184 (set_attr "prefix" "vex")
4185 (set_attr "btver2_decode" "vector")
4186 (set_attr "mode" "OI")])
4188 (define_expand "sse2_cvttpd2dq"
4189 [(set (match_operand:V4SI 0 "register_operand")
4191 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand"))
4194 "operands[2] = CONST0_RTX (V2SImode);")
4196 (define_insn "*sse2_cvttpd2dq"
4197 [(set (match_operand:V4SI 0 "register_operand" "=x")
4199 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4200 (match_operand:V2SI 2 "const0_operand")))]
4204 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
4206 return "cvttpd2dq\t{%1, %0|%0, %1}";
4208 [(set_attr "type" "ssecvt")
4209 (set_attr "amdfam10_decode" "double")
4210 (set_attr "athlon_decode" "vector")
4211 (set_attr "bdver1_decode" "double")
4212 (set_attr "prefix" "maybe_vex")
4213 (set_attr "mode" "TI")])
4215 (define_insn "sse2_cvtsd2ss<round_name>"
4216 [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4219 (float_truncate:V2SF
4220 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4221 (match_operand:V4SF 1 "register_operand" "0,0,v")
4225 cvtsd2ss\t{%2, %0|%0, %2}
4226 cvtsd2ss\t{%2, %0|%0, %q2}
4227 vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4228 [(set_attr "isa" "noavx,noavx,avx")
4229 (set_attr "type" "ssecvt")
4230 (set_attr "athlon_decode" "vector,double,*")
4231 (set_attr "amdfam10_decode" "vector,double,*")
4232 (set_attr "bdver1_decode" "direct,direct,*")
4233 (set_attr "btver2_decode" "double,double,double")
4234 (set_attr "prefix" "orig,orig,<round_prefix>")
4235 (set_attr "mode" "SF")])
4237 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
4238 [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4242 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
4243 (parallel [(const_int 0) (const_int 1)])))
4244 (match_operand:V2DF 1 "register_operand" "0,0,v")
4248 cvtss2sd\t{%2, %0|%0, %2}
4249 cvtss2sd\t{%2, %0|%0, %k2}
4250 vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4251 [(set_attr "isa" "noavx,noavx,avx")
4252 (set_attr "type" "ssecvt")
4253 (set_attr "amdfam10_decode" "vector,double,*")
4254 (set_attr "athlon_decode" "direct,direct,*")
4255 (set_attr "bdver1_decode" "direct,direct,*")
4256 (set_attr "btver2_decode" "double,double,double")
4257 (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4258 (set_attr "mode" "DF")])
4260 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4261 [(set (match_operand:V8SF 0 "register_operand" "=v")
4262 (float_truncate:V8SF
4263 (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4265 "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4266 [(set_attr "type" "ssecvt")
4267 (set_attr "prefix" "evex")
4268 (set_attr "mode" "V8SF")])
4270 (define_insn "avx_cvtpd2ps256"
4271 [(set (match_operand:V4SF 0 "register_operand" "=x")
4272 (float_truncate:V4SF
4273 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
4275 "vcvtpd2ps{y}\t{%1, %0|%0, %1}"
4276 [(set_attr "type" "ssecvt")
4277 (set_attr "prefix" "vex")
4278 (set_attr "btver2_decode" "vector")
4279 (set_attr "mode" "V4SF")])
4281 (define_expand "sse2_cvtpd2ps"
4282 [(set (match_operand:V4SF 0 "register_operand")
4284 (float_truncate:V2SF
4285 (match_operand:V2DF 1 "nonimmediate_operand"))
4288 "operands[2] = CONST0_RTX (V2SFmode);")
4290 (define_insn "*sse2_cvtpd2ps"
4291 [(set (match_operand:V4SF 0 "register_operand" "=x")
4293 (float_truncate:V2SF
4294 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
4295 (match_operand:V2SF 2 "const0_operand")))]
4299 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
4301 return "cvtpd2ps\t{%1, %0|%0, %1}";
4303 [(set_attr "type" "ssecvt")
4304 (set_attr "amdfam10_decode" "double")
4305 (set_attr "athlon_decode" "vector")
4306 (set_attr "bdver1_decode" "double")
4307 (set_attr "prefix_data16" "1")
4308 (set_attr "prefix" "maybe_vex")
4309 (set_attr "mode" "V4SF")])
4311 ;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4312 (define_mode_attr sf2dfmode
4313 [(V8DF "V8SF") (V4DF "V4SF")])
4315 (define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4316 [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4317 (float_extend:VF2_512_256
4318 (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4319 "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4320 "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4321 [(set_attr "type" "ssecvt")
4322 (set_attr "prefix" "maybe_vex")
4323 (set_attr "mode" "<MODE>")])
4325 (define_insn "*avx_cvtps2pd256_2"
4326 [(set (match_operand:V4DF 0 "register_operand" "=x")
4329 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4330 (parallel [(const_int 0) (const_int 1)
4331 (const_int 2) (const_int 3)]))))]
4333 "vcvtps2pd\t{%x1, %0|%0, %x1}"
4334 [(set_attr "type" "ssecvt")
4335 (set_attr "prefix" "vex")
4336 (set_attr "mode" "V4DF")])
4338 (define_insn "vec_unpacks_lo_v16sf"
4339 [(set (match_operand:V8DF 0 "register_operand" "=v")
4342 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4343 (parallel [(const_int 0) (const_int 1)
4344 (const_int 2) (const_int 3)
4345 (const_int 4) (const_int 5)
4346 (const_int 6) (const_int 7)]))))]
4348 "vcvtps2pd\t{%t1, %0|%0, %t1}"
4349 [(set_attr "type" "ssecvt")
4350 (set_attr "prefix" "evex")
4351 (set_attr "mode" "V8DF")])
4353 (define_insn "sse2_cvtps2pd"
4354 [(set (match_operand:V2DF 0 "register_operand" "=x")
4357 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
4358 (parallel [(const_int 0) (const_int 1)]))))]
4360 "%vcvtps2pd\t{%1, %0|%0, %q1}"
4361 [(set_attr "type" "ssecvt")
4362 (set_attr "amdfam10_decode" "direct")
4363 (set_attr "athlon_decode" "double")
4364 (set_attr "bdver1_decode" "double")
4365 (set_attr "prefix_data16" "0")
4366 (set_attr "prefix" "maybe_vex")
4367 (set_attr "mode" "V2DF")])
4369 (define_expand "vec_unpacks_hi_v4sf"
4374 (match_operand:V4SF 1 "nonimmediate_operand"))
4375 (parallel [(const_int 6) (const_int 7)
4376 (const_int 2) (const_int 3)])))
4377 (set (match_operand:V2DF 0 "register_operand")
4381 (parallel [(const_int 0) (const_int 1)]))))]
4383 "operands[2] = gen_reg_rtx (V4SFmode);")
4385 (define_expand "vec_unpacks_hi_v8sf"
4388 (match_operand:V8SF 1 "nonimmediate_operand")
4389 (parallel [(const_int 4) (const_int 5)
4390 (const_int 6) (const_int 7)])))
4391 (set (match_operand:V4DF 0 "register_operand")
4395 "operands[2] = gen_reg_rtx (V4SFmode);")
4397 (define_expand "vec_unpacks_hi_v16sf"
4400 (match_operand:V16SF 1 "nonimmediate_operand")
4401 (parallel [(const_int 8) (const_int 9)
4402 (const_int 10) (const_int 11)
4403 (const_int 12) (const_int 13)
4404 (const_int 14) (const_int 15)])))
4405 (set (match_operand:V8DF 0 "register_operand")
4409 "operands[2] = gen_reg_rtx (V8SFmode);")
4411 (define_expand "vec_unpacks_lo_v4sf"
4412 [(set (match_operand:V2DF 0 "register_operand")
4415 (match_operand:V4SF 1 "nonimmediate_operand")
4416 (parallel [(const_int 0) (const_int 1)]))))]
4419 (define_expand "vec_unpacks_lo_v8sf"
4420 [(set (match_operand:V4DF 0 "register_operand")
4423 (match_operand:V8SF 1 "nonimmediate_operand")
4424 (parallel [(const_int 0) (const_int 1)
4425 (const_int 2) (const_int 3)]))))]
4428 (define_mode_attr sseunpackfltmode
4429 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
4430 (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
4432 (define_expand "vec_unpacks_float_hi_<mode>"
4433 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4434 (match_operand:VI2_AVX512F 1 "register_operand")]
4437 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4439 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
4440 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4441 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4445 (define_expand "vec_unpacks_float_lo_<mode>"
4446 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4447 (match_operand:VI2_AVX512F 1 "register_operand")]
4450 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4452 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
4453 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4454 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4458 (define_expand "vec_unpacku_float_hi_<mode>"
4459 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4460 (match_operand:VI2_AVX512F 1 "register_operand")]
4463 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4465 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
4466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4467 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4471 (define_expand "vec_unpacku_float_lo_<mode>"
4472 [(match_operand:<sseunpackfltmode> 0 "register_operand")
4473 (match_operand:VI2_AVX512F 1 "register_operand")]
4476 rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
4478 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
4479 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
4480 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
4484 (define_expand "vec_unpacks_float_hi_v4si"
4487 (match_operand:V4SI 1 "nonimmediate_operand")
4488 (parallel [(const_int 2) (const_int 3)
4489 (const_int 2) (const_int 3)])))
4490 (set (match_operand:V2DF 0 "register_operand")
4494 (parallel [(const_int 0) (const_int 1)]))))]
4496 "operands[2] = gen_reg_rtx (V4SImode);")
4498 (define_expand "vec_unpacks_float_lo_v4si"
4499 [(set (match_operand:V2DF 0 "register_operand")
4502 (match_operand:V4SI 1 "nonimmediate_operand")
4503 (parallel [(const_int 0) (const_int 1)]))))]
4506 (define_expand "vec_unpacks_float_hi_v8si"
4509 (match_operand:V8SI 1 "nonimmediate_operand")
4510 (parallel [(const_int 4) (const_int 5)
4511 (const_int 6) (const_int 7)])))
4512 (set (match_operand:V4DF 0 "register_operand")
4516 "operands[2] = gen_reg_rtx (V4SImode);")
4518 (define_expand "vec_unpacks_float_lo_v8si"
4519 [(set (match_operand:V4DF 0 "register_operand")
4522 (match_operand:V8SI 1 "nonimmediate_operand")
4523 (parallel [(const_int 0) (const_int 1)
4524 (const_int 2) (const_int 3)]))))]
4527 (define_expand "vec_unpacks_float_hi_v16si"
4530 (match_operand:V16SI 1 "nonimmediate_operand")
4531 (parallel [(const_int 8) (const_int 9)
4532 (const_int 10) (const_int 11)
4533 (const_int 12) (const_int 13)
4534 (const_int 14) (const_int 15)])))
4535 (set (match_operand:V8DF 0 "register_operand")
4539 "operands[2] = gen_reg_rtx (V8SImode);")
4541 (define_expand "vec_unpacks_float_lo_v16si"
4542 [(set (match_operand:V8DF 0 "register_operand")
4545 (match_operand:V16SI 1 "nonimmediate_operand")
4546 (parallel [(const_int 0) (const_int 1)
4547 (const_int 2) (const_int 3)
4548 (const_int 4) (const_int 5)
4549 (const_int 6) (const_int 7)]))))]
4552 (define_expand "vec_unpacku_float_hi_v4si"
4555 (match_operand:V4SI 1 "nonimmediate_operand")
4556 (parallel [(const_int 2) (const_int 3)
4557 (const_int 2) (const_int 3)])))
4562 (parallel [(const_int 0) (const_int 1)]))))
4564 (lt:V2DF (match_dup 6) (match_dup 3)))
4566 (and:V2DF (match_dup 7) (match_dup 4)))
4567 (set (match_operand:V2DF 0 "register_operand")
4568 (plus:V2DF (match_dup 6) (match_dup 8)))]
4571 REAL_VALUE_TYPE TWO32r;
4575 real_ldexp (&TWO32r, &dconst1, 32);
4576 x = const_double_from_real_value (TWO32r, DFmode);
4578 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4579 operands[4] = force_reg (V2DFmode,
4580 ix86_build_const_vector (V2DFmode, 1, x));
4582 operands[5] = gen_reg_rtx (V4SImode);
4584 for (i = 6; i < 9; i++)
4585 operands[i] = gen_reg_rtx (V2DFmode);
4588 (define_expand "vec_unpacku_float_lo_v4si"
4592 (match_operand:V4SI 1 "nonimmediate_operand")
4593 (parallel [(const_int 0) (const_int 1)]))))
4595 (lt:V2DF (match_dup 5) (match_dup 3)))
4597 (and:V2DF (match_dup 6) (match_dup 4)))
4598 (set (match_operand:V2DF 0 "register_operand")
4599 (plus:V2DF (match_dup 5) (match_dup 7)))]
4602 REAL_VALUE_TYPE TWO32r;
4606 real_ldexp (&TWO32r, &dconst1, 32);
4607 x = const_double_from_real_value (TWO32r, DFmode);
4609 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
4610 operands[4] = force_reg (V2DFmode,
4611 ix86_build_const_vector (V2DFmode, 1, x));
4613 for (i = 5; i < 8; i++)
4614 operands[i] = gen_reg_rtx (V2DFmode);
4617 (define_expand "vec_unpacku_float_hi_v8si"
4618 [(match_operand:V4DF 0 "register_operand")
4619 (match_operand:V8SI 1 "register_operand")]
4622 REAL_VALUE_TYPE TWO32r;
4626 real_ldexp (&TWO32r, &dconst1, 32);
4627 x = const_double_from_real_value (TWO32r, DFmode);
4629 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4630 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4631 tmp[5] = gen_reg_rtx (V4SImode);
4633 for (i = 2; i < 5; i++)
4634 tmp[i] = gen_reg_rtx (V4DFmode);
4635 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
4636 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
4637 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4638 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4639 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4640 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4644 (define_expand "vec_unpacku_float_hi_v16si"
4645 [(match_operand:V8DF 0 "register_operand")
4646 (match_operand:V16SI 1 "register_operand")]
4649 REAL_VALUE_TYPE TWO32r;
4652 real_ldexp (&TWO32r, &dconst1, 32);
4653 x = const_double_from_real_value (TWO32r, DFmode);
4655 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4656 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4657 tmp[2] = gen_reg_rtx (V8DFmode);
4658 tmp[3] = gen_reg_rtx (V8SImode);
4659 k = gen_reg_rtx (QImode);
4661 emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
4662 emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
4663 emit_insn (gen_rtx_SET (VOIDmode, k,
4664 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4665 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4666 emit_move_insn (operands[0], tmp[2]);
4670 (define_expand "vec_unpacku_float_lo_v8si"
4671 [(match_operand:V4DF 0 "register_operand")
4672 (match_operand:V8SI 1 "nonimmediate_operand")]
4675 REAL_VALUE_TYPE TWO32r;
4679 real_ldexp (&TWO32r, &dconst1, 32);
4680 x = const_double_from_real_value (TWO32r, DFmode);
4682 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
4683 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
4685 for (i = 2; i < 5; i++)
4686 tmp[i] = gen_reg_rtx (V4DFmode);
4687 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
4688 emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
4689 gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
4690 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
4691 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
4695 (define_expand "vec_unpacku_float_lo_v16si"
4696 [(match_operand:V8DF 0 "register_operand")
4697 (match_operand:V16SI 1 "nonimmediate_operand")]
4700 REAL_VALUE_TYPE TWO32r;
4703 real_ldexp (&TWO32r, &dconst1, 32);
4704 x = const_double_from_real_value (TWO32r, DFmode);
4706 tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
4707 tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
4708 tmp[2] = gen_reg_rtx (V8DFmode);
4709 k = gen_reg_rtx (QImode);
4711 emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
4712 emit_insn (gen_rtx_SET (VOIDmode, k,
4713 gen_rtx_LT (QImode, tmp[2], tmp[0])));
4714 emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
4715 emit_move_insn (operands[0], tmp[2]);
4719 (define_expand "vec_pack_trunc_<mode>"
4721 (float_truncate:<sf2dfmode>
4722 (match_operand:VF2_512_256 1 "nonimmediate_operand")))
4724 (float_truncate:<sf2dfmode>
4725 (match_operand:VF2_512_256 2 "nonimmediate_operand")))
4726 (set (match_operand:<ssePSmode> 0 "register_operand")
4727 (vec_concat:<ssePSmode>
4732 operands[3] = gen_reg_rtx (<sf2dfmode>mode);
4733 operands[4] = gen_reg_rtx (<sf2dfmode>mode);
4736 (define_expand "vec_pack_trunc_v2df"
4737 [(match_operand:V4SF 0 "register_operand")
4738 (match_operand:V2DF 1 "nonimmediate_operand")
4739 (match_operand:V2DF 2 "nonimmediate_operand")]
4744 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4746 tmp0 = gen_reg_rtx (V4DFmode);
4747 tmp1 = force_reg (V2DFmode, operands[1]);
4749 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4750 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
4754 tmp0 = gen_reg_rtx (V4SFmode);
4755 tmp1 = gen_reg_rtx (V4SFmode);
4757 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
4758 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
4759 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
4764 (define_expand "vec_pack_sfix_trunc_v8df"
4765 [(match_operand:V16SI 0 "register_operand")
4766 (match_operand:V8DF 1 "nonimmediate_operand")
4767 (match_operand:V8DF 2 "nonimmediate_operand")]
4772 r1 = gen_reg_rtx (V8SImode);
4773 r2 = gen_reg_rtx (V8SImode);
4775 emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
4776 emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
4777 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4781 (define_expand "vec_pack_sfix_trunc_v4df"
4782 [(match_operand:V8SI 0 "register_operand")
4783 (match_operand:V4DF 1 "nonimmediate_operand")
4784 (match_operand:V4DF 2 "nonimmediate_operand")]
4789 r1 = gen_reg_rtx (V4SImode);
4790 r2 = gen_reg_rtx (V4SImode);
4792 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
4793 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
4794 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4798 (define_expand "vec_pack_sfix_trunc_v2df"
4799 [(match_operand:V4SI 0 "register_operand")
4800 (match_operand:V2DF 1 "nonimmediate_operand")
4801 (match_operand:V2DF 2 "nonimmediate_operand")]
4804 rtx tmp0, tmp1, tmp2;
4806 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4808 tmp0 = gen_reg_rtx (V4DFmode);
4809 tmp1 = force_reg (V2DFmode, operands[1]);
4811 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4812 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
4816 tmp0 = gen_reg_rtx (V4SImode);
4817 tmp1 = gen_reg_rtx (V4SImode);
4818 tmp2 = gen_reg_rtx (V2DImode);
4820 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
4821 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
4822 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4823 gen_lowpart (V2DImode, tmp0),
4824 gen_lowpart (V2DImode, tmp1)));
4825 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4830 (define_mode_attr ssepackfltmode
4831 [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
4833 (define_expand "vec_pack_ufix_trunc_<mode>"
4834 [(match_operand:<ssepackfltmode> 0 "register_operand")
4835 (match_operand:VF2 1 "register_operand")
4836 (match_operand:VF2 2 "register_operand")]
4839 if (<MODE>mode == V8DFmode)
4843 r1 = gen_reg_rtx (V8SImode);
4844 r2 = gen_reg_rtx (V8SImode);
4846 emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
4847 emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
4848 emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
4853 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4854 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
4855 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
4856 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
4857 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
4859 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
4860 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
4864 tmp[5] = gen_reg_rtx (V8SFmode);
4865 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
4866 gen_lowpart (V8SFmode, tmp[3]), 0);
4867 tmp[5] = gen_lowpart (V8SImode, tmp[5]);
4869 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
4870 operands[0], 0, OPTAB_DIRECT);
4871 if (tmp[6] != operands[0])
4872 emit_move_insn (operands[0], tmp[6]);
4878 (define_expand "vec_pack_sfix_v4df"
4879 [(match_operand:V8SI 0 "register_operand")
4880 (match_operand:V4DF 1 "nonimmediate_operand")
4881 (match_operand:V4DF 2 "nonimmediate_operand")]
4886 r1 = gen_reg_rtx (V4SImode);
4887 r2 = gen_reg_rtx (V4SImode);
4889 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
4890 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
4891 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
4895 (define_expand "vec_pack_sfix_v2df"
4896 [(match_operand:V4SI 0 "register_operand")
4897 (match_operand:V2DF 1 "nonimmediate_operand")
4898 (match_operand:V2DF 2 "nonimmediate_operand")]
4901 rtx tmp0, tmp1, tmp2;
4903 if (TARGET_AVX && !TARGET_PREFER_AVX128)
4905 tmp0 = gen_reg_rtx (V4DFmode);
4906 tmp1 = force_reg (V2DFmode, operands[1]);
4908 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
4909 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
4913 tmp0 = gen_reg_rtx (V4SImode);
4914 tmp1 = gen_reg_rtx (V4SImode);
4915 tmp2 = gen_reg_rtx (V2DImode);
4917 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
4918 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
4919 emit_insn (gen_vec_interleave_lowv2di (tmp2,
4920 gen_lowpart (V2DImode, tmp0),
4921 gen_lowpart (V2DImode, tmp1)));
4922 emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
4927 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4929 ;; Parallel single-precision floating point element swizzling
4931 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4933 (define_expand "sse_movhlps_exp"
4934 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4937 (match_operand:V4SF 1 "nonimmediate_operand")
4938 (match_operand:V4SF 2 "nonimmediate_operand"))
4939 (parallel [(const_int 6)
4945 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4947 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
4949 /* Fix up the destination if needed. */
4950 if (dst != operands[0])
4951 emit_move_insn (operands[0], dst);
4956 (define_insn "sse_movhlps"
4957 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
4960 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
4961 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
4962 (parallel [(const_int 6)
4966 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4968 movhlps\t{%2, %0|%0, %2}
4969 vmovhlps\t{%2, %1, %0|%0, %1, %2}
4970 movlps\t{%H2, %0|%0, %H2}
4971 vmovlps\t{%H2, %1, %0|%0, %1, %H2}
4972 %vmovhps\t{%2, %0|%q0, %2}"
4973 [(set_attr "isa" "noavx,avx,noavx,avx,*")
4974 (set_attr "type" "ssemov")
4975 (set_attr "ssememalign" "64")
4976 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
4977 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
4979 (define_expand "sse_movlhps_exp"
4980 [(set (match_operand:V4SF 0 "nonimmediate_operand")
4983 (match_operand:V4SF 1 "nonimmediate_operand")
4984 (match_operand:V4SF 2 "nonimmediate_operand"))
4985 (parallel [(const_int 0)
4991 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
4993 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
4995 /* Fix up the destination if needed. */
4996 if (dst != operands[0])
4997 emit_move_insn (operands[0], dst);
5002 (define_insn "sse_movlhps"
5003 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5006 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5007 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5008 (parallel [(const_int 0)
5012 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5014 movlhps\t{%2, %0|%0, %2}
5015 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5016 movhps\t{%2, %0|%0, %q2}
5017 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5018 %vmovlps\t{%2, %H0|%H0, %2}"
5019 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5020 (set_attr "type" "ssemov")
5021 (set_attr "ssememalign" "64")
5022 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5023 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5025 (define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5026 [(set (match_operand:V16SF 0 "register_operand" "=v")
5029 (match_operand:V16SF 1 "register_operand" "v")
5030 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5031 (parallel [(const_int 2) (const_int 18)
5032 (const_int 3) (const_int 19)
5033 (const_int 6) (const_int 22)
5034 (const_int 7) (const_int 23)
5035 (const_int 10) (const_int 26)
5036 (const_int 11) (const_int 27)
5037 (const_int 14) (const_int 30)
5038 (const_int 15) (const_int 31)])))]
5040 "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5041 [(set_attr "type" "sselog")
5042 (set_attr "prefix" "evex")
5043 (set_attr "mode" "V16SF")])
5045 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5046 (define_insn "avx_unpckhps256"
5047 [(set (match_operand:V8SF 0 "register_operand" "=x")
5050 (match_operand:V8SF 1 "register_operand" "x")
5051 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5052 (parallel [(const_int 2) (const_int 10)
5053 (const_int 3) (const_int 11)
5054 (const_int 6) (const_int 14)
5055 (const_int 7) (const_int 15)])))]
5057 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5058 [(set_attr "type" "sselog")
5059 (set_attr "prefix" "vex")
5060 (set_attr "mode" "V8SF")])
5062 (define_expand "vec_interleave_highv8sf"
5066 (match_operand:V8SF 1 "register_operand" "x")
5067 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5068 (parallel [(const_int 0) (const_int 8)
5069 (const_int 1) (const_int 9)
5070 (const_int 4) (const_int 12)
5071 (const_int 5) (const_int 13)])))
5077 (parallel [(const_int 2) (const_int 10)
5078 (const_int 3) (const_int 11)
5079 (const_int 6) (const_int 14)
5080 (const_int 7) (const_int 15)])))
5081 (set (match_operand:V8SF 0 "register_operand")
5086 (parallel [(const_int 4) (const_int 5)
5087 (const_int 6) (const_int 7)
5088 (const_int 12) (const_int 13)
5089 (const_int 14) (const_int 15)])))]
5092 operands[3] = gen_reg_rtx (V8SFmode);
5093 operands[4] = gen_reg_rtx (V8SFmode);
5096 (define_insn "vec_interleave_highv4sf"
5097 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5100 (match_operand:V4SF 1 "register_operand" "0,x")
5101 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5102 (parallel [(const_int 2) (const_int 6)
5103 (const_int 3) (const_int 7)])))]
5106 unpckhps\t{%2, %0|%0, %2}
5107 vunpckhps\t{%2, %1, %0|%0, %1, %2}"
5108 [(set_attr "isa" "noavx,avx")
5109 (set_attr "type" "sselog")
5110 (set_attr "prefix" "orig,vex")
5111 (set_attr "mode" "V4SF")])
5113 (define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5114 [(set (match_operand:V16SF 0 "register_operand" "=v")
5117 (match_operand:V16SF 1 "register_operand" "v")
5118 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5119 (parallel [(const_int 0) (const_int 16)
5120 (const_int 1) (const_int 17)
5121 (const_int 4) (const_int 20)
5122 (const_int 5) (const_int 21)
5123 (const_int 8) (const_int 24)
5124 (const_int 9) (const_int 25)
5125 (const_int 12) (const_int 28)
5126 (const_int 13) (const_int 29)])))]
5128 "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5129 [(set_attr "type" "sselog")
5130 (set_attr "prefix" "evex")
5131 (set_attr "mode" "V16SF")])
5133 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5134 (define_insn "avx_unpcklps256"
5135 [(set (match_operand:V8SF 0 "register_operand" "=x")
5138 (match_operand:V8SF 1 "register_operand" "x")
5139 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5140 (parallel [(const_int 0) (const_int 8)
5141 (const_int 1) (const_int 9)
5142 (const_int 4) (const_int 12)
5143 (const_int 5) (const_int 13)])))]
5145 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5146 [(set_attr "type" "sselog")
5147 (set_attr "prefix" "vex")
5148 (set_attr "mode" "V8SF")])
5150 (define_expand "vec_interleave_lowv8sf"
5154 (match_operand:V8SF 1 "register_operand" "x")
5155 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5156 (parallel [(const_int 0) (const_int 8)
5157 (const_int 1) (const_int 9)
5158 (const_int 4) (const_int 12)
5159 (const_int 5) (const_int 13)])))
5165 (parallel [(const_int 2) (const_int 10)
5166 (const_int 3) (const_int 11)
5167 (const_int 6) (const_int 14)
5168 (const_int 7) (const_int 15)])))
5169 (set (match_operand:V8SF 0 "register_operand")
5174 (parallel [(const_int 0) (const_int 1)
5175 (const_int 2) (const_int 3)
5176 (const_int 8) (const_int 9)
5177 (const_int 10) (const_int 11)])))]
5180 operands[3] = gen_reg_rtx (V8SFmode);
5181 operands[4] = gen_reg_rtx (V8SFmode);
5184 (define_insn "vec_interleave_lowv4sf"
5185 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5188 (match_operand:V4SF 1 "register_operand" "0,x")
5189 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5190 (parallel [(const_int 0) (const_int 4)
5191 (const_int 1) (const_int 5)])))]
5194 unpcklps\t{%2, %0|%0, %2}
5195 vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5196 [(set_attr "isa" "noavx,avx")
5197 (set_attr "type" "sselog")
5198 (set_attr "prefix" "orig,vex")
5199 (set_attr "mode" "V4SF")])
5201 ;; These are modeled with the same vec_concat as the others so that we
5202 ;; capture users of shufps that can use the new instructions
5203 (define_insn "avx_movshdup256"
5204 [(set (match_operand:V8SF 0 "register_operand" "=x")
5207 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5209 (parallel [(const_int 1) (const_int 1)
5210 (const_int 3) (const_int 3)
5211 (const_int 5) (const_int 5)
5212 (const_int 7) (const_int 7)])))]
5214 "vmovshdup\t{%1, %0|%0, %1}"
5215 [(set_attr "type" "sse")
5216 (set_attr "prefix" "vex")
5217 (set_attr "mode" "V8SF")])
5219 (define_insn "sse3_movshdup"
5220 [(set (match_operand:V4SF 0 "register_operand" "=x")
5223 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5225 (parallel [(const_int 1)
5230 "%vmovshdup\t{%1, %0|%0, %1}"
5231 [(set_attr "type" "sse")
5232 (set_attr "prefix_rep" "1")
5233 (set_attr "prefix" "maybe_vex")
5234 (set_attr "mode" "V4SF")])
5236 (define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5237 [(set (match_operand:V16SF 0 "register_operand" "=v")
5240 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5242 (parallel [(const_int 1) (const_int 1)
5243 (const_int 3) (const_int 3)
5244 (const_int 5) (const_int 5)
5245 (const_int 7) (const_int 7)
5246 (const_int 9) (const_int 9)
5247 (const_int 11) (const_int 11)
5248 (const_int 13) (const_int 13)
5249 (const_int 15) (const_int 15)])))]
5251 "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5252 [(set_attr "type" "sse")
5253 (set_attr "prefix" "evex")
5254 (set_attr "mode" "V16SF")])
5256 (define_insn "avx_movsldup256"
5257 [(set (match_operand:V8SF 0 "register_operand" "=x")
5260 (match_operand:V8SF 1 "nonimmediate_operand" "xm")
5262 (parallel [(const_int 0) (const_int 0)
5263 (const_int 2) (const_int 2)
5264 (const_int 4) (const_int 4)
5265 (const_int 6) (const_int 6)])))]
5267 "vmovsldup\t{%1, %0|%0, %1}"
5268 [(set_attr "type" "sse")
5269 (set_attr "prefix" "vex")
5270 (set_attr "mode" "V8SF")])
5272 (define_insn "sse3_movsldup"
5273 [(set (match_operand:V4SF 0 "register_operand" "=x")
5276 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
5278 (parallel [(const_int 0)
5283 "%vmovsldup\t{%1, %0|%0, %1}"
5284 [(set_attr "type" "sse")
5285 (set_attr "prefix_rep" "1")
5286 (set_attr "prefix" "maybe_vex")
5287 (set_attr "mode" "V4SF")])
5289 (define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
5290 [(set (match_operand:V16SF 0 "register_operand" "=v")
5293 (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5295 (parallel [(const_int 0) (const_int 0)
5296 (const_int 2) (const_int 2)
5297 (const_int 4) (const_int 4)
5298 (const_int 6) (const_int 6)
5299 (const_int 8) (const_int 8)
5300 (const_int 10) (const_int 10)
5301 (const_int 12) (const_int 12)
5302 (const_int 14) (const_int 14)])))]
5304 "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5305 [(set_attr "type" "sse")
5306 (set_attr "prefix" "evex")
5307 (set_attr "mode" "V16SF")])
5309 (define_expand "avx_shufps256"
5310 [(match_operand:V8SF 0 "register_operand")
5311 (match_operand:V8SF 1 "register_operand")
5312 (match_operand:V8SF 2 "nonimmediate_operand")
5313 (match_operand:SI 3 "const_int_operand")]
5316 int mask = INTVAL (operands[3]);
5317 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2],
5318 GEN_INT ((mask >> 0) & 3),
5319 GEN_INT ((mask >> 2) & 3),
5320 GEN_INT (((mask >> 4) & 3) + 8),
5321 GEN_INT (((mask >> 6) & 3) + 8),
5322 GEN_INT (((mask >> 0) & 3) + 4),
5323 GEN_INT (((mask >> 2) & 3) + 4),
5324 GEN_INT (((mask >> 4) & 3) + 12),
5325 GEN_INT (((mask >> 6) & 3) + 12)));
5329 ;; One bit in mask selects 2 elements.
5330 (define_insn "avx_shufps256_1"
5331 [(set (match_operand:V8SF 0 "register_operand" "=x")
5334 (match_operand:V8SF 1 "register_operand" "x")
5335 (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5336 (parallel [(match_operand 3 "const_0_to_3_operand" )
5337 (match_operand 4 "const_0_to_3_operand" )
5338 (match_operand 5 "const_8_to_11_operand" )
5339 (match_operand 6 "const_8_to_11_operand" )
5340 (match_operand 7 "const_4_to_7_operand" )
5341 (match_operand 8 "const_4_to_7_operand" )
5342 (match_operand 9 "const_12_to_15_operand")
5343 (match_operand 10 "const_12_to_15_operand")])))]
5345 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
5346 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
5347 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
5348 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
5351 mask = INTVAL (operands[3]);
5352 mask |= INTVAL (operands[4]) << 2;
5353 mask |= (INTVAL (operands[5]) - 8) << 4;
5354 mask |= (INTVAL (operands[6]) - 8) << 6;
5355 operands[3] = GEN_INT (mask);
5357 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5359 [(set_attr "type" "sseshuf")
5360 (set_attr "length_immediate" "1")
5361 (set_attr "prefix" "vex")
5362 (set_attr "mode" "V8SF")])
5364 (define_expand "sse_shufps"
5365 [(match_operand:V4SF 0 "register_operand")
5366 (match_operand:V4SF 1 "register_operand")
5367 (match_operand:V4SF 2 "nonimmediate_operand")
5368 (match_operand:SI 3 "const_int_operand")]
5371 int mask = INTVAL (operands[3]);
5372 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2],
5373 GEN_INT ((mask >> 0) & 3),
5374 GEN_INT ((mask >> 2) & 3),
5375 GEN_INT (((mask >> 4) & 3) + 4),
5376 GEN_INT (((mask >> 6) & 3) + 4)));
5380 (define_insn "sse_shufps_<mode>"
5381 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
5382 (vec_select:VI4F_128
5383 (vec_concat:<ssedoublevecmode>
5384 (match_operand:VI4F_128 1 "register_operand" "0,x")
5385 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
5386 (parallel [(match_operand 3 "const_0_to_3_operand")
5387 (match_operand 4 "const_0_to_3_operand")
5388 (match_operand 5 "const_4_to_7_operand")
5389 (match_operand 6 "const_4_to_7_operand")])))]
5393 mask |= INTVAL (operands[3]) << 0;
5394 mask |= INTVAL (operands[4]) << 2;
5395 mask |= (INTVAL (operands[5]) - 4) << 4;
5396 mask |= (INTVAL (operands[6]) - 4) << 6;
5397 operands[3] = GEN_INT (mask);
5399 switch (which_alternative)
5402 return "shufps\t{%3, %2, %0|%0, %2, %3}";
5404 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5409 [(set_attr "isa" "noavx,avx")
5410 (set_attr "type" "sseshuf")
5411 (set_attr "length_immediate" "1")
5412 (set_attr "prefix" "orig,vex")
5413 (set_attr "mode" "V4SF")])
5415 (define_insn "sse_storehps"
5416 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5418 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
5419 (parallel [(const_int 2) (const_int 3)])))]
5422 %vmovhps\t{%1, %0|%q0, %1}
5423 %vmovhlps\t{%1, %d0|%d0, %1}
5424 %vmovlps\t{%H1, %d0|%d0, %H1}"
5425 [(set_attr "type" "ssemov")
5426 (set_attr "ssememalign" "64")
5427 (set_attr "prefix" "maybe_vex")
5428 (set_attr "mode" "V2SF,V4SF,V2SF")])
5430 (define_expand "sse_loadhps_exp"
5431 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5434 (match_operand:V4SF 1 "nonimmediate_operand")
5435 (parallel [(const_int 0) (const_int 1)]))
5436 (match_operand:V2SF 2 "nonimmediate_operand")))]
5439 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5441 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
5443 /* Fix up the destination if needed. */
5444 if (dst != operands[0])
5445 emit_move_insn (operands[0], dst);
5450 (define_insn "sse_loadhps"
5451 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o")
5454 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5455 (parallel [(const_int 0) (const_int 1)]))
5456 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))]
5459 movhps\t{%2, %0|%0, %q2}
5460 vmovhps\t{%2, %1, %0|%0, %1, %q2}
5461 movlhps\t{%2, %0|%0, %2}
5462 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5463 %vmovlps\t{%2, %H0|%H0, %2}"
5464 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5465 (set_attr "type" "ssemov")
5466 (set_attr "ssememalign" "64")
5467 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5468 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
5470 (define_insn "sse_storelps"
5471 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
5473 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
5474 (parallel [(const_int 0) (const_int 1)])))]
5477 %vmovlps\t{%1, %0|%q0, %1}
5478 %vmovaps\t{%1, %0|%0, %1}
5479 %vmovlps\t{%1, %d0|%d0, %q1}"
5480 [(set_attr "type" "ssemov")
5481 (set_attr "prefix" "maybe_vex")
5482 (set_attr "mode" "V2SF,V4SF,V2SF")])
5484 (define_expand "sse_loadlps_exp"
5485 [(set (match_operand:V4SF 0 "nonimmediate_operand")
5487 (match_operand:V2SF 2 "nonimmediate_operand")
5489 (match_operand:V4SF 1 "nonimmediate_operand")
5490 (parallel [(const_int 2) (const_int 3)]))))]
5493 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5495 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
5497 /* Fix up the destination if needed. */
5498 if (dst != operands[0])
5499 emit_move_insn (operands[0], dst);
5504 (define_insn "sse_loadlps"
5505 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m")
5507 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x")
5509 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
5510 (parallel [(const_int 2) (const_int 3)]))))]
5513 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
5514 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
5515 movlps\t{%2, %0|%0, %q2}
5516 vmovlps\t{%2, %1, %0|%0, %1, %q2}
5517 %vmovlps\t{%2, %0|%q0, %2}"
5518 [(set_attr "isa" "noavx,avx,noavx,avx,*")
5519 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
5520 (set_attr "ssememalign" "64")
5521 (set_attr "length_immediate" "1,1,*,*,*")
5522 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5523 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5525 (define_insn "sse_movss"
5526 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5528 (match_operand:V4SF 2 "register_operand" " x,x")
5529 (match_operand:V4SF 1 "register_operand" " 0,x")
5533 movss\t{%2, %0|%0, %2}
5534 vmovss\t{%2, %1, %0|%0, %1, %2}"
5535 [(set_attr "isa" "noavx,avx")
5536 (set_attr "type" "ssemov")
5537 (set_attr "prefix" "orig,vex")
5538 (set_attr "mode" "SF")])
5540 (define_insn "avx2_vec_dup<mode>"
5541 [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
5542 (vec_duplicate:VF1_128_256
5544 (match_operand:V4SF 1 "register_operand" "x")
5545 (parallel [(const_int 0)]))))]
5547 "vbroadcastss\t{%1, %0|%0, %1}"
5548 [(set_attr "type" "sselog1")
5549 (set_attr "prefix" "vex")
5550 (set_attr "mode" "<MODE>")])
5552 (define_insn "avx2_vec_dupv8sf_1"
5553 [(set (match_operand:V8SF 0 "register_operand" "=x")
5556 (match_operand:V8SF 1 "register_operand" "x")
5557 (parallel [(const_int 0)]))))]
5559 "vbroadcastss\t{%x1, %0|%0, %x1}"
5560 [(set_attr "type" "sselog1")
5561 (set_attr "prefix" "vex")
5562 (set_attr "mode" "V8SF")])
5564 (define_insn "vec_dupv4sf"
5565 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
5567 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
5570 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
5571 vbroadcastss\t{%1, %0|%0, %1}
5572 shufps\t{$0, %0, %0|%0, %0, 0}"
5573 [(set_attr "isa" "avx,avx,noavx")
5574 (set_attr "type" "sseshuf1,ssemov,sseshuf1")
5575 (set_attr "length_immediate" "1,0,1")
5576 (set_attr "prefix_extra" "0,1,*")
5577 (set_attr "prefix" "vex,vex,orig")
5578 (set_attr "mode" "V4SF")])
5580 ;; Although insertps takes register source, we prefer
5581 ;; unpcklps with register source since it is shorter.
5582 (define_insn "*vec_concatv2sf_sse4_1"
5583 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y")
5585 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m")
5586 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))]
5589 unpcklps\t{%2, %0|%0, %2}
5590 vunpcklps\t{%2, %1, %0|%0, %1, %2}
5591 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
5592 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
5593 %vmovss\t{%1, %0|%0, %1}
5594 punpckldq\t{%2, %0|%0, %2}
5595 movd\t{%1, %0|%0, %1}"
5596 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
5597 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
5598 (set_attr "prefix_data16" "*,*,1,*,*,*,*")
5599 (set_attr "prefix_extra" "*,*,1,1,*,*,*")
5600 (set_attr "length_immediate" "*,*,1,1,*,*,*")
5601 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
5602 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
5604 ;; ??? In theory we can match memory for the MMX alternative, but allowing
5605 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
5606 ;; alternatives pretty much forces the MMX alternative to be chosen.
5607 (define_insn "*vec_concatv2sf_sse"
5608 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
5610 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
5611 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
5614 unpcklps\t{%2, %0|%0, %2}
5615 movss\t{%1, %0|%0, %1}
5616 punpckldq\t{%2, %0|%0, %2}
5617 movd\t{%1, %0|%0, %1}"
5618 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
5619 (set_attr "mode" "V4SF,SF,DI,DI")])
5621 (define_insn "*vec_concatv4sf"
5622 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x")
5624 (match_operand:V2SF 1 "register_operand" " 0,x,0,x")
5625 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
5628 movlhps\t{%2, %0|%0, %2}
5629 vmovlhps\t{%2, %1, %0|%0, %1, %2}
5630 movhps\t{%2, %0|%0, %q2}
5631 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
5632 [(set_attr "isa" "noavx,avx,noavx,avx")
5633 (set_attr "type" "ssemov")
5634 (set_attr "prefix" "orig,vex,orig,vex")
5635 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
5637 (define_expand "vec_init<mode>"
5638 [(match_operand:V_128 0 "register_operand")
5642 ix86_expand_vector_init (false, operands[0], operands[1]);
5646 ;; Avoid combining registers from different units in a single alternative,
5647 ;; see comment above inline_secondary_memory_needed function in i386.c
5648 (define_insn "vec_set<mode>_0"
5649 [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
5650 "=x,x,x ,x,x,x,x ,x ,m ,m ,m")
5652 (vec_duplicate:VI4F_128
5653 (match_operand:<ssescalarmode> 2 "general_operand"
5654 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF"))
5655 (match_operand:VI4F_128 1 "vector_move_operand"
5656 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0")
5660 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
5661 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
5662 %vmovd\t{%2, %0|%0, %2}
5663 movss\t{%2, %0|%0, %2}
5664 movss\t{%2, %0|%0, %2}
5665 vmovss\t{%2, %1, %0|%0, %1, %2}
5666 pinsrd\t{$0, %2, %0|%0, %2, 0}
5667 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
5671 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*")
5673 (cond [(eq_attr "alternative" "0,6,7")
5674 (const_string "sselog")
5675 (eq_attr "alternative" "9")
5676 (const_string "imov")
5677 (eq_attr "alternative" "10")
5678 (const_string "fmov")
5680 (const_string "ssemov")))
5681 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*")
5682 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*")
5683 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*")
5684 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")])
5686 ;; A subset is vec_setv4sf.
5687 (define_insn "*vec_setv4sf_sse4_1"
5688 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5691 (match_operand:SF 2 "nonimmediate_operand" "xm,xm"))
5692 (match_operand:V4SF 1 "register_operand" "0,x")
5693 (match_operand:SI 3 "const_int_operand")))]
5695 && ((unsigned) exact_log2 (INTVAL (operands[3]))
5696 < GET_MODE_NUNITS (V4SFmode))"
5698 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
5699 switch (which_alternative)
5702 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5704 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5709 [(set_attr "isa" "noavx,avx")
5710 (set_attr "type" "sselog")
5711 (set_attr "prefix_data16" "1,*")
5712 (set_attr "prefix_extra" "1")
5713 (set_attr "length_immediate" "1")
5714 (set_attr "prefix" "orig,vex")
5715 (set_attr "mode" "V4SF")])
5717 (define_insn "sse4_1_insertps"
5718 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5719 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
5720 (match_operand:V4SF 1 "register_operand" "0,x")
5721 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
5725 if (MEM_P (operands[2]))
5727 unsigned count_s = INTVAL (operands[3]) >> 6;
5729 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
5730 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
5732 switch (which_alternative)
5735 return "insertps\t{%3, %2, %0|%0, %2, %3}";
5737 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
5742 [(set_attr "isa" "noavx,avx")
5743 (set_attr "type" "sselog")
5744 (set_attr "prefix_data16" "1,*")
5745 (set_attr "prefix_extra" "1")
5746 (set_attr "length_immediate" "1")
5747 (set_attr "prefix" "orig,vex")
5748 (set_attr "mode" "V4SF")])
5751 [(set (match_operand:VI4F_128 0 "memory_operand")
5753 (vec_duplicate:VI4F_128
5754 (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
5757 "TARGET_SSE && reload_completed"
5758 [(set (match_dup 0) (match_dup 1))]
5759 "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
5761 (define_expand "vec_set<mode>"
5762 [(match_operand:V 0 "register_operand")
5763 (match_operand:<ssescalarmode> 1 "register_operand")
5764 (match_operand 2 "const_int_operand")]
5767 ix86_expand_vector_set (false, operands[0], operands[1],
5768 INTVAL (operands[2]));
5772 (define_insn_and_split "*vec_extractv4sf_0"
5773 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
5775 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
5776 (parallel [(const_int 0)])))]
5777 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
5779 "&& reload_completed"
5780 [(set (match_dup 0) (match_dup 1))]
5782 if (REG_P (operands[1]))
5783 operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
5785 operands[1] = adjust_address (operands[1], SFmode, 0);
5788 (define_insn_and_split "*sse4_1_extractps"
5789 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x")
5791 (match_operand:V4SF 1 "register_operand" "x,0,x")
5792 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))]
5795 %vextractps\t{%2, %1, %0|%0, %1, %2}
5798 "&& reload_completed && SSE_REG_P (operands[0])"
5801 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
5802 switch (INTVAL (operands[2]))
5806 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
5807 operands[2], operands[2],
5808 GEN_INT (INTVAL (operands[2]) + 4),
5809 GEN_INT (INTVAL (operands[2]) + 4)));
5812 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
5815 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */
5820 [(set_attr "isa" "*,noavx,avx")
5821 (set_attr "type" "sselog,*,*")
5822 (set_attr "prefix_data16" "1,*,*")
5823 (set_attr "prefix_extra" "1,*,*")
5824 (set_attr "length_immediate" "1,*,*")
5825 (set_attr "prefix" "maybe_vex,*,*")
5826 (set_attr "mode" "V4SF,*,*")])
5828 (define_insn_and_split "*vec_extractv4sf_mem"
5829 [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
5831 (match_operand:V4SF 1 "memory_operand" "o,o,o")
5832 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
5835 "&& reload_completed"
5836 [(set (match_dup 0) (match_dup 1))]
5838 operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
5841 (define_expand "avx512f_vextract<shuffletype>32x4_mask"
5842 [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
5843 (match_operand:V16FI 1 "register_operand")
5844 (match_operand:SI 2 "const_0_to_3_operand")
5845 (match_operand:<ssequartermode> 3 "nonimmediate_operand")
5846 (match_operand:QI 4 "register_operand")]
5849 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5850 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5851 switch (INTVAL (operands[2]))
5854 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5855 operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
5856 GEN_INT (3), operands[3], operands[4]));
5859 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5860 operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
5861 GEN_INT (7), operands[3], operands[4]));
5864 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5865 operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
5866 GEN_INT (11), operands[3], operands[4]));
5869 emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
5870 operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
5871 GEN_INT (15), operands[3], operands[4]));
5879 (define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
5880 [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
5881 (vec_merge:<ssequartermode>
5882 (vec_select:<ssequartermode>
5883 (match_operand:V16FI 1 "register_operand" "v")
5884 (parallel [(match_operand 2 "const_0_to_15_operand")
5885 (match_operand 3 "const_0_to_15_operand")
5886 (match_operand 4 "const_0_to_15_operand")
5887 (match_operand 5 "const_0_to_15_operand")]))
5888 (match_operand:<ssequartermode> 6 "memory_operand" "0")
5889 (match_operand:QI 7 "register_operand" "Yk")))]
5890 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5891 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5892 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5894 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5895 return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
5897 [(set_attr "type" "sselog")
5898 (set_attr "prefix_extra" "1")
5899 (set_attr "length_immediate" "1")
5900 (set_attr "memory" "store")
5901 (set_attr "prefix" "evex")
5902 (set_attr "mode" "<sseinsnmode>")])
5904 (define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
5905 [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5906 (vec_select:<ssequartermode>
5907 (match_operand:V16FI 1 "register_operand" "v")
5908 (parallel [(match_operand 2 "const_0_to_15_operand")
5909 (match_operand 3 "const_0_to_15_operand")
5910 (match_operand 4 "const_0_to_15_operand")
5911 (match_operand 5 "const_0_to_15_operand")])))]
5912 "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
5913 && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
5914 && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
5916 operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
5917 return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
5919 [(set_attr "type" "sselog")
5920 (set_attr "prefix_extra" "1")
5921 (set_attr "length_immediate" "1")
5922 (set (attr "memory")
5923 (if_then_else (match_test "MEM_P (operands[0])")
5924 (const_string "store")
5925 (const_string "none")))
5926 (set_attr "prefix" "evex")
5927 (set_attr "mode" "<sseinsnmode>")])
5929 (define_expand "avx512f_vextract<shuffletype>64x4_mask"
5930 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5931 (match_operand:V8FI 1 "register_operand")
5932 (match_operand:SI 2 "const_0_to_1_operand")
5933 (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
5934 (match_operand:QI 4 "register_operand")]
5937 rtx (*insn)(rtx, rtx, rtx, rtx);
5939 if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
5940 operands[0] = force_reg (<ssequartermode>mode, operands[0]);
5942 switch (INTVAL (operands[2]))
5945 insn = gen_vec_extract_lo_<mode>_mask;
5948 insn = gen_vec_extract_hi_<mode>_mask;
5954 emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
5959 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
5960 (vec_select:<ssehalfvecmode>
5961 (match_operand:V8FI 1 "nonimmediate_operand")
5962 (parallel [(const_int 0) (const_int 1)
5963 (const_int 2) (const_int 3)])))]
5964 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
5965 && reload_completed"
5968 rtx op1 = operands[1];
5970 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
5972 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
5973 emit_move_insn (operands[0], op1);
5977 (define_insn "vec_extract_lo_<mode>_maskm"
5978 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
5979 (vec_merge:<ssehalfvecmode>
5980 (vec_select:<ssehalfvecmode>
5981 (match_operand:V8FI 1 "register_operand" "v")
5982 (parallel [(const_int 0) (const_int 1)
5983 (const_int 2) (const_int 3)]))
5984 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
5985 (match_operand:QI 3 "register_operand" "Yk")))]
5987 "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
5988 [(set_attr "type" "sselog")
5989 (set_attr "prefix_extra" "1")
5990 (set_attr "length_immediate" "1")
5991 (set_attr "prefix" "evex")
5992 (set_attr "mode" "<sseinsnmode>")])
5994 (define_insn "vec_extract_lo_<mode><mask_name>"
5995 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
5996 (vec_select:<ssehalfvecmode>
5997 (match_operand:V8FI 1 "nonimmediate_operand" "vm")
5998 (parallel [(const_int 0) (const_int 1)
5999 (const_int 2) (const_int 3)])))]
6000 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6003 return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6007 [(set_attr "type" "sselog")
6008 (set_attr "prefix_extra" "1")
6009 (set_attr "length_immediate" "1")
6010 (set (attr "memory")
6011 (if_then_else (match_test "MEM_P (operands[0])")
6012 (const_string "store")
6013 (const_string "none")))
6014 (set_attr "prefix" "evex")
6015 (set_attr "mode" "<sseinsnmode>")])
6017 (define_insn "vec_extract_hi_<mode>_maskm"
6018 [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6019 (vec_merge:<ssehalfvecmode>
6020 (vec_select:<ssehalfvecmode>
6021 (match_operand:V8FI 1 "register_operand" "v")
6022 (parallel [(const_int 4) (const_int 5)
6023 (const_int 6) (const_int 7)]))
6024 (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6025 (match_operand:QI 3 "register_operand" "Yk")))]
6027 "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6028 [(set_attr "type" "sselog")
6029 (set_attr "prefix_extra" "1")
6030 (set_attr "length_immediate" "1")
6031 (set_attr "memory" "store")
6032 (set_attr "prefix" "evex")
6033 (set_attr "mode" "<sseinsnmode>")])
6035 (define_insn "vec_extract_hi_<mode><mask_name>"
6036 [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6037 (vec_select:<ssehalfvecmode>
6038 (match_operand:V8FI 1 "register_operand" "v")
6039 (parallel [(const_int 4) (const_int 5)
6040 (const_int 6) (const_int 7)])))]
6042 "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6043 [(set_attr "type" "sselog")
6044 (set_attr "prefix_extra" "1")
6045 (set_attr "length_immediate" "1")
6046 (set (attr "memory")
6047 (if_then_else (match_test "MEM_P (operands[0])")
6048 (const_string "store")
6049 (const_string "none")))
6050 (set_attr "prefix" "evex")
6051 (set_attr "mode" "<sseinsnmode>")])
6053 (define_expand "avx_vextractf128<mode>"
6054 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6055 (match_operand:V_256 1 "register_operand")
6056 (match_operand:SI 2 "const_0_to_1_operand")]
6059 rtx (*insn)(rtx, rtx);
6061 switch (INTVAL (operands[2]))
6064 insn = gen_vec_extract_lo_<mode>;
6067 insn = gen_vec_extract_hi_<mode>;
6073 emit_insn (insn (operands[0], operands[1]));
6077 (define_insn_and_split "vec_extract_lo_<mode>"
6078 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6079 (vec_select:<ssehalfvecmode>
6080 (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6081 (parallel [(const_int 0) (const_int 1)
6082 (const_int 2) (const_int 3)
6083 (const_int 4) (const_int 5)
6084 (const_int 6) (const_int 7)])))]
6085 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6087 "&& reload_completed"
6090 rtx op1 = operands[1];
6092 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6094 op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6095 emit_move_insn (operands[0], op1);
6099 (define_insn "vec_extract_hi_<mode>"
6100 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6101 (vec_select:<ssehalfvecmode>
6102 (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
6103 (parallel [(const_int 8) (const_int 9)
6104 (const_int 10) (const_int 11)
6105 (const_int 12) (const_int 13)
6106 (const_int 14) (const_int 15)])))]
6108 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6109 [(set_attr "type" "sselog")
6110 (set_attr "prefix_extra" "1")
6111 (set_attr "length_immediate" "1")
6112 (set_attr "memory" "none,store")
6113 (set_attr "prefix" "evex")
6114 (set_attr "mode" "XI")])
6116 (define_insn_and_split "vec_extract_lo_<mode>"
6117 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6118 (vec_select:<ssehalfvecmode>
6119 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
6120 (parallel [(const_int 0) (const_int 1)])))]
6121 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6123 "&& reload_completed"
6124 [(set (match_dup 0) (match_dup 1))]
6126 if (REG_P (operands[1]))
6127 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6129 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6132 (define_insn "vec_extract_hi_<mode>"
6133 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6134 (vec_select:<ssehalfvecmode>
6135 (match_operand:VI8F_256 1 "register_operand" "x,x")
6136 (parallel [(const_int 2) (const_int 3)])))]
6138 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6139 [(set_attr "type" "sselog")
6140 (set_attr "prefix_extra" "1")
6141 (set_attr "length_immediate" "1")
6142 (set_attr "memory" "none,store")
6143 (set_attr "prefix" "vex")
6144 (set_attr "mode" "<sseinsnmode>")])
6146 (define_insn_and_split "vec_extract_lo_<mode>"
6147 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6148 (vec_select:<ssehalfvecmode>
6149 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x")
6150 (parallel [(const_int 0) (const_int 1)
6151 (const_int 2) (const_int 3)])))]
6152 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6154 "&& reload_completed"
6155 [(set (match_dup 0) (match_dup 1))]
6157 if (REG_P (operands[1]))
6158 operands[1] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[1]));
6160 operands[1] = adjust_address (operands[1], <ssehalfvecmode>mode, 0);
6163 (define_insn "vec_extract_hi_<mode>"
6164 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
6165 (vec_select:<ssehalfvecmode>
6166 (match_operand:VI4F_256 1 "register_operand" "x,x")
6167 (parallel [(const_int 4) (const_int 5)
6168 (const_int 6) (const_int 7)])))]
6170 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}"
6171 [(set_attr "type" "sselog")
6172 (set_attr "prefix_extra" "1")
6173 (set_attr "length_immediate" "1")
6174 (set_attr "memory" "none,store")
6175 (set_attr "prefix" "vex")
6176 (set_attr "mode" "<sseinsnmode>")])
6178 (define_insn_and_split "vec_extract_lo_v32hi"
6179 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6181 (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
6182 (parallel [(const_int 0) (const_int 1)
6183 (const_int 2) (const_int 3)
6184 (const_int 4) (const_int 5)
6185 (const_int 6) (const_int 7)
6186 (const_int 8) (const_int 9)
6187 (const_int 10) (const_int 11)
6188 (const_int 12) (const_int 13)
6189 (const_int 14) (const_int 15)])))]
6190 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6192 "&& reload_completed"
6193 [(set (match_dup 0) (match_dup 1))]
6195 if (REG_P (operands[1]))
6196 operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
6198 operands[1] = adjust_address (operands[1], V16HImode, 0);
6201 (define_insn "vec_extract_hi_v32hi"
6202 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
6204 (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
6205 (parallel [(const_int 16) (const_int 17)
6206 (const_int 18) (const_int 19)
6207 (const_int 20) (const_int 21)
6208 (const_int 22) (const_int 23)
6209 (const_int 24) (const_int 25)
6210 (const_int 26) (const_int 27)
6211 (const_int 28) (const_int 29)
6212 (const_int 30) (const_int 31)])))]
6214 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6215 [(set_attr "type" "sselog")
6216 (set_attr "prefix_extra" "1")
6217 (set_attr "length_immediate" "1")
6218 (set_attr "memory" "none,store")
6219 (set_attr "prefix" "evex")
6220 (set_attr "mode" "XI")])
6222 (define_insn_and_split "vec_extract_lo_v16hi"
6223 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6225 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
6226 (parallel [(const_int 0) (const_int 1)
6227 (const_int 2) (const_int 3)
6228 (const_int 4) (const_int 5)
6229 (const_int 6) (const_int 7)])))]
6230 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6232 "&& reload_completed"
6233 [(set (match_dup 0) (match_dup 1))]
6235 if (REG_P (operands[1]))
6236 operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
6238 operands[1] = adjust_address (operands[1], V8HImode, 0);
6241 (define_insn "vec_extract_hi_v16hi"
6242 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
6244 (match_operand:V16HI 1 "register_operand" "x,x")
6245 (parallel [(const_int 8) (const_int 9)
6246 (const_int 10) (const_int 11)
6247 (const_int 12) (const_int 13)
6248 (const_int 14) (const_int 15)])))]
6250 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6251 [(set_attr "type" "sselog")
6252 (set_attr "prefix_extra" "1")
6253 (set_attr "length_immediate" "1")
6254 (set_attr "memory" "none,store")
6255 (set_attr "prefix" "vex")
6256 (set_attr "mode" "OI")])
6258 (define_insn_and_split "vec_extract_lo_v64qi"
6259 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6261 (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
6262 (parallel [(const_int 0) (const_int 1)
6263 (const_int 2) (const_int 3)
6264 (const_int 4) (const_int 5)
6265 (const_int 6) (const_int 7)
6266 (const_int 8) (const_int 9)
6267 (const_int 10) (const_int 11)
6268 (const_int 12) (const_int 13)
6269 (const_int 14) (const_int 15)
6270 (const_int 16) (const_int 17)
6271 (const_int 18) (const_int 19)
6272 (const_int 20) (const_int 21)
6273 (const_int 22) (const_int 23)
6274 (const_int 24) (const_int 25)
6275 (const_int 26) (const_int 27)
6276 (const_int 28) (const_int 29)
6277 (const_int 30) (const_int 31)])))]
6278 "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6280 "&& reload_completed"
6281 [(set (match_dup 0) (match_dup 1))]
6283 if (REG_P (operands[1]))
6284 operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
6286 operands[1] = adjust_address (operands[1], V32QImode, 0);
6289 (define_insn "vec_extract_hi_v64qi"
6290 [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
6292 (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
6293 (parallel [(const_int 32) (const_int 33)
6294 (const_int 34) (const_int 35)
6295 (const_int 36) (const_int 37)
6296 (const_int 38) (const_int 39)
6297 (const_int 40) (const_int 41)
6298 (const_int 42) (const_int 43)
6299 (const_int 44) (const_int 45)
6300 (const_int 46) (const_int 47)
6301 (const_int 48) (const_int 49)
6302 (const_int 50) (const_int 51)
6303 (const_int 52) (const_int 53)
6304 (const_int 54) (const_int 55)
6305 (const_int 56) (const_int 57)
6306 (const_int 58) (const_int 59)
6307 (const_int 60) (const_int 61)
6308 (const_int 62) (const_int 63)])))]
6310 "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6311 [(set_attr "type" "sselog")
6312 (set_attr "prefix_extra" "1")
6313 (set_attr "length_immediate" "1")
6314 (set_attr "memory" "none,store")
6315 (set_attr "prefix" "evex")
6316 (set_attr "mode" "XI")])
6318 (define_insn_and_split "vec_extract_lo_v32qi"
6319 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6321 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
6322 (parallel [(const_int 0) (const_int 1)
6323 (const_int 2) (const_int 3)
6324 (const_int 4) (const_int 5)
6325 (const_int 6) (const_int 7)
6326 (const_int 8) (const_int 9)
6327 (const_int 10) (const_int 11)
6328 (const_int 12) (const_int 13)
6329 (const_int 14) (const_int 15)])))]
6330 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6332 "&& reload_completed"
6333 [(set (match_dup 0) (match_dup 1))]
6335 if (REG_P (operands[1]))
6336 operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
6338 operands[1] = adjust_address (operands[1], V16QImode, 0);
6341 (define_insn "vec_extract_hi_v32qi"
6342 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
6344 (match_operand:V32QI 1 "register_operand" "x,x")
6345 (parallel [(const_int 16) (const_int 17)
6346 (const_int 18) (const_int 19)
6347 (const_int 20) (const_int 21)
6348 (const_int 22) (const_int 23)
6349 (const_int 24) (const_int 25)
6350 (const_int 26) (const_int 27)
6351 (const_int 28) (const_int 29)
6352 (const_int 30) (const_int 31)])))]
6354 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
6355 [(set_attr "type" "sselog")
6356 (set_attr "prefix_extra" "1")
6357 (set_attr "length_immediate" "1")
6358 (set_attr "memory" "none,store")
6359 (set_attr "prefix" "vex")
6360 (set_attr "mode" "OI")])
6362 ;; Modes handled by vec_extract patterns.
6363 (define_mode_iterator VEC_EXTRACT_MODE
6364 [(V32QI "TARGET_AVX") V16QI
6365 (V16HI "TARGET_AVX") V8HI
6366 (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
6367 (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
6368 (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
6369 (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
6371 (define_expand "vec_extract<mode>"
6372 [(match_operand:<ssescalarmode> 0 "register_operand")
6373 (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
6374 (match_operand 2 "const_int_operand")]
6377 ix86_expand_vector_extract (false, operands[0], operands[1],
6378 INTVAL (operands[2]));
6382 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6384 ;; Parallel double-precision floating point element swizzling
6386 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6388 (define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
6389 [(set (match_operand:V8DF 0 "register_operand" "=v")
6392 (match_operand:V8DF 1 "nonimmediate_operand" "v")
6393 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
6394 (parallel [(const_int 1) (const_int 9)
6395 (const_int 3) (const_int 11)
6396 (const_int 5) (const_int 13)
6397 (const_int 7) (const_int 15)])))]
6399 "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6400 [(set_attr "type" "sselog")
6401 (set_attr "prefix" "evex")
6402 (set_attr "mode" "V8DF")])
6404 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6405 (define_insn "avx_unpckhpd256"
6406 [(set (match_operand:V4DF 0 "register_operand" "=x")
6409 (match_operand:V4DF 1 "register_operand" "x")
6410 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6411 (parallel [(const_int 1) (const_int 5)
6412 (const_int 3) (const_int 7)])))]
6414 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
6415 [(set_attr "type" "sselog")
6416 (set_attr "prefix" "vex")
6417 (set_attr "mode" "V4DF")])
6419 (define_expand "vec_interleave_highv4df"
6423 (match_operand:V4DF 1 "register_operand" "x")
6424 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6425 (parallel [(const_int 0) (const_int 4)
6426 (const_int 2) (const_int 6)])))
6432 (parallel [(const_int 1) (const_int 5)
6433 (const_int 3) (const_int 7)])))
6434 (set (match_operand:V4DF 0 "register_operand")
6439 (parallel [(const_int 2) (const_int 3)
6440 (const_int 6) (const_int 7)])))]
6443 operands[3] = gen_reg_rtx (V4DFmode);
6444 operands[4] = gen_reg_rtx (V4DFmode);
6448 (define_expand "vec_interleave_highv2df"
6449 [(set (match_operand:V2DF 0 "register_operand")
6452 (match_operand:V2DF 1 "nonimmediate_operand")
6453 (match_operand:V2DF 2 "nonimmediate_operand"))
6454 (parallel [(const_int 1)
6458 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
6459 operands[2] = force_reg (V2DFmode, operands[2]);
6462 (define_insn "*vec_interleave_highv2df"
6463 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
6466 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
6467 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
6468 (parallel [(const_int 1)
6470 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
6472 unpckhpd\t{%2, %0|%0, %2}
6473 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
6474 %vmovddup\t{%H1, %0|%0, %H1}
6475 movlpd\t{%H1, %0|%0, %H1}
6476 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
6477 %vmovhpd\t{%1, %0|%q0, %1}"
6478 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6479 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6480 (set_attr "ssememalign" "64")
6481 (set_attr "prefix_data16" "*,*,*,1,*,1")
6482 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6483 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6485 (define_expand "avx512f_movddup512<mask_name>"
6486 [(set (match_operand:V8DF 0 "register_operand")
6489 (match_operand:V8DF 1 "nonimmediate_operand")
6491 (parallel [(const_int 0) (const_int 8)
6492 (const_int 2) (const_int 10)
6493 (const_int 4) (const_int 12)
6494 (const_int 6) (const_int 14)])))]
6497 (define_expand "avx512f_unpcklpd512<mask_name>"
6498 [(set (match_operand:V8DF 0 "register_operand")
6501 (match_operand:V8DF 1 "register_operand")
6502 (match_operand:V8DF 2 "nonimmediate_operand"))
6503 (parallel [(const_int 0) (const_int 8)
6504 (const_int 2) (const_int 10)
6505 (const_int 4) (const_int 12)
6506 (const_int 6) (const_int 14)])))]
6509 (define_insn "*avx512f_unpcklpd512<mask_name>"
6510 [(set (match_operand:V8DF 0 "register_operand" "=v,v")
6513 (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
6514 (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
6515 (parallel [(const_int 0) (const_int 8)
6516 (const_int 2) (const_int 10)
6517 (const_int 4) (const_int 12)
6518 (const_int 6) (const_int 14)])))]
6521 vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
6522 vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
6523 [(set_attr "type" "sselog")
6524 (set_attr "prefix" "evex")
6525 (set_attr "mode" "V8DF")])
6527 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
6528 (define_expand "avx_movddup256"
6529 [(set (match_operand:V4DF 0 "register_operand")
6532 (match_operand:V4DF 1 "nonimmediate_operand")
6534 (parallel [(const_int 0) (const_int 4)
6535 (const_int 2) (const_int 6)])))]
6538 (define_expand "avx_unpcklpd256"
6539 [(set (match_operand:V4DF 0 "register_operand")
6542 (match_operand:V4DF 1 "register_operand")
6543 (match_operand:V4DF 2 "nonimmediate_operand"))
6544 (parallel [(const_int 0) (const_int 4)
6545 (const_int 2) (const_int 6)])))]
6548 (define_insn "*avx_unpcklpd256"
6549 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
6552 (match_operand:V4DF 1 "nonimmediate_operand" " x,m")
6553 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1"))
6554 (parallel [(const_int 0) (const_int 4)
6555 (const_int 2) (const_int 6)])))]
6558 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6559 vmovddup\t{%1, %0|%0, %1}"
6560 [(set_attr "type" "sselog")
6561 (set_attr "prefix" "vex")
6562 (set_attr "mode" "V4DF")])
6564 (define_expand "vec_interleave_lowv4df"
6568 (match_operand:V4DF 1 "register_operand" "x")
6569 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
6570 (parallel [(const_int 0) (const_int 4)
6571 (const_int 2) (const_int 6)])))
6577 (parallel [(const_int 1) (const_int 5)
6578 (const_int 3) (const_int 7)])))
6579 (set (match_operand:V4DF 0 "register_operand")
6584 (parallel [(const_int 0) (const_int 1)
6585 (const_int 4) (const_int 5)])))]
6588 operands[3] = gen_reg_rtx (V4DFmode);
6589 operands[4] = gen_reg_rtx (V4DFmode);
6592 (define_expand "vec_interleave_lowv2df"
6593 [(set (match_operand:V2DF 0 "register_operand")
6596 (match_operand:V2DF 1 "nonimmediate_operand")
6597 (match_operand:V2DF 2 "nonimmediate_operand"))
6598 (parallel [(const_int 0)
6602 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
6603 operands[1] = force_reg (V2DFmode, operands[1]);
6606 (define_insn "*vec_interleave_lowv2df"
6607 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o")
6610 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
6611 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
6612 (parallel [(const_int 0)
6614 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
6616 unpcklpd\t{%2, %0|%0, %2}
6617 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
6618 %vmovddup\t{%1, %0|%0, %q1}
6619 movhpd\t{%2, %0|%0, %q2}
6620 vmovhpd\t{%2, %1, %0|%0, %1, %q2}
6621 %vmovlpd\t{%2, %H0|%H0, %2}"
6622 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
6623 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
6624 (set_attr "ssememalign" "64")
6625 (set_attr "prefix_data16" "*,*,*,1,*,1")
6626 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
6627 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
6630 [(set (match_operand:V2DF 0 "memory_operand")
6633 (match_operand:V2DF 1 "register_operand")
6635 (parallel [(const_int 0)
6637 "TARGET_SSE3 && reload_completed"
6640 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
6641 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
6642 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
6647 [(set (match_operand:V2DF 0 "register_operand")
6650 (match_operand:V2DF 1 "memory_operand")
6652 (parallel [(match_operand:SI 2 "const_0_to_1_operand")
6653 (match_operand:SI 3 "const_int_operand")])))]
6654 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
6655 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
6657 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
6660 (define_insn "avx512f_vmscalef<mode><round_name>"
6661 [(set (match_operand:VF_128 0 "register_operand" "=v")
6664 [(match_operand:VF_128 1 "register_operand" "v")
6665 (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
6670 "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
6671 [(set_attr "prefix" "evex")
6672 (set_attr "mode" "<ssescalarmode>")])
6674 (define_insn "avx512f_scalef<mode><mask_name><round_name>"
6675 [(set (match_operand:VF_512 0 "register_operand" "=v")
6677 [(match_operand:VF_512 1 "register_operand" "v")
6678 (match_operand:VF_512 2 "<round_nimm_predicate>" "<round_constraint>")]
6681 "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
6682 [(set_attr "prefix" "evex")
6683 (set_attr "mode" "<MODE>")])
6685 (define_expand "avx512f_vternlog<mode>_maskz"
6686 [(match_operand:VI48_512 0 "register_operand")
6687 (match_operand:VI48_512 1 "register_operand")
6688 (match_operand:VI48_512 2 "register_operand")
6689 (match_operand:VI48_512 3 "nonimmediate_operand")
6690 (match_operand:SI 4 "const_0_to_255_operand")
6691 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6694 emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
6695 operands[0], operands[1], operands[2], operands[3],
6696 operands[4], CONST0_RTX (<MODE>mode), operands[5]));
6700 (define_insn "avx512f_vternlog<mode><sd_maskz_name>"
6701 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6703 [(match_operand:VI48_512 1 "register_operand" "0")
6704 (match_operand:VI48_512 2 "register_operand" "v")
6705 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6706 (match_operand:SI 4 "const_0_to_255_operand")]
6709 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
6710 [(set_attr "type" "sselog")
6711 (set_attr "prefix" "evex")
6712 (set_attr "mode" "<sseinsnmode>")])
6714 (define_insn "avx512f_vternlog<mode>_mask"
6715 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6718 [(match_operand:VI48_512 1 "register_operand" "0")
6719 (match_operand:VI48_512 2 "register_operand" "v")
6720 (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
6721 (match_operand:SI 4 "const_0_to_255_operand")]
6724 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6726 "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
6727 [(set_attr "type" "sselog")
6728 (set_attr "prefix" "evex")
6729 (set_attr "mode" "<sseinsnmode>")])
6731 (define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
6732 [(set (match_operand:VF_512 0 "register_operand" "=v")
6733 (unspec:VF_512 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6736 "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
6737 [(set_attr "prefix" "evex")
6738 (set_attr "mode" "<MODE>")])
6740 (define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
6741 [(set (match_operand:VF_128 0 "register_operand" "=v")
6744 [(match_operand:VF_128 1 "register_operand" "v")
6745 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
6750 "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
6751 [(set_attr "prefix" "evex")
6752 (set_attr "mode" "<ssescalarmode>")])
6754 (define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
6755 [(set (match_operand:VI48_512 0 "register_operand" "=v")
6756 (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
6757 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
6758 (match_operand:SI 3 "const_0_to_255_operand")]
6761 "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
6762 [(set_attr "prefix" "evex")
6763 (set_attr "mode" "<sseinsnmode>")])
6765 (define_expand "avx512f_shufps512_mask"
6766 [(match_operand:V16SF 0 "register_operand")
6767 (match_operand:V16SF 1 "register_operand")
6768 (match_operand:V16SF 2 "nonimmediate_operand")
6769 (match_operand:SI 3 "const_0_to_255_operand")
6770 (match_operand:V16SF 4 "register_operand")
6771 (match_operand:HI 5 "register_operand")]
6774 int mask = INTVAL (operands[3]);
6775 emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
6776 GEN_INT ((mask >> 0) & 3),
6777 GEN_INT ((mask >> 2) & 3),
6778 GEN_INT (((mask >> 4) & 3) + 16),
6779 GEN_INT (((mask >> 6) & 3) + 16),
6780 GEN_INT (((mask >> 0) & 3) + 4),
6781 GEN_INT (((mask >> 2) & 3) + 4),
6782 GEN_INT (((mask >> 4) & 3) + 20),
6783 GEN_INT (((mask >> 6) & 3) + 20),
6784 GEN_INT (((mask >> 0) & 3) + 8),
6785 GEN_INT (((mask >> 2) & 3) + 8),
6786 GEN_INT (((mask >> 4) & 3) + 24),
6787 GEN_INT (((mask >> 6) & 3) + 24),
6788 GEN_INT (((mask >> 0) & 3) + 12),
6789 GEN_INT (((mask >> 2) & 3) + 12),
6790 GEN_INT (((mask >> 4) & 3) + 28),
6791 GEN_INT (((mask >> 6) & 3) + 28),
6792 operands[4], operands[5]));
6797 (define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name>"
6798 [(match_operand:VF_512 0 "register_operand")
6799 (match_operand:VF_512 1 "register_operand")
6800 (match_operand:VF_512 2 "register_operand")
6801 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6802 (match_operand:SI 4 "const_0_to_255_operand")
6803 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6806 emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6807 operands[0], operands[1], operands[2], operands[3],
6808 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6809 <round_saeonly_expand_operand6>));
6813 (define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
6814 [(set (match_operand:VF_512 0 "register_operand" "=v")
6816 [(match_operand:VF_512 1 "register_operand" "0")
6817 (match_operand:VF_512 2 "register_operand" "v")
6818 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6819 (match_operand:SI 4 "const_0_to_255_operand")]
6822 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6823 [(set_attr "prefix" "evex")
6824 (set_attr "mode" "<MODE>")])
6826 (define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
6827 [(set (match_operand:VF_512 0 "register_operand" "=v")
6830 [(match_operand:VF_512 1 "register_operand" "0")
6831 (match_operand:VF_512 2 "register_operand" "v")
6832 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6833 (match_operand:SI 4 "const_0_to_255_operand")]
6836 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6838 "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6839 [(set_attr "prefix" "evex")
6840 (set_attr "mode" "<MODE>")])
6842 (define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
6843 [(match_operand:VF_128 0 "register_operand")
6844 (match_operand:VF_128 1 "register_operand")
6845 (match_operand:VF_128 2 "register_operand")
6846 (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
6847 (match_operand:SI 4 "const_0_to_255_operand")
6848 (match_operand:<avx512fmaskmode> 5 "register_operand")]
6851 emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
6852 operands[0], operands[1], operands[2], operands[3],
6853 operands[4], CONST0_RTX (<MODE>mode), operands[5]
6854 <round_saeonly_expand_operand6>));
6858 (define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
6859 [(set (match_operand:VF_128 0 "register_operand" "=v")
6862 [(match_operand:VF_128 1 "register_operand" "0")
6863 (match_operand:VF_128 2 "register_operand" "v")
6864 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6865 (match_operand:SI 4 "const_0_to_255_operand")]
6870 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
6871 [(set_attr "prefix" "evex")
6872 (set_attr "mode" "<ssescalarmode>")])
6874 (define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
6875 [(set (match_operand:VF_128 0 "register_operand" "=v")
6879 [(match_operand:VF_128 1 "register_operand" "0")
6880 (match_operand:VF_128 2 "register_operand" "v")
6881 (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6882 (match_operand:SI 4 "const_0_to_255_operand")]
6887 (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
6889 "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
6890 [(set_attr "prefix" "evex")
6891 (set_attr "mode" "<ssescalarmode>")])
6893 (define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
6894 [(set (match_operand:VF_512 0 "register_operand" "=v")
6896 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6897 (match_operand:SI 2 "const_0_to_255_operand")]
6900 "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
6901 [(set_attr "length_immediate" "1")
6902 (set_attr "prefix" "evex")
6903 (set_attr "mode" "<MODE>")])
6905 (define_insn "avx512f_rndscale<mode><round_saeonly_name>"
6906 [(set (match_operand:VF_128 0 "register_operand" "=v")
6909 [(match_operand:VF_128 1 "register_operand" "v")
6910 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
6911 (match_operand:SI 3 "const_0_to_255_operand")]
6916 "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
6917 [(set_attr "length_immediate" "1")
6918 (set_attr "prefix" "evex")
6919 (set_attr "mode" "<MODE>")])
6921 ;; One bit in mask selects 2 elements.
6922 (define_insn "avx512f_shufps512_1<mask_name>"
6923 [(set (match_operand:V16SF 0 "register_operand" "=v")
6926 (match_operand:V16SF 1 "register_operand" "v")
6927 (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
6928 (parallel [(match_operand 3 "const_0_to_3_operand")
6929 (match_operand 4 "const_0_to_3_operand")
6930 (match_operand 5 "const_16_to_19_operand")
6931 (match_operand 6 "const_16_to_19_operand")
6932 (match_operand 7 "const_4_to_7_operand")
6933 (match_operand 8 "const_4_to_7_operand")
6934 (match_operand 9 "const_20_to_23_operand")
6935 (match_operand 10 "const_20_to_23_operand")
6936 (match_operand 11 "const_8_to_11_operand")
6937 (match_operand 12 "const_8_to_11_operand")
6938 (match_operand 13 "const_24_to_27_operand")
6939 (match_operand 14 "const_24_to_27_operand")
6940 (match_operand 15 "const_12_to_15_operand")
6941 (match_operand 16 "const_12_to_15_operand")
6942 (match_operand 17 "const_28_to_31_operand")
6943 (match_operand 18 "const_28_to_31_operand")])))]
6945 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6946 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6947 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6948 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
6949 && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
6950 && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
6951 && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
6952 && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
6953 && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
6954 && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
6955 && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
6956 && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
6959 mask = INTVAL (operands[3]);
6960 mask |= INTVAL (operands[4]) << 2;
6961 mask |= (INTVAL (operands[5]) - 16) << 4;
6962 mask |= (INTVAL (operands[6]) - 16) << 6;
6963 operands[3] = GEN_INT (mask);
6965 return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
6967 [(set_attr "type" "sselog")
6968 (set_attr "length_immediate" "1")
6969 (set_attr "prefix" "evex")
6970 (set_attr "mode" "V16SF")])
6972 (define_expand "avx512f_shufpd512_mask"
6973 [(match_operand:V8DF 0 "register_operand")
6974 (match_operand:V8DF 1 "register_operand")
6975 (match_operand:V8DF 2 "nonimmediate_operand")
6976 (match_operand:SI 3 "const_0_to_255_operand")
6977 (match_operand:V8DF 4 "register_operand")
6978 (match_operand:QI 5 "register_operand")]
6981 int mask = INTVAL (operands[3]);
6982 emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
6984 GEN_INT (mask & 2 ? 9 : 8),
6985 GEN_INT (mask & 4 ? 3 : 2),
6986 GEN_INT (mask & 8 ? 11 : 10),
6987 GEN_INT (mask & 16 ? 5 : 4),
6988 GEN_INT (mask & 32 ? 13 : 12),
6989 GEN_INT (mask & 64 ? 7 : 6),
6990 GEN_INT (mask & 128 ? 15 : 14),
6991 operands[4], operands[5]));
6995 (define_insn "avx512f_shufpd512_1<mask_name>"
6996 [(set (match_operand:V8DF 0 "register_operand" "=v")
6999 (match_operand:V8DF 1 "register_operand" "v")
7000 (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7001 (parallel [(match_operand 3 "const_0_to_1_operand")
7002 (match_operand 4 "const_8_to_9_operand")
7003 (match_operand 5 "const_2_to_3_operand")
7004 (match_operand 6 "const_10_to_11_operand")
7005 (match_operand 7 "const_4_to_5_operand")
7006 (match_operand 8 "const_12_to_13_operand")
7007 (match_operand 9 "const_6_to_7_operand")
7008 (match_operand 10 "const_14_to_15_operand")])))]
7012 mask = INTVAL (operands[3]);
7013 mask |= (INTVAL (operands[4]) - 8) << 1;
7014 mask |= (INTVAL (operands[5]) - 2) << 2;
7015 mask |= (INTVAL (operands[6]) - 10) << 3;
7016 mask |= (INTVAL (operands[7]) - 4) << 4;
7017 mask |= (INTVAL (operands[8]) - 12) << 5;
7018 mask |= (INTVAL (operands[9]) - 6) << 6;
7019 mask |= (INTVAL (operands[10]) - 14) << 7;
7020 operands[3] = GEN_INT (mask);
7022 return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7024 [(set_attr "type" "sselog")
7025 (set_attr "length_immediate" "1")
7026 (set_attr "prefix" "evex")
7027 (set_attr "mode" "V8DF")])
7029 (define_expand "avx_shufpd256"
7030 [(match_operand:V4DF 0 "register_operand")
7031 (match_operand:V4DF 1 "register_operand")
7032 (match_operand:V4DF 2 "nonimmediate_operand")
7033 (match_operand:SI 3 "const_int_operand")]
7036 int mask = INTVAL (operands[3]);
7037 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2],
7039 GEN_INT (mask & 2 ? 5 : 4),
7040 GEN_INT (mask & 4 ? 3 : 2),
7041 GEN_INT (mask & 8 ? 7 : 6)));
7045 (define_insn "avx_shufpd256_1"
7046 [(set (match_operand:V4DF 0 "register_operand" "=x")
7049 (match_operand:V4DF 1 "register_operand" "x")
7050 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7051 (parallel [(match_operand 3 "const_0_to_1_operand")
7052 (match_operand 4 "const_4_to_5_operand")
7053 (match_operand 5 "const_2_to_3_operand")
7054 (match_operand 6 "const_6_to_7_operand")])))]
7058 mask = INTVAL (operands[3]);
7059 mask |= (INTVAL (operands[4]) - 4) << 1;
7060 mask |= (INTVAL (operands[5]) - 2) << 2;
7061 mask |= (INTVAL (operands[6]) - 6) << 3;
7062 operands[3] = GEN_INT (mask);
7064 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7066 [(set_attr "type" "sseshuf")
7067 (set_attr "length_immediate" "1")
7068 (set_attr "prefix" "vex")
7069 (set_attr "mode" "V4DF")])
7071 (define_expand "sse2_shufpd"
7072 [(match_operand:V2DF 0 "register_operand")
7073 (match_operand:V2DF 1 "register_operand")
7074 (match_operand:V2DF 2 "nonimmediate_operand")
7075 (match_operand:SI 3 "const_int_operand")]
7078 int mask = INTVAL (operands[3]);
7079 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2],
7081 GEN_INT (mask & 2 ? 3 : 2)));
7085 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
7086 (define_insn "avx2_interleave_highv4di"
7087 [(set (match_operand:V4DI 0 "register_operand" "=x")
7090 (match_operand:V4DI 1 "register_operand" "x")
7091 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7092 (parallel [(const_int 1)
7097 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7098 [(set_attr "type" "sselog")
7099 (set_attr "prefix" "vex")
7100 (set_attr "mode" "OI")])
7102 (define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
7103 [(set (match_operand:V8DI 0 "register_operand" "=v")
7106 (match_operand:V8DI 1 "register_operand" "v")
7107 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7108 (parallel [(const_int 1) (const_int 9)
7109 (const_int 3) (const_int 11)
7110 (const_int 5) (const_int 13)
7111 (const_int 7) (const_int 15)])))]
7113 "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7114 [(set_attr "type" "sselog")
7115 (set_attr "prefix" "evex")
7116 (set_attr "mode" "XI")])
7118 (define_insn "vec_interleave_highv2di"
7119 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7122 (match_operand:V2DI 1 "register_operand" "0,x")
7123 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7124 (parallel [(const_int 1)
7128 punpckhqdq\t{%2, %0|%0, %2}
7129 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
7130 [(set_attr "isa" "noavx,avx")
7131 (set_attr "type" "sselog")
7132 (set_attr "prefix_data16" "1,*")
7133 (set_attr "prefix" "orig,vex")
7134 (set_attr "mode" "TI")])
7136 (define_insn "avx2_interleave_lowv4di"
7137 [(set (match_operand:V4DI 0 "register_operand" "=x")
7140 (match_operand:V4DI 1 "register_operand" "x")
7141 (match_operand:V4DI 2 "nonimmediate_operand" "xm"))
7142 (parallel [(const_int 0)
7147 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7148 [(set_attr "type" "sselog")
7149 (set_attr "prefix" "vex")
7150 (set_attr "mode" "OI")])
7152 (define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
7153 [(set (match_operand:V8DI 0 "register_operand" "=v")
7156 (match_operand:V8DI 1 "register_operand" "v")
7157 (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
7158 (parallel [(const_int 0) (const_int 8)
7159 (const_int 2) (const_int 10)
7160 (const_int 4) (const_int 12)
7161 (const_int 6) (const_int 14)])))]
7163 "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7164 [(set_attr "type" "sselog")
7165 (set_attr "prefix" "evex")
7166 (set_attr "mode" "XI")])
7168 (define_insn "vec_interleave_lowv2di"
7169 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7172 (match_operand:V2DI 1 "register_operand" "0,x")
7173 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm"))
7174 (parallel [(const_int 0)
7178 punpcklqdq\t{%2, %0|%0, %2}
7179 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
7180 [(set_attr "isa" "noavx,avx")
7181 (set_attr "type" "sselog")
7182 (set_attr "prefix_data16" "1,*")
7183 (set_attr "prefix" "orig,vex")
7184 (set_attr "mode" "TI")])
7186 (define_insn "sse2_shufpd_<mode>"
7187 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
7188 (vec_select:VI8F_128
7189 (vec_concat:<ssedoublevecmode>
7190 (match_operand:VI8F_128 1 "register_operand" "0,x")
7191 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
7192 (parallel [(match_operand 3 "const_0_to_1_operand")
7193 (match_operand 4 "const_2_to_3_operand")])))]
7197 mask = INTVAL (operands[3]);
7198 mask |= (INTVAL (operands[4]) - 2) << 1;
7199 operands[3] = GEN_INT (mask);
7201 switch (which_alternative)
7204 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
7206 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7211 [(set_attr "isa" "noavx,avx")
7212 (set_attr "type" "sseshuf")
7213 (set_attr "length_immediate" "1")
7214 (set_attr "prefix" "orig,vex")
7215 (set_attr "mode" "V2DF")])
7217 ;; Avoid combining registers from different units in a single alternative,
7218 ;; see comment above inline_secondary_memory_needed function in i386.c
7219 (define_insn "sse2_storehpd"
7220 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r")
7222 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
7223 (parallel [(const_int 1)])))]
7224 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7226 %vmovhpd\t{%1, %0|%0, %1}
7228 vunpckhpd\t{%d1, %0|%0, %d1}
7232 [(set_attr "isa" "*,noavx,avx,*,*,*")
7233 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
7234 (set (attr "prefix_data16")
7236 (and (eq_attr "alternative" "0")
7237 (not (match_test "TARGET_AVX")))
7239 (const_string "*")))
7240 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
7241 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
7244 [(set (match_operand:DF 0 "register_operand")
7246 (match_operand:V2DF 1 "memory_operand")
7247 (parallel [(const_int 1)])))]
7248 "TARGET_SSE2 && reload_completed"
7249 [(set (match_dup 0) (match_dup 1))]
7250 "operands[1] = adjust_address (operands[1], DFmode, 8);")
7252 (define_insn "*vec_extractv2df_1_sse"
7253 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7255 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
7256 (parallel [(const_int 1)])))]
7257 "!TARGET_SSE2 && TARGET_SSE
7258 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7260 movhps\t{%1, %0|%q0, %1}
7261 movhlps\t{%1, %0|%0, %1}
7262 movlps\t{%H1, %0|%0, %H1}"
7263 [(set_attr "type" "ssemov")
7264 (set_attr "ssememalign" "64")
7265 (set_attr "mode" "V2SF,V4SF,V2SF")])
7267 ;; Avoid combining registers from different units in a single alternative,
7268 ;; see comment above inline_secondary_memory_needed function in i386.c
7269 (define_insn "sse2_storelpd"
7270 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r")
7272 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
7273 (parallel [(const_int 0)])))]
7274 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7276 %vmovlpd\t{%1, %0|%0, %1}
7281 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
7282 (set_attr "prefix_data16" "1,*,*,*,*")
7283 (set_attr "prefix" "maybe_vex")
7284 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
7287 [(set (match_operand:DF 0 "register_operand")
7289 (match_operand:V2DF 1 "nonimmediate_operand")
7290 (parallel [(const_int 0)])))]
7291 "TARGET_SSE2 && reload_completed"
7292 [(set (match_dup 0) (match_dup 1))]
7294 if (REG_P (operands[1]))
7295 operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
7297 operands[1] = adjust_address (operands[1], DFmode, 0);
7300 (define_insn "*vec_extractv2df_0_sse"
7301 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
7303 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
7304 (parallel [(const_int 0)])))]
7305 "!TARGET_SSE2 && TARGET_SSE
7306 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7308 movlps\t{%1, %0|%0, %1}
7309 movaps\t{%1, %0|%0, %1}
7310 movlps\t{%1, %0|%0, %q1}"
7311 [(set_attr "type" "ssemov")
7312 (set_attr "mode" "V2SF,V4SF,V2SF")])
7314 (define_expand "sse2_loadhpd_exp"
7315 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7318 (match_operand:V2DF 1 "nonimmediate_operand")
7319 (parallel [(const_int 0)]))
7320 (match_operand:DF 2 "nonimmediate_operand")))]
7323 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7325 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
7327 /* Fix up the destination if needed. */
7328 if (dst != operands[0])
7329 emit_move_insn (operands[0], dst);
7334 ;; Avoid combining registers from different units in a single alternative,
7335 ;; see comment above inline_secondary_memory_needed function in i386.c
7336 (define_insn "sse2_loadhpd"
7337 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7341 (match_operand:V2DF 1 "nonimmediate_operand"
7343 (parallel [(const_int 0)]))
7344 (match_operand:DF 2 "nonimmediate_operand"
7345 " m,m,x,x,x,*f,r")))]
7346 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7348 movhpd\t{%2, %0|%0, %2}
7349 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7350 unpcklpd\t{%2, %0|%0, %2}
7351 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7355 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
7356 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
7357 (set_attr "ssememalign" "64")
7358 (set_attr "prefix_data16" "1,*,*,*,*,*,*")
7359 (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
7360 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
7363 [(set (match_operand:V2DF 0 "memory_operand")
7365 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
7366 (match_operand:DF 1 "register_operand")))]
7367 "TARGET_SSE2 && reload_completed"
7368 [(set (match_dup 0) (match_dup 1))]
7369 "operands[0] = adjust_address (operands[0], DFmode, 8);")
7371 (define_expand "sse2_loadlpd_exp"
7372 [(set (match_operand:V2DF 0 "nonimmediate_operand")
7374 (match_operand:DF 2 "nonimmediate_operand")
7376 (match_operand:V2DF 1 "nonimmediate_operand")
7377 (parallel [(const_int 1)]))))]
7380 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
7382 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
7384 /* Fix up the destination if needed. */
7385 if (dst != operands[0])
7386 emit_move_insn (operands[0], dst);
7391 ;; Avoid combining registers from different units in a single alternative,
7392 ;; see comment above inline_secondary_memory_needed function in i386.c
7393 (define_insn "sse2_loadlpd"
7394 [(set (match_operand:V2DF 0 "nonimmediate_operand"
7395 "=x,x,x,x,x,x,x,x,m,m ,m")
7397 (match_operand:DF 2 "nonimmediate_operand"
7398 " m,m,m,x,x,0,0,x,x,*f,r")
7400 (match_operand:V2DF 1 "vector_move_operand"
7401 " C,0,x,0,x,x,o,o,0,0 ,0")
7402 (parallel [(const_int 1)]))))]
7403 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7405 %vmovsd\t{%2, %0|%0, %2}
7406 movlpd\t{%2, %0|%0, %2}
7407 vmovlpd\t{%2, %1, %0|%0, %1, %2}
7408 movsd\t{%2, %0|%0, %2}
7409 vmovsd\t{%2, %1, %0|%0, %1, %2}
7410 shufpd\t{$2, %1, %0|%0, %1, 2}
7411 movhpd\t{%H1, %0|%0, %H1}
7412 vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
7416 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
7418 (cond [(eq_attr "alternative" "5")
7419 (const_string "sselog")
7420 (eq_attr "alternative" "9")
7421 (const_string "fmov")
7422 (eq_attr "alternative" "10")
7423 (const_string "imov")
7425 (const_string "ssemov")))
7426 (set_attr "ssememalign" "64")
7427 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
7428 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
7429 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
7430 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
7433 [(set (match_operand:V2DF 0 "memory_operand")
7435 (match_operand:DF 1 "register_operand")
7436 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
7437 "TARGET_SSE2 && reload_completed"
7438 [(set (match_dup 0) (match_dup 1))]
7439 "operands[0] = adjust_address (operands[0], DFmode, 0);")
7441 (define_insn "sse2_movsd"
7442 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o")
7444 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
7445 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
7449 movsd\t{%2, %0|%0, %2}
7450 vmovsd\t{%2, %1, %0|%0, %1, %2}
7451 movlpd\t{%2, %0|%0, %q2}
7452 vmovlpd\t{%2, %1, %0|%0, %1, %q2}
7453 %vmovlpd\t{%2, %0|%q0, %2}
7454 shufpd\t{$2, %1, %0|%0, %1, 2}
7455 movhps\t{%H1, %0|%0, %H1}
7456 vmovhps\t{%H1, %2, %0|%0, %2, %H1}
7457 %vmovhps\t{%1, %H0|%H0, %1}"
7458 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
7461 (eq_attr "alternative" "5")
7462 (const_string "sselog")
7463 (const_string "ssemov")))
7464 (set (attr "prefix_data16")
7466 (and (eq_attr "alternative" "2,4")
7467 (not (match_test "TARGET_AVX")))
7469 (const_string "*")))
7470 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
7471 (set_attr "ssememalign" "64")
7472 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
7473 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
7475 (define_insn "vec_dupv2df"
7476 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
7478 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
7482 %vmovddup\t{%1, %0|%0, %1}"
7483 [(set_attr "isa" "noavx,sse3")
7484 (set_attr "type" "sselog1")
7485 (set_attr "prefix" "orig,maybe_vex")
7486 (set_attr "mode" "V2DF,DF")])
7488 (define_insn "*vec_concatv2df"
7489 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
7491 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
7492 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
7495 unpcklpd\t{%2, %0|%0, %2}
7496 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7497 %vmovddup\t{%1, %0|%0, %1}
7498 movhpd\t{%2, %0|%0, %2}
7499 vmovhpd\t{%2, %1, %0|%0, %1, %2}
7500 %vmovsd\t{%1, %0|%0, %1}
7501 movlhps\t{%2, %0|%0, %2}
7502 movhps\t{%2, %0|%0, %2}"
7503 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
7506 (eq_attr "alternative" "0,1,2")
7507 (const_string "sselog")
7508 (const_string "ssemov")))
7509 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
7510 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
7511 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
7513 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7515 ;; Parallel integer down-conversion operations
7517 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7519 (define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
7520 (define_mode_attr pmov_src_mode
7521 [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
7522 (define_mode_attr pmov_src_lower
7523 [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
7524 (define_mode_attr pmov_suff
7525 [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
7527 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
7528 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7529 (any_truncate:PMOV_DST_MODE
7530 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
7532 "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
7533 [(set_attr "type" "ssemov")
7534 (set_attr "memory" "none,store")
7535 (set_attr "prefix" "evex")
7536 (set_attr "mode" "<sseinsnmode>")])
7538 (define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
7539 [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
7540 (vec_merge:PMOV_DST_MODE
7541 (any_truncate:PMOV_DST_MODE
7542 (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
7543 (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
7544 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
7546 "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7547 [(set_attr "type" "ssemov")
7548 (set_attr "memory" "none,store")
7549 (set_attr "prefix" "evex")
7550 (set_attr "mode" "<sseinsnmode>")])
7552 (define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
7553 [(set (match_operand:PMOV_DST_MODE 0 "memory_operand")
7554 (vec_merge:PMOV_DST_MODE
7555 (any_truncate:PMOV_DST_MODE
7556 (match_operand:<pmov_src_mode> 1 "register_operand"))
7558 (match_operand:<avx512fmaskmode> 2 "register_operand")))]
7561 (define_insn "*avx512f_<code>v8div16qi2"
7562 [(set (match_operand:V16QI 0 "register_operand" "=v")
7565 (match_operand:V8DI 1 "register_operand" "v"))
7566 (const_vector:V8QI [(const_int 0) (const_int 0)
7567 (const_int 0) (const_int 0)
7568 (const_int 0) (const_int 0)
7569 (const_int 0) (const_int 0)])))]
7571 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7572 [(set_attr "type" "ssemov")
7573 (set_attr "prefix" "evex")
7574 (set_attr "mode" "TI")])
7576 (define_insn "*avx512f_<code>v8div16qi2_store"
7577 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7580 (match_operand:V8DI 1 "register_operand" "v"))
7583 (parallel [(const_int 8) (const_int 9)
7584 (const_int 10) (const_int 11)
7585 (const_int 12) (const_int 13)
7586 (const_int 14) (const_int 15)]))))]
7588 "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
7589 [(set_attr "type" "ssemov")
7590 (set_attr "memory" "store")
7591 (set_attr "prefix" "evex")
7592 (set_attr "mode" "TI")])
7594 (define_insn "avx512f_<code>v8div16qi2_mask"
7595 [(set (match_operand:V16QI 0 "register_operand" "=v")
7599 (match_operand:V8DI 1 "register_operand" "v"))
7601 (match_operand:V16QI 2 "vector_move_operand" "0C")
7602 (parallel [(const_int 0) (const_int 1)
7603 (const_int 2) (const_int 3)
7604 (const_int 4) (const_int 5)
7605 (const_int 6) (const_int 7)]))
7606 (match_operand:QI 3 "register_operand" "Yk"))
7607 (const_vector:V8QI [(const_int 0) (const_int 0)
7608 (const_int 0) (const_int 0)
7609 (const_int 0) (const_int 0)
7610 (const_int 0) (const_int 0)])))]
7612 "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
7613 [(set_attr "type" "ssemov")
7614 (set_attr "prefix" "evex")
7615 (set_attr "mode" "TI")])
7617 (define_insn "avx512f_<code>v8div16qi2_mask_store"
7618 [(set (match_operand:V16QI 0 "memory_operand" "=m")
7622 (match_operand:V8DI 1 "register_operand" "v"))
7625 (parallel [(const_int 0) (const_int 1)
7626 (const_int 2) (const_int 3)
7627 (const_int 4) (const_int 5)
7628 (const_int 6) (const_int 7)]))
7629 (match_operand:QI 2 "register_operand" "Yk"))
7632 (parallel [(const_int 8) (const_int 9)
7633 (const_int 10) (const_int 11)
7634 (const_int 12) (const_int 13)
7635 (const_int 14) (const_int 15)]))))]
7637 "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
7638 [(set_attr "type" "ssemov")
7639 (set_attr "memory" "store")
7640 (set_attr "prefix" "evex")
7641 (set_attr "mode" "TI")])
7643 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7645 ;; Parallel integral arithmetic
7647 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7649 (define_expand "neg<mode>2"
7650 [(set (match_operand:VI_AVX2 0 "register_operand")
7653 (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
7655 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
7657 (define_expand "<plusminus_insn><mode>3<mask_name>"
7658 [(set (match_operand:VI_AVX2 0 "register_operand")
7660 (match_operand:VI_AVX2 1 "nonimmediate_operand")
7661 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
7662 "TARGET_SSE2 && <mask_mode512bit_condition>"
7663 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7665 (define_insn "*<plusminus_insn><mode>3<mask_name>"
7666 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
7668 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7669 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7670 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
7672 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7673 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7674 [(set_attr "isa" "noavx,avx")
7675 (set_attr "type" "sseiadd")
7676 (set_attr "prefix_data16" "1,*")
7677 (set_attr "prefix" "<mask_prefix3>")
7678 (set_attr "mode" "<sseinsnmode>")])
7680 (define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
7681 [(set (match_operand:VI12_AVX2 0 "register_operand")
7682 (sat_plusminus:VI12_AVX2
7683 (match_operand:VI12_AVX2 1 "nonimmediate_operand")
7684 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
7686 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
7688 (define_insn "*<sse2_avx2>_<plusminus_insn><mode>3"
7689 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
7690 (sat_plusminus:VI12_AVX2
7691 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
7692 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
7693 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
7695 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
7696 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
7697 [(set_attr "isa" "noavx,avx")
7698 (set_attr "type" "sseiadd")
7699 (set_attr "prefix_data16" "1,*")
7700 (set_attr "prefix" "orig,vex")
7701 (set_attr "mode" "TI")])
7703 (define_expand "mul<mode>3"
7704 [(set (match_operand:VI1_AVX2 0 "register_operand")
7705 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand")
7706 (match_operand:VI1_AVX2 2 "register_operand")))]
7709 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
7713 (define_expand "mul<mode>3"
7714 [(set (match_operand:VI2_AVX2 0 "register_operand")
7715 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
7716 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
7718 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7720 (define_insn "*mul<mode>3"
7721 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7722 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
7723 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
7724 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7726 pmullw\t{%2, %0|%0, %2}
7727 vpmullw\t{%2, %1, %0|%0, %1, %2}"
7728 [(set_attr "isa" "noavx,avx")
7729 (set_attr "type" "sseimul")
7730 (set_attr "prefix_data16" "1,*")
7731 (set_attr "prefix" "orig,vex")
7732 (set_attr "mode" "<sseinsnmode>")])
7734 (define_expand "<s>mul<mode>3_highpart"
7735 [(set (match_operand:VI2_AVX2 0 "register_operand")
7737 (lshiftrt:<ssedoublemode>
7738 (mult:<ssedoublemode>
7739 (any_extend:<ssedoublemode>
7740 (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
7741 (any_extend:<ssedoublemode>
7742 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
7745 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
7747 (define_insn "*<s>mul<mode>3_highpart"
7748 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
7750 (lshiftrt:<ssedoublemode>
7751 (mult:<ssedoublemode>
7752 (any_extend:<ssedoublemode>
7753 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
7754 (any_extend:<ssedoublemode>
7755 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
7757 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
7759 pmulh<u>w\t{%2, %0|%0, %2}
7760 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}"
7761 [(set_attr "isa" "noavx,avx")
7762 (set_attr "type" "sseimul")
7763 (set_attr "prefix_data16" "1,*")
7764 (set_attr "prefix" "orig,vex")
7765 (set_attr "mode" "<sseinsnmode>")])
7767 (define_expand "vec_widen_umult_even_v16si<mask_name>"
7768 [(set (match_operand:V8DI 0 "register_operand")
7772 (match_operand:V16SI 1 "nonimmediate_operand")
7773 (parallel [(const_int 0) (const_int 2)
7774 (const_int 4) (const_int 6)
7775 (const_int 8) (const_int 10)
7776 (const_int 12) (const_int 14)])))
7779 (match_operand:V16SI 2 "nonimmediate_operand")
7780 (parallel [(const_int 0) (const_int 2)
7781 (const_int 4) (const_int 6)
7782 (const_int 8) (const_int 10)
7783 (const_int 12) (const_int 14)])))))]
7785 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7787 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
7788 [(set (match_operand:V8DI 0 "register_operand" "=v")
7792 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7793 (parallel [(const_int 0) (const_int 2)
7794 (const_int 4) (const_int 6)
7795 (const_int 8) (const_int 10)
7796 (const_int 12) (const_int 14)])))
7799 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7800 (parallel [(const_int 0) (const_int 2)
7801 (const_int 4) (const_int 6)
7802 (const_int 8) (const_int 10)
7803 (const_int 12) (const_int 14)])))))]
7804 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7805 "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7806 [(set_attr "isa" "avx512f")
7807 (set_attr "type" "sseimul")
7808 (set_attr "prefix_extra" "1")
7809 (set_attr "prefix" "evex")
7810 (set_attr "mode" "XI")])
7812 (define_expand "vec_widen_umult_even_v8si"
7813 [(set (match_operand:V4DI 0 "register_operand")
7817 (match_operand:V8SI 1 "nonimmediate_operand")
7818 (parallel [(const_int 0) (const_int 2)
7819 (const_int 4) (const_int 6)])))
7822 (match_operand:V8SI 2 "nonimmediate_operand")
7823 (parallel [(const_int 0) (const_int 2)
7824 (const_int 4) (const_int 6)])))))]
7826 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7828 (define_insn "*vec_widen_umult_even_v8si"
7829 [(set (match_operand:V4DI 0 "register_operand" "=x")
7833 (match_operand:V8SI 1 "nonimmediate_operand" "%x")
7834 (parallel [(const_int 0) (const_int 2)
7835 (const_int 4) (const_int 6)])))
7838 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7839 (parallel [(const_int 0) (const_int 2)
7840 (const_int 4) (const_int 6)])))))]
7841 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7842 "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7843 [(set_attr "type" "sseimul")
7844 (set_attr "prefix" "vex")
7845 (set_attr "mode" "OI")])
7847 (define_expand "vec_widen_umult_even_v4si"
7848 [(set (match_operand:V2DI 0 "register_operand")
7852 (match_operand:V4SI 1 "nonimmediate_operand")
7853 (parallel [(const_int 0) (const_int 2)])))
7856 (match_operand:V4SI 2 "nonimmediate_operand")
7857 (parallel [(const_int 0) (const_int 2)])))))]
7859 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7861 (define_insn "*vec_widen_umult_even_v4si"
7862 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7866 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7867 (parallel [(const_int 0) (const_int 2)])))
7870 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7871 (parallel [(const_int 0) (const_int 2)])))))]
7872 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7874 pmuludq\t{%2, %0|%0, %2}
7875 vpmuludq\t{%2, %1, %0|%0, %1, %2}"
7876 [(set_attr "isa" "noavx,avx")
7877 (set_attr "type" "sseimul")
7878 (set_attr "prefix_data16" "1,*")
7879 (set_attr "prefix" "orig,vex")
7880 (set_attr "mode" "TI")])
7882 (define_expand "vec_widen_smult_even_v16si<mask_name>"
7883 [(set (match_operand:V8DI 0 "register_operand")
7887 (match_operand:V16SI 1 "nonimmediate_operand")
7888 (parallel [(const_int 0) (const_int 2)
7889 (const_int 4) (const_int 6)
7890 (const_int 8) (const_int 10)
7891 (const_int 12) (const_int 14)])))
7894 (match_operand:V16SI 2 "nonimmediate_operand")
7895 (parallel [(const_int 0) (const_int 2)
7896 (const_int 4) (const_int 6)
7897 (const_int 8) (const_int 10)
7898 (const_int 12) (const_int 14)])))))]
7900 "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
7902 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
7903 [(set (match_operand:V8DI 0 "register_operand" "=v")
7907 (match_operand:V16SI 1 "nonimmediate_operand" "%v")
7908 (parallel [(const_int 0) (const_int 2)
7909 (const_int 4) (const_int 6)
7910 (const_int 8) (const_int 10)
7911 (const_int 12) (const_int 14)])))
7914 (match_operand:V16SI 2 "nonimmediate_operand" "vm")
7915 (parallel [(const_int 0) (const_int 2)
7916 (const_int 4) (const_int 6)
7917 (const_int 8) (const_int 10)
7918 (const_int 12) (const_int 14)])))))]
7919 "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
7920 "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7921 [(set_attr "isa" "avx512f")
7922 (set_attr "type" "sseimul")
7923 (set_attr "prefix_extra" "1")
7924 (set_attr "prefix" "evex")
7925 (set_attr "mode" "XI")])
7927 (define_expand "vec_widen_smult_even_v8si"
7928 [(set (match_operand:V4DI 0 "register_operand")
7932 (match_operand:V8SI 1 "nonimmediate_operand")
7933 (parallel [(const_int 0) (const_int 2)
7934 (const_int 4) (const_int 6)])))
7937 (match_operand:V8SI 2 "nonimmediate_operand")
7938 (parallel [(const_int 0) (const_int 2)
7939 (const_int 4) (const_int 6)])))))]
7941 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
7943 (define_insn "*vec_widen_smult_even_v8si"
7944 [(set (match_operand:V4DI 0 "register_operand" "=x")
7948 (match_operand:V8SI 1 "nonimmediate_operand" "x")
7949 (parallel [(const_int 0) (const_int 2)
7950 (const_int 4) (const_int 6)])))
7953 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
7954 (parallel [(const_int 0) (const_int 2)
7955 (const_int 4) (const_int 6)])))))]
7956 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)"
7957 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7958 [(set_attr "type" "sseimul")
7959 (set_attr "prefix_extra" "1")
7960 (set_attr "prefix" "vex")
7961 (set_attr "mode" "OI")])
7963 (define_expand "sse4_1_mulv2siv2di3"
7964 [(set (match_operand:V2DI 0 "register_operand")
7968 (match_operand:V4SI 1 "nonimmediate_operand")
7969 (parallel [(const_int 0) (const_int 2)])))
7972 (match_operand:V4SI 2 "nonimmediate_operand")
7973 (parallel [(const_int 0) (const_int 2)])))))]
7975 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
7977 (define_insn "*sse4_1_mulv2siv2di3"
7978 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7982 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x")
7983 (parallel [(const_int 0) (const_int 2)])))
7986 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
7987 (parallel [(const_int 0) (const_int 2)])))))]
7988 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
7990 pmuldq\t{%2, %0|%0, %2}
7991 vpmuldq\t{%2, %1, %0|%0, %1, %2}"
7992 [(set_attr "isa" "noavx,avx")
7993 (set_attr "type" "sseimul")
7994 (set_attr "prefix_data16" "1,*")
7995 (set_attr "prefix_extra" "1")
7996 (set_attr "prefix" "orig,vex")
7997 (set_attr "mode" "TI")])
7999 (define_expand "avx2_pmaddwd"
8000 [(set (match_operand:V8SI 0 "register_operand")
8005 (match_operand:V16HI 1 "nonimmediate_operand")
8006 (parallel [(const_int 0) (const_int 2)
8007 (const_int 4) (const_int 6)
8008 (const_int 8) (const_int 10)
8009 (const_int 12) (const_int 14)])))
8012 (match_operand:V16HI 2 "nonimmediate_operand")
8013 (parallel [(const_int 0) (const_int 2)
8014 (const_int 4) (const_int 6)
8015 (const_int 8) (const_int 10)
8016 (const_int 12) (const_int 14)]))))
8019 (vec_select:V8HI (match_dup 1)
8020 (parallel [(const_int 1) (const_int 3)
8021 (const_int 5) (const_int 7)
8022 (const_int 9) (const_int 11)
8023 (const_int 13) (const_int 15)])))
8025 (vec_select:V8HI (match_dup 2)
8026 (parallel [(const_int 1) (const_int 3)
8027 (const_int 5) (const_int 7)
8028 (const_int 9) (const_int 11)
8029 (const_int 13) (const_int 15)]))))))]
8031 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
8033 (define_insn "*avx2_pmaddwd"
8034 [(set (match_operand:V8SI 0 "register_operand" "=x")
8039 (match_operand:V16HI 1 "nonimmediate_operand" "%x")
8040 (parallel [(const_int 0) (const_int 2)
8041 (const_int 4) (const_int 6)
8042 (const_int 8) (const_int 10)
8043 (const_int 12) (const_int 14)])))
8046 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
8047 (parallel [(const_int 0) (const_int 2)
8048 (const_int 4) (const_int 6)
8049 (const_int 8) (const_int 10)
8050 (const_int 12) (const_int 14)]))))
8053 (vec_select:V8HI (match_dup 1)
8054 (parallel [(const_int 1) (const_int 3)
8055 (const_int 5) (const_int 7)
8056 (const_int 9) (const_int 11)
8057 (const_int 13) (const_int 15)])))
8059 (vec_select:V8HI (match_dup 2)
8060 (parallel [(const_int 1) (const_int 3)
8061 (const_int 5) (const_int 7)
8062 (const_int 9) (const_int 11)
8063 (const_int 13) (const_int 15)]))))))]
8064 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
8065 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8066 [(set_attr "type" "sseiadd")
8067 (set_attr "prefix" "vex")
8068 (set_attr "mode" "OI")])
8070 (define_expand "sse2_pmaddwd"
8071 [(set (match_operand:V4SI 0 "register_operand")
8076 (match_operand:V8HI 1 "nonimmediate_operand")
8077 (parallel [(const_int 0) (const_int 2)
8078 (const_int 4) (const_int 6)])))
8081 (match_operand:V8HI 2 "nonimmediate_operand")
8082 (parallel [(const_int 0) (const_int 2)
8083 (const_int 4) (const_int 6)]))))
8086 (vec_select:V4HI (match_dup 1)
8087 (parallel [(const_int 1) (const_int 3)
8088 (const_int 5) (const_int 7)])))
8090 (vec_select:V4HI (match_dup 2)
8091 (parallel [(const_int 1) (const_int 3)
8092 (const_int 5) (const_int 7)]))))))]
8094 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
8096 (define_insn "*sse2_pmaddwd"
8097 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
8102 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8103 (parallel [(const_int 0) (const_int 2)
8104 (const_int 4) (const_int 6)])))
8107 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
8108 (parallel [(const_int 0) (const_int 2)
8109 (const_int 4) (const_int 6)]))))
8112 (vec_select:V4HI (match_dup 1)
8113 (parallel [(const_int 1) (const_int 3)
8114 (const_int 5) (const_int 7)])))
8116 (vec_select:V4HI (match_dup 2)
8117 (parallel [(const_int 1) (const_int 3)
8118 (const_int 5) (const_int 7)]))))))]
8119 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8121 pmaddwd\t{%2, %0|%0, %2}
8122 vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
8123 [(set_attr "isa" "noavx,avx")
8124 (set_attr "type" "sseiadd")
8125 (set_attr "atom_unit" "simul")
8126 (set_attr "prefix_data16" "1,*")
8127 (set_attr "prefix" "orig,vex")
8128 (set_attr "mode" "TI")])
8130 (define_expand "mul<mode>3<mask_name>"
8131 [(set (match_operand:VI4_AVX512F 0 "register_operand")
8133 (match_operand:VI4_AVX512F 1 "general_vector_operand")
8134 (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
8135 "TARGET_SSE2 && <mask_mode512bit_condition>"
8139 if (!nonimmediate_operand (operands[1], <MODE>mode))
8140 operands[1] = force_reg (<MODE>mode, operands[1]);
8141 if (!nonimmediate_operand (operands[2], <MODE>mode))
8142 operands[2] = force_reg (<MODE>mode, operands[2]);
8143 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
8147 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
8152 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
8153 [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
8155 (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
8156 (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
8157 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
8159 pmulld\t{%2, %0|%0, %2}
8160 vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8161 [(set_attr "isa" "noavx,avx")
8162 (set_attr "type" "sseimul")
8163 (set_attr "prefix_extra" "1")
8164 (set_attr "prefix" "<mask_prefix3>")
8165 (set_attr "btver2_decode" "vector,vector")
8166 (set_attr "mode" "<sseinsnmode>")])
8168 (define_expand "mul<mode>3"
8169 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
8170 (mult:VI8_AVX2_AVX512F
8171 (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
8172 (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
8175 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
8179 (define_expand "vec_widen_<s>mult_hi_<mode>"
8180 [(match_operand:<sseunpackmode> 0 "register_operand")
8181 (any_extend:<sseunpackmode>
8182 (match_operand:VI124_AVX2 1 "register_operand"))
8183 (match_operand:VI124_AVX2 2 "register_operand")]
8186 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8191 (define_expand "vec_widen_<s>mult_lo_<mode>"
8192 [(match_operand:<sseunpackmode> 0 "register_operand")
8193 (any_extend:<sseunpackmode>
8194 (match_operand:VI124_AVX2 1 "register_operand"))
8195 (match_operand:VI124_AVX2 2 "register_operand")]
8198 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
8203 ;; Most widen_<s>mult_even_<mode> can be handled directly from other
8204 ;; named patterns, but signed V4SI needs special help for plain SSE2.
8205 (define_expand "vec_widen_smult_even_v4si"
8206 [(match_operand:V2DI 0 "register_operand")
8207 (match_operand:V4SI 1 "nonimmediate_operand")
8208 (match_operand:V4SI 2 "nonimmediate_operand")]
8211 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8216 (define_expand "vec_widen_<s>mult_odd_<mode>"
8217 [(match_operand:<sseunpackmode> 0 "register_operand")
8218 (any_extend:<sseunpackmode>
8219 (match_operand:VI4_AVX512F 1 "general_vector_operand"))
8220 (match_operand:VI4_AVX512F 2 "general_vector_operand")]
8223 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
8228 (define_expand "sdot_prod<mode>"
8229 [(match_operand:<sseunpackmode> 0 "register_operand")
8230 (match_operand:VI2_AVX2 1 "register_operand")
8231 (match_operand:VI2_AVX2 2 "register_operand")
8232 (match_operand:<sseunpackmode> 3 "register_operand")]
8235 rtx t = gen_reg_rtx (<sseunpackmode>mode);
8236 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2]));
8237 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
8238 gen_rtx_PLUS (<sseunpackmode>mode,
8243 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
8244 ;; back together when madd is available.
8245 (define_expand "sdot_prodv4si"
8246 [(match_operand:V2DI 0 "register_operand")
8247 (match_operand:V4SI 1 "register_operand")
8248 (match_operand:V4SI 2 "register_operand")
8249 (match_operand:V2DI 3 "register_operand")]
8252 rtx t = gen_reg_rtx (V2DImode);
8253 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
8254 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
8258 (define_insn "ashr<mode>3"
8259 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
8261 (match_operand:VI24_AVX2 1 "register_operand" "0,x")
8262 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8265 psra<ssemodesuffix>\t{%2, %0|%0, %2}
8266 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8267 [(set_attr "isa" "noavx,avx")
8268 (set_attr "type" "sseishft")
8269 (set (attr "length_immediate")
8270 (if_then_else (match_operand 2 "const_int_operand")
8272 (const_string "0")))
8273 (set_attr "prefix_data16" "1,*")
8274 (set_attr "prefix" "orig,vex")
8275 (set_attr "mode" "<sseinsnmode>")])
8277 (define_insn "ashr<mode>3<mask_name>"
8278 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8280 (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
8281 (match_operand:SI 2 "nonmemory_operand" "v,N")))]
8282 "TARGET_AVX512F && <mask_mode512bit_condition>"
8283 "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8284 [(set_attr "type" "sseishft")
8285 (set (attr "length_immediate")
8286 (if_then_else (match_operand 2 "const_int_operand")
8288 (const_string "0")))
8289 (set_attr "mode" "<sseinsnmode>")])
8291 (define_insn "<shift_insn><mode>3"
8292 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
8293 (any_lshift:VI248_AVX2
8294 (match_operand:VI248_AVX2 1 "register_operand" "0,x")
8295 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
8298 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
8299 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8300 [(set_attr "isa" "noavx,avx")
8301 (set_attr "type" "sseishft")
8302 (set (attr "length_immediate")
8303 (if_then_else (match_operand 2 "const_int_operand")
8305 (const_string "0")))
8306 (set_attr "prefix_data16" "1,*")
8307 (set_attr "prefix" "orig,vex")
8308 (set_attr "mode" "<sseinsnmode>")])
8310 (define_insn "<shift_insn><mode>3<mask_name>"
8311 [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
8312 (any_lshift:VI48_512
8313 (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
8314 (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
8315 "TARGET_AVX512F && <mask_mode512bit_condition>"
8316 "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8317 [(set_attr "isa" "avx512f")
8318 (set_attr "type" "sseishft")
8319 (set (attr "length_immediate")
8320 (if_then_else (match_operand 2 "const_int_operand")
8322 (const_string "0")))
8323 (set_attr "prefix" "evex")
8324 (set_attr "mode" "<sseinsnmode>")])
8327 (define_expand "vec_shl_<mode>"
8330 (match_operand:VI_128 1 "register_operand")
8331 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8332 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8335 operands[1] = gen_lowpart (V1TImode, operands[1]);
8336 operands[3] = gen_reg_rtx (V1TImode);
8337 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8340 (define_insn "<sse2_avx2>_ashl<mode>3"
8341 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8343 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8344 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8347 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8349 switch (which_alternative)
8352 return "pslldq\t{%2, %0|%0, %2}";
8354 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
8359 [(set_attr "isa" "noavx,avx")
8360 (set_attr "type" "sseishft")
8361 (set_attr "length_immediate" "1")
8362 (set_attr "prefix_data16" "1,*")
8363 (set_attr "prefix" "orig,vex")
8364 (set_attr "mode" "<sseinsnmode>")])
8366 (define_expand "vec_shr_<mode>"
8369 (match_operand:VI_128 1 "register_operand")
8370 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
8371 (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
8374 operands[1] = gen_lowpart (V1TImode, operands[1]);
8375 operands[3] = gen_reg_rtx (V1TImode);
8376 operands[4] = gen_lowpart (<MODE>mode, operands[3]);
8379 (define_insn "<sse2_avx2>_lshr<mode>3"
8380 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x")
8381 (lshiftrt:VIMAX_AVX2
8382 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x")
8383 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
8386 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
8388 switch (which_alternative)
8391 return "psrldq\t{%2, %0|%0, %2}";
8393 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
8398 [(set_attr "isa" "noavx,avx")
8399 (set_attr "type" "sseishft")
8400 (set_attr "length_immediate" "1")
8401 (set_attr "atom_unit" "sishuf")
8402 (set_attr "prefix_data16" "1,*")
8403 (set_attr "prefix" "orig,vex")
8404 (set_attr "mode" "<sseinsnmode>")])
8406 (define_insn "avx512f_<rotate>v<mode><mask_name>"
8407 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8408 (any_rotate:VI48_512
8409 (match_operand:VI48_512 1 "register_operand" "v")
8410 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
8412 "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8413 [(set_attr "prefix" "evex")
8414 (set_attr "mode" "<sseinsnmode>")])
8416 (define_insn "avx512f_<rotate><mode><mask_name>"
8417 [(set (match_operand:VI48_512 0 "register_operand" "=v")
8418 (any_rotate:VI48_512
8419 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
8420 (match_operand:SI 2 "const_0_to_255_operand")))]
8422 "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8423 [(set_attr "prefix" "evex")
8424 (set_attr "mode" "<sseinsnmode>")])
8426 (define_expand "<code><mode>3<mask_name><round_name>"
8427 [(set (match_operand:VI124_256_48_512 0 "register_operand")
8428 (maxmin:VI124_256_48_512
8429 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>")
8430 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>")))]
8431 "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8432 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
8434 (define_insn "*avx2_<code><mode>3<mask_name><round_name>"
8435 [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
8436 (maxmin:VI124_256_48_512
8437 (match_operand:VI124_256_48_512 1 "<round_nimm_predicate>" "%v")
8438 (match_operand:VI124_256_48_512 2 "<round_nimm_predicate>" "<round_constraint>")))]
8439 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
8440 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
8441 "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
8442 [(set_attr "type" "sseiadd")
8443 (set_attr "prefix_extra" "1")
8444 (set_attr "prefix" "maybe_evex")
8445 (set_attr "mode" "OI")])
8447 (define_expand "<code><mode>3"
8448 [(set (match_operand:VI8_AVX2 0 "register_operand")
8450 (match_operand:VI8_AVX2 1 "register_operand")
8451 (match_operand:VI8_AVX2 2 "register_operand")))]
8458 xops[0] = operands[0];
8460 if (<CODE> == SMAX || <CODE> == UMAX)
8462 xops[1] = operands[1];
8463 xops[2] = operands[2];
8467 xops[1] = operands[2];
8468 xops[2] = operands[1];
8471 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
8473 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
8474 xops[4] = operands[1];
8475 xops[5] = operands[2];
8477 ok = ix86_expand_int_vcond (xops);
8482 (define_expand "<code><mode>3"
8483 [(set (match_operand:VI124_128 0 "register_operand")
8485 (match_operand:VI124_128 1 "nonimmediate_operand")
8486 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8489 if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
8490 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8496 xops[0] = operands[0];
8497 operands[1] = force_reg (<MODE>mode, operands[1]);
8498 operands[2] = force_reg (<MODE>mode, operands[2]);
8502 xops[1] = operands[1];
8503 xops[2] = operands[2];
8507 xops[1] = operands[2];
8508 xops[2] = operands[1];
8511 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
8512 xops[4] = operands[1];
8513 xops[5] = operands[2];
8515 ok = ix86_expand_int_vcond (xops);
8521 (define_insn "*sse4_1_<code><mode>3"
8522 [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
8524 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x")
8525 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))]
8526 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8528 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8529 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8530 [(set_attr "isa" "noavx,avx")
8531 (set_attr "type" "sseiadd")
8532 (set_attr "prefix_extra" "1,*")
8533 (set_attr "prefix" "orig,vex")
8534 (set_attr "mode" "TI")])
8536 (define_insn "*<code>v8hi3"
8537 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
8539 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
8540 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
8541 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
8543 p<maxmin_int>w\t{%2, %0|%0, %2}
8544 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
8545 [(set_attr "isa" "noavx,avx")
8546 (set_attr "type" "sseiadd")
8547 (set_attr "prefix_data16" "1,*")
8548 (set_attr "prefix_extra" "*,1")
8549 (set_attr "prefix" "orig,vex")
8550 (set_attr "mode" "TI")])
8552 (define_expand "<code><mode>3"
8553 [(set (match_operand:VI124_128 0 "register_operand")
8555 (match_operand:VI124_128 1 "nonimmediate_operand")
8556 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8559 if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
8560 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
8561 else if (<CODE> == UMAX && <MODE>mode == V8HImode)
8563 rtx op0 = operands[0], op2 = operands[2], op3 = op0;
8564 operands[1] = force_reg (<MODE>mode, operands[1]);
8565 if (rtx_equal_p (op3, op2))
8566 op3 = gen_reg_rtx (V8HImode);
8567 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
8568 emit_insn (gen_addv8hi3 (op0, op3, op2));
8576 operands[1] = force_reg (<MODE>mode, operands[1]);
8577 operands[2] = force_reg (<MODE>mode, operands[2]);
8579 xops[0] = operands[0];
8583 xops[1] = operands[1];
8584 xops[2] = operands[2];
8588 xops[1] = operands[2];
8589 xops[2] = operands[1];
8592 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
8593 xops[4] = operands[1];
8594 xops[5] = operands[2];
8596 ok = ix86_expand_int_vcond (xops);
8602 (define_insn "*sse4_1_<code><mode>3"
8603 [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
8605 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x")
8606 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))]
8607 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
8609 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
8610 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8611 [(set_attr "isa" "noavx,avx")
8612 (set_attr "type" "sseiadd")
8613 (set_attr "prefix_extra" "1,*")
8614 (set_attr "prefix" "orig,vex")
8615 (set_attr "mode" "TI")])
8617 (define_insn "*<code>v16qi3"
8618 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
8620 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
8621 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
8622 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
8624 p<maxmin_int>b\t{%2, %0|%0, %2}
8625 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
8626 [(set_attr "isa" "noavx,avx")
8627 (set_attr "type" "sseiadd")
8628 (set_attr "prefix_data16" "1,*")
8629 (set_attr "prefix_extra" "*,1")
8630 (set_attr "prefix" "orig,vex")
8631 (set_attr "mode" "TI")])
8633 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8635 ;; Parallel integral comparisons
8637 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8639 (define_expand "avx2_eq<mode>3"
8640 [(set (match_operand:VI_256 0 "register_operand")
8642 (match_operand:VI_256 1 "nonimmediate_operand")
8643 (match_operand:VI_256 2 "nonimmediate_operand")))]
8645 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8647 (define_insn "*avx2_eq<mode>3"
8648 [(set (match_operand:VI_256 0 "register_operand" "=x")
8650 (match_operand:VI_256 1 "nonimmediate_operand" "%x")
8651 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8652 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8653 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8654 [(set_attr "type" "ssecmp")
8655 (set_attr "prefix_extra" "1")
8656 (set_attr "prefix" "vex")
8657 (set_attr "mode" "OI")])
8659 (define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
8660 [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
8661 (unspec:<avx512fmaskmode>
8662 [(match_operand:VI48_512 1 "register_operand")
8663 (match_operand:VI48_512 2 "nonimmediate_operand")]
8666 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8668 (define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
8669 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8670 (unspec:<avx512fmaskmode>
8671 [(match_operand:VI48_512 1 "register_operand" "%v")
8672 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
8674 "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8675 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8676 [(set_attr "type" "ssecmp")
8677 (set_attr "prefix_extra" "1")
8678 (set_attr "prefix" "evex")
8679 (set_attr "mode" "<sseinsnmode>")])
8681 (define_insn "*sse4_1_eqv2di3"
8682 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8684 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x")
8685 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8686 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
8688 pcmpeqq\t{%2, %0|%0, %2}
8689 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
8690 [(set_attr "isa" "noavx,avx")
8691 (set_attr "type" "ssecmp")
8692 (set_attr "prefix_extra" "1")
8693 (set_attr "prefix" "orig,vex")
8694 (set_attr "mode" "TI")])
8696 (define_insn "*sse2_eq<mode>3"
8697 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8699 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
8700 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8701 "TARGET_SSE2 && !TARGET_XOP
8702 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
8704 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
8705 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8706 [(set_attr "isa" "noavx,avx")
8707 (set_attr "type" "ssecmp")
8708 (set_attr "prefix_data16" "1,*")
8709 (set_attr "prefix" "orig,vex")
8710 (set_attr "mode" "TI")])
8712 (define_expand "sse2_eq<mode>3"
8713 [(set (match_operand:VI124_128 0 "register_operand")
8715 (match_operand:VI124_128 1 "nonimmediate_operand")
8716 (match_operand:VI124_128 2 "nonimmediate_operand")))]
8717 "TARGET_SSE2 && !TARGET_XOP "
8718 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
8720 (define_expand "sse4_1_eqv2di3"
8721 [(set (match_operand:V2DI 0 "register_operand")
8723 (match_operand:V2DI 1 "nonimmediate_operand")
8724 (match_operand:V2DI 2 "nonimmediate_operand")))]
8726 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
8728 (define_insn "sse4_2_gtv2di3"
8729 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
8731 (match_operand:V2DI 1 "register_operand" "0,x")
8732 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))]
8735 pcmpgtq\t{%2, %0|%0, %2}
8736 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
8737 [(set_attr "isa" "noavx,avx")
8738 (set_attr "type" "ssecmp")
8739 (set_attr "prefix_extra" "1")
8740 (set_attr "prefix" "orig,vex")
8741 (set_attr "mode" "TI")])
8743 (define_insn "avx2_gt<mode>3"
8744 [(set (match_operand:VI_256 0 "register_operand" "=x")
8746 (match_operand:VI_256 1 "register_operand" "x")
8747 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
8749 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8750 [(set_attr "type" "ssecmp")
8751 (set_attr "prefix_extra" "1")
8752 (set_attr "prefix" "vex")
8753 (set_attr "mode" "OI")])
8755 (define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
8756 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
8757 (unspec:<avx512fmaskmode>
8758 [(match_operand:VI48_512 1 "register_operand" "v")
8759 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
8761 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
8762 [(set_attr "type" "ssecmp")
8763 (set_attr "prefix_extra" "1")
8764 (set_attr "prefix" "evex")
8765 (set_attr "mode" "<sseinsnmode>")])
8767 (define_insn "sse2_gt<mode>3"
8768 [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
8770 (match_operand:VI124_128 1 "register_operand" "0,x")
8771 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
8772 "TARGET_SSE2 && !TARGET_XOP"
8774 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
8775 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
8776 [(set_attr "isa" "noavx,avx")
8777 (set_attr "type" "ssecmp")
8778 (set_attr "prefix_data16" "1,*")
8779 (set_attr "prefix" "orig,vex")
8780 (set_attr "mode" "TI")])
8782 (define_expand "vcond<V_512:mode><VI_512:mode>"
8783 [(set (match_operand:V_512 0 "register_operand")
8785 (match_operator 3 ""
8786 [(match_operand:VI_512 4 "nonimmediate_operand")
8787 (match_operand:VI_512 5 "general_operand")])
8788 (match_operand:V_512 1)
8789 (match_operand:V_512 2)))]
8791 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8792 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8794 bool ok = ix86_expand_int_vcond (operands);
8799 (define_expand "vcond<V_256:mode><VI_256:mode>"
8800 [(set (match_operand:V_256 0 "register_operand")
8802 (match_operator 3 ""
8803 [(match_operand:VI_256 4 "nonimmediate_operand")
8804 (match_operand:VI_256 5 "general_operand")])
8805 (match_operand:V_256 1)
8806 (match_operand:V_256 2)))]
8808 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8809 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8811 bool ok = ix86_expand_int_vcond (operands);
8816 (define_expand "vcond<V_128:mode><VI124_128:mode>"
8817 [(set (match_operand:V_128 0 "register_operand")
8819 (match_operator 3 ""
8820 [(match_operand:VI124_128 4 "nonimmediate_operand")
8821 (match_operand:VI124_128 5 "general_operand")])
8822 (match_operand:V_128 1)
8823 (match_operand:V_128 2)))]
8825 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8826 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8828 bool ok = ix86_expand_int_vcond (operands);
8833 (define_expand "vcond<VI8F_128:mode>v2di"
8834 [(set (match_operand:VI8F_128 0 "register_operand")
8835 (if_then_else:VI8F_128
8836 (match_operator 3 ""
8837 [(match_operand:V2DI 4 "nonimmediate_operand")
8838 (match_operand:V2DI 5 "general_operand")])
8839 (match_operand:VI8F_128 1)
8840 (match_operand:VI8F_128 2)))]
8843 bool ok = ix86_expand_int_vcond (operands);
8848 (define_expand "vcondu<V_512:mode><VI_512:mode>"
8849 [(set (match_operand:V_512 0 "register_operand")
8851 (match_operator 3 ""
8852 [(match_operand:VI_512 4 "nonimmediate_operand")
8853 (match_operand:VI_512 5 "nonimmediate_operand")])
8854 (match_operand:V_512 1 "general_operand")
8855 (match_operand:V_512 2 "general_operand")))]
8857 && (GET_MODE_NUNITS (<V_512:MODE>mode)
8858 == GET_MODE_NUNITS (<VI_512:MODE>mode))"
8860 bool ok = ix86_expand_int_vcond (operands);
8865 (define_expand "vcondu<V_256:mode><VI_256:mode>"
8866 [(set (match_operand:V_256 0 "register_operand")
8868 (match_operator 3 ""
8869 [(match_operand:VI_256 4 "nonimmediate_operand")
8870 (match_operand:VI_256 5 "nonimmediate_operand")])
8871 (match_operand:V_256 1 "general_operand")
8872 (match_operand:V_256 2 "general_operand")))]
8874 && (GET_MODE_NUNITS (<V_256:MODE>mode)
8875 == GET_MODE_NUNITS (<VI_256:MODE>mode))"
8877 bool ok = ix86_expand_int_vcond (operands);
8882 (define_expand "vcondu<V_128:mode><VI124_128:mode>"
8883 [(set (match_operand:V_128 0 "register_operand")
8885 (match_operator 3 ""
8886 [(match_operand:VI124_128 4 "nonimmediate_operand")
8887 (match_operand:VI124_128 5 "nonimmediate_operand")])
8888 (match_operand:V_128 1 "general_operand")
8889 (match_operand:V_128 2 "general_operand")))]
8891 && (GET_MODE_NUNITS (<V_128:MODE>mode)
8892 == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
8894 bool ok = ix86_expand_int_vcond (operands);
8899 (define_expand "vcondu<VI8F_128:mode>v2di"
8900 [(set (match_operand:VI8F_128 0 "register_operand")
8901 (if_then_else:VI8F_128
8902 (match_operator 3 ""
8903 [(match_operand:V2DI 4 "nonimmediate_operand")
8904 (match_operand:V2DI 5 "nonimmediate_operand")])
8905 (match_operand:VI8F_128 1 "general_operand")
8906 (match_operand:VI8F_128 2 "general_operand")))]
8909 bool ok = ix86_expand_int_vcond (operands);
8914 (define_mode_iterator VEC_PERM_AVX2
8915 [V16QI V8HI V4SI V2DI V4SF V2DF
8916 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8917 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
8918 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
8919 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
8920 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
8922 (define_expand "vec_perm<mode>"
8923 [(match_operand:VEC_PERM_AVX2 0 "register_operand")
8924 (match_operand:VEC_PERM_AVX2 1 "register_operand")
8925 (match_operand:VEC_PERM_AVX2 2 "register_operand")
8926 (match_operand:<sseintvecmode> 3 "register_operand")]
8927 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
8929 ix86_expand_vec_perm (operands);
8933 (define_mode_iterator VEC_PERM_CONST
8934 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
8935 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
8936 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
8937 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
8938 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
8939 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
8940 (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
8941 (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
8943 (define_expand "vec_perm_const<mode>"
8944 [(match_operand:VEC_PERM_CONST 0 "register_operand")
8945 (match_operand:VEC_PERM_CONST 1 "register_operand")
8946 (match_operand:VEC_PERM_CONST 2 "register_operand")
8947 (match_operand:<sseintvecmode> 3)]
8950 if (ix86_expand_vec_perm_const (operands))
8956 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8958 ;; Parallel bitwise logical operations
8960 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8962 (define_expand "one_cmpl<mode>2"
8963 [(set (match_operand:VI 0 "register_operand")
8964 (xor:VI (match_operand:VI 1 "nonimmediate_operand")
8968 int i, n = GET_MODE_NUNITS (<MODE>mode);
8969 rtvec v = rtvec_alloc (n);
8971 for (i = 0; i < n; ++i)
8972 RTVEC_ELT (v, i) = constm1_rtx;
8974 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
8977 (define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
8978 [(set (match_operand:VI_AVX2 0 "register_operand")
8980 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
8981 (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
8982 "TARGET_SSE2 && <mask_mode512bit_condition>")
8984 (define_insn "*andnot<mode>3<mask_name>"
8985 [(set (match_operand:VI 0 "register_operand" "=x,v")
8987 (not:VI (match_operand:VI 1 "register_operand" "0,v"))
8988 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
8989 "TARGET_SSE && <mask_mode512bit_condition>"
8991 static char buf[64];
8995 switch (get_attr_mode (insn))
8998 gcc_assert (TARGET_AVX512F);
9000 tmp = "pandn<ssemodesuffix>";
9004 gcc_assert (TARGET_AVX2);
9006 gcc_assert (TARGET_SSE2);
9012 gcc_assert (TARGET_AVX512F);
9014 gcc_assert (TARGET_AVX);
9016 gcc_assert (TARGET_SSE);
9025 switch (which_alternative)
9028 ops = "%s\t{%%2, %%0|%%0, %%2}";
9031 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9037 snprintf (buf, sizeof (buf), ops, tmp);
9040 [(set_attr "isa" "noavx,avx")
9041 (set_attr "type" "sselog")
9042 (set (attr "prefix_data16")
9044 (and (eq_attr "alternative" "0")
9045 (eq_attr "mode" "TI"))
9047 (const_string "*")))
9048 (set_attr "prefix" "<mask_prefix3>")
9050 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
9051 (const_string "<ssePSmode>")
9052 (match_test "TARGET_AVX2")
9053 (const_string "<sseinsnmode>")
9054 (match_test "TARGET_AVX")
9056 (match_test "<MODE_SIZE> > 16")
9057 (const_string "V8SF")
9058 (const_string "<sseinsnmode>"))
9059 (ior (not (match_test "TARGET_SSE2"))
9060 (match_test "optimize_function_for_size_p (cfun)"))
9061 (const_string "V4SF")
9063 (const_string "<sseinsnmode>")))])
9065 (define_expand "<code><mode>3"
9066 [(set (match_operand:VI 0 "register_operand")
9068 (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
9069 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
9072 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
9076 (define_insn "<mask_codefor><code><mode>3<mask_name>"
9077 [(set (match_operand:VI 0 "register_operand" "=x,v")
9079 (match_operand:VI 1 "nonimmediate_operand" "%0,v")
9080 (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
9081 "TARGET_SSE && <mask_mode512bit_condition>
9082 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9084 static char buf[64];
9088 switch (get_attr_mode (insn))
9091 gcc_assert (TARGET_AVX512F);
9093 tmp = "p<logic><ssemodesuffix>";
9097 gcc_assert (TARGET_AVX2);
9099 gcc_assert (TARGET_SSE2);
9105 gcc_assert (TARGET_AVX512F);
9107 gcc_assert (TARGET_AVX);
9109 gcc_assert (TARGET_SSE);
9118 switch (which_alternative)
9121 ops = "%s\t{%%2, %%0|%%0, %%2}";
9124 ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
9130 snprintf (buf, sizeof (buf), ops, tmp);
9133 [(set_attr "isa" "noavx,avx")
9134 (set_attr "type" "sselog")
9135 (set (attr "prefix_data16")
9137 (and (eq_attr "alternative" "0")
9138 (eq_attr "mode" "TI"))
9140 (const_string "*")))
9141 (set_attr "prefix" "<mask_prefix3>")
9143 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
9144 (const_string "<ssePSmode>")
9145 (match_test "TARGET_AVX2")
9146 (const_string "<sseinsnmode>")
9147 (match_test "TARGET_AVX")
9149 (match_test "<MODE_SIZE> > 16")
9150 (const_string "V8SF")
9151 (const_string "<sseinsnmode>"))
9152 (ior (not (match_test "TARGET_SSE2"))
9153 (match_test "optimize_function_for_size_p (cfun)"))
9154 (const_string "V4SF")
9156 (const_string "<sseinsnmode>")))])
9158 (define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
9159 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9160 (unspec:<avx512fmaskmode>
9161 [(match_operand:VI48_512 1 "register_operand" "v")
9162 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9165 "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9166 [(set_attr "prefix" "evex")
9167 (set_attr "mode" "<sseinsnmode>")])
9169 (define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
9170 [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
9171 (unspec:<avx512fmaskmode>
9172 [(match_operand:VI48_512 1 "register_operand" "v")
9173 (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
9176 "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
9177 [(set_attr "prefix" "evex")
9178 (set_attr "mode" "<sseinsnmode>")])
9180 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9182 ;; Parallel integral element swizzling
9184 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9186 (define_expand "vec_pack_trunc_<mode>"
9187 [(match_operand:<ssepackmode> 0 "register_operand")
9188 (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
9189 (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
9192 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
9193 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
9194 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
9198 (define_insn "<sse2_avx2>_packsswb"
9199 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9200 (vec_concat:VI1_AVX2
9201 (ss_truncate:<ssehalfvecmode>
9202 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9203 (ss_truncate:<ssehalfvecmode>
9204 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9207 packsswb\t{%2, %0|%0, %2}
9208 vpacksswb\t{%2, %1, %0|%0, %1, %2}"
9209 [(set_attr "isa" "noavx,avx")
9210 (set_attr "type" "sselog")
9211 (set_attr "prefix_data16" "1,*")
9212 (set_attr "prefix" "orig,vex")
9213 (set_attr "mode" "<sseinsnmode>")])
9215 (define_insn "<sse2_avx2>_packssdw"
9216 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
9217 (vec_concat:VI2_AVX2
9218 (ss_truncate:<ssehalfvecmode>
9219 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9220 (ss_truncate:<ssehalfvecmode>
9221 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9224 packssdw\t{%2, %0|%0, %2}
9225 vpackssdw\t{%2, %1, %0|%0, %1, %2}"
9226 [(set_attr "isa" "noavx,avx")
9227 (set_attr "type" "sselog")
9228 (set_attr "prefix_data16" "1,*")
9229 (set_attr "prefix" "orig,vex")
9230 (set_attr "mode" "<sseinsnmode>")])
9232 (define_insn "<sse2_avx2>_packuswb"
9233 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
9234 (vec_concat:VI1_AVX2
9235 (us_truncate:<ssehalfvecmode>
9236 (match_operand:<sseunpackmode> 1 "register_operand" "0,x"))
9237 (us_truncate:<ssehalfvecmode>
9238 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))]
9241 packuswb\t{%2, %0|%0, %2}
9242 vpackuswb\t{%2, %1, %0|%0, %1, %2}"
9243 [(set_attr "isa" "noavx,avx")
9244 (set_attr "type" "sselog")
9245 (set_attr "prefix_data16" "1,*")
9246 (set_attr "prefix" "orig,vex")
9247 (set_attr "mode" "<sseinsnmode>")])
9249 (define_insn "avx2_interleave_highv32qi"
9250 [(set (match_operand:V32QI 0 "register_operand" "=x")
9253 (match_operand:V32QI 1 "register_operand" "x")
9254 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9255 (parallel [(const_int 8) (const_int 40)
9256 (const_int 9) (const_int 41)
9257 (const_int 10) (const_int 42)
9258 (const_int 11) (const_int 43)
9259 (const_int 12) (const_int 44)
9260 (const_int 13) (const_int 45)
9261 (const_int 14) (const_int 46)
9262 (const_int 15) (const_int 47)
9263 (const_int 24) (const_int 56)
9264 (const_int 25) (const_int 57)
9265 (const_int 26) (const_int 58)
9266 (const_int 27) (const_int 59)
9267 (const_int 28) (const_int 60)
9268 (const_int 29) (const_int 61)
9269 (const_int 30) (const_int 62)
9270 (const_int 31) (const_int 63)])))]
9272 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9273 [(set_attr "type" "sselog")
9274 (set_attr "prefix" "vex")
9275 (set_attr "mode" "OI")])
9277 (define_insn "vec_interleave_highv16qi"
9278 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9281 (match_operand:V16QI 1 "register_operand" "0,x")
9282 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9283 (parallel [(const_int 8) (const_int 24)
9284 (const_int 9) (const_int 25)
9285 (const_int 10) (const_int 26)
9286 (const_int 11) (const_int 27)
9287 (const_int 12) (const_int 28)
9288 (const_int 13) (const_int 29)
9289 (const_int 14) (const_int 30)
9290 (const_int 15) (const_int 31)])))]
9293 punpckhbw\t{%2, %0|%0, %2}
9294 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
9295 [(set_attr "isa" "noavx,avx")
9296 (set_attr "type" "sselog")
9297 (set_attr "prefix_data16" "1,*")
9298 (set_attr "prefix" "orig,vex")
9299 (set_attr "mode" "TI")])
9301 (define_insn "avx2_interleave_lowv32qi"
9302 [(set (match_operand:V32QI 0 "register_operand" "=x")
9305 (match_operand:V32QI 1 "register_operand" "x")
9306 (match_operand:V32QI 2 "nonimmediate_operand" "xm"))
9307 (parallel [(const_int 0) (const_int 32)
9308 (const_int 1) (const_int 33)
9309 (const_int 2) (const_int 34)
9310 (const_int 3) (const_int 35)
9311 (const_int 4) (const_int 36)
9312 (const_int 5) (const_int 37)
9313 (const_int 6) (const_int 38)
9314 (const_int 7) (const_int 39)
9315 (const_int 16) (const_int 48)
9316 (const_int 17) (const_int 49)
9317 (const_int 18) (const_int 50)
9318 (const_int 19) (const_int 51)
9319 (const_int 20) (const_int 52)
9320 (const_int 21) (const_int 53)
9321 (const_int 22) (const_int 54)
9322 (const_int 23) (const_int 55)])))]
9324 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9325 [(set_attr "type" "sselog")
9326 (set_attr "prefix" "vex")
9327 (set_attr "mode" "OI")])
9329 (define_insn "vec_interleave_lowv16qi"
9330 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
9333 (match_operand:V16QI 1 "register_operand" "0,x")
9334 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm"))
9335 (parallel [(const_int 0) (const_int 16)
9336 (const_int 1) (const_int 17)
9337 (const_int 2) (const_int 18)
9338 (const_int 3) (const_int 19)
9339 (const_int 4) (const_int 20)
9340 (const_int 5) (const_int 21)
9341 (const_int 6) (const_int 22)
9342 (const_int 7) (const_int 23)])))]
9345 punpcklbw\t{%2, %0|%0, %2}
9346 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
9347 [(set_attr "isa" "noavx,avx")
9348 (set_attr "type" "sselog")
9349 (set_attr "prefix_data16" "1,*")
9350 (set_attr "prefix" "orig,vex")
9351 (set_attr "mode" "TI")])
9353 (define_insn "avx2_interleave_highv16hi"
9354 [(set (match_operand:V16HI 0 "register_operand" "=x")
9357 (match_operand:V16HI 1 "register_operand" "x")
9358 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9359 (parallel [(const_int 4) (const_int 20)
9360 (const_int 5) (const_int 21)
9361 (const_int 6) (const_int 22)
9362 (const_int 7) (const_int 23)
9363 (const_int 12) (const_int 28)
9364 (const_int 13) (const_int 29)
9365 (const_int 14) (const_int 30)
9366 (const_int 15) (const_int 31)])))]
9368 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9369 [(set_attr "type" "sselog")
9370 (set_attr "prefix" "vex")
9371 (set_attr "mode" "OI")])
9373 (define_insn "vec_interleave_highv8hi"
9374 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9377 (match_operand:V8HI 1 "register_operand" "0,x")
9378 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9379 (parallel [(const_int 4) (const_int 12)
9380 (const_int 5) (const_int 13)
9381 (const_int 6) (const_int 14)
9382 (const_int 7) (const_int 15)])))]
9385 punpckhwd\t{%2, %0|%0, %2}
9386 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
9387 [(set_attr "isa" "noavx,avx")
9388 (set_attr "type" "sselog")
9389 (set_attr "prefix_data16" "1,*")
9390 (set_attr "prefix" "orig,vex")
9391 (set_attr "mode" "TI")])
9393 (define_insn "avx2_interleave_lowv16hi"
9394 [(set (match_operand:V16HI 0 "register_operand" "=x")
9397 (match_operand:V16HI 1 "register_operand" "x")
9398 (match_operand:V16HI 2 "nonimmediate_operand" "xm"))
9399 (parallel [(const_int 0) (const_int 16)
9400 (const_int 1) (const_int 17)
9401 (const_int 2) (const_int 18)
9402 (const_int 3) (const_int 19)
9403 (const_int 8) (const_int 24)
9404 (const_int 9) (const_int 25)
9405 (const_int 10) (const_int 26)
9406 (const_int 11) (const_int 27)])))]
9408 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9409 [(set_attr "type" "sselog")
9410 (set_attr "prefix" "vex")
9411 (set_attr "mode" "OI")])
9413 (define_insn "vec_interleave_lowv8hi"
9414 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
9417 (match_operand:V8HI 1 "register_operand" "0,x")
9418 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm"))
9419 (parallel [(const_int 0) (const_int 8)
9420 (const_int 1) (const_int 9)
9421 (const_int 2) (const_int 10)
9422 (const_int 3) (const_int 11)])))]
9425 punpcklwd\t{%2, %0|%0, %2}
9426 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
9427 [(set_attr "isa" "noavx,avx")
9428 (set_attr "type" "sselog")
9429 (set_attr "prefix_data16" "1,*")
9430 (set_attr "prefix" "orig,vex")
9431 (set_attr "mode" "TI")])
9433 (define_insn "avx2_interleave_highv8si"
9434 [(set (match_operand:V8SI 0 "register_operand" "=x")
9437 (match_operand:V8SI 1 "register_operand" "x")
9438 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9439 (parallel [(const_int 2) (const_int 10)
9440 (const_int 3) (const_int 11)
9441 (const_int 6) (const_int 14)
9442 (const_int 7) (const_int 15)])))]
9444 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9445 [(set_attr "type" "sselog")
9446 (set_attr "prefix" "vex")
9447 (set_attr "mode" "OI")])
9449 (define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
9450 [(set (match_operand:V16SI 0 "register_operand" "=v")
9453 (match_operand:V16SI 1 "register_operand" "v")
9454 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9455 (parallel [(const_int 2) (const_int 18)
9456 (const_int 3) (const_int 19)
9457 (const_int 6) (const_int 22)
9458 (const_int 7) (const_int 23)
9459 (const_int 10) (const_int 26)
9460 (const_int 11) (const_int 27)
9461 (const_int 14) (const_int 30)
9462 (const_int 15) (const_int 31)])))]
9464 "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9465 [(set_attr "type" "sselog")
9466 (set_attr "prefix" "evex")
9467 (set_attr "mode" "XI")])
9470 (define_insn "vec_interleave_highv4si"
9471 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9474 (match_operand:V4SI 1 "register_operand" "0,x")
9475 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9476 (parallel [(const_int 2) (const_int 6)
9477 (const_int 3) (const_int 7)])))]
9480 punpckhdq\t{%2, %0|%0, %2}
9481 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
9482 [(set_attr "isa" "noavx,avx")
9483 (set_attr "type" "sselog")
9484 (set_attr "prefix_data16" "1,*")
9485 (set_attr "prefix" "orig,vex")
9486 (set_attr "mode" "TI")])
9488 (define_insn "avx2_interleave_lowv8si"
9489 [(set (match_operand:V8SI 0 "register_operand" "=x")
9492 (match_operand:V8SI 1 "register_operand" "x")
9493 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))
9494 (parallel [(const_int 0) (const_int 8)
9495 (const_int 1) (const_int 9)
9496 (const_int 4) (const_int 12)
9497 (const_int 5) (const_int 13)])))]
9499 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9500 [(set_attr "type" "sselog")
9501 (set_attr "prefix" "vex")
9502 (set_attr "mode" "OI")])
9504 (define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
9505 [(set (match_operand:V16SI 0 "register_operand" "=v")
9508 (match_operand:V16SI 1 "register_operand" "v")
9509 (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
9510 (parallel [(const_int 0) (const_int 16)
9511 (const_int 1) (const_int 17)
9512 (const_int 4) (const_int 20)
9513 (const_int 5) (const_int 21)
9514 (const_int 8) (const_int 24)
9515 (const_int 9) (const_int 25)
9516 (const_int 12) (const_int 28)
9517 (const_int 13) (const_int 29)])))]
9519 "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9520 [(set_attr "type" "sselog")
9521 (set_attr "prefix" "evex")
9522 (set_attr "mode" "XI")])
9524 (define_insn "vec_interleave_lowv4si"
9525 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9528 (match_operand:V4SI 1 "register_operand" "0,x")
9529 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))
9530 (parallel [(const_int 0) (const_int 4)
9531 (const_int 1) (const_int 5)])))]
9534 punpckldq\t{%2, %0|%0, %2}
9535 vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
9536 [(set_attr "isa" "noavx,avx")
9537 (set_attr "type" "sselog")
9538 (set_attr "prefix_data16" "1,*")
9539 (set_attr "prefix" "orig,vex")
9540 (set_attr "mode" "TI")])
9542 (define_expand "vec_interleave_high<mode>"
9543 [(match_operand:VI_256 0 "register_operand" "=x")
9544 (match_operand:VI_256 1 "register_operand" "x")
9545 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9548 rtx t1 = gen_reg_rtx (<MODE>mode);
9549 rtx t2 = gen_reg_rtx (<MODE>mode);
9550 rtx t3 = gen_reg_rtx (V4DImode);
9551 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9552 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9553 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9554 gen_lowpart (V4DImode, t2),
9555 GEN_INT (1 + (3 << 4))));
9556 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9560 (define_expand "vec_interleave_low<mode>"
9561 [(match_operand:VI_256 0 "register_operand" "=x")
9562 (match_operand:VI_256 1 "register_operand" "x")
9563 (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
9566 rtx t1 = gen_reg_rtx (<MODE>mode);
9567 rtx t2 = gen_reg_rtx (<MODE>mode);
9568 rtx t3 = gen_reg_rtx (V4DImode);
9569 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
9570 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
9571 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
9572 gen_lowpart (V4DImode, t2),
9573 GEN_INT (0 + (2 << 4))));
9574 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
9578 ;; Modes handled by pinsr patterns.
9579 (define_mode_iterator PINSR_MODE
9580 [(V16QI "TARGET_SSE4_1") V8HI
9581 (V4SI "TARGET_SSE4_1")
9582 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
9584 (define_mode_attr sse2p4_1
9585 [(V16QI "sse4_1") (V8HI "sse2")
9586 (V4SI "sse4_1") (V2DI "sse4_1")])
9588 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
9589 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
9590 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
9591 (vec_merge:PINSR_MODE
9592 (vec_duplicate:PINSR_MODE
9593 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
9594 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
9595 (match_operand:SI 3 "const_int_operand")))]
9597 && ((unsigned) exact_log2 (INTVAL (operands[3]))
9598 < GET_MODE_NUNITS (<MODE>mode))"
9600 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
9602 switch (which_alternative)
9605 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9606 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
9609 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
9611 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
9612 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
9615 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
9620 [(set_attr "isa" "noavx,noavx,avx,avx")
9621 (set_attr "type" "sselog")
9622 (set (attr "prefix_rex")
9624 (and (not (match_test "TARGET_AVX"))
9625 (eq (const_string "<MODE>mode") (const_string "V2DImode")))
9627 (const_string "*")))
9628 (set (attr "prefix_data16")
9630 (and (not (match_test "TARGET_AVX"))
9631 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9633 (const_string "*")))
9634 (set (attr "prefix_extra")
9636 (and (not (match_test "TARGET_AVX"))
9637 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
9639 (const_string "1")))
9640 (set_attr "length_immediate" "1")
9641 (set_attr "prefix" "orig,orig,vex,vex")
9642 (set_attr "mode" "TI")])
9644 (define_expand "avx512f_vinsert<shuffletype>32x4_mask"
9645 [(match_operand:V16FI 0 "register_operand")
9646 (match_operand:V16FI 1 "register_operand")
9647 (match_operand:<ssequartermode> 2 "nonimmediate_operand")
9648 (match_operand:SI 3 "const_0_to_3_operand")
9649 (match_operand:V16FI 4 "register_operand")
9650 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9653 switch (INTVAL (operands[3]))
9656 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9657 operands[1], operands[2], GEN_INT (0xFFF), operands[4],
9661 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9662 operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
9666 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9667 operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
9671 emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
9672 operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
9682 (define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
9683 [(set (match_operand:V16FI 0 "register_operand" "=v")
9685 (match_operand:V16FI 1 "register_operand" "v")
9686 (vec_duplicate:V16FI
9687 (match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
9688 (match_operand:SI 3 "const_int_operand" "n")))]
9692 if (INTVAL (operands[3]) == 0xFFF)
9694 else if ( INTVAL (operands[3]) == 0xF0FF)
9696 else if ( INTVAL (operands[3]) == 0xFF0F)
9698 else if ( INTVAL (operands[3]) == 0xFFF0)
9703 operands[3] = GEN_INT (mask);
9705 return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9707 [(set_attr "type" "sselog")
9708 (set_attr "length_immediate" "1")
9709 (set_attr "prefix" "evex")
9710 (set_attr "mode" "<sseinsnmode>")])
9712 (define_expand "avx512f_vinsert<shuffletype>64x4_mask"
9713 [(match_operand:V8FI 0 "register_operand")
9714 (match_operand:V8FI 1 "register_operand")
9715 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
9716 (match_operand:SI 3 "const_0_to_1_operand")
9717 (match_operand:V8FI 4 "register_operand")
9718 (match_operand:<avx512fmaskmode> 5 "register_operand")]
9721 int mask = INTVAL (operands[3]);
9723 emit_insn (gen_vec_set_lo_<mode>_mask
9724 (operands[0], operands[1], operands[2],
9725 operands[4], operands[5]));
9727 emit_insn (gen_vec_set_hi_<mode>_mask
9728 (operands[0], operands[1], operands[2],
9729 operands[4], operands[5]));
9733 (define_insn "vec_set_lo_<mode><mask_name>"
9734 [(set (match_operand:V8FI 0 "register_operand" "=v")
9736 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9737 (vec_select:<ssehalfvecmode>
9738 (match_operand:V8FI 1 "register_operand" "v")
9739 (parallel [(const_int 4) (const_int 5)
9740 (const_int 6) (const_int 7)]))))]
9742 "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
9743 [(set_attr "type" "sselog")
9744 (set_attr "length_immediate" "1")
9745 (set_attr "prefix" "evex")
9746 (set_attr "mode" "XI")])
9748 (define_insn "vec_set_hi_<mode><mask_name>"
9749 [(set (match_operand:V8FI 0 "register_operand" "=v")
9751 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
9752 (vec_select:<ssehalfvecmode>
9753 (match_operand:V8FI 1 "register_operand" "v")
9754 (parallel [(const_int 0) (const_int 1)
9755 (const_int 2) (const_int 3)]))))]
9757 "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
9758 [(set_attr "type" "sselog")
9759 (set_attr "length_immediate" "1")
9760 (set_attr "prefix" "evex")
9761 (set_attr "mode" "XI")])
9763 (define_expand "avx512f_shuf_<shuffletype>64x2_mask"
9764 [(match_operand:V8FI 0 "register_operand")
9765 (match_operand:V8FI 1 "register_operand")
9766 (match_operand:V8FI 2 "nonimmediate_operand")
9767 (match_operand:SI 3 "const_0_to_255_operand")
9768 (match_operand:V8FI 4 "register_operand")
9769 (match_operand:QI 5 "register_operand")]
9772 int mask = INTVAL (operands[3]);
9773 emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
9774 (operands[0], operands[1], operands[2],
9775 GEN_INT (((mask >> 0) & 3) * 2),
9776 GEN_INT (((mask >> 0) & 3) * 2 + 1),
9777 GEN_INT (((mask >> 2) & 3) * 2),
9778 GEN_INT (((mask >> 2) & 3) * 2 + 1),
9779 GEN_INT (((mask >> 4) & 3) * 2 + 8),
9780 GEN_INT (((mask >> 4) & 3) * 2 + 9),
9781 GEN_INT (((mask >> 6) & 3) * 2 + 8),
9782 GEN_INT (((mask >> 6) & 3) * 2 + 9),
9783 operands[4], operands[5]));
9787 (define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
9788 [(set (match_operand:V8FI 0 "register_operand" "=v")
9790 (vec_concat:<ssedoublemode>
9791 (match_operand:V8FI 1 "register_operand" "v")
9792 (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
9793 (parallel [(match_operand 3 "const_0_to_7_operand")
9794 (match_operand 4 "const_0_to_7_operand")
9795 (match_operand 5 "const_0_to_7_operand")
9796 (match_operand 6 "const_0_to_7_operand")
9797 (match_operand 7 "const_8_to_15_operand")
9798 (match_operand 8 "const_8_to_15_operand")
9799 (match_operand 9 "const_8_to_15_operand")
9800 (match_operand 10 "const_8_to_15_operand")])))]
9802 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9803 && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
9804 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9805 && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
9808 mask = INTVAL (operands[3]) / 2;
9809 mask |= INTVAL (operands[5]) / 2 << 2;
9810 mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
9811 mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
9812 operands[3] = GEN_INT (mask);
9814 return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9816 [(set_attr "type" "sselog")
9817 (set_attr "length_immediate" "1")
9818 (set_attr "prefix" "evex")
9819 (set_attr "mode" "<sseinsnmode>")])
9821 (define_expand "avx512f_shuf_<shuffletype>32x4_mask"
9822 [(match_operand:V16FI 0 "register_operand")
9823 (match_operand:V16FI 1 "register_operand")
9824 (match_operand:V16FI 2 "nonimmediate_operand")
9825 (match_operand:SI 3 "const_0_to_255_operand")
9826 (match_operand:V16FI 4 "register_operand")
9827 (match_operand:HI 5 "register_operand")]
9830 int mask = INTVAL (operands[3]);
9831 emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
9832 (operands[0], operands[1], operands[2],
9833 GEN_INT (((mask >> 0) & 3) * 4),
9834 GEN_INT (((mask >> 0) & 3) * 4 + 1),
9835 GEN_INT (((mask >> 0) & 3) * 4 + 2),
9836 GEN_INT (((mask >> 0) & 3) * 4 + 3),
9837 GEN_INT (((mask >> 2) & 3) * 4),
9838 GEN_INT (((mask >> 2) & 3) * 4 + 1),
9839 GEN_INT (((mask >> 2) & 3) * 4 + 2),
9840 GEN_INT (((mask >> 2) & 3) * 4 + 3),
9841 GEN_INT (((mask >> 4) & 3) * 4 + 16),
9842 GEN_INT (((mask >> 4) & 3) * 4 + 17),
9843 GEN_INT (((mask >> 4) & 3) * 4 + 18),
9844 GEN_INT (((mask >> 4) & 3) * 4 + 19),
9845 GEN_INT (((mask >> 6) & 3) * 4 + 16),
9846 GEN_INT (((mask >> 6) & 3) * 4 + 17),
9847 GEN_INT (((mask >> 6) & 3) * 4 + 18),
9848 GEN_INT (((mask >> 6) & 3) * 4 + 19),
9849 operands[4], operands[5]));
9853 (define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
9854 [(set (match_operand:V16FI 0 "register_operand" "=v")
9856 (vec_concat:<ssedoublemode>
9857 (match_operand:V16FI 1 "register_operand" "v")
9858 (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
9859 (parallel [(match_operand 3 "const_0_to_15_operand")
9860 (match_operand 4 "const_0_to_15_operand")
9861 (match_operand 5 "const_0_to_15_operand")
9862 (match_operand 6 "const_0_to_15_operand")
9863 (match_operand 7 "const_0_to_15_operand")
9864 (match_operand 8 "const_0_to_15_operand")
9865 (match_operand 9 "const_0_to_15_operand")
9866 (match_operand 10 "const_0_to_15_operand")
9867 (match_operand 11 "const_16_to_31_operand")
9868 (match_operand 12 "const_16_to_31_operand")
9869 (match_operand 13 "const_16_to_31_operand")
9870 (match_operand 14 "const_16_to_31_operand")
9871 (match_operand 15 "const_16_to_31_operand")
9872 (match_operand 16 "const_16_to_31_operand")
9873 (match_operand 17 "const_16_to_31_operand")
9874 (match_operand 18 "const_16_to_31_operand")])))]
9876 && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
9877 && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
9878 && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
9879 && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
9880 && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
9881 && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
9882 && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
9883 && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
9884 && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
9885 && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
9886 && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
9887 && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
9890 mask = INTVAL (operands[3]) / 4;
9891 mask |= INTVAL (operands[7]) / 4 << 2;
9892 mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
9893 mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
9894 operands[3] = GEN_INT (mask);
9896 return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9898 [(set_attr "type" "sselog")
9899 (set_attr "length_immediate" "1")
9900 (set_attr "prefix" "evex")
9901 (set_attr "mode" "<sseinsnmode>")])
9903 (define_expand "avx512f_pshufdv3_mask"
9904 [(match_operand:V16SI 0 "register_operand")
9905 (match_operand:V16SI 1 "nonimmediate_operand")
9906 (match_operand:SI 2 "const_0_to_255_operand")
9907 (match_operand:V16SI 3 "register_operand")
9908 (match_operand:HI 4 "register_operand")]
9911 int mask = INTVAL (operands[2]);
9912 emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
9913 GEN_INT ((mask >> 0) & 3),
9914 GEN_INT ((mask >> 2) & 3),
9915 GEN_INT ((mask >> 4) & 3),
9916 GEN_INT ((mask >> 6) & 3),
9917 GEN_INT (((mask >> 0) & 3) + 4),
9918 GEN_INT (((mask >> 2) & 3) + 4),
9919 GEN_INT (((mask >> 4) & 3) + 4),
9920 GEN_INT (((mask >> 6) & 3) + 4),
9921 GEN_INT (((mask >> 0) & 3) + 8),
9922 GEN_INT (((mask >> 2) & 3) + 8),
9923 GEN_INT (((mask >> 4) & 3) + 8),
9924 GEN_INT (((mask >> 6) & 3) + 8),
9925 GEN_INT (((mask >> 0) & 3) + 12),
9926 GEN_INT (((mask >> 2) & 3) + 12),
9927 GEN_INT (((mask >> 4) & 3) + 12),
9928 GEN_INT (((mask >> 6) & 3) + 12),
9929 operands[3], operands[4]));
9933 (define_insn "avx512f_pshufd_1<mask_name>"
9934 [(set (match_operand:V16SI 0 "register_operand" "=v")
9936 (match_operand:V16SI 1 "nonimmediate_operand" "vm")
9937 (parallel [(match_operand 2 "const_0_to_3_operand")
9938 (match_operand 3 "const_0_to_3_operand")
9939 (match_operand 4 "const_0_to_3_operand")
9940 (match_operand 5 "const_0_to_3_operand")
9941 (match_operand 6 "const_4_to_7_operand")
9942 (match_operand 7 "const_4_to_7_operand")
9943 (match_operand 8 "const_4_to_7_operand")
9944 (match_operand 9 "const_4_to_7_operand")
9945 (match_operand 10 "const_8_to_11_operand")
9946 (match_operand 11 "const_8_to_11_operand")
9947 (match_operand 12 "const_8_to_11_operand")
9948 (match_operand 13 "const_8_to_11_operand")
9949 (match_operand 14 "const_12_to_15_operand")
9950 (match_operand 15 "const_12_to_15_operand")
9951 (match_operand 16 "const_12_to_15_operand")
9952 (match_operand 17 "const_12_to_15_operand")])))]
9954 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
9955 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
9956 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
9957 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
9958 && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
9959 && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
9960 && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
9961 && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
9962 && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
9963 && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
9964 && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
9965 && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
9968 mask |= INTVAL (operands[2]) << 0;
9969 mask |= INTVAL (operands[3]) << 2;
9970 mask |= INTVAL (operands[4]) << 4;
9971 mask |= INTVAL (operands[5]) << 6;
9972 operands[2] = GEN_INT (mask);
9974 return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
9976 [(set_attr "type" "sselog1")
9977 (set_attr "prefix" "evex")
9978 (set_attr "length_immediate" "1")
9979 (set_attr "mode" "XI")])
9981 (define_expand "avx2_pshufdv3"
9982 [(match_operand:V8SI 0 "register_operand")
9983 (match_operand:V8SI 1 "nonimmediate_operand")
9984 (match_operand:SI 2 "const_0_to_255_operand")]
9987 int mask = INTVAL (operands[2]);
9988 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
9989 GEN_INT ((mask >> 0) & 3),
9990 GEN_INT ((mask >> 2) & 3),
9991 GEN_INT ((mask >> 4) & 3),
9992 GEN_INT ((mask >> 6) & 3),
9993 GEN_INT (((mask >> 0) & 3) + 4),
9994 GEN_INT (((mask >> 2) & 3) + 4),
9995 GEN_INT (((mask >> 4) & 3) + 4),
9996 GEN_INT (((mask >> 6) & 3) + 4)));
10000 (define_insn "avx2_pshufd_1"
10001 [(set (match_operand:V8SI 0 "register_operand" "=x")
10003 (match_operand:V8SI 1 "nonimmediate_operand" "xm")
10004 (parallel [(match_operand 2 "const_0_to_3_operand")
10005 (match_operand 3 "const_0_to_3_operand")
10006 (match_operand 4 "const_0_to_3_operand")
10007 (match_operand 5 "const_0_to_3_operand")
10008 (match_operand 6 "const_4_to_7_operand")
10009 (match_operand 7 "const_4_to_7_operand")
10010 (match_operand 8 "const_4_to_7_operand")
10011 (match_operand 9 "const_4_to_7_operand")])))]
10013 && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
10014 && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
10015 && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
10016 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
10019 mask |= INTVAL (operands[2]) << 0;
10020 mask |= INTVAL (operands[3]) << 2;
10021 mask |= INTVAL (operands[4]) << 4;
10022 mask |= INTVAL (operands[5]) << 6;
10023 operands[2] = GEN_INT (mask);
10025 return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
10027 [(set_attr "type" "sselog1")
10028 (set_attr "prefix" "vex")
10029 (set_attr "length_immediate" "1")
10030 (set_attr "mode" "OI")])
10032 (define_expand "sse2_pshufd"
10033 [(match_operand:V4SI 0 "register_operand")
10034 (match_operand:V4SI 1 "nonimmediate_operand")
10035 (match_operand:SI 2 "const_int_operand")]
10038 int mask = INTVAL (operands[2]);
10039 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
10040 GEN_INT ((mask >> 0) & 3),
10041 GEN_INT ((mask >> 2) & 3),
10042 GEN_INT ((mask >> 4) & 3),
10043 GEN_INT ((mask >> 6) & 3)));
10047 (define_insn "sse2_pshufd_1"
10048 [(set (match_operand:V4SI 0 "register_operand" "=x")
10050 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10051 (parallel [(match_operand 2 "const_0_to_3_operand")
10052 (match_operand 3 "const_0_to_3_operand")
10053 (match_operand 4 "const_0_to_3_operand")
10054 (match_operand 5 "const_0_to_3_operand")])))]
10058 mask |= INTVAL (operands[2]) << 0;
10059 mask |= INTVAL (operands[3]) << 2;
10060 mask |= INTVAL (operands[4]) << 4;
10061 mask |= INTVAL (operands[5]) << 6;
10062 operands[2] = GEN_INT (mask);
10064 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
10066 [(set_attr "type" "sselog1")
10067 (set_attr "prefix_data16" "1")
10068 (set_attr "prefix" "maybe_vex")
10069 (set_attr "length_immediate" "1")
10070 (set_attr "mode" "TI")])
10072 (define_expand "avx2_pshuflwv3"
10073 [(match_operand:V16HI 0 "register_operand")
10074 (match_operand:V16HI 1 "nonimmediate_operand")
10075 (match_operand:SI 2 "const_0_to_255_operand")]
10078 int mask = INTVAL (operands[2]);
10079 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
10080 GEN_INT ((mask >> 0) & 3),
10081 GEN_INT ((mask >> 2) & 3),
10082 GEN_INT ((mask >> 4) & 3),
10083 GEN_INT ((mask >> 6) & 3),
10084 GEN_INT (((mask >> 0) & 3) + 8),
10085 GEN_INT (((mask >> 2) & 3) + 8),
10086 GEN_INT (((mask >> 4) & 3) + 8),
10087 GEN_INT (((mask >> 6) & 3) + 8)));
10091 (define_insn "avx2_pshuflw_1"
10092 [(set (match_operand:V16HI 0 "register_operand" "=x")
10094 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10095 (parallel [(match_operand 2 "const_0_to_3_operand")
10096 (match_operand 3 "const_0_to_3_operand")
10097 (match_operand 4 "const_0_to_3_operand")
10098 (match_operand 5 "const_0_to_3_operand")
10103 (match_operand 6 "const_8_to_11_operand")
10104 (match_operand 7 "const_8_to_11_operand")
10105 (match_operand 8 "const_8_to_11_operand")
10106 (match_operand 9 "const_8_to_11_operand")
10110 (const_int 15)])))]
10112 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10113 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10114 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10115 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10118 mask |= INTVAL (operands[2]) << 0;
10119 mask |= INTVAL (operands[3]) << 2;
10120 mask |= INTVAL (operands[4]) << 4;
10121 mask |= INTVAL (operands[5]) << 6;
10122 operands[2] = GEN_INT (mask);
10124 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10126 [(set_attr "type" "sselog")
10127 (set_attr "prefix" "vex")
10128 (set_attr "length_immediate" "1")
10129 (set_attr "mode" "OI")])
10131 (define_expand "sse2_pshuflw"
10132 [(match_operand:V8HI 0 "register_operand")
10133 (match_operand:V8HI 1 "nonimmediate_operand")
10134 (match_operand:SI 2 "const_int_operand")]
10137 int mask = INTVAL (operands[2]);
10138 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
10139 GEN_INT ((mask >> 0) & 3),
10140 GEN_INT ((mask >> 2) & 3),
10141 GEN_INT ((mask >> 4) & 3),
10142 GEN_INT ((mask >> 6) & 3)));
10146 (define_insn "sse2_pshuflw_1"
10147 [(set (match_operand:V8HI 0 "register_operand" "=x")
10149 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10150 (parallel [(match_operand 2 "const_0_to_3_operand")
10151 (match_operand 3 "const_0_to_3_operand")
10152 (match_operand 4 "const_0_to_3_operand")
10153 (match_operand 5 "const_0_to_3_operand")
10161 mask |= INTVAL (operands[2]) << 0;
10162 mask |= INTVAL (operands[3]) << 2;
10163 mask |= INTVAL (operands[4]) << 4;
10164 mask |= INTVAL (operands[5]) << 6;
10165 operands[2] = GEN_INT (mask);
10167 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
10169 [(set_attr "type" "sselog")
10170 (set_attr "prefix_data16" "0")
10171 (set_attr "prefix_rep" "1")
10172 (set_attr "prefix" "maybe_vex")
10173 (set_attr "length_immediate" "1")
10174 (set_attr "mode" "TI")])
10176 (define_expand "avx2_pshufhwv3"
10177 [(match_operand:V16HI 0 "register_operand")
10178 (match_operand:V16HI 1 "nonimmediate_operand")
10179 (match_operand:SI 2 "const_0_to_255_operand")]
10182 int mask = INTVAL (operands[2]);
10183 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
10184 GEN_INT (((mask >> 0) & 3) + 4),
10185 GEN_INT (((mask >> 2) & 3) + 4),
10186 GEN_INT (((mask >> 4) & 3) + 4),
10187 GEN_INT (((mask >> 6) & 3) + 4),
10188 GEN_INT (((mask >> 0) & 3) + 12),
10189 GEN_INT (((mask >> 2) & 3) + 12),
10190 GEN_INT (((mask >> 4) & 3) + 12),
10191 GEN_INT (((mask >> 6) & 3) + 12)));
10195 (define_insn "avx2_pshufhw_1"
10196 [(set (match_operand:V16HI 0 "register_operand" "=x")
10198 (match_operand:V16HI 1 "nonimmediate_operand" "xm")
10199 (parallel [(const_int 0)
10203 (match_operand 2 "const_4_to_7_operand")
10204 (match_operand 3 "const_4_to_7_operand")
10205 (match_operand 4 "const_4_to_7_operand")
10206 (match_operand 5 "const_4_to_7_operand")
10211 (match_operand 6 "const_12_to_15_operand")
10212 (match_operand 7 "const_12_to_15_operand")
10213 (match_operand 8 "const_12_to_15_operand")
10214 (match_operand 9 "const_12_to_15_operand")])))]
10216 && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
10217 && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
10218 && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
10219 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
10222 mask |= (INTVAL (operands[2]) - 4) << 0;
10223 mask |= (INTVAL (operands[3]) - 4) << 2;
10224 mask |= (INTVAL (operands[4]) - 4) << 4;
10225 mask |= (INTVAL (operands[5]) - 4) << 6;
10226 operands[2] = GEN_INT (mask);
10228 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10230 [(set_attr "type" "sselog")
10231 (set_attr "prefix" "vex")
10232 (set_attr "length_immediate" "1")
10233 (set_attr "mode" "OI")])
10235 (define_expand "sse2_pshufhw"
10236 [(match_operand:V8HI 0 "register_operand")
10237 (match_operand:V8HI 1 "nonimmediate_operand")
10238 (match_operand:SI 2 "const_int_operand")]
10241 int mask = INTVAL (operands[2]);
10242 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
10243 GEN_INT (((mask >> 0) & 3) + 4),
10244 GEN_INT (((mask >> 2) & 3) + 4),
10245 GEN_INT (((mask >> 4) & 3) + 4),
10246 GEN_INT (((mask >> 6) & 3) + 4)));
10250 (define_insn "sse2_pshufhw_1"
10251 [(set (match_operand:V8HI 0 "register_operand" "=x")
10253 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10254 (parallel [(const_int 0)
10258 (match_operand 2 "const_4_to_7_operand")
10259 (match_operand 3 "const_4_to_7_operand")
10260 (match_operand 4 "const_4_to_7_operand")
10261 (match_operand 5 "const_4_to_7_operand")])))]
10265 mask |= (INTVAL (operands[2]) - 4) << 0;
10266 mask |= (INTVAL (operands[3]) - 4) << 2;
10267 mask |= (INTVAL (operands[4]) - 4) << 4;
10268 mask |= (INTVAL (operands[5]) - 4) << 6;
10269 operands[2] = GEN_INT (mask);
10271 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
10273 [(set_attr "type" "sselog")
10274 (set_attr "prefix_rep" "1")
10275 (set_attr "prefix_data16" "0")
10276 (set_attr "prefix" "maybe_vex")
10277 (set_attr "length_immediate" "1")
10278 (set_attr "mode" "TI")])
10280 (define_expand "sse2_loadd"
10281 [(set (match_operand:V4SI 0 "register_operand")
10283 (vec_duplicate:V4SI
10284 (match_operand:SI 1 "nonimmediate_operand"))
10288 "operands[2] = CONST0_RTX (V4SImode);")
10290 (define_insn "sse2_loadld"
10291 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x")
10293 (vec_duplicate:V4SI
10294 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
10295 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x")
10299 %vmovd\t{%2, %0|%0, %2}
10300 %vmovd\t{%2, %0|%0, %2}
10301 movss\t{%2, %0|%0, %2}
10302 movss\t{%2, %0|%0, %2}
10303 vmovss\t{%2, %1, %0|%0, %1, %2}"
10304 [(set_attr "isa" "sse2,*,noavx,noavx,avx")
10305 (set_attr "type" "ssemov")
10306 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
10307 (set_attr "mode" "TI,TI,V4SF,SF,SF")])
10309 (define_insn "*vec_extract<mode>"
10310 [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
10311 (vec_select:<ssescalarmode>
10312 (match_operand:VI12_128 1 "register_operand" "x,x")
10314 [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
10317 %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
10318 %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10319 [(set_attr "type" "sselog1")
10320 (set (attr "prefix_data16")
10322 (and (eq_attr "alternative" "0")
10323 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10325 (const_string "*")))
10326 (set (attr "prefix_extra")
10328 (and (eq_attr "alternative" "0")
10329 (eq (const_string "<MODE>mode") (const_string "V8HImode")))
10331 (const_string "1")))
10332 (set_attr "length_immediate" "1")
10333 (set_attr "prefix" "maybe_vex")
10334 (set_attr "mode" "TI")])
10336 (define_insn "*vec_extractv8hi_sse2"
10337 [(set (match_operand:HI 0 "register_operand" "=r")
10339 (match_operand:V8HI 1 "register_operand" "x")
10341 [(match_operand:SI 2 "const_0_to_7_operand")])))]
10342 "TARGET_SSE2 && !TARGET_SSE4_1"
10343 "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
10344 [(set_attr "type" "sselog1")
10345 (set_attr "prefix_data16" "1")
10346 (set_attr "length_immediate" "1")
10347 (set_attr "mode" "TI")])
10349 (define_insn "*vec_extractv16qi_zext"
10350 [(set (match_operand:SWI48 0 "register_operand" "=r")
10353 (match_operand:V16QI 1 "register_operand" "x")
10355 [(match_operand:SI 2 "const_0_to_15_operand")]))))]
10357 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
10358 [(set_attr "type" "sselog1")
10359 (set_attr "prefix_extra" "1")
10360 (set_attr "length_immediate" "1")
10361 (set_attr "prefix" "maybe_vex")
10362 (set_attr "mode" "TI")])
10364 (define_insn "*vec_extractv8hi_zext"
10365 [(set (match_operand:SWI48 0 "register_operand" "=r")
10368 (match_operand:V8HI 1 "register_operand" "x")
10370 [(match_operand:SI 2 "const_0_to_7_operand")]))))]
10372 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
10373 [(set_attr "type" "sselog1")
10374 (set_attr "prefix_data16" "1")
10375 (set_attr "length_immediate" "1")
10376 (set_attr "prefix" "maybe_vex")
10377 (set_attr "mode" "TI")])
10379 (define_insn "*vec_extract<mode>_mem"
10380 [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
10381 (vec_select:<ssescalarmode>
10382 (match_operand:VI12_128 1 "memory_operand" "o")
10384 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10388 (define_insn "*vec_extract<ssevecmodelower>_0"
10389 [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r ,r,x ,m")
10391 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
10392 (parallel [(const_int 0)])))]
10393 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10395 [(set_attr "isa" "*,sse4,*,*")])
10397 (define_insn_and_split "*vec_extractv4si_0_zext"
10398 [(set (match_operand:DI 0 "register_operand" "=r")
10401 (match_operand:V4SI 1 "register_operand" "x")
10402 (parallel [(const_int 0)]))))]
10403 "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
10405 "&& reload_completed"
10406 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10407 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
10409 (define_insn "*vec_extractv2di_0_sse"
10410 [(set (match_operand:DI 0 "nonimmediate_operand" "=x,m")
10412 (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
10413 (parallel [(const_int 0)])))]
10414 "TARGET_SSE && !TARGET_64BIT
10415 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10419 [(set (match_operand:SWI48x 0 "nonimmediate_operand")
10421 (match_operand:<ssevecmode> 1 "register_operand")
10422 (parallel [(const_int 0)])))]
10423 "TARGET_SSE && reload_completed"
10424 [(set (match_dup 0) (match_dup 1))]
10425 "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
10427 (define_insn "*vec_extractv4si"
10428 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,x,x")
10430 (match_operand:V4SI 1 "register_operand" "x,0,x")
10431 (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
10434 switch (which_alternative)
10437 return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
10440 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10441 return "psrldq\t{%2, %0|%0, %2}";
10444 operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
10445 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10448 gcc_unreachable ();
10451 [(set_attr "isa" "*,noavx,avx")
10452 (set_attr "type" "sselog1,sseishft1,sseishft1")
10453 (set_attr "prefix_extra" "1,*,*")
10454 (set_attr "length_immediate" "1")
10455 (set_attr "prefix" "maybe_vex,orig,vex")
10456 (set_attr "mode" "TI")])
10458 (define_insn "*vec_extractv4si_zext"
10459 [(set (match_operand:DI 0 "register_operand" "=r")
10462 (match_operand:V4SI 1 "register_operand" "x")
10463 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10464 "TARGET_64BIT && TARGET_SSE4_1"
10465 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
10466 [(set_attr "type" "sselog1")
10467 (set_attr "prefix_extra" "1")
10468 (set_attr "length_immediate" "1")
10469 (set_attr "prefix" "maybe_vex")
10470 (set_attr "mode" "TI")])
10472 (define_insn "*vec_extractv4si_mem"
10473 [(set (match_operand:SI 0 "register_operand" "=x,r")
10475 (match_operand:V4SI 1 "memory_operand" "o,o")
10476 (parallel [(match_operand 2 "const_0_to_3_operand")])))]
10480 (define_insn_and_split "*vec_extractv4si_zext_mem"
10481 [(set (match_operand:DI 0 "register_operand" "=x,r")
10484 (match_operand:V4SI 1 "memory_operand" "o,o")
10485 (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
10486 "TARGET_64BIT && TARGET_SSE"
10488 "&& reload_completed"
10489 [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
10491 operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
10494 (define_insn "*vec_extractv2di_1"
10495 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,m,x,x,x,x,r")
10497 (match_operand:V2DI 1 "nonimmediate_operand" "x ,x,0,x,x,o,o")
10498 (parallel [(const_int 1)])))]
10499 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10501 %vpextrq\t{$1, %1, %0|%0, %1, 1}
10502 %vmovhps\t{%1, %0|%0, %1}
10503 psrldq\t{$8, %0|%0, 8}
10504 vpsrldq\t{$8, %1, %0|%0, %1, 8}
10505 movhlps\t{%1, %0|%0, %1}
10508 [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
10509 (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
10510 (set_attr "length_immediate" "1,*,1,1,*,*,*")
10511 (set_attr "prefix_rex" "1,*,*,*,*,*,*")
10512 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
10513 (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
10514 (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
10517 [(set (match_operand:<ssescalarmode> 0 "register_operand")
10518 (vec_select:<ssescalarmode>
10519 (match_operand:VI_128 1 "memory_operand")
10521 [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
10522 "TARGET_SSE && reload_completed"
10523 [(set (match_dup 0) (match_dup 1))]
10525 int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
10527 operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
10530 (define_insn "*vec_dupv4si"
10531 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
10532 (vec_duplicate:V4SI
10533 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
10536 %vpshufd\t{$0, %1, %0|%0, %1, 0}
10537 vbroadcastss\t{%1, %0|%0, %1}
10538 shufps\t{$0, %0, %0|%0, %0, 0}"
10539 [(set_attr "isa" "sse2,avx,noavx")
10540 (set_attr "type" "sselog1,ssemov,sselog1")
10541 (set_attr "length_immediate" "1,0,1")
10542 (set_attr "prefix_extra" "0,1,*")
10543 (set_attr "prefix" "maybe_vex,vex,orig")
10544 (set_attr "mode" "TI,V4SF,V4SF")])
10546 (define_insn "*vec_dupv2di"
10547 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
10548 (vec_duplicate:V2DI
10549 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
10553 vpunpcklqdq\t{%d1, %0|%0, %d1}
10554 %vmovddup\t{%1, %0|%0, %1}
10556 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
10557 (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
10558 (set_attr "prefix" "orig,vex,maybe_vex,orig")
10559 (set_attr "mode" "TI,TI,DF,V4SF")])
10561 (define_insn "*vec_concatv2si_sse4_1"
10562 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
10564 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm")
10565 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))]
10568 pinsrd\t{$1, %2, %0|%0, %2, 1}
10569 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
10570 punpckldq\t{%2, %0|%0, %2}
10571 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
10572 %vmovd\t{%1, %0|%0, %1}
10573 punpckldq\t{%2, %0|%0, %2}
10574 movd\t{%1, %0|%0, %1}"
10575 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10576 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
10577 (set_attr "prefix_extra" "1,1,*,*,*,*,*")
10578 (set_attr "length_immediate" "1,1,*,*,*,*,*")
10579 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
10580 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")])
10582 ;; ??? In theory we can match memory for the MMX alternative, but allowing
10583 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
10584 ;; alternatives pretty much forces the MMX alternative to be chosen.
10585 (define_insn "*vec_concatv2si"
10586 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,x,x,*y,*y")
10588 (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
10589 (match_operand:SI 2 "reg_or_0_operand" " x,C ,C, x,C,*y,C")))]
10590 "TARGET_SSE && !TARGET_SSE4_1"
10592 punpckldq\t{%2, %0|%0, %2}
10593 movd\t{%1, %0|%0, %1}
10594 movd\t{%1, %0|%0, %1}
10595 unpcklps\t{%2, %0|%0, %2}
10596 movss\t{%1, %0|%0, %1}
10597 punpckldq\t{%2, %0|%0, %2}
10598 movd\t{%1, %0|%0, %1}"
10599 [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
10600 (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
10601 (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
10603 (define_insn "*vec_concatv4si"
10604 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x")
10606 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x")
10607 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
10610 punpcklqdq\t{%2, %0|%0, %2}
10611 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10612 movlhps\t{%2, %0|%0, %2}
10613 movhps\t{%2, %0|%0, %q2}
10614 vmovhps\t{%2, %1, %0|%0, %1, %q2}"
10615 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
10616 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
10617 (set_attr "prefix" "orig,vex,orig,orig,vex")
10618 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
10620 ;; movd instead of movq is required to handle broken assemblers.
10621 (define_insn "vec_concatv2di"
10622 [(set (match_operand:V2DI 0 "register_operand"
10623 "=x,x ,Yi,x ,!x,x,x,x,x,x")
10625 (match_operand:DI 1 "nonimmediate_operand"
10626 " 0,x ,r ,xm,*y,0,x,0,0,x")
10627 (match_operand:DI 2 "vector_move_operand"
10628 "rm,rm,C ,C ,C ,x,x,x,m,m")))]
10631 pinsrq\t{$1, %2, %0|%0, %2, 1}
10632 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
10633 * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
10634 %vmovq\t{%1, %0|%0, %1}
10635 movq2dq\t{%1, %0|%0, %1}
10636 punpcklqdq\t{%2, %0|%0, %2}
10637 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
10638 movlhps\t{%2, %0|%0, %2}
10639 movhps\t{%2, %0|%0, %2}
10640 vmovhps\t{%2, %1, %0|%0, %1, %2}"
10641 [(set_attr "isa" "x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
10644 (eq_attr "alternative" "0,1,5,6")
10645 (const_string "sselog")
10646 (const_string "ssemov")))
10647 (set_attr "prefix_rex" "1,1,1,*,*,*,*,*,*,*")
10648 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*,*")
10649 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*,*")
10650 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
10651 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
10653 (define_expand "vec_unpacks_lo_<mode>"
10654 [(match_operand:<sseunpackmode> 0 "register_operand")
10655 (match_operand:VI124_AVX512F 1 "register_operand")]
10657 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
10659 (define_expand "vec_unpacks_hi_<mode>"
10660 [(match_operand:<sseunpackmode> 0 "register_operand")
10661 (match_operand:VI124_AVX512F 1 "register_operand")]
10663 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
10665 (define_expand "vec_unpacku_lo_<mode>"
10666 [(match_operand:<sseunpackmode> 0 "register_operand")
10667 (match_operand:VI124_AVX512F 1 "register_operand")]
10669 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
10671 (define_expand "vec_unpacku_hi_<mode>"
10672 [(match_operand:<sseunpackmode> 0 "register_operand")
10673 (match_operand:VI124_AVX512F 1 "register_operand")]
10675 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
10677 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10681 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10683 (define_expand "<sse2_avx2>_uavg<mode>3"
10684 [(set (match_operand:VI12_AVX2 0 "register_operand")
10685 (truncate:VI12_AVX2
10686 (lshiftrt:<ssedoublemode>
10687 (plus:<ssedoublemode>
10688 (plus:<ssedoublemode>
10689 (zero_extend:<ssedoublemode>
10690 (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
10691 (zero_extend:<ssedoublemode>
10692 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
10697 operands[3] = CONST1_RTX(<MODE>mode);
10698 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
10701 (define_insn "*<sse2_avx2>_uavg<mode>3"
10702 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x")
10703 (truncate:VI12_AVX2
10704 (lshiftrt:<ssedoublemode>
10705 (plus:<ssedoublemode>
10706 (plus:<ssedoublemode>
10707 (zero_extend:<ssedoublemode>
10708 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x"))
10709 (zero_extend:<ssedoublemode>
10710 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))
10711 (match_operand:VI12_AVX2 3 "const1_operand"))
10713 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
10715 pavg<ssemodesuffix>\t{%2, %0|%0, %2}
10716 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10717 [(set_attr "isa" "noavx,avx")
10718 (set_attr "type" "sseiadd")
10719 (set_attr "prefix_data16" "1,*")
10720 (set_attr "prefix" "orig,vex")
10721 (set_attr "mode" "<sseinsnmode>")])
10723 ;; The correct representation for this is absolutely enormous, and
10724 ;; surely not generally useful.
10725 (define_insn "<sse2_avx2>_psadbw"
10726 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x")
10728 [(match_operand:<ssebytemode> 1 "register_operand" "0,x")
10729 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")]
10733 psadbw\t{%2, %0|%0, %2}
10734 vpsadbw\t{%2, %1, %0|%0, %1, %2}"
10735 [(set_attr "isa" "noavx,avx")
10736 (set_attr "type" "sseiadd")
10737 (set_attr "atom_unit" "simul")
10738 (set_attr "prefix_data16" "1,*")
10739 (set_attr "prefix" "orig,vex")
10740 (set_attr "mode" "<sseinsnmode>")])
10742 (define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
10743 [(set (match_operand:SI 0 "register_operand" "=r")
10745 [(match_operand:VF_128_256 1 "register_operand" "x")]
10748 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
10749 [(set_attr "type" "ssemov")
10750 (set_attr "prefix" "maybe_vex")
10751 (set_attr "mode" "<MODE>")])
10753 (define_insn "avx2_pmovmskb"
10754 [(set (match_operand:SI 0 "register_operand" "=r")
10755 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
10758 "vpmovmskb\t{%1, %0|%0, %1}"
10759 [(set_attr "type" "ssemov")
10760 (set_attr "prefix" "vex")
10761 (set_attr "mode" "DI")])
10763 (define_insn "sse2_pmovmskb"
10764 [(set (match_operand:SI 0 "register_operand" "=r")
10765 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
10768 "%vpmovmskb\t{%1, %0|%0, %1}"
10769 [(set_attr "type" "ssemov")
10770 (set_attr "prefix_data16" "1")
10771 (set_attr "prefix" "maybe_vex")
10772 (set_attr "mode" "SI")])
10774 (define_expand "sse2_maskmovdqu"
10775 [(set (match_operand:V16QI 0 "memory_operand")
10776 (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
10777 (match_operand:V16QI 2 "register_operand")
10782 (define_insn "*sse2_maskmovdqu"
10783 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
10784 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
10785 (match_operand:V16QI 2 "register_operand" "x")
10786 (mem:V16QI (match_dup 0))]
10790 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
10791 that requires %v to be at the beginning of the opcode name. */
10792 if (Pmode != word_mode)
10793 fputs ("\taddr32", asm_out_file);
10794 return "%vmaskmovdqu\t{%2, %1|%1, %2}";
10796 [(set_attr "type" "ssemov")
10797 (set_attr "prefix_data16" "1")
10798 (set (attr "length_address")
10799 (symbol_ref ("Pmode != word_mode")))
10800 ;; The implicit %rdi operand confuses default length_vex computation.
10801 (set (attr "length_vex")
10802 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
10803 (set_attr "prefix" "maybe_vex")
10804 (set_attr "mode" "TI")])
10806 (define_insn "sse_ldmxcsr"
10807 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
10811 [(set_attr "type" "sse")
10812 (set_attr "atom_sse_attr" "mxcsr")
10813 (set_attr "prefix" "maybe_vex")
10814 (set_attr "memory" "load")])
10816 (define_insn "sse_stmxcsr"
10817 [(set (match_operand:SI 0 "memory_operand" "=m")
10818 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
10821 [(set_attr "type" "sse")
10822 (set_attr "atom_sse_attr" "mxcsr")
10823 (set_attr "prefix" "maybe_vex")
10824 (set_attr "memory" "store")])
10826 (define_insn "sse2_clflush"
10827 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
10831 [(set_attr "type" "sse")
10832 (set_attr "atom_sse_attr" "fence")
10833 (set_attr "memory" "unknown")])
10836 (define_insn "sse3_mwait"
10837 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
10838 (match_operand:SI 1 "register_operand" "c")]
10841 ;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
10842 ;; Since 32bit register operands are implicitly zero extended to 64bit,
10843 ;; we only need to set up 32bit registers.
10845 [(set_attr "length" "3")])
10847 (define_insn "sse3_monitor_<mode>"
10848 [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
10849 (match_operand:SI 1 "register_operand" "c")
10850 (match_operand:SI 2 "register_operand" "d")]
10853 ;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
10854 ;; RCX and RDX are used. Since 32bit register operands are implicitly
10855 ;; zero extended to 64bit, we only need to set up 32bit registers.
10857 [(set (attr "length")
10858 (symbol_ref ("(Pmode != word_mode) + 3")))])
10860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10862 ;; SSSE3 instructions
10864 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10866 (define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
10868 (define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
10869 [(set (match_operand:V16HI 0 "register_operand" "=x")
10874 (ssse3_plusminus:HI
10876 (match_operand:V16HI 1 "register_operand" "x")
10877 (parallel [(const_int 0)]))
10878 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10879 (ssse3_plusminus:HI
10880 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10881 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10883 (ssse3_plusminus:HI
10884 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10885 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10886 (ssse3_plusminus:HI
10887 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10888 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10891 (ssse3_plusminus:HI
10892 (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
10893 (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
10894 (ssse3_plusminus:HI
10895 (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
10896 (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
10898 (ssse3_plusminus:HI
10899 (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
10900 (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
10901 (ssse3_plusminus:HI
10902 (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
10903 (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
10907 (ssse3_plusminus:HI
10909 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
10910 (parallel [(const_int 0)]))
10911 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10912 (ssse3_plusminus:HI
10913 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10914 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10916 (ssse3_plusminus:HI
10917 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10918 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10919 (ssse3_plusminus:HI
10920 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10921 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
10924 (ssse3_plusminus:HI
10925 (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
10926 (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
10927 (ssse3_plusminus:HI
10928 (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
10929 (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
10931 (ssse3_plusminus:HI
10932 (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
10933 (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
10934 (ssse3_plusminus:HI
10935 (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
10936 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
10938 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10939 [(set_attr "type" "sseiadd")
10940 (set_attr "prefix_extra" "1")
10941 (set_attr "prefix" "vex")
10942 (set_attr "mode" "OI")])
10944 (define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
10945 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10949 (ssse3_plusminus:HI
10951 (match_operand:V8HI 1 "register_operand" "0,x")
10952 (parallel [(const_int 0)]))
10953 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
10954 (ssse3_plusminus:HI
10955 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
10956 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
10958 (ssse3_plusminus:HI
10959 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
10960 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
10961 (ssse3_plusminus:HI
10962 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
10963 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
10966 (ssse3_plusminus:HI
10968 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
10969 (parallel [(const_int 0)]))
10970 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
10971 (ssse3_plusminus:HI
10972 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
10973 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
10975 (ssse3_plusminus:HI
10976 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
10977 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
10978 (ssse3_plusminus:HI
10979 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
10980 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
10983 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
10984 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
10985 [(set_attr "isa" "noavx,avx")
10986 (set_attr "type" "sseiadd")
10987 (set_attr "atom_unit" "complex")
10988 (set_attr "prefix_data16" "1,*")
10989 (set_attr "prefix_extra" "1")
10990 (set_attr "prefix" "orig,vex")
10991 (set_attr "mode" "TI")])
10993 (define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
10994 [(set (match_operand:V4HI 0 "register_operand" "=y")
10997 (ssse3_plusminus:HI
10999 (match_operand:V4HI 1 "register_operand" "0")
11000 (parallel [(const_int 0)]))
11001 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
11002 (ssse3_plusminus:HI
11003 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
11004 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
11006 (ssse3_plusminus:HI
11008 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
11009 (parallel [(const_int 0)]))
11010 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
11011 (ssse3_plusminus:HI
11012 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
11013 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
11015 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
11016 [(set_attr "type" "sseiadd")
11017 (set_attr "atom_unit" "complex")
11018 (set_attr "prefix_extra" "1")
11019 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11020 (set_attr "mode" "DI")])
11022 (define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
11023 [(set (match_operand:V8SI 0 "register_operand" "=x")
11029 (match_operand:V8SI 1 "register_operand" "x")
11030 (parallel [(const_int 0)]))
11031 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11033 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11034 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11037 (vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
11038 (vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
11040 (vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
11041 (vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
11046 (match_operand:V8SI 2 "nonimmediate_operand" "xm")
11047 (parallel [(const_int 0)]))
11048 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11050 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11051 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
11054 (vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
11055 (vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
11057 (vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
11058 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
11060 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11061 [(set_attr "type" "sseiadd")
11062 (set_attr "prefix_extra" "1")
11063 (set_attr "prefix" "vex")
11064 (set_attr "mode" "OI")])
11066 (define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
11067 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
11072 (match_operand:V4SI 1 "register_operand" "0,x")
11073 (parallel [(const_int 0)]))
11074 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11076 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
11077 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
11081 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
11082 (parallel [(const_int 0)]))
11083 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
11085 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
11086 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
11089 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
11090 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
11091 [(set_attr "isa" "noavx,avx")
11092 (set_attr "type" "sseiadd")
11093 (set_attr "atom_unit" "complex")
11094 (set_attr "prefix_data16" "1,*")
11095 (set_attr "prefix_extra" "1")
11096 (set_attr "prefix" "orig,vex")
11097 (set_attr "mode" "TI")])
11099 (define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
11100 [(set (match_operand:V2SI 0 "register_operand" "=y")
11104 (match_operand:V2SI 1 "register_operand" "0")
11105 (parallel [(const_int 0)]))
11106 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
11109 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
11110 (parallel [(const_int 0)]))
11111 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
11113 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
11114 [(set_attr "type" "sseiadd")
11115 (set_attr "atom_unit" "complex")
11116 (set_attr "prefix_extra" "1")
11117 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11118 (set_attr "mode" "DI")])
11120 (define_insn "avx2_pmaddubsw256"
11121 [(set (match_operand:V16HI 0 "register_operand" "=x")
11126 (match_operand:V32QI 1 "register_operand" "x")
11127 (parallel [(const_int 0) (const_int 2)
11128 (const_int 4) (const_int 6)
11129 (const_int 8) (const_int 10)
11130 (const_int 12) (const_int 14)
11131 (const_int 16) (const_int 18)
11132 (const_int 20) (const_int 22)
11133 (const_int 24) (const_int 26)
11134 (const_int 28) (const_int 30)])))
11137 (match_operand:V32QI 2 "nonimmediate_operand" "xm")
11138 (parallel [(const_int 0) (const_int 2)
11139 (const_int 4) (const_int 6)
11140 (const_int 8) (const_int 10)
11141 (const_int 12) (const_int 14)
11142 (const_int 16) (const_int 18)
11143 (const_int 20) (const_int 22)
11144 (const_int 24) (const_int 26)
11145 (const_int 28) (const_int 30)]))))
11148 (vec_select:V16QI (match_dup 1)
11149 (parallel [(const_int 1) (const_int 3)
11150 (const_int 5) (const_int 7)
11151 (const_int 9) (const_int 11)
11152 (const_int 13) (const_int 15)
11153 (const_int 17) (const_int 19)
11154 (const_int 21) (const_int 23)
11155 (const_int 25) (const_int 27)
11156 (const_int 29) (const_int 31)])))
11158 (vec_select:V16QI (match_dup 2)
11159 (parallel [(const_int 1) (const_int 3)
11160 (const_int 5) (const_int 7)
11161 (const_int 9) (const_int 11)
11162 (const_int 13) (const_int 15)
11163 (const_int 17) (const_int 19)
11164 (const_int 21) (const_int 23)
11165 (const_int 25) (const_int 27)
11166 (const_int 29) (const_int 31)]))))))]
11168 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11169 [(set_attr "type" "sseiadd")
11170 (set_attr "prefix_extra" "1")
11171 (set_attr "prefix" "vex")
11172 (set_attr "mode" "OI")])
11174 (define_insn "ssse3_pmaddubsw128"
11175 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11180 (match_operand:V16QI 1 "register_operand" "0,x")
11181 (parallel [(const_int 0) (const_int 2)
11182 (const_int 4) (const_int 6)
11183 (const_int 8) (const_int 10)
11184 (const_int 12) (const_int 14)])))
11187 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
11188 (parallel [(const_int 0) (const_int 2)
11189 (const_int 4) (const_int 6)
11190 (const_int 8) (const_int 10)
11191 (const_int 12) (const_int 14)]))))
11194 (vec_select:V8QI (match_dup 1)
11195 (parallel [(const_int 1) (const_int 3)
11196 (const_int 5) (const_int 7)
11197 (const_int 9) (const_int 11)
11198 (const_int 13) (const_int 15)])))
11200 (vec_select:V8QI (match_dup 2)
11201 (parallel [(const_int 1) (const_int 3)
11202 (const_int 5) (const_int 7)
11203 (const_int 9) (const_int 11)
11204 (const_int 13) (const_int 15)]))))))]
11207 pmaddubsw\t{%2, %0|%0, %2}
11208 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
11209 [(set_attr "isa" "noavx,avx")
11210 (set_attr "type" "sseiadd")
11211 (set_attr "atom_unit" "simul")
11212 (set_attr "prefix_data16" "1,*")
11213 (set_attr "prefix_extra" "1")
11214 (set_attr "prefix" "orig,vex")
11215 (set_attr "mode" "TI")])
11217 (define_insn "ssse3_pmaddubsw"
11218 [(set (match_operand:V4HI 0 "register_operand" "=y")
11223 (match_operand:V8QI 1 "register_operand" "0")
11224 (parallel [(const_int 0) (const_int 2)
11225 (const_int 4) (const_int 6)])))
11228 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
11229 (parallel [(const_int 0) (const_int 2)
11230 (const_int 4) (const_int 6)]))))
11233 (vec_select:V4QI (match_dup 1)
11234 (parallel [(const_int 1) (const_int 3)
11235 (const_int 5) (const_int 7)])))
11237 (vec_select:V4QI (match_dup 2)
11238 (parallel [(const_int 1) (const_int 3)
11239 (const_int 5) (const_int 7)]))))))]
11241 "pmaddubsw\t{%2, %0|%0, %2}"
11242 [(set_attr "type" "sseiadd")
11243 (set_attr "atom_unit" "simul")
11244 (set_attr "prefix_extra" "1")
11245 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11246 (set_attr "mode" "DI")])
11248 (define_mode_iterator PMULHRSW
11249 [V4HI V8HI (V16HI "TARGET_AVX2")])
11251 (define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
11252 [(set (match_operand:PMULHRSW 0 "register_operand")
11254 (lshiftrt:<ssedoublemode>
11255 (plus:<ssedoublemode>
11256 (lshiftrt:<ssedoublemode>
11257 (mult:<ssedoublemode>
11258 (sign_extend:<ssedoublemode>
11259 (match_operand:PMULHRSW 1 "nonimmediate_operand"))
11260 (sign_extend:<ssedoublemode>
11261 (match_operand:PMULHRSW 2 "nonimmediate_operand")))
11267 operands[3] = CONST1_RTX(<MODE>mode);
11268 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11271 (define_insn "*<ssse3_avx2>_pmulhrsw<mode>3"
11272 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
11274 (lshiftrt:<ssedoublemode>
11275 (plus:<ssedoublemode>
11276 (lshiftrt:<ssedoublemode>
11277 (mult:<ssedoublemode>
11278 (sign_extend:<ssedoublemode>
11279 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x"))
11280 (sign_extend:<ssedoublemode>
11281 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))
11283 (match_operand:VI2_AVX2 3 "const1_operand"))
11285 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
11287 pmulhrsw\t{%2, %0|%0, %2}
11288 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
11289 [(set_attr "isa" "noavx,avx")
11290 (set_attr "type" "sseimul")
11291 (set_attr "prefix_data16" "1,*")
11292 (set_attr "prefix_extra" "1")
11293 (set_attr "prefix" "orig,vex")
11294 (set_attr "mode" "<sseinsnmode>")])
11296 (define_insn "*ssse3_pmulhrswv4hi3"
11297 [(set (match_operand:V4HI 0 "register_operand" "=y")
11304 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
11306 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
11308 (match_operand:V4HI 3 "const1_operand"))
11310 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
11311 "pmulhrsw\t{%2, %0|%0, %2}"
11312 [(set_attr "type" "sseimul")
11313 (set_attr "prefix_extra" "1")
11314 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11315 (set_attr "mode" "DI")])
11317 (define_insn "<ssse3_avx2>_pshufb<mode>3"
11318 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11320 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11321 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")]
11325 pshufb\t{%2, %0|%0, %2}
11326 vpshufb\t{%2, %1, %0|%0, %1, %2}"
11327 [(set_attr "isa" "noavx,avx")
11328 (set_attr "type" "sselog1")
11329 (set_attr "prefix_data16" "1,*")
11330 (set_attr "prefix_extra" "1")
11331 (set_attr "prefix" "orig,vex")
11332 (set_attr "btver2_decode" "vector,vector")
11333 (set_attr "mode" "<sseinsnmode>")])
11335 (define_insn "ssse3_pshufbv8qi3"
11336 [(set (match_operand:V8QI 0 "register_operand" "=y")
11337 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
11338 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
11341 "pshufb\t{%2, %0|%0, %2}";
11342 [(set_attr "type" "sselog1")
11343 (set_attr "prefix_extra" "1")
11344 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11345 (set_attr "mode" "DI")])
11347 (define_insn "<ssse3_avx2>_psign<mode>3"
11348 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
11350 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
11351 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
11355 psign<ssemodesuffix>\t{%2, %0|%0, %2}
11356 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11357 [(set_attr "isa" "noavx,avx")
11358 (set_attr "type" "sselog1")
11359 (set_attr "prefix_data16" "1,*")
11360 (set_attr "prefix_extra" "1")
11361 (set_attr "prefix" "orig,vex")
11362 (set_attr "mode" "<sseinsnmode>")])
11364 (define_insn "ssse3_psign<mode>3"
11365 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11367 [(match_operand:MMXMODEI 1 "register_operand" "0")
11368 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
11371 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
11372 [(set_attr "type" "sselog1")
11373 (set_attr "prefix_extra" "1")
11374 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11375 (set_attr "mode" "DI")])
11377 (define_insn "<ssse3_avx2>_palignr<mode>"
11378 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x")
11379 (unspec:SSESCALARMODE
11380 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x")
11381 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm")
11382 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
11386 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11388 switch (which_alternative)
11391 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11393 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11395 gcc_unreachable ();
11398 [(set_attr "isa" "noavx,avx")
11399 (set_attr "type" "sseishft")
11400 (set_attr "atom_unit" "sishuf")
11401 (set_attr "prefix_data16" "1,*")
11402 (set_attr "prefix_extra" "1")
11403 (set_attr "length_immediate" "1")
11404 (set_attr "prefix" "orig,vex")
11405 (set_attr "mode" "<sseinsnmode>")])
11407 (define_insn "ssse3_palignrdi"
11408 [(set (match_operand:DI 0 "register_operand" "=y")
11409 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
11410 (match_operand:DI 2 "nonimmediate_operand" "ym")
11411 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
11415 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
11416 return "palignr\t{%3, %2, %0|%0, %2, %3}";
11418 [(set_attr "type" "sseishft")
11419 (set_attr "atom_unit" "sishuf")
11420 (set_attr "prefix_extra" "1")
11421 (set_attr "length_immediate" "1")
11422 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11423 (set_attr "mode" "DI")])
11425 (define_insn "<mask_codefor>abs<mode>2<mask_name>"
11426 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
11427 (abs:VI124_AVX2_48_AVX512F
11428 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
11429 "TARGET_SSSE3 && <mask_mode512bit_condition>"
11430 "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11431 [(set_attr "type" "sselog1")
11432 (set_attr "prefix_data16" "1")
11433 (set_attr "prefix_extra" "1")
11434 (set_attr "prefix" "maybe_vex")
11435 (set_attr "mode" "<sseinsnmode>")])
11437 (define_expand "abs<mode>2"
11438 [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand")
11439 (abs:VI124_AVX2_48_AVX512F
11440 (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand")))]
11445 ix86_expand_sse2_abs (operands[0], operands[1]);
11450 (define_insn "abs<mode>2"
11451 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
11453 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
11455 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
11456 [(set_attr "type" "sselog1")
11457 (set_attr "prefix_rep" "0")
11458 (set_attr "prefix_extra" "1")
11459 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
11460 (set_attr "mode" "DI")])
11462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11464 ;; AMD SSE4A instructions
11466 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11468 (define_insn "sse4a_movnt<mode>"
11469 [(set (match_operand:MODEF 0 "memory_operand" "=m")
11471 [(match_operand:MODEF 1 "register_operand" "x")]
11474 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
11475 [(set_attr "type" "ssemov")
11476 (set_attr "mode" "<MODE>")])
11478 (define_insn "sse4a_vmmovnt<mode>"
11479 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
11480 (unspec:<ssescalarmode>
11481 [(vec_select:<ssescalarmode>
11482 (match_operand:VF_128 1 "register_operand" "x")
11483 (parallel [(const_int 0)]))]
11486 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
11487 [(set_attr "type" "ssemov")
11488 (set_attr "mode" "<ssescalarmode>")])
11490 (define_insn "sse4a_extrqi"
11491 [(set (match_operand:V2DI 0 "register_operand" "=x")
11492 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11493 (match_operand 2 "const_0_to_255_operand")
11494 (match_operand 3 "const_0_to_255_operand")]
11497 "extrq\t{%3, %2, %0|%0, %2, %3}"
11498 [(set_attr "type" "sse")
11499 (set_attr "prefix_data16" "1")
11500 (set_attr "length_immediate" "2")
11501 (set_attr "mode" "TI")])
11503 (define_insn "sse4a_extrq"
11504 [(set (match_operand:V2DI 0 "register_operand" "=x")
11505 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11506 (match_operand:V16QI 2 "register_operand" "x")]
11509 "extrq\t{%2, %0|%0, %2}"
11510 [(set_attr "type" "sse")
11511 (set_attr "prefix_data16" "1")
11512 (set_attr "mode" "TI")])
11514 (define_insn "sse4a_insertqi"
11515 [(set (match_operand:V2DI 0 "register_operand" "=x")
11516 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11517 (match_operand:V2DI 2 "register_operand" "x")
11518 (match_operand 3 "const_0_to_255_operand")
11519 (match_operand 4 "const_0_to_255_operand")]
11522 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
11523 [(set_attr "type" "sseins")
11524 (set_attr "prefix_data16" "0")
11525 (set_attr "prefix_rep" "1")
11526 (set_attr "length_immediate" "2")
11527 (set_attr "mode" "TI")])
11529 (define_insn "sse4a_insertq"
11530 [(set (match_operand:V2DI 0 "register_operand" "=x")
11531 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
11532 (match_operand:V2DI 2 "register_operand" "x")]
11535 "insertq\t{%2, %0|%0, %2}"
11536 [(set_attr "type" "sseins")
11537 (set_attr "prefix_data16" "0")
11538 (set_attr "prefix_rep" "1")
11539 (set_attr "mode" "TI")])
11541 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11543 ;; Intel SSE4.1 instructions
11545 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11547 (define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
11548 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11549 (vec_merge:VF_128_256
11550 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11551 (match_operand:VF_128_256 1 "register_operand" "0,x")
11552 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
11555 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11556 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11557 [(set_attr "isa" "noavx,avx")
11558 (set_attr "type" "ssemov")
11559 (set_attr "length_immediate" "1")
11560 (set_attr "prefix_data16" "1,*")
11561 (set_attr "prefix_extra" "1")
11562 (set_attr "prefix" "orig,vex")
11563 (set_attr "mode" "<MODE>")])
11565 (define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
11566 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11568 [(match_operand:VF_128_256 1 "register_operand" "0,x")
11569 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11570 (match_operand:VF_128_256 3 "register_operand" "Yz,x")]
11574 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11575 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11576 [(set_attr "isa" "noavx,avx")
11577 (set_attr "type" "ssemov")
11578 (set_attr "length_immediate" "1")
11579 (set_attr "prefix_data16" "1,*")
11580 (set_attr "prefix_extra" "1")
11581 (set_attr "prefix" "orig,vex")
11582 (set_attr "btver2_decode" "vector,vector")
11583 (set_attr "mode" "<MODE>")])
11585 (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
11586 [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
11588 [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,x")
11589 (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")
11590 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11594 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
11595 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11596 [(set_attr "isa" "noavx,avx")
11597 (set_attr "type" "ssemul")
11598 (set_attr "length_immediate" "1")
11599 (set_attr "prefix_data16" "1,*")
11600 (set_attr "prefix_extra" "1")
11601 (set_attr "prefix" "orig,vex")
11602 (set_attr "btver2_decode" "vector,vector")
11603 (set_attr "mode" "<MODE>")])
11605 (define_insn "<sse4_1_avx2>_movntdqa"
11606 [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=x, v")
11607 (unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m")]
11610 "%vmovntdqa\t{%1, %0|%0, %1}"
11611 [(set_attr "type" "ssemov")
11612 (set_attr "prefix_extra" "1, *")
11613 (set_attr "prefix" "maybe_vex, evex")
11614 (set_attr "mode" "<sseinsnmode>")])
11616 (define_insn "<sse4_1_avx2>_mpsadbw"
11617 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11619 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11620 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11621 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
11625 mpsadbw\t{%3, %2, %0|%0, %2, %3}
11626 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11627 [(set_attr "isa" "noavx,avx")
11628 (set_attr "type" "sselog1")
11629 (set_attr "length_immediate" "1")
11630 (set_attr "prefix_extra" "1")
11631 (set_attr "prefix" "orig,vex")
11632 (set_attr "btver2_decode" "vector,vector")
11633 (set_attr "mode" "<sseinsnmode>")])
11635 (define_insn "avx2_packusdw"
11636 [(set (match_operand:V16HI 0 "register_operand" "=x")
11639 (match_operand:V8SI 1 "register_operand" "x"))
11641 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))]
11643 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11644 [(set_attr "type" "sselog")
11645 (set_attr "prefix_extra" "1")
11646 (set_attr "prefix" "vex")
11647 (set_attr "mode" "OI")])
11649 (define_insn "sse4_1_packusdw"
11650 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11653 (match_operand:V4SI 1 "register_operand" "0,x"))
11655 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))]
11658 packusdw\t{%2, %0|%0, %2}
11659 vpackusdw\t{%2, %1, %0|%0, %1, %2}"
11660 [(set_attr "isa" "noavx,avx")
11661 (set_attr "type" "sselog")
11662 (set_attr "prefix_extra" "1")
11663 (set_attr "prefix" "orig,vex")
11664 (set_attr "mode" "TI")])
11666 (define_insn "<sse4_1_avx2>_pblendvb"
11667 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x")
11669 [(match_operand:VI1_AVX2 1 "register_operand" "0,x")
11670 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")
11671 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")]
11675 pblendvb\t{%3, %2, %0|%0, %2, %3}
11676 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11677 [(set_attr "isa" "noavx,avx")
11678 (set_attr "type" "ssemov")
11679 (set_attr "prefix_extra" "1")
11680 (set_attr "length_immediate" "*,1")
11681 (set_attr "prefix" "orig,vex")
11682 (set_attr "btver2_decode" "vector,vector")
11683 (set_attr "mode" "<sseinsnmode>")])
11685 (define_insn "sse4_1_pblendw"
11686 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
11688 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
11689 (match_operand:V8HI 1 "register_operand" "0,x")
11690 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))]
11693 pblendw\t{%3, %2, %0|%0, %2, %3}
11694 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11695 [(set_attr "isa" "noavx,avx")
11696 (set_attr "type" "ssemov")
11697 (set_attr "prefix_extra" "1")
11698 (set_attr "length_immediate" "1")
11699 (set_attr "prefix" "orig,vex")
11700 (set_attr "mode" "TI")])
11702 ;; The builtin uses an 8-bit immediate. Expand that.
11703 (define_expand "avx2_pblendw"
11704 [(set (match_operand:V16HI 0 "register_operand")
11706 (match_operand:V16HI 2 "nonimmediate_operand")
11707 (match_operand:V16HI 1 "register_operand")
11708 (match_operand:SI 3 "const_0_to_255_operand")))]
11711 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
11712 operands[3] = GEN_INT (val << 8 | val);
11715 (define_insn "*avx2_pblendw"
11716 [(set (match_operand:V16HI 0 "register_operand" "=x")
11718 (match_operand:V16HI 2 "nonimmediate_operand" "xm")
11719 (match_operand:V16HI 1 "register_operand" "x")
11720 (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
11723 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
11724 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11726 [(set_attr "type" "ssemov")
11727 (set_attr "prefix_extra" "1")
11728 (set_attr "length_immediate" "1")
11729 (set_attr "prefix" "vex")
11730 (set_attr "mode" "OI")])
11732 (define_insn "avx2_pblendd<mode>"
11733 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
11734 (vec_merge:VI4_AVX2
11735 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
11736 (match_operand:VI4_AVX2 1 "register_operand" "x")
11737 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
11739 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11740 [(set_attr "type" "ssemov")
11741 (set_attr "prefix_extra" "1")
11742 (set_attr "length_immediate" "1")
11743 (set_attr "prefix" "vex")
11744 (set_attr "mode" "<sseinsnmode>")])
11746 (define_insn "sse4_1_phminposuw"
11747 [(set (match_operand:V8HI 0 "register_operand" "=x")
11748 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
11749 UNSPEC_PHMINPOSUW))]
11751 "%vphminposuw\t{%1, %0|%0, %1}"
11752 [(set_attr "type" "sselog1")
11753 (set_attr "prefix_extra" "1")
11754 (set_attr "prefix" "maybe_vex")
11755 (set_attr "mode" "TI")])
11757 (define_insn "avx2_<code>v16qiv16hi2"
11758 [(set (match_operand:V16HI 0 "register_operand" "=x")
11760 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))]
11762 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}"
11763 [(set_attr "type" "ssemov")
11764 (set_attr "prefix_extra" "1")
11765 (set_attr "prefix" "vex")
11766 (set_attr "mode" "OI")])
11768 (define_insn "sse4_1_<code>v8qiv8hi2"
11769 [(set (match_operand:V8HI 0 "register_operand" "=x")
11772 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11773 (parallel [(const_int 0) (const_int 1)
11774 (const_int 2) (const_int 3)
11775 (const_int 4) (const_int 5)
11776 (const_int 6) (const_int 7)]))))]
11778 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}"
11779 [(set_attr "type" "ssemov")
11780 (set_attr "ssememalign" "64")
11781 (set_attr "prefix_extra" "1")
11782 (set_attr "prefix" "maybe_vex")
11783 (set_attr "mode" "TI")])
11785 (define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
11786 [(set (match_operand:V16SI 0 "register_operand" "=v")
11788 (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
11790 "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11791 [(set_attr "type" "ssemov")
11792 (set_attr "prefix" "evex")
11793 (set_attr "mode" "XI")])
11795 (define_insn "avx2_<code>v8qiv8si2"
11796 [(set (match_operand:V8SI 0 "register_operand" "=x")
11799 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11800 (parallel [(const_int 0) (const_int 1)
11801 (const_int 2) (const_int 3)
11802 (const_int 4) (const_int 5)
11803 (const_int 6) (const_int 7)]))))]
11805 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
11806 [(set_attr "type" "ssemov")
11807 (set_attr "prefix_extra" "1")
11808 (set_attr "prefix" "vex")
11809 (set_attr "mode" "OI")])
11811 (define_insn "sse4_1_<code>v4qiv4si2"
11812 [(set (match_operand:V4SI 0 "register_operand" "=x")
11815 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11816 (parallel [(const_int 0) (const_int 1)
11817 (const_int 2) (const_int 3)]))))]
11819 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}"
11820 [(set_attr "type" "ssemov")
11821 (set_attr "ssememalign" "32")
11822 (set_attr "prefix_extra" "1")
11823 (set_attr "prefix" "maybe_vex")
11824 (set_attr "mode" "TI")])
11826 (define_insn "avx512f_<code>v16hiv16si2<mask_name>"
11827 [(set (match_operand:V16SI 0 "register_operand" "=v")
11829 (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
11831 "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11832 [(set_attr "type" "ssemov")
11833 (set_attr "prefix" "evex")
11834 (set_attr "mode" "XI")])
11836 (define_insn "avx2_<code>v8hiv8si2"
11837 [(set (match_operand:V8SI 0 "register_operand" "=x")
11839 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))]
11841 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
11842 [(set_attr "type" "ssemov")
11843 (set_attr "prefix_extra" "1")
11844 (set_attr "prefix" "vex")
11845 (set_attr "mode" "OI")])
11847 (define_insn "sse4_1_<code>v4hiv4si2"
11848 [(set (match_operand:V4SI 0 "register_operand" "=x")
11851 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11852 (parallel [(const_int 0) (const_int 1)
11853 (const_int 2) (const_int 3)]))))]
11855 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}"
11856 [(set_attr "type" "ssemov")
11857 (set_attr "ssememalign" "64")
11858 (set_attr "prefix_extra" "1")
11859 (set_attr "prefix" "maybe_vex")
11860 (set_attr "mode" "TI")])
11862 (define_insn "avx512f_<code>v8qiv8di2<mask_name>"
11863 [(set (match_operand:V8DI 0 "register_operand" "=v")
11866 (match_operand:V16QI 1 "nonimmediate_operand" "vm")
11867 (parallel [(const_int 0) (const_int 1)
11868 (const_int 2) (const_int 3)
11869 (const_int 4) (const_int 5)
11870 (const_int 6) (const_int 7)]))))]
11872 "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
11873 [(set_attr "type" "ssemov")
11874 (set_attr "prefix" "evex")
11875 (set_attr "mode" "XI")])
11877 (define_insn "avx2_<code>v4qiv4di2"
11878 [(set (match_operand:V4DI 0 "register_operand" "=x")
11881 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11882 (parallel [(const_int 0) (const_int 1)
11883 (const_int 2) (const_int 3)]))))]
11885 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
11886 [(set_attr "type" "ssemov")
11887 (set_attr "prefix_extra" "1")
11888 (set_attr "prefix" "vex")
11889 (set_attr "mode" "OI")])
11891 (define_insn "sse4_1_<code>v2qiv2di2"
11892 [(set (match_operand:V2DI 0 "register_operand" "=x")
11895 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
11896 (parallel [(const_int 0) (const_int 1)]))))]
11898 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}"
11899 [(set_attr "type" "ssemov")
11900 (set_attr "ssememalign" "16")
11901 (set_attr "prefix_extra" "1")
11902 (set_attr "prefix" "maybe_vex")
11903 (set_attr "mode" "TI")])
11905 (define_insn "avx512f_<code>v8hiv8di2<mask_name>"
11906 [(set (match_operand:V8DI 0 "register_operand" "=v")
11908 (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
11910 "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
11911 [(set_attr "type" "ssemov")
11912 (set_attr "prefix" "evex")
11913 (set_attr "mode" "XI")])
11915 (define_insn "avx2_<code>v4hiv4di2"
11916 [(set (match_operand:V4DI 0 "register_operand" "=x")
11919 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11920 (parallel [(const_int 0) (const_int 1)
11921 (const_int 2) (const_int 3)]))))]
11923 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
11924 [(set_attr "type" "ssemov")
11925 (set_attr "prefix_extra" "1")
11926 (set_attr "prefix" "vex")
11927 (set_attr "mode" "OI")])
11929 (define_insn "sse4_1_<code>v2hiv2di2"
11930 [(set (match_operand:V2DI 0 "register_operand" "=x")
11933 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
11934 (parallel [(const_int 0) (const_int 1)]))))]
11936 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}"
11937 [(set_attr "type" "ssemov")
11938 (set_attr "ssememalign" "32")
11939 (set_attr "prefix_extra" "1")
11940 (set_attr "prefix" "maybe_vex")
11941 (set_attr "mode" "TI")])
11943 (define_insn "avx512f_<code>v8siv8di2<mask_name>"
11944 [(set (match_operand:V8DI 0 "register_operand" "=v")
11946 (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
11948 "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
11949 [(set_attr "type" "ssemov")
11950 (set_attr "prefix" "evex")
11951 (set_attr "mode" "XI")])
11953 (define_insn "avx2_<code>v4siv4di2"
11954 [(set (match_operand:V4DI 0 "register_operand" "=x")
11956 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
11958 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
11959 [(set_attr "type" "ssemov")
11960 (set_attr "prefix_extra" "1")
11961 (set_attr "mode" "OI")])
11963 (define_insn "sse4_1_<code>v2siv2di2"
11964 [(set (match_operand:V2DI 0 "register_operand" "=x")
11967 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
11968 (parallel [(const_int 0) (const_int 1)]))))]
11970 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}"
11971 [(set_attr "type" "ssemov")
11972 (set_attr "ssememalign" "64")
11973 (set_attr "prefix_extra" "1")
11974 (set_attr "prefix" "maybe_vex")
11975 (set_attr "mode" "TI")])
11977 ;; ptestps/ptestpd are very similar to comiss and ucomiss when
11978 ;; setting FLAGS_REG. But it is not a really compare instruction.
11979 (define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
11980 [(set (reg:CC FLAGS_REG)
11981 (unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
11982 (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
11985 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
11986 [(set_attr "type" "ssecomi")
11987 (set_attr "prefix_extra" "1")
11988 (set_attr "prefix" "vex")
11989 (set_attr "mode" "<MODE>")])
11991 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
11992 ;; But it is not a really compare instruction.
11993 (define_insn "avx_ptest256"
11994 [(set (reg:CC FLAGS_REG)
11995 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
11996 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
11999 "vptest\t{%1, %0|%0, %1}"
12000 [(set_attr "type" "ssecomi")
12001 (set_attr "prefix_extra" "1")
12002 (set_attr "prefix" "vex")
12003 (set_attr "btver2_decode" "vector")
12004 (set_attr "mode" "OI")])
12006 (define_insn "sse4_1_ptest"
12007 [(set (reg:CC FLAGS_REG)
12008 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
12009 (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
12012 "%vptest\t{%1, %0|%0, %1}"
12013 [(set_attr "type" "ssecomi")
12014 (set_attr "prefix_extra" "1")
12015 (set_attr "prefix" "maybe_vex")
12016 (set_attr "mode" "TI")])
12018 (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
12019 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
12021 [(match_operand:VF_128_256 1 "nonimmediate_operand" "xm")
12022 (match_operand:SI 2 "const_0_to_15_operand" "n")]
12025 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12026 [(set_attr "type" "ssecvt")
12027 (set (attr "prefix_data16")
12029 (match_test "TARGET_AVX")
12031 (const_string "1")))
12032 (set_attr "prefix_extra" "1")
12033 (set_attr "length_immediate" "1")
12034 (set_attr "prefix" "maybe_vex")
12035 (set_attr "mode" "<MODE>")])
12037 (define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
12038 [(match_operand:<sseintvecmode> 0 "register_operand")
12039 (match_operand:VF1_128_256 1 "nonimmediate_operand")
12040 (match_operand:SI 2 "const_0_to_15_operand")]
12043 rtx tmp = gen_reg_rtx (<MODE>mode);
12046 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
12049 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12053 (define_expand "avx512f_roundpd512"
12054 [(match_operand:V8DF 0 "register_operand")
12055 (match_operand:V8DF 1 "nonimmediate_operand")
12056 (match_operand:SI 2 "const_0_to_15_operand")]
12059 emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
12063 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
12064 [(match_operand:<ssepackfltmode> 0 "register_operand")
12065 (match_operand:VF2 1 "nonimmediate_operand")
12066 (match_operand:VF2 2 "nonimmediate_operand")
12067 (match_operand:SI 3 "const_0_to_15_operand")]
12072 if (<MODE>mode == V2DFmode
12073 && TARGET_AVX && !TARGET_PREFER_AVX128)
12075 rtx tmp2 = gen_reg_rtx (V4DFmode);
12077 tmp0 = gen_reg_rtx (V4DFmode);
12078 tmp1 = force_reg (V2DFmode, operands[1]);
12080 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12081 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
12082 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12086 tmp0 = gen_reg_rtx (<MODE>mode);
12087 tmp1 = gen_reg_rtx (<MODE>mode);
12090 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
12093 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
12096 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12101 (define_insn "sse4_1_round<ssescalarmodesuffix>"
12102 [(set (match_operand:VF_128 0 "register_operand" "=x,x")
12105 [(match_operand:VF_128 2 "register_operand" "x,x")
12106 (match_operand:SI 3 "const_0_to_15_operand" "n,n")]
12108 (match_operand:VF_128 1 "register_operand" "0,x")
12112 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
12113 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12114 [(set_attr "isa" "noavx,avx")
12115 (set_attr "type" "ssecvt")
12116 (set_attr "length_immediate" "1")
12117 (set_attr "prefix_data16" "1,*")
12118 (set_attr "prefix_extra" "1")
12119 (set_attr "prefix" "orig,vex")
12120 (set_attr "mode" "<MODE>")])
12122 (define_expand "round<mode>2"
12123 [(set (match_dup 4)
12125 (match_operand:VF 1 "register_operand")
12127 (set (match_operand:VF 0 "register_operand")
12129 [(match_dup 4) (match_dup 5)]
12131 "TARGET_ROUND && !flag_trapping_math"
12133 enum machine_mode scalar_mode;
12134 const struct real_format *fmt;
12135 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
12136 rtx half, vec_half;
12138 scalar_mode = GET_MODE_INNER (<MODE>mode);
12140 /* load nextafter (0.5, 0.0) */
12141 fmt = REAL_MODE_FORMAT (scalar_mode);
12142 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
12143 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
12144 half = const_double_from_real_value (pred_half, scalar_mode);
12146 vec_half = ix86_build_const_vector (<MODE>mode, true, half);
12147 vec_half = force_reg (<MODE>mode, vec_half);
12149 operands[3] = gen_reg_rtx (<MODE>mode);
12150 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
12152 operands[4] = gen_reg_rtx (<MODE>mode);
12153 operands[5] = GEN_INT (ROUND_TRUNC);
12156 (define_expand "round<mode>2_sfix"
12157 [(match_operand:<sseintvecmode> 0 "register_operand")
12158 (match_operand:VF1_128_256 1 "register_operand")]
12159 "TARGET_ROUND && !flag_trapping_math"
12161 rtx tmp = gen_reg_rtx (<MODE>mode);
12163 emit_insn (gen_round<mode>2 (tmp, operands[1]));
12166 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
12170 (define_expand "round<mode>2_vec_pack_sfix"
12171 [(match_operand:<ssepackfltmode> 0 "register_operand")
12172 (match_operand:VF2 1 "register_operand")
12173 (match_operand:VF2 2 "register_operand")]
12174 "TARGET_ROUND && !flag_trapping_math"
12178 if (<MODE>mode == V2DFmode
12179 && TARGET_AVX && !TARGET_PREFER_AVX128)
12181 rtx tmp2 = gen_reg_rtx (V4DFmode);
12183 tmp0 = gen_reg_rtx (V4DFmode);
12184 tmp1 = force_reg (V2DFmode, operands[1]);
12186 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
12187 emit_insn (gen_roundv4df2 (tmp2, tmp0));
12188 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
12192 tmp0 = gen_reg_rtx (<MODE>mode);
12193 tmp1 = gen_reg_rtx (<MODE>mode);
12195 emit_insn (gen_round<mode>2 (tmp0, operands[1]));
12196 emit_insn (gen_round<mode>2 (tmp1, operands[2]));
12199 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
12204 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12206 ;; Intel SSE4.2 string/text processing instructions
12208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12210 (define_insn_and_split "sse4_2_pcmpestr"
12211 [(set (match_operand:SI 0 "register_operand" "=c,c")
12213 [(match_operand:V16QI 2 "register_operand" "x,x")
12214 (match_operand:SI 3 "register_operand" "a,a")
12215 (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
12216 (match_operand:SI 5 "register_operand" "d,d")
12217 (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
12219 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12227 (set (reg:CC FLAGS_REG)
12236 && can_create_pseudo_p ()"
12241 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12242 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12243 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12246 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12247 operands[3], operands[4],
12248 operands[5], operands[6]));
12250 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12251 operands[3], operands[4],
12252 operands[5], operands[6]));
12253 if (flags && !(ecx || xmm0))
12254 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12255 operands[2], operands[3],
12256 operands[4], operands[5],
12258 if (!(flags || ecx || xmm0))
12259 emit_note (NOTE_INSN_DELETED);
12263 [(set_attr "type" "sselog")
12264 (set_attr "prefix_data16" "1")
12265 (set_attr "prefix_extra" "1")
12266 (set_attr "ssememalign" "8")
12267 (set_attr "length_immediate" "1")
12268 (set_attr "memory" "none,load")
12269 (set_attr "mode" "TI")])
12271 (define_insn_and_split "*sse4_2_pcmpestr_unaligned"
12272 [(set (match_operand:SI 0 "register_operand" "=c")
12274 [(match_operand:V16QI 2 "register_operand" "x")
12275 (match_operand:SI 3 "register_operand" "a")
12277 [(match_operand:V16QI 4 "memory_operand" "m")]
12279 (match_operand:SI 5 "register_operand" "d")
12280 (match_operand:SI 6 "const_0_to_255_operand" "n")]
12282 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12286 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12290 (set (reg:CC FLAGS_REG)
12294 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
12299 && can_create_pseudo_p ()"
12304 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12305 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12306 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12309 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
12310 operands[3], operands[4],
12311 operands[5], operands[6]));
12313 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
12314 operands[3], operands[4],
12315 operands[5], operands[6]));
12316 if (flags && !(ecx || xmm0))
12317 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
12318 operands[2], operands[3],
12319 operands[4], operands[5],
12321 if (!(flags || ecx || xmm0))
12322 emit_note (NOTE_INSN_DELETED);
12326 [(set_attr "type" "sselog")
12327 (set_attr "prefix_data16" "1")
12328 (set_attr "prefix_extra" "1")
12329 (set_attr "ssememalign" "8")
12330 (set_attr "length_immediate" "1")
12331 (set_attr "memory" "load")
12332 (set_attr "mode" "TI")])
12334 (define_insn "sse4_2_pcmpestri"
12335 [(set (match_operand:SI 0 "register_operand" "=c,c")
12337 [(match_operand:V16QI 1 "register_operand" "x,x")
12338 (match_operand:SI 2 "register_operand" "a,a")
12339 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12340 (match_operand:SI 4 "register_operand" "d,d")
12341 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12343 (set (reg:CC FLAGS_REG)
12352 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
12353 [(set_attr "type" "sselog")
12354 (set_attr "prefix_data16" "1")
12355 (set_attr "prefix_extra" "1")
12356 (set_attr "prefix" "maybe_vex")
12357 (set_attr "ssememalign" "8")
12358 (set_attr "length_immediate" "1")
12359 (set_attr "btver2_decode" "vector")
12360 (set_attr "memory" "none,load")
12361 (set_attr "mode" "TI")])
12363 (define_insn "sse4_2_pcmpestrm"
12364 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12366 [(match_operand:V16QI 1 "register_operand" "x,x")
12367 (match_operand:SI 2 "register_operand" "a,a")
12368 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12369 (match_operand:SI 4 "register_operand" "d,d")
12370 (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
12372 (set (reg:CC FLAGS_REG)
12381 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
12382 [(set_attr "type" "sselog")
12383 (set_attr "prefix_data16" "1")
12384 (set_attr "prefix_extra" "1")
12385 (set_attr "ssememalign" "8")
12386 (set_attr "length_immediate" "1")
12387 (set_attr "prefix" "maybe_vex")
12388 (set_attr "btver2_decode" "vector")
12389 (set_attr "memory" "none,load")
12390 (set_attr "mode" "TI")])
12392 (define_insn "sse4_2_pcmpestr_cconly"
12393 [(set (reg:CC FLAGS_REG)
12395 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12396 (match_operand:SI 3 "register_operand" "a,a,a,a")
12397 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
12398 (match_operand:SI 5 "register_operand" "d,d,d,d")
12399 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
12401 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12402 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12405 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12406 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
12407 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
12408 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
12409 [(set_attr "type" "sselog")
12410 (set_attr "prefix_data16" "1")
12411 (set_attr "prefix_extra" "1")
12412 (set_attr "ssememalign" "8")
12413 (set_attr "length_immediate" "1")
12414 (set_attr "memory" "none,load,none,load")
12415 (set_attr "btver2_decode" "vector,vector,vector,vector")
12416 (set_attr "prefix" "maybe_vex")
12417 (set_attr "mode" "TI")])
12419 (define_insn_and_split "sse4_2_pcmpistr"
12420 [(set (match_operand:SI 0 "register_operand" "=c,c")
12422 [(match_operand:V16QI 2 "register_operand" "x,x")
12423 (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
12424 (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
12426 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
12432 (set (reg:CC FLAGS_REG)
12439 && can_create_pseudo_p ()"
12444 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12445 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12446 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12449 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12450 operands[3], operands[4]));
12452 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12453 operands[3], operands[4]));
12454 if (flags && !(ecx || xmm0))
12455 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12456 operands[2], operands[3],
12458 if (!(flags || ecx || xmm0))
12459 emit_note (NOTE_INSN_DELETED);
12463 [(set_attr "type" "sselog")
12464 (set_attr "prefix_data16" "1")
12465 (set_attr "prefix_extra" "1")
12466 (set_attr "ssememalign" "8")
12467 (set_attr "length_immediate" "1")
12468 (set_attr "memory" "none,load")
12469 (set_attr "mode" "TI")])
12471 (define_insn_and_split "*sse4_2_pcmpistr_unaligned"
12472 [(set (match_operand:SI 0 "register_operand" "=c")
12474 [(match_operand:V16QI 2 "register_operand" "x")
12476 [(match_operand:V16QI 3 "memory_operand" "m")]
12478 (match_operand:SI 4 "const_0_to_255_operand" "n")]
12480 (set (match_operand:V16QI 1 "register_operand" "=Yz")
12483 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12486 (set (reg:CC FLAGS_REG)
12489 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
12493 && can_create_pseudo_p ()"
12498 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
12499 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
12500 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
12503 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
12504 operands[3], operands[4]));
12506 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
12507 operands[3], operands[4]));
12508 if (flags && !(ecx || xmm0))
12509 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
12510 operands[2], operands[3],
12512 if (!(flags || ecx || xmm0))
12513 emit_note (NOTE_INSN_DELETED);
12517 [(set_attr "type" "sselog")
12518 (set_attr "prefix_data16" "1")
12519 (set_attr "prefix_extra" "1")
12520 (set_attr "ssememalign" "8")
12521 (set_attr "length_immediate" "1")
12522 (set_attr "memory" "load")
12523 (set_attr "mode" "TI")])
12525 (define_insn "sse4_2_pcmpistri"
12526 [(set (match_operand:SI 0 "register_operand" "=c,c")
12528 [(match_operand:V16QI 1 "register_operand" "x,x")
12529 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12530 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12532 (set (reg:CC FLAGS_REG)
12539 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
12540 [(set_attr "type" "sselog")
12541 (set_attr "prefix_data16" "1")
12542 (set_attr "prefix_extra" "1")
12543 (set_attr "ssememalign" "8")
12544 (set_attr "length_immediate" "1")
12545 (set_attr "prefix" "maybe_vex")
12546 (set_attr "memory" "none,load")
12547 (set_attr "btver2_decode" "vector")
12548 (set_attr "mode" "TI")])
12550 (define_insn "sse4_2_pcmpistrm"
12551 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
12553 [(match_operand:V16QI 1 "register_operand" "x,x")
12554 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
12555 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
12557 (set (reg:CC FLAGS_REG)
12564 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
12565 [(set_attr "type" "sselog")
12566 (set_attr "prefix_data16" "1")
12567 (set_attr "prefix_extra" "1")
12568 (set_attr "ssememalign" "8")
12569 (set_attr "length_immediate" "1")
12570 (set_attr "prefix" "maybe_vex")
12571 (set_attr "memory" "none,load")
12572 (set_attr "btver2_decode" "vector")
12573 (set_attr "mode" "TI")])
12575 (define_insn "sse4_2_pcmpistr_cconly"
12576 [(set (reg:CC FLAGS_REG)
12578 [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
12579 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
12580 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
12582 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
12583 (clobber (match_scratch:SI 1 "= X, X,c,c"))]
12586 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12587 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
12588 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
12589 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
12590 [(set_attr "type" "sselog")
12591 (set_attr "prefix_data16" "1")
12592 (set_attr "prefix_extra" "1")
12593 (set_attr "ssememalign" "8")
12594 (set_attr "length_immediate" "1")
12595 (set_attr "memory" "none,load,none,load")
12596 (set_attr "prefix" "maybe_vex")
12597 (set_attr "btver2_decode" "vector,vector,vector,vector")
12598 (set_attr "mode" "TI")])
12600 ;; Packed float variants
12601 (define_mode_attr GATHER_SCATTER_SF_MEM_MODE
12602 [(V8DI "V8SF") (V16SI "V16SF")])
12604 (define_expand "avx512pf_gatherpf<mode>sf"
12606 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12607 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12609 [(match_operand 2 "vsib_address_operand")
12610 (match_operand:VI48_512 1 "register_operand")
12611 (match_operand:SI 3 "const1248_operand")]))
12612 (match_operand:SI 4 "const_2_to_3_operand")]
12613 UNSPEC_GATHER_PREFETCH)]
12617 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12618 operands[3]), UNSPEC_VSIBADDR);
12621 (define_insn "*avx512pf_gatherpf<mode>sf_mask"
12623 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12624 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12626 [(match_operand:P 2 "vsib_address_operand" "Tv")
12627 (match_operand:VI48_512 1 "register_operand" "v")
12628 (match_operand:SI 3 "const1248_operand" "n")]
12630 (match_operand:SI 4 "const_2_to_3_operand" "n")]
12631 UNSPEC_GATHER_PREFETCH)]
12634 switch (INTVAL (operands[4]))
12637 return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12639 return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12641 gcc_unreachable ();
12644 [(set_attr "type" "sse")
12645 (set_attr "prefix" "evex")
12646 (set_attr "mode" "XI")])
12648 (define_insn "*avx512pf_gatherpf<mode>sf"
12651 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12653 [(match_operand:P 1 "vsib_address_operand" "Tv")
12654 (match_operand:VI48_512 0 "register_operand" "v")
12655 (match_operand:SI 2 "const1248_operand" "n")]
12657 (match_operand:SI 3 "const_2_to_3_operand" "n")]
12658 UNSPEC_GATHER_PREFETCH)]
12661 switch (INTVAL (operands[3]))
12664 return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
12666 return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
12668 gcc_unreachable ();
12671 [(set_attr "type" "sse")
12672 (set_attr "prefix" "evex")
12673 (set_attr "mode" "XI")])
12675 ;; Packed double variants
12676 (define_expand "avx512pf_gatherpf<mode>df"
12678 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12681 [(match_operand 2 "vsib_address_operand")
12682 (match_operand:VI4_256_8_512 1 "register_operand")
12683 (match_operand:SI 3 "const1248_operand")]))
12684 (match_operand:SI 4 "const_2_to_3_operand")]
12685 UNSPEC_GATHER_PREFETCH)]
12689 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12690 operands[3]), UNSPEC_VSIBADDR);
12693 (define_insn "*avx512pf_gatherpf<mode>df_mask"
12695 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12696 (match_operator:V8DF 5 "vsib_mem_operator"
12698 [(match_operand:P 2 "vsib_address_operand" "Tv")
12699 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12700 (match_operand:SI 3 "const1248_operand" "n")]
12702 (match_operand:SI 4 "const_2_to_3_operand" "n")]
12703 UNSPEC_GATHER_PREFETCH)]
12706 switch (INTVAL (operands[4]))
12709 return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12711 return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12713 gcc_unreachable ();
12716 [(set_attr "type" "sse")
12717 (set_attr "prefix" "evex")
12718 (set_attr "mode" "XI")])
12720 (define_insn "*avx512pf_gatherpf<mode>df"
12723 (match_operator:V8DF 4 "vsib_mem_operator"
12725 [(match_operand:P 1 "vsib_address_operand" "Tv")
12726 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12727 (match_operand:SI 2 "const1248_operand" "n")]
12729 (match_operand:SI 3 "const_2_to_3_operand" "n")]
12730 UNSPEC_GATHER_PREFETCH)]
12733 switch (INTVAL (operands[3]))
12736 return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
12738 return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
12740 gcc_unreachable ();
12743 [(set_attr "type" "sse")
12744 (set_attr "prefix" "evex")
12745 (set_attr "mode" "XI")])
12747 ;; Packed float variants
12748 (define_expand "avx512pf_scatterpf<mode>sf"
12750 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12751 (mem:<GATHER_SCATTER_SF_MEM_MODE>
12753 [(match_operand 2 "vsib_address_operand")
12754 (match_operand:VI48_512 1 "register_operand")
12755 (match_operand:SI 3 "const1248_operand")]))
12756 (match_operand:SI 4 "const2367_operand")]
12757 UNSPEC_SCATTER_PREFETCH)]
12761 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12762 operands[3]), UNSPEC_VSIBADDR);
12765 (define_insn "*avx512pf_scatterpf<mode>sf_mask"
12767 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12768 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
12770 [(match_operand:P 2 "vsib_address_operand" "Tv")
12771 (match_operand:VI48_512 1 "register_operand" "v")
12772 (match_operand:SI 3 "const1248_operand" "n")]
12774 (match_operand:SI 4 "const2367_operand" "n")]
12775 UNSPEC_SCATTER_PREFETCH)]
12778 switch (INTVAL (operands[4]))
12782 return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12785 return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
12787 gcc_unreachable ();
12790 [(set_attr "type" "sse")
12791 (set_attr "prefix" "evex")
12792 (set_attr "mode" "XI")])
12794 (define_insn "*avx512pf_scatterpf<mode>sf"
12797 (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
12799 [(match_operand:P 1 "vsib_address_operand" "Tv")
12800 (match_operand:VI48_512 0 "register_operand" "v")
12801 (match_operand:SI 2 "const1248_operand" "n")]
12803 (match_operand:SI 3 "const2367_operand" "n")]
12804 UNSPEC_SCATTER_PREFETCH)]
12807 switch (INTVAL (operands[3]))
12811 return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
12814 return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
12816 gcc_unreachable ();
12819 [(set_attr "type" "sse")
12820 (set_attr "prefix" "evex")
12821 (set_attr "mode" "XI")])
12823 ;; Packed double variants
12824 (define_expand "avx512pf_scatterpf<mode>df"
12826 [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
12829 [(match_operand 2 "vsib_address_operand")
12830 (match_operand:VI4_256_8_512 1 "register_operand")
12831 (match_operand:SI 3 "const1248_operand")]))
12832 (match_operand:SI 4 "const2367_operand")]
12833 UNSPEC_SCATTER_PREFETCH)]
12837 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
12838 operands[3]), UNSPEC_VSIBADDR);
12841 (define_insn "*avx512pf_scatterpf<mode>df_mask"
12843 [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
12844 (match_operator:V8DF 5 "vsib_mem_operator"
12846 [(match_operand:P 2 "vsib_address_operand" "Tv")
12847 (match_operand:VI4_256_8_512 1 "register_operand" "v")
12848 (match_operand:SI 3 "const1248_operand" "n")]
12850 (match_operand:SI 4 "const2367_operand" "n")]
12851 UNSPEC_SCATTER_PREFETCH)]
12854 switch (INTVAL (operands[4]))
12858 return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12861 return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
12863 gcc_unreachable ();
12866 [(set_attr "type" "sse")
12867 (set_attr "prefix" "evex")
12868 (set_attr "mode" "XI")])
12870 (define_insn "*avx512pf_scatterpf<mode>df"
12873 (match_operator:V8DF 4 "vsib_mem_operator"
12875 [(match_operand:P 1 "vsib_address_operand" "Tv")
12876 (match_operand:VI4_256_8_512 0 "register_operand" "v")
12877 (match_operand:SI 2 "const1248_operand" "n")]
12879 (match_operand:SI 3 "const2367_operand" "n")]
12880 UNSPEC_SCATTER_PREFETCH)]
12883 switch (INTVAL (operands[3]))
12887 return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
12890 return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
12892 gcc_unreachable ();
12895 [(set_attr "type" "sse")
12896 (set_attr "prefix" "evex")
12897 (set_attr "mode" "XI")])
12899 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
12900 [(set (match_operand:VF_512 0 "register_operand" "=v")
12902 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12905 "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12906 [(set_attr "prefix" "evex")
12907 (set_attr "type" "sse")
12908 (set_attr "mode" "<MODE>")])
12910 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
12911 [(set (match_operand:VF_512 0 "register_operand" "=v")
12913 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12916 "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12917 [(set_attr "prefix" "evex")
12918 (set_attr "type" "sse")
12919 (set_attr "mode" "<MODE>")])
12921 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
12922 [(set (match_operand:VF_128 0 "register_operand" "=v")
12925 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12927 (match_operand:VF_128 2 "register_operand" "v")
12930 "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
12931 [(set_attr "length_immediate" "1")
12932 (set_attr "prefix" "evex")
12933 (set_attr "type" "sse")
12934 (set_attr "mode" "<MODE>")])
12936 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
12937 [(set (match_operand:VF_512 0 "register_operand" "=v")
12939 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12942 "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
12943 [(set_attr "prefix" "evex")
12944 (set_attr "type" "sse")
12945 (set_attr "mode" "<MODE>")])
12947 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
12948 [(set (match_operand:VF_128 0 "register_operand" "=v")
12951 [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
12953 (match_operand:VF_128 2 "register_operand" "v")
12956 "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
12957 [(set_attr "length_immediate" "1")
12958 (set_attr "type" "sse")
12959 (set_attr "prefix" "evex")
12960 (set_attr "mode" "<MODE>")])
12962 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12964 ;; XOP instructions
12966 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12968 (define_code_iterator xop_plus [plus ss_plus])
12970 (define_code_attr macs [(plus "macs") (ss_plus "macss")])
12971 (define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
12973 ;; XOP parallel integer multiply/add instructions.
12975 (define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
12976 [(set (match_operand:VI24_128 0 "register_operand" "=x")
12979 (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
12980 (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
12981 (match_operand:VI24_128 3 "register_operand" "x")))]
12983 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
12984 [(set_attr "type" "ssemuladd")
12985 (set_attr "mode" "TI")])
12987 (define_insn "xop_p<macs>dql"
12988 [(set (match_operand:V2DI 0 "register_operand" "=x")
12993 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
12994 (parallel [(const_int 0) (const_int 2)])))
12997 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
12998 (parallel [(const_int 0) (const_int 2)]))))
12999 (match_operand:V2DI 3 "register_operand" "x")))]
13001 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13002 [(set_attr "type" "ssemuladd")
13003 (set_attr "mode" "TI")])
13005 (define_insn "xop_p<macs>dqh"
13006 [(set (match_operand:V2DI 0 "register_operand" "=x")
13011 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
13012 (parallel [(const_int 1) (const_int 3)])))
13015 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
13016 (parallel [(const_int 1) (const_int 3)]))))
13017 (match_operand:V2DI 3 "register_operand" "x")))]
13019 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13020 [(set_attr "type" "ssemuladd")
13021 (set_attr "mode" "TI")])
13023 ;; XOP parallel integer multiply/add instructions for the intrinisics
13024 (define_insn "xop_p<macs>wd"
13025 [(set (match_operand:V4SI 0 "register_operand" "=x")
13030 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13031 (parallel [(const_int 1) (const_int 3)
13032 (const_int 5) (const_int 7)])))
13035 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13036 (parallel [(const_int 1) (const_int 3)
13037 (const_int 5) (const_int 7)]))))
13038 (match_operand:V4SI 3 "register_operand" "x")))]
13040 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13041 [(set_attr "type" "ssemuladd")
13042 (set_attr "mode" "TI")])
13044 (define_insn "xop_p<madcs>wd"
13045 [(set (match_operand:V4SI 0 "register_operand" "=x")
13051 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
13052 (parallel [(const_int 0) (const_int 2)
13053 (const_int 4) (const_int 6)])))
13056 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
13057 (parallel [(const_int 0) (const_int 2)
13058 (const_int 4) (const_int 6)]))))
13063 (parallel [(const_int 1) (const_int 3)
13064 (const_int 5) (const_int 7)])))
13068 (parallel [(const_int 1) (const_int 3)
13069 (const_int 5) (const_int 7)])))))
13070 (match_operand:V4SI 3 "register_operand" "x")))]
13072 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13073 [(set_attr "type" "ssemuladd")
13074 (set_attr "mode" "TI")])
13076 ;; XOP parallel XMM conditional moves
13077 (define_insn "xop_pcmov_<mode><avxsizesuffix>"
13078 [(set (match_operand:V 0 "register_operand" "=x,x")
13080 (match_operand:V 3 "nonimmediate_operand" "x,m")
13081 (match_operand:V 1 "register_operand" "x,x")
13082 (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
13084 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13085 [(set_attr "type" "sse4arg")])
13087 ;; XOP horizontal add/subtract instructions
13088 (define_insn "xop_phadd<u>bw"
13089 [(set (match_operand:V8HI 0 "register_operand" "=x")
13093 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13094 (parallel [(const_int 0) (const_int 2)
13095 (const_int 4) (const_int 6)
13096 (const_int 8) (const_int 10)
13097 (const_int 12) (const_int 14)])))
13101 (parallel [(const_int 1) (const_int 3)
13102 (const_int 5) (const_int 7)
13103 (const_int 9) (const_int 11)
13104 (const_int 13) (const_int 15)])))))]
13106 "vphadd<u>bw\t{%1, %0|%0, %1}"
13107 [(set_attr "type" "sseiadd1")])
13109 (define_insn "xop_phadd<u>bd"
13110 [(set (match_operand:V4SI 0 "register_operand" "=x")
13115 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13116 (parallel [(const_int 0) (const_int 4)
13117 (const_int 8) (const_int 12)])))
13121 (parallel [(const_int 1) (const_int 5)
13122 (const_int 9) (const_int 13)]))))
13127 (parallel [(const_int 2) (const_int 6)
13128 (const_int 10) (const_int 14)])))
13132 (parallel [(const_int 3) (const_int 7)
13133 (const_int 11) (const_int 15)]))))))]
13135 "vphadd<u>bd\t{%1, %0|%0, %1}"
13136 [(set_attr "type" "sseiadd1")])
13138 (define_insn "xop_phadd<u>bq"
13139 [(set (match_operand:V2DI 0 "register_operand" "=x")
13145 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13146 (parallel [(const_int 0) (const_int 8)])))
13150 (parallel [(const_int 1) (const_int 9)]))))
13155 (parallel [(const_int 2) (const_int 10)])))
13159 (parallel [(const_int 3) (const_int 11)])))))
13165 (parallel [(const_int 4) (const_int 12)])))
13169 (parallel [(const_int 5) (const_int 13)]))))
13174 (parallel [(const_int 6) (const_int 14)])))
13178 (parallel [(const_int 7) (const_int 15)])))))))]
13180 "vphadd<u>bq\t{%1, %0|%0, %1}"
13181 [(set_attr "type" "sseiadd1")])
13183 (define_insn "xop_phadd<u>wd"
13184 [(set (match_operand:V4SI 0 "register_operand" "=x")
13188 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13189 (parallel [(const_int 0) (const_int 2)
13190 (const_int 4) (const_int 6)])))
13194 (parallel [(const_int 1) (const_int 3)
13195 (const_int 5) (const_int 7)])))))]
13197 "vphadd<u>wd\t{%1, %0|%0, %1}"
13198 [(set_attr "type" "sseiadd1")])
13200 (define_insn "xop_phadd<u>wq"
13201 [(set (match_operand:V2DI 0 "register_operand" "=x")
13206 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13207 (parallel [(const_int 0) (const_int 4)])))
13211 (parallel [(const_int 1) (const_int 5)]))))
13216 (parallel [(const_int 2) (const_int 6)])))
13220 (parallel [(const_int 3) (const_int 7)]))))))]
13222 "vphadd<u>wq\t{%1, %0|%0, %1}"
13223 [(set_attr "type" "sseiadd1")])
13225 (define_insn "xop_phadd<u>dq"
13226 [(set (match_operand:V2DI 0 "register_operand" "=x")
13230 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13231 (parallel [(const_int 0) (const_int 2)])))
13235 (parallel [(const_int 1) (const_int 3)])))))]
13237 "vphadd<u>dq\t{%1, %0|%0, %1}"
13238 [(set_attr "type" "sseiadd1")])
13240 (define_insn "xop_phsubbw"
13241 [(set (match_operand:V8HI 0 "register_operand" "=x")
13245 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
13246 (parallel [(const_int 0) (const_int 2)
13247 (const_int 4) (const_int 6)
13248 (const_int 8) (const_int 10)
13249 (const_int 12) (const_int 14)])))
13253 (parallel [(const_int 1) (const_int 3)
13254 (const_int 5) (const_int 7)
13255 (const_int 9) (const_int 11)
13256 (const_int 13) (const_int 15)])))))]
13258 "vphsubbw\t{%1, %0|%0, %1}"
13259 [(set_attr "type" "sseiadd1")])
13261 (define_insn "xop_phsubwd"
13262 [(set (match_operand:V4SI 0 "register_operand" "=x")
13266 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
13267 (parallel [(const_int 0) (const_int 2)
13268 (const_int 4) (const_int 6)])))
13272 (parallel [(const_int 1) (const_int 3)
13273 (const_int 5) (const_int 7)])))))]
13275 "vphsubwd\t{%1, %0|%0, %1}"
13276 [(set_attr "type" "sseiadd1")])
13278 (define_insn "xop_phsubdq"
13279 [(set (match_operand:V2DI 0 "register_operand" "=x")
13283 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
13284 (parallel [(const_int 0) (const_int 2)])))
13288 (parallel [(const_int 1) (const_int 3)])))))]
13290 "vphsubdq\t{%1, %0|%0, %1}"
13291 [(set_attr "type" "sseiadd1")])
13293 ;; XOP permute instructions
13294 (define_insn "xop_pperm"
13295 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13297 [(match_operand:V16QI 1 "register_operand" "x,x")
13298 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
13299 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
13300 UNSPEC_XOP_PERMUTE))]
13301 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13302 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13303 [(set_attr "type" "sse4arg")
13304 (set_attr "mode" "TI")])
13306 ;; XOP pack instructions that combine two vectors into a smaller vector
13307 (define_insn "xop_pperm_pack_v2di_v4si"
13308 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13311 (match_operand:V2DI 1 "register_operand" "x,x"))
13313 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
13314 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13315 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13316 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13317 [(set_attr "type" "sse4arg")
13318 (set_attr "mode" "TI")])
13320 (define_insn "xop_pperm_pack_v4si_v8hi"
13321 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13324 (match_operand:V4SI 1 "register_operand" "x,x"))
13326 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
13327 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13328 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13329 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13330 [(set_attr "type" "sse4arg")
13331 (set_attr "mode" "TI")])
13333 (define_insn "xop_pperm_pack_v8hi_v16qi"
13334 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
13337 (match_operand:V8HI 1 "register_operand" "x,x"))
13339 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
13340 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
13341 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
13342 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13343 [(set_attr "type" "sse4arg")
13344 (set_attr "mode" "TI")])
13346 ;; XOP packed rotate instructions
13347 (define_expand "rotl<mode>3"
13348 [(set (match_operand:VI_128 0 "register_operand")
13350 (match_operand:VI_128 1 "nonimmediate_operand")
13351 (match_operand:SI 2 "general_operand")))]
13354 /* If we were given a scalar, convert it to parallel */
13355 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13357 rtvec vs = rtvec_alloc (<ssescalarnum>);
13358 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13359 rtx reg = gen_reg_rtx (<MODE>mode);
13360 rtx op2 = operands[2];
13363 if (GET_MODE (op2) != <ssescalarmode>mode)
13365 op2 = gen_reg_rtx (<ssescalarmode>mode);
13366 convert_move (op2, operands[2], false);
13369 for (i = 0; i < <ssescalarnum>; i++)
13370 RTVEC_ELT (vs, i) = op2;
13372 emit_insn (gen_vec_init<mode> (reg, par));
13373 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13378 (define_expand "rotr<mode>3"
13379 [(set (match_operand:VI_128 0 "register_operand")
13381 (match_operand:VI_128 1 "nonimmediate_operand")
13382 (match_operand:SI 2 "general_operand")))]
13385 /* If we were given a scalar, convert it to parallel */
13386 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
13388 rtvec vs = rtvec_alloc (<ssescalarnum>);
13389 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
13390 rtx neg = gen_reg_rtx (<MODE>mode);
13391 rtx reg = gen_reg_rtx (<MODE>mode);
13392 rtx op2 = operands[2];
13395 if (GET_MODE (op2) != <ssescalarmode>mode)
13397 op2 = gen_reg_rtx (<ssescalarmode>mode);
13398 convert_move (op2, operands[2], false);
13401 for (i = 0; i < <ssescalarnum>; i++)
13402 RTVEC_ELT (vs, i) = op2;
13404 emit_insn (gen_vec_init<mode> (reg, par));
13405 emit_insn (gen_neg<mode>2 (neg, reg));
13406 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
13411 (define_insn "xop_rotl<mode>3"
13412 [(set (match_operand:VI_128 0 "register_operand" "=x")
13414 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13415 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13417 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13418 [(set_attr "type" "sseishft")
13419 (set_attr "length_immediate" "1")
13420 (set_attr "mode" "TI")])
13422 (define_insn "xop_rotr<mode>3"
13423 [(set (match_operand:VI_128 0 "register_operand" "=x")
13425 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
13426 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
13430 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
13431 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
13433 [(set_attr "type" "sseishft")
13434 (set_attr "length_immediate" "1")
13435 (set_attr "mode" "TI")])
13437 (define_expand "vrotr<mode>3"
13438 [(match_operand:VI_128 0 "register_operand")
13439 (match_operand:VI_128 1 "register_operand")
13440 (match_operand:VI_128 2 "register_operand")]
13443 rtx reg = gen_reg_rtx (<MODE>mode);
13444 emit_insn (gen_neg<mode>2 (reg, operands[2]));
13445 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
13449 (define_expand "vrotl<mode>3"
13450 [(match_operand:VI_128 0 "register_operand")
13451 (match_operand:VI_128 1 "register_operand")
13452 (match_operand:VI_128 2 "register_operand")]
13455 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
13459 (define_insn "xop_vrotl<mode>3"
13460 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13461 (if_then_else:VI_128
13463 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13466 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13470 (neg:VI_128 (match_dup 2)))))]
13471 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13472 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13473 [(set_attr "type" "sseishft")
13474 (set_attr "prefix_data16" "0")
13475 (set_attr "prefix_extra" "2")
13476 (set_attr "mode" "TI")])
13478 ;; XOP packed shift instructions.
13479 (define_expand "vlshr<mode>3"
13480 [(set (match_operand:VI12_128 0 "register_operand")
13482 (match_operand:VI12_128 1 "register_operand")
13483 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13486 rtx neg = gen_reg_rtx (<MODE>mode);
13487 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13488 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13492 (define_expand "vlshr<mode>3"
13493 [(set (match_operand:VI48_128 0 "register_operand")
13495 (match_operand:VI48_128 1 "register_operand")
13496 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13497 "TARGET_AVX2 || TARGET_XOP"
13501 rtx neg = gen_reg_rtx (<MODE>mode);
13502 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13503 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
13508 (define_expand "vlshr<mode>3"
13509 [(set (match_operand:VI48_512 0 "register_operand")
13511 (match_operand:VI48_512 1 "register_operand")
13512 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13515 (define_expand "vlshr<mode>3"
13516 [(set (match_operand:VI48_256 0 "register_operand")
13518 (match_operand:VI48_256 1 "register_operand")
13519 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13522 (define_expand "vashr<mode>3"
13523 [(set (match_operand:VI128_128 0 "register_operand")
13524 (ashiftrt:VI128_128
13525 (match_operand:VI128_128 1 "register_operand")
13526 (match_operand:VI128_128 2 "nonimmediate_operand")))]
13529 rtx neg = gen_reg_rtx (<MODE>mode);
13530 emit_insn (gen_neg<mode>2 (neg, operands[2]));
13531 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
13535 (define_expand "vashrv4si3"
13536 [(set (match_operand:V4SI 0 "register_operand")
13537 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
13538 (match_operand:V4SI 2 "nonimmediate_operand")))]
13539 "TARGET_AVX2 || TARGET_XOP"
13543 rtx neg = gen_reg_rtx (V4SImode);
13544 emit_insn (gen_negv4si2 (neg, operands[2]));
13545 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
13550 (define_expand "vashrv16si3"
13551 [(set (match_operand:V16SI 0 "register_operand")
13552 (ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
13553 (match_operand:V16SI 2 "nonimmediate_operand")))]
13556 (define_expand "vashrv8si3"
13557 [(set (match_operand:V8SI 0 "register_operand")
13558 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
13559 (match_operand:V8SI 2 "nonimmediate_operand")))]
13562 (define_expand "vashl<mode>3"
13563 [(set (match_operand:VI12_128 0 "register_operand")
13565 (match_operand:VI12_128 1 "register_operand")
13566 (match_operand:VI12_128 2 "nonimmediate_operand")))]
13569 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13573 (define_expand "vashl<mode>3"
13574 [(set (match_operand:VI48_128 0 "register_operand")
13576 (match_operand:VI48_128 1 "register_operand")
13577 (match_operand:VI48_128 2 "nonimmediate_operand")))]
13578 "TARGET_AVX2 || TARGET_XOP"
13582 operands[2] = force_reg (<MODE>mode, operands[2]);
13583 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
13588 (define_expand "vashl<mode>3"
13589 [(set (match_operand:VI48_512 0 "register_operand")
13591 (match_operand:VI48_512 1 "register_operand")
13592 (match_operand:VI48_512 2 "nonimmediate_operand")))]
13595 (define_expand "vashl<mode>3"
13596 [(set (match_operand:VI48_256 0 "register_operand")
13598 (match_operand:VI48_256 1 "register_operand")
13599 (match_operand:VI48_256 2 "nonimmediate_operand")))]
13602 (define_insn "xop_sha<mode>3"
13603 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13604 (if_then_else:VI_128
13606 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13609 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13613 (neg:VI_128 (match_dup 2)))))]
13614 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13615 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13616 [(set_attr "type" "sseishft")
13617 (set_attr "prefix_data16" "0")
13618 (set_attr "prefix_extra" "2")
13619 (set_attr "mode" "TI")])
13621 (define_insn "xop_shl<mode>3"
13622 [(set (match_operand:VI_128 0 "register_operand" "=x,x")
13623 (if_then_else:VI_128
13625 (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
13628 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
13632 (neg:VI_128 (match_dup 2)))))]
13633 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13634 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13635 [(set_attr "type" "sseishft")
13636 (set_attr "prefix_data16" "0")
13637 (set_attr "prefix_extra" "2")
13638 (set_attr "mode" "TI")])
13640 (define_expand "<shift_insn><mode>3"
13641 [(set (match_operand:VI1_AVX2 0 "register_operand")
13642 (any_shift:VI1_AVX2
13643 (match_operand:VI1_AVX2 1 "register_operand")
13644 (match_operand:SI 2 "nonmemory_operand")))]
13647 if (TARGET_XOP && <MODE>mode == V16QImode)
13649 bool negate = false;
13650 rtx (*gen) (rtx, rtx, rtx);
13654 if (<CODE> != ASHIFT)
13656 if (CONST_INT_P (operands[2]))
13657 operands[2] = GEN_INT (-INTVAL (operands[2]));
13661 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
13662 for (i = 0; i < 16; i++)
13663 XVECEXP (par, 0, i) = operands[2];
13665 tmp = gen_reg_rtx (V16QImode);
13666 emit_insn (gen_vec_initv16qi (tmp, par));
13669 emit_insn (gen_negv16qi2 (tmp, tmp));
13671 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
13672 emit_insn (gen (operands[0], operands[1], tmp));
13675 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
13679 (define_expand "ashrv2di3"
13680 [(set (match_operand:V2DI 0 "register_operand")
13682 (match_operand:V2DI 1 "register_operand")
13683 (match_operand:DI 2 "nonmemory_operand")))]
13686 rtx reg = gen_reg_rtx (V2DImode);
13688 bool negate = false;
13691 if (CONST_INT_P (operands[2]))
13692 operands[2] = GEN_INT (-INTVAL (operands[2]));
13696 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
13697 for (i = 0; i < 2; i++)
13698 XVECEXP (par, 0, i) = operands[2];
13700 emit_insn (gen_vec_initv2di (reg, par));
13703 emit_insn (gen_negv2di2 (reg, reg));
13705 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
13709 ;; XOP FRCZ support
13710 (define_insn "xop_frcz<mode>2"
13711 [(set (match_operand:FMAMODE 0 "register_operand" "=x")
13713 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
13716 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
13717 [(set_attr "type" "ssecvt1")
13718 (set_attr "mode" "<MODE>")])
13720 (define_expand "xop_vmfrcz<mode>2"
13721 [(set (match_operand:VF_128 0 "register_operand")
13724 [(match_operand:VF_128 1 "nonimmediate_operand")]
13729 "operands[2] = CONST0_RTX (<MODE>mode);")
13731 (define_insn "*xop_vmfrcz<mode>2"
13732 [(set (match_operand:VF_128 0 "register_operand" "=x")
13735 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
13737 (match_operand:VF_128 2 "const0_operand")
13740 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
13741 [(set_attr "type" "ssecvt1")
13742 (set_attr "mode" "<MODE>")])
13744 (define_insn "xop_maskcmp<mode>3"
13745 [(set (match_operand:VI_128 0 "register_operand" "=x")
13746 (match_operator:VI_128 1 "ix86_comparison_int_operator"
13747 [(match_operand:VI_128 2 "register_operand" "x")
13748 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13750 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13751 [(set_attr "type" "sse4arg")
13752 (set_attr "prefix_data16" "0")
13753 (set_attr "prefix_rep" "0")
13754 (set_attr "prefix_extra" "2")
13755 (set_attr "length_immediate" "1")
13756 (set_attr "mode" "TI")])
13758 (define_insn "xop_maskcmp_uns<mode>3"
13759 [(set (match_operand:VI_128 0 "register_operand" "=x")
13760 (match_operator:VI_128 1 "ix86_comparison_uns_operator"
13761 [(match_operand:VI_128 2 "register_operand" "x")
13762 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
13764 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13765 [(set_attr "type" "ssecmp")
13766 (set_attr "prefix_data16" "0")
13767 (set_attr "prefix_rep" "0")
13768 (set_attr "prefix_extra" "2")
13769 (set_attr "length_immediate" "1")
13770 (set_attr "mode" "TI")])
13772 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
13773 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
13774 ;; the exact instruction generated for the intrinsic.
13775 (define_insn "xop_maskcmp_uns2<mode>3"
13776 [(set (match_operand:VI_128 0 "register_operand" "=x")
13778 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
13779 [(match_operand:VI_128 2 "register_operand" "x")
13780 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
13781 UNSPEC_XOP_UNSIGNED_CMP))]
13783 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
13784 [(set_attr "type" "ssecmp")
13785 (set_attr "prefix_data16" "0")
13786 (set_attr "prefix_extra" "2")
13787 (set_attr "length_immediate" "1")
13788 (set_attr "mode" "TI")])
13790 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
13791 ;; being added here to be complete.
13792 (define_insn "xop_pcom_tf<mode>3"
13793 [(set (match_operand:VI_128 0 "register_operand" "=x")
13795 [(match_operand:VI_128 1 "register_operand" "x")
13796 (match_operand:VI_128 2 "nonimmediate_operand" "xm")
13797 (match_operand:SI 3 "const_int_operand" "n")]
13798 UNSPEC_XOP_TRUEFALSE))]
13801 return ((INTVAL (operands[3]) != 0)
13802 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13803 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
13805 [(set_attr "type" "ssecmp")
13806 (set_attr "prefix_data16" "0")
13807 (set_attr "prefix_extra" "2")
13808 (set_attr "length_immediate" "1")
13809 (set_attr "mode" "TI")])
13811 (define_insn "xop_vpermil2<mode>3"
13812 [(set (match_operand:VF_128_256 0 "register_operand" "=x")
13814 [(match_operand:VF_128_256 1 "register_operand" "x")
13815 (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
13816 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
13817 (match_operand:SI 4 "const_0_to_3_operand" "n")]
13820 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
13821 [(set_attr "type" "sse4arg")
13822 (set_attr "length_immediate" "1")
13823 (set_attr "mode" "<MODE>")])
13825 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13827 (define_insn "aesenc"
13828 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13829 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13830 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13834 aesenc\t{%2, %0|%0, %2}
13835 vaesenc\t{%2, %1, %0|%0, %1, %2}"
13836 [(set_attr "isa" "noavx,avx")
13837 (set_attr "type" "sselog1")
13838 (set_attr "prefix_extra" "1")
13839 (set_attr "prefix" "orig,vex")
13840 (set_attr "btver2_decode" "double,double")
13841 (set_attr "mode" "TI")])
13843 (define_insn "aesenclast"
13844 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13845 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13846 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13847 UNSPEC_AESENCLAST))]
13850 aesenclast\t{%2, %0|%0, %2}
13851 vaesenclast\t{%2, %1, %0|%0, %1, %2}"
13852 [(set_attr "isa" "noavx,avx")
13853 (set_attr "type" "sselog1")
13854 (set_attr "prefix_extra" "1")
13855 (set_attr "prefix" "orig,vex")
13856 (set_attr "btver2_decode" "double,double")
13857 (set_attr "mode" "TI")])
13859 (define_insn "aesdec"
13860 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13861 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13862 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13866 aesdec\t{%2, %0|%0, %2}
13867 vaesdec\t{%2, %1, %0|%0, %1, %2}"
13868 [(set_attr "isa" "noavx,avx")
13869 (set_attr "type" "sselog1")
13870 (set_attr "prefix_extra" "1")
13871 (set_attr "prefix" "orig,vex")
13872 (set_attr "btver2_decode" "double,double")
13873 (set_attr "mode" "TI")])
13875 (define_insn "aesdeclast"
13876 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13877 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13878 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
13879 UNSPEC_AESDECLAST))]
13882 aesdeclast\t{%2, %0|%0, %2}
13883 vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
13884 [(set_attr "isa" "noavx,avx")
13885 (set_attr "type" "sselog1")
13886 (set_attr "prefix_extra" "1")
13887 (set_attr "prefix" "orig,vex")
13888 (set_attr "btver2_decode" "double,double")
13889 (set_attr "mode" "TI")])
13891 (define_insn "aesimc"
13892 [(set (match_operand:V2DI 0 "register_operand" "=x")
13893 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
13896 "%vaesimc\t{%1, %0|%0, %1}"
13897 [(set_attr "type" "sselog1")
13898 (set_attr "prefix_extra" "1")
13899 (set_attr "prefix" "maybe_vex")
13900 (set_attr "mode" "TI")])
13902 (define_insn "aeskeygenassist"
13903 [(set (match_operand:V2DI 0 "register_operand" "=x")
13904 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
13905 (match_operand:SI 2 "const_0_to_255_operand" "n")]
13906 UNSPEC_AESKEYGENASSIST))]
13908 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
13909 [(set_attr "type" "sselog1")
13910 (set_attr "prefix_extra" "1")
13911 (set_attr "length_immediate" "1")
13912 (set_attr "prefix" "maybe_vex")
13913 (set_attr "mode" "TI")])
13915 (define_insn "pclmulqdq"
13916 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
13917 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
13918 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
13919 (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
13923 pclmulqdq\t{%3, %2, %0|%0, %2, %3}
13924 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
13925 [(set_attr "isa" "noavx,avx")
13926 (set_attr "type" "sselog1")
13927 (set_attr "prefix_extra" "1")
13928 (set_attr "length_immediate" "1")
13929 (set_attr "prefix" "orig,vex")
13930 (set_attr "mode" "TI")])
13932 (define_expand "avx_vzeroall"
13933 [(match_par_dup 0 [(const_int 0)])]
13936 int nregs = TARGET_64BIT ? 16 : 8;
13939 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
13941 XVECEXP (operands[0], 0, 0)
13942 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
13945 for (regno = 0; regno < nregs; regno++)
13946 XVECEXP (operands[0], 0, regno + 1)
13947 = gen_rtx_SET (VOIDmode,
13948 gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
13949 CONST0_RTX (V8SImode));
13952 (define_insn "*avx_vzeroall"
13953 [(match_parallel 0 "vzeroall_operation"
13954 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
13957 [(set_attr "type" "sse")
13958 (set_attr "modrm" "0")
13959 (set_attr "memory" "none")
13960 (set_attr "prefix" "vex")
13961 (set_attr "btver2_decode" "vector")
13962 (set_attr "mode" "OI")])
13964 ;; Clear the upper 128bits of AVX registers, equivalent to a NOP
13965 ;; if the upper 128bits are unused.
13966 (define_insn "avx_vzeroupper"
13967 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
13970 [(set_attr "type" "sse")
13971 (set_attr "modrm" "0")
13972 (set_attr "memory" "none")
13973 (set_attr "prefix" "vex")
13974 (set_attr "btver2_decode" "vector")
13975 (set_attr "mode" "OI")])
13977 (define_insn "avx2_pbroadcast<mode>"
13978 [(set (match_operand:VI 0 "register_operand" "=x")
13980 (vec_select:<ssescalarmode>
13981 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
13982 (parallel [(const_int 0)]))))]
13984 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
13985 [(set_attr "type" "ssemov")
13986 (set_attr "prefix_extra" "1")
13987 (set_attr "prefix" "vex")
13988 (set_attr "mode" "<sseinsnmode>")])
13990 (define_insn "avx2_pbroadcast<mode>_1"
13991 [(set (match_operand:VI_256 0 "register_operand" "=x,x")
13992 (vec_duplicate:VI_256
13993 (vec_select:<ssescalarmode>
13994 (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
13995 (parallel [(const_int 0)]))))]
13998 vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
13999 vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
14000 [(set_attr "type" "ssemov")
14001 (set_attr "prefix_extra" "1")
14002 (set_attr "prefix" "vex")
14003 (set_attr "mode" "<sseinsnmode>")])
14005 (define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
14006 [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
14007 (unspec:VI48F_256_512
14008 [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
14009 (match_operand:<sseintvecmode> 2 "register_operand" "v")]
14011 "TARGET_AVX2 && <mask_mode512bit_condition>"
14012 "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
14013 [(set_attr "type" "sselog")
14014 (set_attr "prefix" "<mask_prefix2>")
14015 (set_attr "mode" "<sseinsnmode>")])
14017 (define_expand "<avx2_avx512f>_perm<mode>"
14018 [(match_operand:VI8F_256_512 0 "register_operand")
14019 (match_operand:VI8F_256_512 1 "nonimmediate_operand")
14020 (match_operand:SI 2 "const_0_to_255_operand")]
14023 int mask = INTVAL (operands[2]);
14024 emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
14025 GEN_INT ((mask >> 0) & 3),
14026 GEN_INT ((mask >> 2) & 3),
14027 GEN_INT ((mask >> 4) & 3),
14028 GEN_INT ((mask >> 6) & 3)));
14032 (define_expand "avx512f_perm<mode>_mask"
14033 [(match_operand:V8FI 0 "register_operand")
14034 (match_operand:V8FI 1 "nonimmediate_operand")
14035 (match_operand:SI 2 "const_0_to_255_operand")
14036 (match_operand:V8FI 3 "vector_move_operand")
14037 (match_operand:<avx512fmaskmode> 4 "register_operand")]
14040 int mask = INTVAL (operands[2]);
14041 emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
14042 GEN_INT ((mask >> 0) & 3),
14043 GEN_INT ((mask >> 2) & 3),
14044 GEN_INT ((mask >> 4) & 3),
14045 GEN_INT ((mask >> 6) & 3),
14046 operands[3], operands[4]));
14050 (define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
14051 [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
14052 (vec_select:VI8F_256_512
14053 (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
14054 (parallel [(match_operand 2 "const_0_to_3_operand")
14055 (match_operand 3 "const_0_to_3_operand")
14056 (match_operand 4 "const_0_to_3_operand")
14057 (match_operand 5 "const_0_to_3_operand")])))]
14058 "TARGET_AVX2 && <mask_mode512bit_condition>"
14061 mask |= INTVAL (operands[2]) << 0;
14062 mask |= INTVAL (operands[3]) << 2;
14063 mask |= INTVAL (operands[4]) << 4;
14064 mask |= INTVAL (operands[5]) << 6;
14065 operands[2] = GEN_INT (mask);
14066 return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14068 [(set_attr "type" "sselog")
14069 (set_attr "prefix" "<mask_prefix2>")
14070 (set_attr "mode" "<sseinsnmode>")])
14072 (define_insn "avx2_permv2ti"
14073 [(set (match_operand:V4DI 0 "register_operand" "=x")
14075 [(match_operand:V4DI 1 "register_operand" "x")
14076 (match_operand:V4DI 2 "nonimmediate_operand" "xm")
14077 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14080 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14081 [(set_attr "type" "sselog")
14082 (set_attr "prefix" "vex")
14083 (set_attr "mode" "OI")])
14085 (define_insn "avx2_vec_dupv4df"
14086 [(set (match_operand:V4DF 0 "register_operand" "=x")
14087 (vec_duplicate:V4DF
14089 (match_operand:V2DF 1 "register_operand" "x")
14090 (parallel [(const_int 0)]))))]
14092 "vbroadcastsd\t{%1, %0|%0, %1}"
14093 [(set_attr "type" "sselog1")
14094 (set_attr "prefix" "vex")
14095 (set_attr "mode" "V4DF")])
14097 ;; Modes handled by AVX vec_dup patterns.
14098 (define_mode_iterator AVX_VEC_DUP_MODE
14099 [V8SI V8SF V4DI V4DF])
14101 (define_insn "vec_dup<mode>"
14102 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
14103 (vec_duplicate:AVX_VEC_DUP_MODE
14104 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))]
14107 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
14108 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1}
14110 [(set_attr "type" "ssemov")
14111 (set_attr "prefix_extra" "1")
14112 (set_attr "prefix" "vex")
14113 (set_attr "isa" "*,avx2,noavx2")
14114 (set_attr "mode" "V8SF")])
14116 (define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
14117 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14118 (vec_duplicate:VI48F_512
14119 (vec_select:<ssescalarmode>
14120 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
14121 (parallel [(const_int 0)]))))]
14123 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14124 [(set_attr "type" "ssemov")
14125 (set_attr "prefix" "evex")
14126 (set_attr "mode" "<sseinsnmode>")])
14128 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14129 [(set (match_operand:V16FI 0 "register_operand" "=v,v")
14130 (vec_duplicate:V16FI
14131 (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
14134 vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
14135 vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14136 [(set_attr "type" "ssemov")
14137 (set_attr "prefix" "evex")
14138 (set_attr "mode" "<sseinsnmode>")])
14140 (define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
14141 [(set (match_operand:V8FI 0 "register_operand" "=v,v")
14142 (vec_duplicate:V8FI
14143 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
14146 vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
14147 vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14148 [(set_attr "type" "ssemov")
14149 (set_attr "prefix" "evex")
14150 (set_attr "mode" "<sseinsnmode>")])
14152 (define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
14153 [(set (match_operand:VI48_512 0 "register_operand" "=v")
14154 (vec_duplicate:VI48_512
14155 (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
14156 "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
14157 "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14158 [(set_attr "type" "ssemov")
14159 (set_attr "prefix" "evex")
14160 (set_attr "mode" "<sseinsnmode>")])
14162 (define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
14163 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14164 (vec_duplicate:VI48F_512
14165 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm")))]
14167 "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14168 [(set_attr "type" "ssemov")
14169 (set_attr "prefix" "evex")
14170 (set_attr "mode" "<sseinsnmode>")])
14172 (define_insn "avx2_vbroadcasti128_<mode>"
14173 [(set (match_operand:VI_256 0 "register_operand" "=x")
14175 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
14178 "vbroadcasti128\t{%1, %0|%0, %1}"
14179 [(set_attr "type" "ssemov")
14180 (set_attr "prefix_extra" "1")
14181 (set_attr "prefix" "vex")
14182 (set_attr "mode" "OI")])
14185 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
14186 (vec_duplicate:AVX_VEC_DUP_MODE
14187 (match_operand:<ssescalarmode> 1 "register_operand")))]
14188 "TARGET_AVX && !TARGET_AVX2 && reload_completed"
14189 [(set (match_dup 2)
14190 (vec_duplicate:<ssehalfvecmode> (match_dup 1)))
14192 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
14193 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
14195 (define_insn "avx_vbroadcastf128_<mode>"
14196 [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
14198 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
14202 vbroadcast<i128>\t{%1, %0|%0, %1}
14203 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
14204 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
14205 [(set_attr "type" "ssemov,sselog1,sselog1")
14206 (set_attr "prefix_extra" "1")
14207 (set_attr "length_immediate" "0,1,1")
14208 (set_attr "prefix" "vex")
14209 (set_attr "mode" "<sseinsnmode>")])
14211 (define_insn "avx512cd_maskb_vec_dupv8di"
14212 [(set (match_operand:V8DI 0 "register_operand" "=v")
14213 (vec_duplicate:V8DI
14215 (match_operand:QI 1 "register_operand" "Yk"))))]
14217 "vpbroadcastmb2q\t{%1, %0|%0, %1}"
14218 [(set_attr "type" "mskmov")
14219 (set_attr "prefix" "evex")
14220 (set_attr "mode" "XI")])
14222 (define_insn "avx512cd_maskw_vec_dupv16si"
14223 [(set (match_operand:V16SI 0 "register_operand" "=v")
14224 (vec_duplicate:V16SI
14226 (match_operand:HI 1 "register_operand" "Yk"))))]
14228 "vpbroadcastmw2d\t{%1, %0|%0, %1}"
14229 [(set_attr "type" "mskmov")
14230 (set_attr "prefix" "evex")
14231 (set_attr "mode" "XI")])
14233 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
14234 ;; If it so happens that the input is in memory, use vbroadcast.
14235 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
14236 (define_insn "*avx_vperm_broadcast_v4sf"
14237 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
14239 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
14240 (match_parallel 2 "avx_vbroadcast_operand"
14241 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14244 int elt = INTVAL (operands[3]);
14245 switch (which_alternative)
14249 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
14250 return "vbroadcastss\t{%1, %0|%0, %k1}";
14252 operands[2] = GEN_INT (elt * 0x55);
14253 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
14255 gcc_unreachable ();
14258 [(set_attr "type" "ssemov,ssemov,sselog1")
14259 (set_attr "prefix_extra" "1")
14260 (set_attr "length_immediate" "0,0,1")
14261 (set_attr "prefix" "vex")
14262 (set_attr "mode" "SF,SF,V4SF")])
14264 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
14265 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
14267 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
14268 (match_parallel 2 "avx_vbroadcast_operand"
14269 [(match_operand 3 "const_int_operand" "C,n,n")])))]
14272 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
14273 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
14275 rtx op0 = operands[0], op1 = operands[1];
14276 int elt = INTVAL (operands[3]);
14282 if (TARGET_AVX2 && elt == 0)
14284 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
14289 /* Shuffle element we care about into all elements of the 128-bit lane.
14290 The other lane gets shuffled too, but we don't care. */
14291 if (<MODE>mode == V4DFmode)
14292 mask = (elt & 1 ? 15 : 0);
14294 mask = (elt & 3) * 0x55;
14295 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
14297 /* Shuffle the lane we care about into both lanes of the dest. */
14298 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
14299 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
14303 operands[1] = adjust_address (op1, <ssescalarmode>mode,
14304 elt * GET_MODE_SIZE (<ssescalarmode>mode));
14307 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14308 [(set (match_operand:VF2 0 "register_operand")
14310 (match_operand:VF2 1 "nonimmediate_operand")
14311 (match_operand:SI 2 "const_0_to_255_operand")))]
14312 "TARGET_AVX && <mask_mode512bit_condition>"
14314 int mask = INTVAL (operands[2]);
14315 rtx perm[<ssescalarnum>];
14318 for (i = 0; i < <ssescalarnum>; i = i + 2)
14320 perm[i] = GEN_INT (((mask >> i) & 1) + i);
14321 perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
14325 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14328 (define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
14329 [(set (match_operand:VF1 0 "register_operand")
14331 (match_operand:VF1 1 "nonimmediate_operand")
14332 (match_operand:SI 2 "const_0_to_255_operand")))]
14333 "TARGET_AVX && <mask_mode512bit_condition>"
14335 int mask = INTVAL (operands[2]);
14336 rtx perm[<ssescalarnum>];
14339 for (i = 0; i < <ssescalarnum>; i = i + 4)
14341 perm[i] = GEN_INT (((mask >> 0) & 3) + i);
14342 perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
14343 perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
14344 perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
14348 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
14351 (define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
14352 [(set (match_operand:VF 0 "register_operand" "=v")
14354 (match_operand:VF 1 "nonimmediate_operand" "vm")
14355 (match_parallel 2 ""
14356 [(match_operand 3 "const_int_operand")])))]
14357 "TARGET_AVX && <mask_mode512bit_condition>
14358 && avx_vpermilp_parallel (operands[2], <MODE>mode)"
14360 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
14361 operands[2] = GEN_INT (mask);
14362 return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
14364 [(set_attr "type" "sselog")
14365 (set_attr "prefix_extra" "1")
14366 (set_attr "length_immediate" "1")
14367 (set_attr "prefix" "<mask_prefix>")
14368 (set_attr "mode" "<sseinsnmode>")])
14370 (define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
14371 [(set (match_operand:VF 0 "register_operand" "=v")
14373 [(match_operand:VF 1 "register_operand" "v")
14374 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
14376 "TARGET_AVX && <mask_mode512bit_condition>"
14377 "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14378 [(set_attr "type" "sselog")
14379 (set_attr "prefix_extra" "1")
14380 (set_attr "btver2_decode" "vector")
14381 (set_attr "prefix" "<mask_prefix>")
14382 (set_attr "mode" "<sseinsnmode>")])
14384 (define_expand "avx512f_vpermi2var<mode>3_maskz"
14385 [(match_operand:VI48F_512 0 "register_operand" "=v")
14386 (match_operand:VI48F_512 1 "register_operand" "v")
14387 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14388 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14389 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14392 emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
14393 operands[0], operands[1], operands[2], operands[3],
14394 CONST0_RTX (<MODE>mode), operands[4]));
14398 (define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
14399 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14401 [(match_operand:VI48F_512 1 "register_operand" "v")
14402 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14403 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14406 "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14407 [(set_attr "type" "sselog")
14408 (set_attr "prefix" "evex")
14409 (set_attr "mode" "<sseinsnmode>")])
14411 (define_insn "avx512f_vpermi2var<mode>3_mask"
14412 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14413 (vec_merge:VI48F_512
14415 [(match_operand:VI48F_512 1 "register_operand" "v")
14416 (match_operand:<sseintvecmode> 2 "register_operand" "0")
14417 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14418 UNSPEC_VPERMI2_MASK)
14420 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14422 "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14423 [(set_attr "type" "sselog")
14424 (set_attr "prefix" "evex")
14425 (set_attr "mode" "<sseinsnmode>")])
14427 (define_expand "avx512f_vpermt2var<mode>3_maskz"
14428 [(match_operand:VI48F_512 0 "register_operand" "=v")
14429 (match_operand:<sseintvecmode> 1 "register_operand" "v")
14430 (match_operand:VI48F_512 2 "register_operand" "0")
14431 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
14432 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
14435 emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
14436 operands[0], operands[1], operands[2], operands[3],
14437 CONST0_RTX (<MODE>mode), operands[4]));
14441 (define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
14442 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14444 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14445 (match_operand:VI48F_512 2 "register_operand" "0")
14446 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14449 "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
14450 [(set_attr "type" "sselog")
14451 (set_attr "prefix" "evex")
14452 (set_attr "mode" "<sseinsnmode>")])
14454 (define_insn "avx512f_vpermt2var<mode>3_mask"
14455 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
14456 (vec_merge:VI48F_512
14458 [(match_operand:<sseintvecmode> 1 "register_operand" "v")
14459 (match_operand:VI48F_512 2 "register_operand" "0")
14460 (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
14463 (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
14465 "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
14466 [(set_attr "type" "sselog")
14467 (set_attr "prefix" "evex")
14468 (set_attr "mode" "<sseinsnmode>")])
14470 (define_expand "avx_vperm2f128<mode>3"
14471 [(set (match_operand:AVX256MODE2P 0 "register_operand")
14472 (unspec:AVX256MODE2P
14473 [(match_operand:AVX256MODE2P 1 "register_operand")
14474 (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
14475 (match_operand:SI 3 "const_0_to_255_operand")]
14476 UNSPEC_VPERMIL2F128))]
14479 int mask = INTVAL (operands[3]);
14480 if ((mask & 0x88) == 0)
14482 rtx perm[<ssescalarnum>], t1, t2;
14483 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
14485 base = (mask & 3) * nelt2;
14486 for (i = 0; i < nelt2; ++i)
14487 perm[i] = GEN_INT (base + i);
14489 base = ((mask >> 4) & 3) * nelt2;
14490 for (i = 0; i < nelt2; ++i)
14491 perm[i + nelt2] = GEN_INT (base + i);
14493 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
14494 operands[1], operands[2]);
14495 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
14496 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
14497 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
14503 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
14504 ;; means that in order to represent this properly in rtl we'd have to
14505 ;; nest *another* vec_concat with a zero operand and do the select from
14506 ;; a 4x wide vector. That doesn't seem very nice.
14507 (define_insn "*avx_vperm2f128<mode>_full"
14508 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14509 (unspec:AVX256MODE2P
14510 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
14511 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
14512 (match_operand:SI 3 "const_0_to_255_operand" "n")]
14513 UNSPEC_VPERMIL2F128))]
14515 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14516 [(set_attr "type" "sselog")
14517 (set_attr "prefix_extra" "1")
14518 (set_attr "length_immediate" "1")
14519 (set_attr "prefix" "vex")
14520 (set_attr "mode" "<sseinsnmode>")])
14522 (define_insn "*avx_vperm2f128<mode>_nozero"
14523 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
14524 (vec_select:AVX256MODE2P
14525 (vec_concat:<ssedoublevecmode>
14526 (match_operand:AVX256MODE2P 1 "register_operand" "x")
14527 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
14528 (match_parallel 3 ""
14529 [(match_operand 4 "const_int_operand")])))]
14531 && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
14533 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
14535 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
14537 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
14538 operands[3] = GEN_INT (mask);
14539 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14541 [(set_attr "type" "sselog")
14542 (set_attr "prefix_extra" "1")
14543 (set_attr "length_immediate" "1")
14544 (set_attr "prefix" "vex")
14545 (set_attr "mode" "<sseinsnmode>")])
14547 (define_expand "avx_vinsertf128<mode>"
14548 [(match_operand:V_256 0 "register_operand")
14549 (match_operand:V_256 1 "register_operand")
14550 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14551 (match_operand:SI 3 "const_0_to_1_operand")]
14554 rtx (*insn)(rtx, rtx, rtx);
14556 switch (INTVAL (operands[3]))
14559 insn = gen_vec_set_lo_<mode>;
14562 insn = gen_vec_set_hi_<mode>;
14565 gcc_unreachable ();
14568 emit_insn (insn (operands[0], operands[1], operands[2]));
14572 (define_insn "avx2_vec_set_lo_v4di"
14573 [(set (match_operand:V4DI 0 "register_operand" "=x")
14575 (match_operand:V2DI 2 "nonimmediate_operand" "xm")
14577 (match_operand:V4DI 1 "register_operand" "x")
14578 (parallel [(const_int 2) (const_int 3)]))))]
14580 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14581 [(set_attr "type" "sselog")
14582 (set_attr "prefix_extra" "1")
14583 (set_attr "length_immediate" "1")
14584 (set_attr "prefix" "vex")
14585 (set_attr "mode" "OI")])
14587 (define_insn "avx2_vec_set_hi_v4di"
14588 [(set (match_operand:V4DI 0 "register_operand" "=x")
14591 (match_operand:V4DI 1 "register_operand" "x")
14592 (parallel [(const_int 0) (const_int 1)]))
14593 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
14595 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14596 [(set_attr "type" "sselog")
14597 (set_attr "prefix_extra" "1")
14598 (set_attr "length_immediate" "1")
14599 (set_attr "prefix" "vex")
14600 (set_attr "mode" "OI")])
14602 (define_insn "vec_set_lo_<mode>"
14603 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14604 (vec_concat:VI8F_256
14605 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14606 (vec_select:<ssehalfvecmode>
14607 (match_operand:VI8F_256 1 "register_operand" "x")
14608 (parallel [(const_int 2) (const_int 3)]))))]
14610 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14611 [(set_attr "type" "sselog")
14612 (set_attr "prefix_extra" "1")
14613 (set_attr "length_immediate" "1")
14614 (set_attr "prefix" "vex")
14615 (set_attr "mode" "<sseinsnmode>")])
14617 (define_insn "vec_set_hi_<mode>"
14618 [(set (match_operand:VI8F_256 0 "register_operand" "=x")
14619 (vec_concat:VI8F_256
14620 (vec_select:<ssehalfvecmode>
14621 (match_operand:VI8F_256 1 "register_operand" "x")
14622 (parallel [(const_int 0) (const_int 1)]))
14623 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14625 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14626 [(set_attr "type" "sselog")
14627 (set_attr "prefix_extra" "1")
14628 (set_attr "length_immediate" "1")
14629 (set_attr "prefix" "vex")
14630 (set_attr "mode" "<sseinsnmode>")])
14632 (define_insn "vec_set_lo_<mode>"
14633 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14634 (vec_concat:VI4F_256
14635 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")
14636 (vec_select:<ssehalfvecmode>
14637 (match_operand:VI4F_256 1 "register_operand" "x")
14638 (parallel [(const_int 4) (const_int 5)
14639 (const_int 6) (const_int 7)]))))]
14641 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14642 [(set_attr "type" "sselog")
14643 (set_attr "prefix_extra" "1")
14644 (set_attr "length_immediate" "1")
14645 (set_attr "prefix" "vex")
14646 (set_attr "mode" "<sseinsnmode>")])
14648 (define_insn "vec_set_hi_<mode>"
14649 [(set (match_operand:VI4F_256 0 "register_operand" "=x")
14650 (vec_concat:VI4F_256
14651 (vec_select:<ssehalfvecmode>
14652 (match_operand:VI4F_256 1 "register_operand" "x")
14653 (parallel [(const_int 0) (const_int 1)
14654 (const_int 2) (const_int 3)]))
14655 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))]
14657 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14658 [(set_attr "type" "sselog")
14659 (set_attr "prefix_extra" "1")
14660 (set_attr "length_immediate" "1")
14661 (set_attr "prefix" "vex")
14662 (set_attr "mode" "<sseinsnmode>")])
14664 (define_insn "vec_set_lo_v16hi"
14665 [(set (match_operand:V16HI 0 "register_operand" "=x")
14667 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
14669 (match_operand:V16HI 1 "register_operand" "x")
14670 (parallel [(const_int 8) (const_int 9)
14671 (const_int 10) (const_int 11)
14672 (const_int 12) (const_int 13)
14673 (const_int 14) (const_int 15)]))))]
14675 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14676 [(set_attr "type" "sselog")
14677 (set_attr "prefix_extra" "1")
14678 (set_attr "length_immediate" "1")
14679 (set_attr "prefix" "vex")
14680 (set_attr "mode" "OI")])
14682 (define_insn "vec_set_hi_v16hi"
14683 [(set (match_operand:V16HI 0 "register_operand" "=x")
14686 (match_operand:V16HI 1 "register_operand" "x")
14687 (parallel [(const_int 0) (const_int 1)
14688 (const_int 2) (const_int 3)
14689 (const_int 4) (const_int 5)
14690 (const_int 6) (const_int 7)]))
14691 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
14693 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14694 [(set_attr "type" "sselog")
14695 (set_attr "prefix_extra" "1")
14696 (set_attr "length_immediate" "1")
14697 (set_attr "prefix" "vex")
14698 (set_attr "mode" "OI")])
14700 (define_insn "vec_set_lo_v32qi"
14701 [(set (match_operand:V32QI 0 "register_operand" "=x")
14703 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
14705 (match_operand:V32QI 1 "register_operand" "x")
14706 (parallel [(const_int 16) (const_int 17)
14707 (const_int 18) (const_int 19)
14708 (const_int 20) (const_int 21)
14709 (const_int 22) (const_int 23)
14710 (const_int 24) (const_int 25)
14711 (const_int 26) (const_int 27)
14712 (const_int 28) (const_int 29)
14713 (const_int 30) (const_int 31)]))))]
14715 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
14716 [(set_attr "type" "sselog")
14717 (set_attr "prefix_extra" "1")
14718 (set_attr "length_immediate" "1")
14719 (set_attr "prefix" "vex")
14720 (set_attr "mode" "OI")])
14722 (define_insn "vec_set_hi_v32qi"
14723 [(set (match_operand:V32QI 0 "register_operand" "=x")
14726 (match_operand:V32QI 1 "register_operand" "x")
14727 (parallel [(const_int 0) (const_int 1)
14728 (const_int 2) (const_int 3)
14729 (const_int 4) (const_int 5)
14730 (const_int 6) (const_int 7)
14731 (const_int 8) (const_int 9)
14732 (const_int 10) (const_int 11)
14733 (const_int 12) (const_int 13)
14734 (const_int 14) (const_int 15)]))
14735 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
14737 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
14738 [(set_attr "type" "sselog")
14739 (set_attr "prefix_extra" "1")
14740 (set_attr "length_immediate" "1")
14741 (set_attr "prefix" "vex")
14742 (set_attr "mode" "OI")])
14744 (define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
14745 [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
14747 [(match_operand:<sseintvecmode> 2 "register_operand" "x")
14748 (match_operand:V48_AVX2 1 "memory_operand" "m")]
14751 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
14752 [(set_attr "type" "sselog1")
14753 (set_attr "prefix_extra" "1")
14754 (set_attr "prefix" "vex")
14755 (set_attr "btver2_decode" "vector")
14756 (set_attr "mode" "<sseinsnmode>")])
14758 (define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
14759 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
14761 [(match_operand:<sseintvecmode> 1 "register_operand" "x")
14762 (match_operand:V48_AVX2 2 "register_operand" "x")
14766 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14767 [(set_attr "type" "sselog1")
14768 (set_attr "prefix_extra" "1")
14769 (set_attr "prefix" "vex")
14770 (set_attr "btver2_decode" "vector")
14771 (set_attr "mode" "<sseinsnmode>")])
14773 (define_expand "maskload<mode>"
14774 [(set (match_operand:V48_AVX2 0 "register_operand")
14776 [(match_operand:<sseintvecmode> 2 "register_operand")
14777 (match_operand:V48_AVX2 1 "memory_operand")]
14781 (define_expand "maskstore<mode>"
14782 [(set (match_operand:V48_AVX2 0 "memory_operand")
14784 [(match_operand:<sseintvecmode> 2 "register_operand")
14785 (match_operand:V48_AVX2 1 "register_operand")
14790 (define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
14791 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
14792 (unspec:AVX256MODE2P
14793 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
14797 "&& reload_completed"
14800 rtx op0 = operands[0];
14801 rtx op1 = operands[1];
14803 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
14805 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
14806 emit_move_insn (op0, op1);
14810 (define_expand "vec_init<mode>"
14811 [(match_operand:V_256 0 "register_operand")
14815 ix86_expand_vector_init (false, operands[0], operands[1]);
14819 (define_expand "vec_init<mode>"
14820 [(match_operand:VI48F_512 0 "register_operand")
14824 ix86_expand_vector_init (false, operands[0], operands[1]);
14828 (define_expand "avx2_extracti128"
14829 [(match_operand:V2DI 0 "nonimmediate_operand")
14830 (match_operand:V4DI 1 "register_operand")
14831 (match_operand:SI 2 "const_0_to_1_operand")]
14834 rtx (*insn)(rtx, rtx);
14836 switch (INTVAL (operands[2]))
14839 insn = gen_vec_extract_lo_v4di;
14842 insn = gen_vec_extract_hi_v4di;
14845 gcc_unreachable ();
14848 emit_insn (insn (operands[0], operands[1]));
14852 (define_expand "avx2_inserti128"
14853 [(match_operand:V4DI 0 "register_operand")
14854 (match_operand:V4DI 1 "register_operand")
14855 (match_operand:V2DI 2 "nonimmediate_operand")
14856 (match_operand:SI 3 "const_0_to_1_operand")]
14859 rtx (*insn)(rtx, rtx, rtx);
14861 switch (INTVAL (operands[3]))
14864 insn = gen_avx2_vec_set_lo_v4di;
14867 insn = gen_avx2_vec_set_hi_v4di;
14870 gcc_unreachable ();
14873 emit_insn (insn (operands[0], operands[1], operands[2]));
14877 (define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
14878 [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
14879 (ashiftrt:VI48_AVX512F
14880 (match_operand:VI48_AVX512F 1 "register_operand" "v")
14881 (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
14882 "TARGET_AVX2 && <mask_mode512bit_condition>"
14883 "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14884 [(set_attr "type" "sseishft")
14885 (set_attr "prefix" "maybe_evex")
14886 (set_attr "mode" "<sseinsnmode>")])
14888 (define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
14889 [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
14890 (any_lshift:VI48_AVX2_48_AVX512F
14891 (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
14892 (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
14893 "TARGET_AVX2 && <mask_mode512bit_condition>"
14894 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14895 [(set_attr "type" "sseishft")
14896 (set_attr "prefix" "maybe_evex")
14897 (set_attr "mode" "<sseinsnmode>")])
14899 ;; For avx_vec_concat<mode> insn pattern
14900 (define_mode_attr concat_tg_mode
14901 [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
14902 (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
14904 (define_insn "avx_vec_concat<mode>"
14905 [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
14906 (vec_concat:V_256_512
14907 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
14908 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
14911 switch (which_alternative)
14914 return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
14916 switch (get_attr_mode (insn))
14919 return "vmovaps\t{%1, %t0|%t0, %1}";
14921 return "vmovapd\t{%1, %t0|%t0, %1}";
14923 return "vmovaps\t{%1, %x0|%x0, %1}";
14925 return "vmovapd\t{%1, %x0|%x0, %1}";
14927 return "vmovdqa\t{%1, %t0|%t0, %1}";
14929 return "vmovdqa\t{%1, %x0|%x0, %1}";
14931 gcc_unreachable ();
14934 gcc_unreachable ();
14937 [(set_attr "type" "sselog,ssemov")
14938 (set_attr "prefix_extra" "1,*")
14939 (set_attr "length_immediate" "1,*")
14940 (set_attr "prefix" "maybe_evex")
14941 (set_attr "mode" "<sseinsnmode>")])
14943 (define_insn "vcvtph2ps"
14944 [(set (match_operand:V4SF 0 "register_operand" "=x")
14946 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")]
14948 (parallel [(const_int 0) (const_int 1)
14949 (const_int 2) (const_int 3)])))]
14951 "vcvtph2ps\t{%1, %0|%0, %1}"
14952 [(set_attr "type" "ssecvt")
14953 (set_attr "prefix" "vex")
14954 (set_attr "mode" "V4SF")])
14956 (define_insn "*vcvtph2ps_load"
14957 [(set (match_operand:V4SF 0 "register_operand" "=x")
14958 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
14959 UNSPEC_VCVTPH2PS))]
14961 "vcvtph2ps\t{%1, %0|%0, %1}"
14962 [(set_attr "type" "ssecvt")
14963 (set_attr "prefix" "vex")
14964 (set_attr "mode" "V8SF")])
14966 (define_insn "vcvtph2ps256"
14967 [(set (match_operand:V8SF 0 "register_operand" "=x")
14968 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
14969 UNSPEC_VCVTPH2PS))]
14971 "vcvtph2ps\t{%1, %0|%0, %1}"
14972 [(set_attr "type" "ssecvt")
14973 (set_attr "prefix" "vex")
14974 (set_attr "btver2_decode" "double")
14975 (set_attr "mode" "V8SF")])
14977 (define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
14978 [(set (match_operand:V16SF 0 "register_operand" "=v")
14980 [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
14981 UNSPEC_VCVTPH2PS))]
14983 "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
14984 [(set_attr "type" "ssecvt")
14985 (set_attr "prefix" "evex")
14986 (set_attr "mode" "V16SF")])
14988 (define_expand "vcvtps2ph"
14989 [(set (match_operand:V8HI 0 "register_operand")
14991 (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
14992 (match_operand:SI 2 "const_0_to_255_operand")]
14996 "operands[3] = CONST0_RTX (V4HImode);")
14998 (define_insn "*vcvtps2ph"
14999 [(set (match_operand:V8HI 0 "register_operand" "=x")
15001 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15002 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15004 (match_operand:V4HI 3 "const0_operand")))]
15006 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15007 [(set_attr "type" "ssecvt")
15008 (set_attr "prefix" "vex")
15009 (set_attr "mode" "V4SF")])
15011 (define_insn "*vcvtps2ph_store"
15012 [(set (match_operand:V4HI 0 "memory_operand" "=m")
15013 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
15014 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15015 UNSPEC_VCVTPS2PH))]
15017 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15018 [(set_attr "type" "ssecvt")
15019 (set_attr "prefix" "vex")
15020 (set_attr "mode" "V4SF")])
15022 (define_insn "vcvtps2ph256"
15023 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
15024 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
15025 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15026 UNSPEC_VCVTPS2PH))]
15028 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
15029 [(set_attr "type" "ssecvt")
15030 (set_attr "prefix" "vex")
15031 (set_attr "btver2_decode" "vector")
15032 (set_attr "mode" "V8SF")])
15034 (define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
15035 [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
15037 [(match_operand:V16SF 1 "register_operand" "v")
15038 (match_operand:SI 2 "const_0_to_255_operand" "N")]
15039 UNSPEC_VCVTPS2PH))]
15041 "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15042 [(set_attr "type" "ssecvt")
15043 (set_attr "prefix" "evex")
15044 (set_attr "mode" "V16SF")])
15046 ;; For gather* insn patterns
15047 (define_mode_iterator VEC_GATHER_MODE
15048 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
15049 (define_mode_attr VEC_GATHER_IDXSI
15050 [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
15051 (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
15052 (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
15053 (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
15055 (define_mode_attr VEC_GATHER_IDXDI
15056 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15057 (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
15058 (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
15059 (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
15061 (define_mode_attr VEC_GATHER_SRCDI
15062 [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
15063 (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
15064 (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
15065 (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
15067 (define_expand "avx2_gathersi<mode>"
15068 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15069 (unspec:VEC_GATHER_MODE
15070 [(match_operand:VEC_GATHER_MODE 1 "register_operand")
15071 (mem:<ssescalarmode>
15073 [(match_operand 2 "vsib_address_operand")
15074 (match_operand:<VEC_GATHER_IDXSI>
15075 3 "register_operand")
15076 (match_operand:SI 5 "const1248_operand ")]))
15077 (mem:BLK (scratch))
15078 (match_operand:VEC_GATHER_MODE 4 "register_operand")]
15080 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15084 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15085 operands[5]), UNSPEC_VSIBADDR);
15088 (define_insn "*avx2_gathersi<mode>"
15089 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15090 (unspec:VEC_GATHER_MODE
15091 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
15092 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15094 [(match_operand:P 3 "vsib_address_operand" "Tv")
15095 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
15096 (match_operand:SI 6 "const1248_operand" "n")]
15098 (mem:BLK (scratch))
15099 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
15101 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15103 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
15104 [(set_attr "type" "ssemov")
15105 (set_attr "prefix" "vex")
15106 (set_attr "mode" "<sseinsnmode>")])
15108 (define_insn "*avx2_gathersi<mode>_2"
15109 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15110 (unspec:VEC_GATHER_MODE
15112 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15114 [(match_operand:P 2 "vsib_address_operand" "Tv")
15115 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
15116 (match_operand:SI 5 "const1248_operand" "n")]
15118 (mem:BLK (scratch))
15119 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
15121 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15123 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
15124 [(set_attr "type" "ssemov")
15125 (set_attr "prefix" "vex")
15126 (set_attr "mode" "<sseinsnmode>")])
15128 (define_expand "avx2_gatherdi<mode>"
15129 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
15130 (unspec:VEC_GATHER_MODE
15131 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15132 (mem:<ssescalarmode>
15134 [(match_operand 2 "vsib_address_operand")
15135 (match_operand:<VEC_GATHER_IDXDI>
15136 3 "register_operand")
15137 (match_operand:SI 5 "const1248_operand ")]))
15138 (mem:BLK (scratch))
15139 (match_operand:<VEC_GATHER_SRCDI>
15140 4 "register_operand")]
15142 (clobber (match_scratch:VEC_GATHER_MODE 6))])]
15146 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15147 operands[5]), UNSPEC_VSIBADDR);
15150 (define_insn "*avx2_gatherdi<mode>"
15151 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15152 (unspec:VEC_GATHER_MODE
15153 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15154 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15156 [(match_operand:P 3 "vsib_address_operand" "Tv")
15157 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15158 (match_operand:SI 6 "const1248_operand" "n")]
15160 (mem:BLK (scratch))
15161 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15163 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15165 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
15166 [(set_attr "type" "ssemov")
15167 (set_attr "prefix" "vex")
15168 (set_attr "mode" "<sseinsnmode>")])
15170 (define_insn "*avx2_gatherdi<mode>_2"
15171 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
15172 (unspec:VEC_GATHER_MODE
15174 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15176 [(match_operand:P 2 "vsib_address_operand" "Tv")
15177 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15178 (match_operand:SI 5 "const1248_operand" "n")]
15180 (mem:BLK (scratch))
15181 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15183 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
15186 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15187 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
15188 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
15190 [(set_attr "type" "ssemov")
15191 (set_attr "prefix" "vex")
15192 (set_attr "mode" "<sseinsnmode>")])
15194 (define_insn "*avx2_gatherdi<mode>_3"
15195 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15196 (vec_select:<VEC_GATHER_SRCDI>
15198 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
15199 (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
15201 [(match_operand:P 3 "vsib_address_operand" "Tv")
15202 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
15203 (match_operand:SI 6 "const1248_operand" "n")]
15205 (mem:BLK (scratch))
15206 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
15208 (parallel [(const_int 0) (const_int 1)
15209 (const_int 2) (const_int 3)])))
15210 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15212 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
15213 [(set_attr "type" "ssemov")
15214 (set_attr "prefix" "vex")
15215 (set_attr "mode" "<sseinsnmode>")])
15217 (define_insn "*avx2_gatherdi<mode>_4"
15218 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
15219 (vec_select:<VEC_GATHER_SRCDI>
15222 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15224 [(match_operand:P 2 "vsib_address_operand" "Tv")
15225 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
15226 (match_operand:SI 5 "const1248_operand" "n")]
15228 (mem:BLK (scratch))
15229 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
15231 (parallel [(const_int 0) (const_int 1)
15232 (const_int 2) (const_int 3)])))
15233 (clobber (match_scratch:VI4F_256 1 "=&x"))]
15235 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
15236 [(set_attr "type" "ssemov")
15237 (set_attr "prefix" "vex")
15238 (set_attr "mode" "<sseinsnmode>")])
15240 (define_expand "avx512f_gathersi<mode>"
15241 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15243 [(match_operand:VI48F_512 1 "register_operand")
15244 (match_operand:<avx512fmaskmode> 4 "register_operand")
15245 (mem:<ssescalarmode>
15247 [(match_operand 2 "vsib_address_operand")
15248 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
15249 (match_operand:SI 5 "const1248_operand")]))]
15251 (clobber (match_scratch:<avx512fmaskmode> 7))])]
15255 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15256 operands[5]), UNSPEC_VSIBADDR);
15259 (define_insn "*avx512f_gathersi<mode>"
15260 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15262 [(match_operand:VI48F_512 1 "register_operand" "0")
15263 (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
15264 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15266 [(match_operand:P 4 "vsib_address_operand" "Tv")
15267 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
15268 (match_operand:SI 5 "const1248_operand" "n")]
15269 UNSPEC_VSIBADDR)])]
15271 (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
15273 "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
15274 [(set_attr "type" "ssemov")
15275 (set_attr "prefix" "evex")
15276 (set_attr "mode" "<sseinsnmode>")])
15278 (define_insn "*avx512f_gathersi<mode>_2"
15279 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15282 (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15283 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15285 [(match_operand:P 3 "vsib_address_operand" "Tv")
15286 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15287 (match_operand:SI 4 "const1248_operand" "n")]
15288 UNSPEC_VSIBADDR)])]
15290 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15292 "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
15293 [(set_attr "type" "ssemov")
15294 (set_attr "prefix" "evex")
15295 (set_attr "mode" "<sseinsnmode>")])
15298 (define_expand "avx512f_gatherdi<mode>"
15299 [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
15301 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
15302 (match_operand:QI 4 "register_operand")
15303 (mem:<ssescalarmode>
15305 [(match_operand 2 "vsib_address_operand")
15306 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
15307 (match_operand:SI 5 "const1248_operand")]))]
15309 (clobber (match_scratch:QI 7))])]
15313 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
15314 operands[5]), UNSPEC_VSIBADDR);
15317 (define_insn "*avx512f_gatherdi<mode>"
15318 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15320 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
15321 (match_operand:QI 7 "register_operand" "2")
15322 (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
15324 [(match_operand:P 4 "vsib_address_operand" "Tv")
15325 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
15326 (match_operand:SI 5 "const1248_operand" "n")]
15327 UNSPEC_VSIBADDR)])]
15329 (clobber (match_scratch:QI 2 "=&Yk"))]
15331 "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
15332 [(set_attr "type" "ssemov")
15333 (set_attr "prefix" "evex")
15334 (set_attr "mode" "<sseinsnmode>")])
15336 (define_insn "*avx512f_gatherdi<mode>_2"
15337 [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
15340 (match_operand:QI 6 "register_operand" "1")
15341 (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
15343 [(match_operand:P 3 "vsib_address_operand" "Tv")
15344 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
15345 (match_operand:SI 4 "const1248_operand" "n")]
15346 UNSPEC_VSIBADDR)])]
15348 (clobber (match_scratch:QI 1 "=&Yk"))]
15351 if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
15352 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
15353 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
15355 [(set_attr "type" "ssemov")
15356 (set_attr "prefix" "evex")
15357 (set_attr "mode" "<sseinsnmode>")])
15359 (define_expand "avx512f_scattersi<mode>"
15360 [(parallel [(set (mem:VI48F_512
15362 [(match_operand 0 "vsib_address_operand")
15363 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
15364 (match_operand:SI 4 "const1248_operand")]))
15366 [(match_operand:<avx512fmaskmode> 1 "register_operand")
15367 (match_operand:VI48F_512 3 "register_operand")]
15369 (clobber (match_scratch:<avx512fmaskmode> 6))])]
15373 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15374 operands[4]), UNSPEC_VSIBADDR);
15377 (define_insn "*avx512f_scattersi<mode>"
15378 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15380 [(match_operand:P 0 "vsib_address_operand" "Tv")
15381 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
15382 (match_operand:SI 4 "const1248_operand" "n")]
15385 [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
15386 (match_operand:VI48F_512 3 "register_operand" "v")]
15388 (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
15390 "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15391 [(set_attr "type" "ssemov")
15392 (set_attr "prefix" "evex")
15393 (set_attr "mode" "<sseinsnmode>")])
15395 (define_expand "avx512f_scatterdi<mode>"
15396 [(parallel [(set (mem:VI48F_512
15398 [(match_operand 0 "vsib_address_operand")
15399 (match_operand:V8DI 2 "register_operand")
15400 (match_operand:SI 4 "const1248_operand")]))
15402 [(match_operand:QI 1 "register_operand")
15403 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
15405 (clobber (match_scratch:QI 6))])]
15409 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
15410 operands[4]), UNSPEC_VSIBADDR);
15413 (define_insn "*avx512f_scatterdi<mode>"
15414 [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
15416 [(match_operand:P 0 "vsib_address_operand" "Tv")
15417 (match_operand:V8DI 2 "register_operand" "v")
15418 (match_operand:SI 4 "const1248_operand" "n")]
15421 [(match_operand:QI 6 "register_operand" "1")
15422 (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
15424 (clobber (match_scratch:QI 1 "=&Yk"))]
15426 "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
15427 [(set_attr "type" "ssemov")
15428 (set_attr "prefix" "evex")
15429 (set_attr "mode" "<sseinsnmode>")])
15431 (define_insn "avx512f_compress<mode>_mask"
15432 [(set (match_operand:VI48F_512 0 "register_operand" "=v")
15434 [(match_operand:VI48F_512 1 "register_operand" "v")
15435 (match_operand:VI48F_512 2 "vector_move_operand" "0C")
15436 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
15439 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15440 [(set_attr "type" "ssemov")
15441 (set_attr "prefix" "evex")
15442 (set_attr "mode" "<sseinsnmode>")])
15444 (define_insn "avx512f_compressstore<mode>_mask"
15445 [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
15447 [(match_operand:VI48F_512 1 "register_operand" "x")
15449 (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
15450 UNSPEC_COMPRESS_STORE))]
15452 "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
15453 [(set_attr "type" "ssemov")
15454 (set_attr "prefix" "evex")
15455 (set_attr "memory" "store")
15456 (set_attr "mode" "<sseinsnmode>")])
15458 (define_expand "avx512f_expand<mode>_maskz"
15459 [(set (match_operand:VI48F_512 0 "register_operand")
15461 [(match_operand:VI48F_512 1 "nonimmediate_operand")
15462 (match_operand:VI48F_512 2 "vector_move_operand")
15463 (match_operand:<avx512fmaskmode> 3 "register_operand")]
15466 "operands[2] = CONST0_RTX (<MODE>mode);")
15468 (define_insn "avx512f_expand<mode>_mask"
15469 [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
15471 [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
15472 (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
15473 (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
15476 "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
15477 [(set_attr "type" "ssemov")
15478 (set_attr "prefix" "evex")
15479 (set_attr "memory" "none,load")
15480 (set_attr "mode" "<sseinsnmode>")])
15482 (define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
15483 [(set (match_operand:VF_512 0 "register_operand" "=v")
15485 [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15486 (match_operand:SI 2 "const_0_to_15_operand")]
15489 "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
15490 [(set_attr "prefix" "evex")
15491 (set_attr "mode" "<MODE>")])
15493 (define_insn "avx512f_getmant<mode><round_saeonly_name>"
15494 [(set (match_operand:VF_128 0 "register_operand" "=v")
15497 [(match_operand:VF_128 1 "register_operand" "v")
15498 (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
15499 (match_operand:SI 3 "const_0_to_15_operand")]
15504 "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
15505 [(set_attr "prefix" "evex")
15506 (set_attr "mode" "<ssescalarmode>")])
15508 (define_insn "clz<mode>2<mask_name>"
15509 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15511 (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
15513 "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15514 [(set_attr "type" "sse")
15515 (set_attr "prefix" "evex")
15516 (set_attr "mode" "<sseinsnmode>")])
15518 (define_insn "<mask_codefor>conflict<mode><mask_name>"
15519 [(set (match_operand:VI48_512 0 "register_operand" "=v")
15521 [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
15524 "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
15525 [(set_attr "type" "sse")
15526 (set_attr "prefix" "evex")
15527 (set_attr "mode" "<sseinsnmode>")])
15529 (define_insn "sha1msg1"
15530 [(set (match_operand:V4SI 0 "register_operand" "=x")
15532 [(match_operand:V4SI 1 "register_operand" "0")
15533 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15536 "sha1msg1\t{%2, %0|%0, %2}"
15537 [(set_attr "type" "sselog1")
15538 (set_attr "mode" "TI")])
15540 (define_insn "sha1msg2"
15541 [(set (match_operand:V4SI 0 "register_operand" "=x")
15543 [(match_operand:V4SI 1 "register_operand" "0")
15544 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15547 "sha1msg2\t{%2, %0|%0, %2}"
15548 [(set_attr "type" "sselog1")
15549 (set_attr "mode" "TI")])
15551 (define_insn "sha1nexte"
15552 [(set (match_operand:V4SI 0 "register_operand" "=x")
15554 [(match_operand:V4SI 1 "register_operand" "0")
15555 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15556 UNSPEC_SHA1NEXTE))]
15558 "sha1nexte\t{%2, %0|%0, %2}"
15559 [(set_attr "type" "sselog1")
15560 (set_attr "mode" "TI")])
15562 (define_insn "sha1rnds4"
15563 [(set (match_operand:V4SI 0 "register_operand" "=x")
15565 [(match_operand:V4SI 1 "register_operand" "0")
15566 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15567 (match_operand:SI 3 "const_0_to_3_operand" "n")]
15568 UNSPEC_SHA1RNDS4))]
15570 "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
15571 [(set_attr "type" "sselog1")
15572 (set_attr "length_immediate" "1")
15573 (set_attr "mode" "TI")])
15575 (define_insn "sha256msg1"
15576 [(set (match_operand:V4SI 0 "register_operand" "=x")
15578 [(match_operand:V4SI 1 "register_operand" "0")
15579 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15580 UNSPEC_SHA256MSG1))]
15582 "sha256msg1\t{%2, %0|%0, %2}"
15583 [(set_attr "type" "sselog1")
15584 (set_attr "mode" "TI")])
15586 (define_insn "sha256msg2"
15587 [(set (match_operand:V4SI 0 "register_operand" "=x")
15589 [(match_operand:V4SI 1 "register_operand" "0")
15590 (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
15591 UNSPEC_SHA256MSG2))]
15593 "sha256msg2\t{%2, %0|%0, %2}"
15594 [(set_attr "type" "sselog1")
15595 (set_attr "mode" "TI")])
15597 (define_insn "sha256rnds2"
15598 [(set (match_operand:V4SI 0 "register_operand" "=x")
15600 [(match_operand:V4SI 1 "register_operand" "0")
15601 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15602 (match_operand:V4SI 3 "register_operand" "Yz")]
15603 UNSPEC_SHA256RNDS2))]
15605 "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
15606 [(set_attr "type" "sselog1")
15607 (set_attr "length_immediate" "1")
15608 (set_attr "mode" "TI")])